VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 103404

Last change on this file since 103404 was 103404, checked in by vboxsync, 10 months ago

VMM/IEM: Threaded function statistics. bugref:10376

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 111.3 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 103404 2024-02-17 01:53:09Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 while (pTb)
643 {
644 if (pTb->GCPhysPc == GCPhysPc)
645 {
646 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
647 {
648 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
649 {
650 STAM_COUNTER_INC(&pTbCache->cLookupHits);
651 AssertMsg(cLeft > 0, ("%d\n", cLeft));
652
653 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
654 pTb->cUsed++;
655#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
656 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
657 {
658 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
659 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
660 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
661 return pTb;
662 }
663 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
664 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
665 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
666 return iemNativeRecompile(pVCpu, pTb);
667#else
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
669 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
670 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
671 return pTb;
672#endif
673 }
674 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
675 }
676 else
677 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
678 }
679 else
680 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
681
682 pTb = pTb->pNext;
683#ifdef VBOX_STRICT
684 cLeft--;
685#endif
686 }
687 AssertMsg(cLeft == 0, ("%d\n", cLeft));
688 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
689 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
690 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
691 return pTb;
692}
693
694
695/*********************************************************************************************************************************
696* Translation Block Allocator.
697*********************************************************************************************************************************/
698/*
699 * Translation block allocationmanagement.
700 */
701
702#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
703# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
704 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
705# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
706 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
707# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
708 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
709#else
710# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
711 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
712# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
713 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
714# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
715 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
716#endif
717/** Makes a TB index from a chunk index and TB index within that chunk. */
718#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
719 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
720
721
722/**
723 * Initializes the TB allocator and cache for an EMT.
724 *
725 * @returns VBox status code.
726 * @param pVM The VM handle.
727 * @param cInitialTbs The initial number of translation blocks to
728 * preallocator.
729 * @param cMaxTbs The max number of translation blocks allowed.
730 * @param cbInitialExec The initial size of the executable memory allocator.
731 * @param cbMaxExec The max size of the executable memory allocator.
732 * @param cbChunkExec The chunk size for executable memory allocator. Zero
733 * or UINT32_MAX for automatically determining this.
734 * @thread EMT
735 */
736DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
737 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
738{
739 PVMCPUCC pVCpu = VMMGetCpu(pVM);
740 Assert(!pVCpu->iem.s.pTbCacheR3);
741 Assert(!pVCpu->iem.s.pTbAllocatorR3);
742
743 /*
744 * Calculate the chunk size of the TB allocator.
745 * The minimum chunk size is 2MiB.
746 */
747 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
748 uint32_t cbPerChunk = _2M;
749 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
750#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
751 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
752 uint8_t cChunkShift = 21 - cTbShift;
753 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
754#endif
755 for (;;)
756 {
757 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
758 break;
759 cbPerChunk *= 2;
760 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
761#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
762 cChunkShift += 1;
763#endif
764 }
765
766 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
767 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
768 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
769
770 cMaxTbs = cMaxChunks * cTbsPerChunk;
771
772 /*
773 * Allocate and initalize it.
774 */
775 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
776 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
777 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
778 if (!pTbAllocator)
779 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
780 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
781 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
782 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
783 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
784 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
785 pTbAllocator->cbPerChunk = cbPerChunk;
786 pTbAllocator->cMaxTbs = cMaxTbs;
787#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
788 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
789 pTbAllocator->cChunkShift = cChunkShift;
790 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
791#endif
792
793 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
794 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
795
796 /*
797 * Allocate the initial chunks.
798 */
799 for (uint32_t idxChunk = 0; ; idxChunk++)
800 {
801 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
802 if (!paTbs)
803 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
804 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
805 cbPerChunk, idxChunk, pVCpu->idCpu);
806
807 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
808 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
809 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
810 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
811 pTbAllocator->cTotalTbs += cTbsPerChunk;
812
813 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
814 break;
815 }
816
817 /*
818 * Calculate the size of the hash table. We double the max TB count and
819 * round it up to the nearest power of two.
820 */
821 uint32_t cCacheEntries = cMaxTbs * 2;
822 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
823 {
824 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
825 cCacheEntries = RT_BIT_32(iBitTop);
826 Assert(cCacheEntries >= cMaxTbs * 2);
827 }
828
829 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
830 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
831 if (!pTbCache)
832 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
833 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
834 cbTbCache, cCacheEntries, pVCpu->idCpu);
835
836 /*
837 * Initialize it (assumes zeroed by the allocator).
838 */
839 pTbCache->uMagic = IEMTBCACHE_MAGIC;
840 pTbCache->cHash = cCacheEntries;
841 pTbCache->uHashMask = cCacheEntries - 1;
842 Assert(pTbCache->cHash > pTbCache->uHashMask);
843 pVCpu->iem.s.pTbCacheR3 = pTbCache;
844
845 /*
846 * Initialize the native executable memory allocator.
847 */
848#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
849 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
850 AssertLogRelRCReturn(rc, rc);
851#else
852 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
853#endif
854
855 return VINF_SUCCESS;
856}
857
858
859/**
860 * Inner free worker.
861 */
862static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
863 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
864{
865 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
866 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
867 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
868 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
869
870 /*
871 * Unlink the TB from the hash table.
872 */
873 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
874
875 /*
876 * Free the TB itself.
877 */
878 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
879 {
880 case IEMTB_F_TYPE_THREADED:
881 pTbAllocator->cThreadedTbs -= 1;
882 RTMemFree(pTb->Thrd.paCalls);
883 break;
884#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
885 case IEMTB_F_TYPE_NATIVE:
886 pTbAllocator->cNativeTbs -= 1;
887 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
888 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
889 break;
890#endif
891 default:
892 AssertFailed();
893 }
894 RTMemFree(pTb->pabOpcodes);
895
896 pTb->pNext = NULL;
897 pTb->fFlags = 0;
898 pTb->GCPhysPc = UINT64_MAX;
899 pTb->Gen.uPtr = 0;
900 pTb->Gen.uData = 0;
901 pTb->cbOpcodes = 0;
902 pTb->pabOpcodes = NULL;
903
904 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
905 Assert(pTbAllocator->cInUseTbs > 0);
906
907 pTbAllocator->cInUseTbs -= 1;
908 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
909}
910
911
912/**
913 * Frees the given TB.
914 *
915 * @param pVCpu The cross context virtual CPU structure of the calling
916 * thread.
917 * @param pTb The translation block to free.
918 * @thread EMT(pVCpu)
919 */
920static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
921{
922 /*
923 * Validate state.
924 */
925 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
926 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
927 uint8_t const idxChunk = pTb->idxAllocChunk;
928 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
929 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
930 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
931
932 /*
933 * Call inner worker.
934 */
935 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
936}
937
938
939/**
940 * Schedules a native TB for freeing when it's not longer being executed and
941 * part of the caller's call stack.
942 *
943 * The TB will be removed from the translation block cache, though, so it isn't
944 * possible to executed it again and the IEMTB::pNext member can be used to link
945 * it together with other TBs awaiting freeing.
946 *
947 * @param pVCpu The cross context virtual CPU structure of the calling
948 * thread.
949 * @param pTb The translation block to schedule for freeing.
950 */
951static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
952{
953 /*
954 * Validate state.
955 */
956 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
957 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
958 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
959 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
960 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
961 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
962 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
963 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
964
965 /*
966 * Remove it from the cache and prepend it to the allocator's todo list.
967 */
968 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
969
970 pTb->pNext = pTbAllocator->pDelayedFreeHead;
971 pTbAllocator->pDelayedFreeHead = pTb;
972}
973
974
975/**
976 * Processes the delayed frees.
977 *
978 * This is called by the allocator function as well as the native recompile
979 * function before making any TB or executable memory allocations respectively.
980 */
981void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
982{
983 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
984 pTbAllocator->pDelayedFreeHead = NULL;
985 while (pTb)
986 {
987 PIEMTB const pTbNext = pTb->pNext;
988 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
989 iemTbAllocatorFree(pVCpu, pTb);
990 pTb = pTbNext;
991 }
992}
993
994
995/**
996 * Grow the translation block allocator with another chunk.
997 */
998static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
999{
1000 /*
1001 * Validate state.
1002 */
1003 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1004 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1005 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1006 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1007 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1008
1009 /*
1010 * Allocate a new chunk and add it to the allocator.
1011 */
1012 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1013 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1014 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1015
1016 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1017 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1018 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1019 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1020 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1021 pTbAllocator->cTotalTbs += cTbsPerChunk;
1022 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1023
1024 return VINF_SUCCESS;
1025}
1026
1027
1028/**
1029 * Allocates a TB from allocator with free block.
1030 *
1031 * This is common code to both the fast and slow allocator code paths.
1032 */
1033DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1034{
1035 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1036
1037 int idxTb;
1038 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1039 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1040 pTbAllocator->cTotalTbs,
1041 pTbAllocator->iStartHint & ~(uint32_t)63);
1042 else
1043 idxTb = -1;
1044 if (idxTb < 0)
1045 {
1046 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1047 AssertLogRelReturn(idxTb >= 0, NULL);
1048 }
1049 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1050 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1051
1052 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1053 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1054 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1055 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1056 Assert(pTb->idxAllocChunk == idxChunk);
1057
1058 pTbAllocator->cInUseTbs += 1;
1059 if (fThreaded)
1060 pTbAllocator->cThreadedTbs += 1;
1061 else
1062 pTbAllocator->cNativeTbs += 1;
1063 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1064 return pTb;
1065}
1066
1067
1068/**
1069 * Slow path for iemTbAllocatorAlloc.
1070 */
1071static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1072{
1073 /*
1074 * With some luck we can add another chunk.
1075 */
1076 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1077 {
1078 int rc = iemTbAllocatorGrow(pVCpu);
1079 if (RT_SUCCESS(rc))
1080 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1081 }
1082
1083 /*
1084 * We have to prune stuff. Sigh.
1085 *
1086 * This requires scanning for older TBs and kick them out. Not sure how to
1087 * best do this as we don't want to maintain any list of TBs ordered by last
1088 * usage time. But one reasonably simple approach would be that each time we
1089 * get here we continue a sequential scan of the allocation chunks,
1090 * considering just a smallish number of TBs and freeing a fixed portion of
1091 * them. Say, we consider the next 128 TBs, freeing the least recently used
1092 * in out of groups of 4 TBs, resulting in 32 free TBs.
1093 */
1094 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1095 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1096 uint32_t const cTbsToPrune = 128;
1097 uint32_t const cTbsPerGroup = 4;
1098 uint32_t cFreedTbs = 0;
1099#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1100 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1101#else
1102 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1103#endif
1104 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1105 idxTbPruneFrom = 0;
1106 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1107 {
1108 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1109 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1110 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1111 uint32_t cMsAge = msNow - pTb->msLastUsed;
1112 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1113
1114 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1115 {
1116#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1117 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1118 { /* likely */ }
1119 else
1120 {
1121 idxInChunk2 = 0;
1122 idxChunk2 += 1;
1123 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1124 idxChunk2 = 0;
1125 }
1126#endif
1127 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1128 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1129 if ( cMsAge2 > cMsAge
1130 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1131 {
1132 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1133 pTb = pTb2;
1134 idxChunk = idxChunk2;
1135 idxInChunk = idxInChunk2;
1136 cMsAge = cMsAge2;
1137 }
1138 }
1139
1140 /* Free the TB. */
1141 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1142 cFreedTbs++; /* paranoia */
1143 }
1144 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1145 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1146
1147 /*
1148 * Allocate a TB from the ones we've pruned.
1149 */
1150 if (cFreedTbs)
1151 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1152 return NULL;
1153}
1154
1155
1156/**
1157 * Allocate a translation block.
1158 *
1159 * @returns Pointer to block on success, NULL if we're out and is unable to
1160 * free up an existing one (very unlikely once implemented).
1161 * @param pVCpu The cross context virtual CPU structure of the calling
1162 * thread.
1163 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1164 * For statistics.
1165 */
1166DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1167{
1168 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1169 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1170
1171 /* Free any pending TBs before we proceed. */
1172 if (!pTbAllocator->pDelayedFreeHead)
1173 { /* probably likely */ }
1174 else
1175 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1176
1177 /* If the allocator is full, take slow code path.*/
1178 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1179 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1180 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1181}
1182
1183
1184/**
1185 * This is called when we're out of space for native TBs.
1186 *
1187 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1188 * The difference is that we only prune native TBs and will only free any if
1189 * there are least two in a group. The conditions under which we're called are
1190 * different - there will probably be free TBs in the table when we're called.
1191 * Therefore we increase the group size and max scan length, though we'll stop
1192 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1193 * up at least 8 TBs.
1194 */
1195void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1196{
1197 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1198 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1199
1200 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1201
1202 /*
1203 * Flush the delayed free list before we start freeing TBs indiscriminately.
1204 */
1205 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1206
1207 /*
1208 * Scan and free TBs.
1209 */
1210 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1211 uint32_t const cTbsToPrune = 128 * 8;
1212 uint32_t const cTbsPerGroup = 4 * 4;
1213 uint32_t cFreedTbs = 0;
1214 uint32_t cMaxInstrs = 0;
1215 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1216 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1217 {
1218 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1219 idxTbPruneFrom = 0;
1220 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1221 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1222 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1223 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1224 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1225
1226 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1227 {
1228 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1229 { /* likely */ }
1230 else
1231 {
1232 idxInChunk2 = 0;
1233 idxChunk2 += 1;
1234 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1235 idxChunk2 = 0;
1236 }
1237 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1238 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1239 {
1240 cNativeTbs += 1;
1241 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1242 if ( cMsAge2 > cMsAge
1243 || ( cMsAge2 == cMsAge
1244 && ( pTb2->cUsed < pTb->cUsed
1245 || ( pTb2->cUsed == pTb->cUsed
1246 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1247 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1248 {
1249 pTb = pTb2;
1250 idxChunk = idxChunk2;
1251 idxInChunk = idxInChunk2;
1252 cMsAge = cMsAge2;
1253 }
1254 }
1255 }
1256
1257 /* Free the TB if we found at least two native one in this group. */
1258 if (cNativeTbs >= 2)
1259 {
1260 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1261 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1262 cFreedTbs++;
1263 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1264 break;
1265 }
1266 }
1267 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1268
1269 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1270}
1271
1272
1273/*********************************************************************************************************************************
1274* Threaded Recompiler Core *
1275*********************************************************************************************************************************/
1276
1277/**
1278 * Allocate a translation block for threadeded recompilation.
1279 *
1280 * This is allocated with maxed out call table and storage for opcode bytes,
1281 * because it's only supposed to be called once per EMT to allocate the TB
1282 * pointed to by IEMCPU::pThrdCompileTbR3.
1283 *
1284 * @returns Pointer to the translation block on success, NULL on failure.
1285 * @param pVM The cross context virtual machine structure.
1286 * @param pVCpu The cross context virtual CPU structure of the calling
1287 * thread.
1288 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1289 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1290 */
1291static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1292{
1293 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1294 if (pTb)
1295 {
1296 unsigned const cCalls = 256;
1297 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1298 if (pTb->Thrd.paCalls)
1299 {
1300 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1301 if (pTb->pabOpcodes)
1302 {
1303 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1304 pTb->Thrd.cAllocated = cCalls;
1305 pTb->Thrd.cCalls = 0;
1306 pTb->cbOpcodes = 0;
1307 pTb->pNext = NULL;
1308 pTb->cUsed = 0;
1309 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1310 pTb->idxAllocChunk = UINT8_MAX;
1311 pTb->GCPhysPc = GCPhysPc;
1312 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1313 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1314 pTb->cInstructions = 0;
1315
1316 /* Init the first opcode range. */
1317 pTb->cRanges = 1;
1318 pTb->aRanges[0].cbOpcodes = 0;
1319 pTb->aRanges[0].offOpcodes = 0;
1320 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1321 pTb->aRanges[0].u2Unused = 0;
1322 pTb->aRanges[0].idxPhysPage = 0;
1323 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1324 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1325
1326 return pTb;
1327 }
1328 RTMemFree(pTb->Thrd.paCalls);
1329 }
1330 RTMemFree(pTb);
1331 }
1332 RT_NOREF(pVM);
1333 return NULL;
1334}
1335
1336
1337/**
1338 * Called on the TB that are dedicated for recompilation before it's reused.
1339 *
1340 * @param pVCpu The cross context virtual CPU structure of the calling
1341 * thread.
1342 * @param pTb The translation block to reuse.
1343 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1344 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1345 */
1346static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1347{
1348 pTb->GCPhysPc = GCPhysPc;
1349 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1350 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1351 pTb->Thrd.cCalls = 0;
1352 pTb->cbOpcodes = 0;
1353 pTb->cInstructions = 0;
1354
1355 /* Init the first opcode range. */
1356 pTb->cRanges = 1;
1357 pTb->aRanges[0].cbOpcodes = 0;
1358 pTb->aRanges[0].offOpcodes = 0;
1359 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1360 pTb->aRanges[0].u2Unused = 0;
1361 pTb->aRanges[0].idxPhysPage = 0;
1362 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1363 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1364}
1365
1366
1367/**
1368 * Used to duplicate a threded translation block after recompilation is done.
1369 *
1370 * @returns Pointer to the translation block on success, NULL on failure.
1371 * @param pVM The cross context virtual machine structure.
1372 * @param pVCpu The cross context virtual CPU structure of the calling
1373 * thread.
1374 * @param pTbSrc The TB to duplicate.
1375 */
1376static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1377{
1378 /*
1379 * Just using the heap for now. Will make this more efficient and
1380 * complicated later, don't worry. :-)
1381 */
1382 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1383 if (pTb)
1384 {
1385 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1386 memcpy(pTb, pTbSrc, sizeof(*pTb));
1387 pTb->idxAllocChunk = idxAllocChunk;
1388
1389 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1390 Assert(cCalls > 0);
1391 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1392 if (pTb->Thrd.paCalls)
1393 {
1394 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1395 Assert(cbOpcodes > 0);
1396 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1397 if (pTb->pabOpcodes)
1398 {
1399 pTb->Thrd.cAllocated = cCalls;
1400 pTb->pNext = NULL;
1401 pTb->cUsed = 0;
1402 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1403 pTb->fFlags = pTbSrc->fFlags;
1404
1405 return pTb;
1406 }
1407 RTMemFree(pTb->Thrd.paCalls);
1408 }
1409 iemTbAllocatorFree(pVCpu, pTb);
1410 }
1411 RT_NOREF(pVM);
1412 return NULL;
1413
1414}
1415
1416
1417/**
1418 * Adds the given TB to the hash table.
1419 *
1420 * @param pVCpu The cross context virtual CPU structure of the calling
1421 * thread.
1422 * @param pTbCache The cache to add it to.
1423 * @param pTb The translation block to add.
1424 */
1425static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1426{
1427 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1428
1429 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1430 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1431 if (LogIs12Enabled())
1432 {
1433 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1434 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1435 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1436 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1437 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1438 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1439 pTb->aRanges[idxRange].idxPhysPage == 0
1440 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1441 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1442 }
1443}
1444
1445
1446/**
1447 * Called by opcode verifier functions when they detect a problem.
1448 */
1449void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1450{
1451 /* Unless it's safe, we can only immediately free threaded TB, as we will
1452 have more code left to execute in native TBs when fSafeToFree == false. */
1453 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1454 iemTbAllocatorFree(pVCpu, pTb);
1455 else
1456 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1457}
1458
1459
1460/*
1461 * Real code.
1462 */
1463
1464#ifdef LOG_ENABLED
1465/**
1466 * Logs the current instruction.
1467 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1468 * @param pszFunction The IEM function doing the execution.
1469 * @param idxInstr The instruction number in the block.
1470 */
1471static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1472{
1473# ifdef IN_RING3
1474 if (LogIs2Enabled())
1475 {
1476 char szInstr[256];
1477 uint32_t cbInstr = 0;
1478 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1479 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1480 szInstr, sizeof(szInstr), &cbInstr);
1481
1482 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1483 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1484 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1485 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1486 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1487 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1488 " %s\n"
1489 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1490 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1491 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1492 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1493 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1494 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1495 szInstr));
1496
1497 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1498 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1499 }
1500 else
1501# endif
1502 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1503 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1504}
1505#endif /* LOG_ENABLED */
1506
1507
1508#if 0
1509static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1510{
1511 RT_NOREF(pVM, pVCpu);
1512 return rcStrict;
1513}
1514#endif
1515
1516
1517/**
1518 * Initializes the decoder state when compiling TBs.
1519 *
1520 * This presumes that fExec has already be initialized.
1521 *
1522 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1523 * to apply fixes to them as well.
1524 *
1525 * @param pVCpu The cross context virtual CPU structure of the calling
1526 * thread.
1527 * @param fReInit Clear for the first call for a TB, set for subsequent
1528 * calls from inside the compile loop where we can skip a
1529 * couple of things.
1530 * @param fExtraFlags The extra translation block flags when @a fReInit is
1531 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1532 * checked.
1533 */
1534DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1535{
1536 /* ASSUMES: That iemInitExec was already called and that anyone changing
1537 CPU state affecting the fExec bits since then will have updated fExec! */
1538 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1539 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1540
1541 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1542
1543 /* Decoder state: */
1544 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1545 pVCpu->iem.s.enmEffAddrMode = enmMode;
1546 if (enmMode != IEMMODE_64BIT)
1547 {
1548 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1549 pVCpu->iem.s.enmEffOpSize = enmMode;
1550 }
1551 else
1552 {
1553 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1554 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1555 }
1556 pVCpu->iem.s.fPrefixes = 0;
1557 pVCpu->iem.s.uRexReg = 0;
1558 pVCpu->iem.s.uRexB = 0;
1559 pVCpu->iem.s.uRexIndex = 0;
1560 pVCpu->iem.s.idxPrefix = 0;
1561 pVCpu->iem.s.uVex3rdReg = 0;
1562 pVCpu->iem.s.uVexLength = 0;
1563 pVCpu->iem.s.fEvexStuff = 0;
1564 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1565 pVCpu->iem.s.offModRm = 0;
1566 pVCpu->iem.s.iNextMapping = 0;
1567
1568 if (!fReInit)
1569 {
1570 pVCpu->iem.s.cActiveMappings = 0;
1571 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1572 pVCpu->iem.s.fEndTb = false;
1573 pVCpu->iem.s.fTbCheckOpcodes = false;
1574 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1575 pVCpu->iem.s.fTbCrossedPage = false;
1576 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1577 pVCpu->iem.s.fTbCurInstrIsSti = false;
1578 /* Force RF clearing and TF checking on first instruction in the block
1579 as we don't really know what came before and should assume the worst: */
1580 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1581 }
1582 else
1583 {
1584 Assert(pVCpu->iem.s.cActiveMappings == 0);
1585 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1586 Assert(pVCpu->iem.s.fEndTb == false);
1587 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1588 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1589 }
1590 pVCpu->iem.s.fTbCurInstr = 0;
1591
1592#ifdef DBGFTRACE_ENABLED
1593 switch (IEM_GET_CPU_MODE(pVCpu))
1594 {
1595 case IEMMODE_64BIT:
1596 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1597 break;
1598 case IEMMODE_32BIT:
1599 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1600 break;
1601 case IEMMODE_16BIT:
1602 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1603 break;
1604 }
1605#endif
1606}
1607
1608
1609/**
1610 * Initializes the opcode fetcher when starting the compilation.
1611 *
1612 * @param pVCpu The cross context virtual CPU structure of the calling
1613 * thread.
1614 */
1615DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1616{
1617 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1618#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1619 pVCpu->iem.s.offOpcode = 0;
1620#else
1621 RT_NOREF(pVCpu);
1622#endif
1623}
1624
1625
1626/**
1627 * Re-initializes the opcode fetcher between instructions while compiling.
1628 *
1629 * @param pVCpu The cross context virtual CPU structure of the calling
1630 * thread.
1631 */
1632DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1633{
1634 if (pVCpu->iem.s.pbInstrBuf)
1635 {
1636 uint64_t off = pVCpu->cpum.GstCtx.rip;
1637 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1638 off += pVCpu->cpum.GstCtx.cs.u64Base;
1639 off -= pVCpu->iem.s.uInstrBufPc;
1640 if (off < pVCpu->iem.s.cbInstrBufTotal)
1641 {
1642 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1643 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1644 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1645 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1646 else
1647 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1648 }
1649 else
1650 {
1651 pVCpu->iem.s.pbInstrBuf = NULL;
1652 pVCpu->iem.s.offInstrNextByte = 0;
1653 pVCpu->iem.s.offCurInstrStart = 0;
1654 pVCpu->iem.s.cbInstrBuf = 0;
1655 pVCpu->iem.s.cbInstrBufTotal = 0;
1656 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1657 }
1658 }
1659 else
1660 {
1661 pVCpu->iem.s.offInstrNextByte = 0;
1662 pVCpu->iem.s.offCurInstrStart = 0;
1663 pVCpu->iem.s.cbInstrBuf = 0;
1664 pVCpu->iem.s.cbInstrBufTotal = 0;
1665#ifdef VBOX_STRICT
1666 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1667#endif
1668 }
1669#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1670 pVCpu->iem.s.offOpcode = 0;
1671#endif
1672}
1673
1674#ifdef LOG_ENABLED
1675
1676/**
1677 * Inserts a NOP call.
1678 *
1679 * This is for debugging.
1680 *
1681 * @returns true on success, false if we're out of call entries.
1682 * @param pTb The translation block being compiled.
1683 */
1684bool iemThreadedCompileEmitNop(PIEMTB pTb)
1685{
1686 /* Emit the call. */
1687 uint32_t const idxCall = pTb->Thrd.cCalls;
1688 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1689 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1690 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1691 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
1692 pCall->idxInstr = pTb->cInstructions - 1;
1693 pCall->uUnused0 = 0;
1694 pCall->offOpcode = 0;
1695 pCall->cbOpcode = 0;
1696 pCall->idxRange = 0;
1697 pCall->auParams[0] = 0;
1698 pCall->auParams[1] = 0;
1699 pCall->auParams[2] = 0;
1700 return true;
1701}
1702
1703
1704/**
1705 * Called by iemThreadedCompile if cpu state logging is desired.
1706 *
1707 * @returns true on success, false if we're out of call entries.
1708 * @param pTb The translation block being compiled.
1709 */
1710bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
1711{
1712 /* Emit the call. */
1713 uint32_t const idxCall = pTb->Thrd.cCalls;
1714 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1715 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1716 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1717 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
1718 pCall->idxInstr = pTb->cInstructions - 1;
1719 pCall->uUnused0 = 0;
1720 pCall->offOpcode = 0;
1721 pCall->cbOpcode = 0;
1722 pCall->idxRange = 0;
1723 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
1724 pCall->auParams[1] = 0;
1725 pCall->auParams[2] = 0;
1726 return true;
1727}
1728
1729#endif /* LOG_ENABLED */
1730
1731DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1732{
1733 switch (cbInstr)
1734 {
1735 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1736 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1737 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1738 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1739 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1740 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1741 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1742 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1743 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1744 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1745 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1746 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1747 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1748 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1749 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1750 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1751 }
1752}
1753
1754
1755/**
1756 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1757 *
1758 * - CS LIM check required.
1759 * - Must recheck opcode bytes.
1760 * - Previous instruction branched.
1761 * - TLB load detected, probably due to page crossing.
1762 *
1763 * @returns true if everything went well, false if we're out of space in the TB
1764 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1765 * @param pVCpu The cross context virtual CPU structure of the calling
1766 * thread.
1767 * @param pTb The translation block being compiled.
1768 */
1769bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1770{
1771 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1772 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1773#if 0
1774 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1775 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1776#endif
1777
1778 /*
1779 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1780 * see if it's needed to start checking.
1781 */
1782 bool fConsiderCsLimChecking;
1783 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1784 if ( fMode == IEM_F_MODE_X86_64BIT
1785 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1786 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1787 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1788 fConsiderCsLimChecking = false; /* already enabled or not needed */
1789 else
1790 {
1791 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1792 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1793 fConsiderCsLimChecking = true; /* likely */
1794 else
1795 {
1796 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1797 return false;
1798 }
1799 }
1800
1801 /*
1802 * Prepare call now, even before we know if can accept the instruction in this TB.
1803 * This allows us amending parameters w/o making every case suffer.
1804 */
1805 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1806 uint16_t const offOpcode = pTb->cbOpcodes;
1807 uint8_t idxRange = pTb->cRanges - 1;
1808
1809 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1810 pCall->idxInstr = pTb->cInstructions;
1811 pCall->offOpcode = offOpcode;
1812 pCall->idxRange = idxRange;
1813 pCall->cbOpcode = cbInstr;
1814 pCall->auParams[0] = (uint32_t)cbInstr
1815 | (uint32_t)(pVCpu->iem.s.fExec << 8) /* liveness: Enough of fExec for IEM_F_MODE_X86_IS_FLAT. */
1816 /* The upper dword is sometimes used for cbStartPage. */;
1817 pCall->auParams[1] = idxRange;
1818 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1819
1820/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1821 * gotten onto. If we do, stop */
1822
1823 /*
1824 * Case 1: We've branched (RIP changed).
1825 *
1826 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1827 * Req: 1 extra range, no extra phys.
1828 *
1829 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1830 * necessary (fTbCrossedPage is true).
1831 * Req: 1 extra range, probably 1 extra phys page entry.
1832 *
1833 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1834 * but in addition we cross into the following page and require
1835 * another TLB load.
1836 * Req: 2 extra ranges, probably 2 extra phys page entries.
1837 *
1838 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1839 * the following page (thus fTbCrossedPage is true).
1840 * Req: 2 extra ranges, probably 1 extra phys page entry.
1841 *
1842 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1843 * it may trigger "spuriously" from the CPU point of view because of
1844 * physical page changes that'll invalid the physical TLB and trigger a
1845 * call to the function. In theory this be a big deal, just a bit
1846 * performance loss as we'll pick the LoadingTlb variants.
1847 *
1848 * Note! We do not currently optimize branching to the next instruction (sorry
1849 * 32-bit PIC code). We could maybe do that in the branching code that
1850 * sets (or not) fTbBranched.
1851 */
1852 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1853 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1854 * code. This'll require filtering out far jmps and calls, as they
1855 * load CS which should technically be considered indirect since the
1856 * GDT/LDT entry's base address can be modified independently from
1857 * the code. */
1858 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1859 {
1860 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1861 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1862 {
1863 /* 1a + 1b - instruction fully within the branched to page. */
1864 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1865 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1866
1867 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1868 {
1869 /* Check that we've got a free range. */
1870 idxRange += 1;
1871 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1872 { /* likely */ }
1873 else
1874 {
1875 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1876 return false;
1877 }
1878 pCall->idxRange = idxRange;
1879 pCall->auParams[1] = idxRange;
1880 pCall->auParams[2] = 0;
1881
1882 /* Check that we've got a free page slot. */
1883 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1884 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1885 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1886 pTb->aRanges[idxRange].idxPhysPage = 0;
1887 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1888 || pTb->aGCPhysPages[0] == GCPhysNew)
1889 {
1890 pTb->aGCPhysPages[0] = GCPhysNew;
1891 pTb->aRanges[idxRange].idxPhysPage = 1;
1892 }
1893 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1894 || pTb->aGCPhysPages[1] == GCPhysNew)
1895 {
1896 pTb->aGCPhysPages[1] = GCPhysNew;
1897 pTb->aRanges[idxRange].idxPhysPage = 2;
1898 }
1899 else
1900 {
1901 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1902 return false;
1903 }
1904
1905 /* Finish setting up the new range. */
1906 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1907 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1908 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1909 pTb->aRanges[idxRange].u2Unused = 0;
1910 pTb->cRanges++;
1911 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
1912 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
1913 pTb->aRanges[idxRange].offOpcodes));
1914 }
1915 else
1916 {
1917 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1918 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1919 }
1920
1921 /* Determin which function we need to load & check.
1922 Note! For jumps to a new page, we'll set both fTbBranched and
1923 fTbCrossedPage to avoid unnecessary TLB work for intra
1924 page branching */
1925 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1926 || pVCpu->iem.s.fTbCrossedPage)
1927 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1928 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1929 : !fConsiderCsLimChecking
1930 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1931 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1932 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1933 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1934 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1935 : !fConsiderCsLimChecking
1936 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1937 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1938 else
1939 {
1940 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1941 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1942 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1943 : !fConsiderCsLimChecking
1944 ? kIemThreadedFunc_BltIn_CheckOpcodes
1945 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1946 }
1947 }
1948 else
1949 {
1950 /* 1c + 1d - instruction crosses pages. */
1951 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1952 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1953
1954 /* Lazy bird: Check that this isn't case 1c, since we've already
1955 load the first physical address. End the TB and
1956 make it a case 2b instead.
1957
1958 Hmm. Too much bother to detect, so just do the same
1959 with case 1d as well. */
1960#if 0 /** @todo get back to this later when we've got the actual branch code in
1961 * place. */
1962 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1963
1964 /* Check that we've got two free ranges. */
1965 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1966 { /* likely */ }
1967 else
1968 return false;
1969 idxRange += 1;
1970 pCall->idxRange = idxRange;
1971 pCall->auParams[1] = idxRange;
1972 pCall->auParams[2] = 0;
1973
1974 /* ... */
1975
1976#else
1977 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1978 return false;
1979#endif
1980 }
1981 }
1982
1983 /*
1984 * Case 2: Page crossing.
1985 *
1986 * Sub-case 2a: The instruction starts on the first byte in the next page.
1987 *
1988 * Sub-case 2b: The instruction has opcode bytes in both the current and
1989 * following page.
1990 *
1991 * Both cases requires a new range table entry and probably a new physical
1992 * page entry. The difference is in which functions to emit and whether to
1993 * add bytes to the current range.
1994 */
1995 else if (pVCpu->iem.s.fTbCrossedPage)
1996 {
1997 /* Check that we've got a free range. */
1998 idxRange += 1;
1999 if (idxRange < RT_ELEMENTS(pTb->aRanges))
2000 { /* likely */ }
2001 else
2002 {
2003 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2004 return false;
2005 }
2006
2007 /* Check that we've got a free page slot. */
2008 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2009 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2010 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2011 pTb->aRanges[idxRange].idxPhysPage = 0;
2012 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2013 || pTb->aGCPhysPages[0] == GCPhysNew)
2014 {
2015 pTb->aGCPhysPages[0] = GCPhysNew;
2016 pTb->aRanges[idxRange].idxPhysPage = 1;
2017 }
2018 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2019 || pTb->aGCPhysPages[1] == GCPhysNew)
2020 {
2021 pTb->aGCPhysPages[1] = GCPhysNew;
2022 pTb->aRanges[idxRange].idxPhysPage = 2;
2023 }
2024 else
2025 {
2026 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2027 return false;
2028 }
2029
2030 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2031 {
2032 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2033 pCall->idxRange = idxRange;
2034 pCall->auParams[1] = idxRange;
2035 pCall->auParams[2] = 0;
2036
2037 /* Finish setting up the new range. */
2038 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2039 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2040 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2041 pTb->aRanges[idxRange].u2Unused = 0;
2042 pTb->cRanges++;
2043 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2044 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2045 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2046
2047 /* Determin which function we need to load & check. */
2048 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2049 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2050 : !fConsiderCsLimChecking
2051 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2052 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2053 }
2054 else
2055 {
2056 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2057 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2058 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2059 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2060
2061 /* We've good. Split the instruction over the old and new range table entries. */
2062 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2063
2064 pTb->aRanges[idxRange].offPhysPage = 0;
2065 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2066 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2067 pTb->aRanges[idxRange].u2Unused = 0;
2068 pTb->cRanges++;
2069 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2070 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2071 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2072
2073 /* Determin which function we need to load & check. */
2074 if (pVCpu->iem.s.fTbCheckOpcodes)
2075 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2076 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2077 : !fConsiderCsLimChecking
2078 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2079 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2080 else
2081 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2082 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2083 : !fConsiderCsLimChecking
2084 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2085 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2086 }
2087 }
2088
2089 /*
2090 * Regular case: No new range required.
2091 */
2092 else
2093 {
2094 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2095 if (pVCpu->iem.s.fTbCheckOpcodes)
2096 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2097 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2098 : kIemThreadedFunc_BltIn_CheckOpcodes;
2099 else
2100 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2101
2102 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2103 pTb->cbOpcodes = offOpcode + cbInstr;
2104 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2105 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2106 }
2107
2108 /*
2109 * Commit the call.
2110 */
2111 pTb->Thrd.cCalls++;
2112
2113 /*
2114 * Clear state.
2115 */
2116 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2117 pVCpu->iem.s.fTbCrossedPage = false;
2118 pVCpu->iem.s.fTbCheckOpcodes = false;
2119
2120 /*
2121 * Copy opcode bytes.
2122 */
2123 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2124 pTb->cbOpcodes = offOpcode + cbInstr;
2125 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2126
2127 return true;
2128}
2129
2130
2131/**
2132 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2133 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2134 *
2135 * @returns true if anything is pending, false if not.
2136 * @param pVCpu The cross context virtual CPU structure of the calling
2137 * thread.
2138 */
2139DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2140{
2141 uint64_t fCpu = pVCpu->fLocalForcedActions;
2142 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2143#if 1
2144 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2145 if (RT_LIKELY( !fCpu
2146 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2147 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2148 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2149 return false;
2150 return true;
2151#else
2152 return false;
2153#endif
2154
2155}
2156
2157
2158/**
2159 * Called by iemThreadedCompile when a block requires a mode check.
2160 *
2161 * @returns true if we should continue, false if we're out of call entries.
2162 * @param pVCpu The cross context virtual CPU structure of the calling
2163 * thread.
2164 * @param pTb The translation block being compiled.
2165 */
2166static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2167{
2168 /* Emit the call. */
2169 uint32_t const idxCall = pTb->Thrd.cCalls;
2170 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2171 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2172 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2173 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2174 pCall->idxInstr = pTb->cInstructions - 1;
2175 pCall->uUnused0 = 0;
2176 pCall->offOpcode = 0;
2177 pCall->cbOpcode = 0;
2178 pCall->idxRange = 0;
2179 pCall->auParams[0] = pVCpu->iem.s.fExec;
2180 pCall->auParams[1] = 0;
2181 pCall->auParams[2] = 0;
2182 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2183 return true;
2184}
2185
2186
2187/**
2188 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2189 * set.
2190 *
2191 * @returns true if we should continue, false if an IRQ is deliverable or a
2192 * relevant force flag is pending.
2193 * @param pVCpu The cross context virtual CPU structure of the calling
2194 * thread.
2195 * @param pTb The translation block being compiled.
2196 * @sa iemThreadedCompileCheckIrq
2197 */
2198bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2199{
2200 /*
2201 * Skip this we've already emitted a call after the previous instruction
2202 * or if it's the first call, as we're always checking FFs between blocks.
2203 */
2204 uint32_t const idxCall = pTb->Thrd.cCalls;
2205 if ( idxCall > 0
2206 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2207 {
2208 /* Emit the call. */
2209 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2210 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2211 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2212 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2213 pCall->idxInstr = pTb->cInstructions;
2214 pCall->uUnused0 = 0;
2215 pCall->offOpcode = 0;
2216 pCall->cbOpcode = 0;
2217 pCall->idxRange = 0;
2218 pCall->auParams[0] = 0;
2219 pCall->auParams[1] = 0;
2220 pCall->auParams[2] = 0;
2221 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2222
2223 /* Reset the IRQ check value. */
2224 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2225
2226 /*
2227 * Check for deliverable IRQs and pending force flags.
2228 */
2229 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2230 }
2231 return true; /* continue */
2232}
2233
2234
2235/**
2236 * Emits an IRQ check call and checks for pending IRQs.
2237 *
2238 * @returns true if we should continue, false if an IRQ is deliverable or a
2239 * relevant force flag is pending.
2240 * @param pVCpu The cross context virtual CPU structure of the calling
2241 * thread.
2242 * @param pTb The transation block.
2243 * @sa iemThreadedCompileBeginEmitCallsComplications
2244 */
2245static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2246{
2247 /* Check again in a little bit, unless it is immediately following an STI
2248 in which case we *must* check immediately after the next instruction
2249 as well in case it's executed with interrupt inhibition. We could
2250 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2251 bs3-timers-1 which is doing sti + sti + cli. */
2252 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2253 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2254 else
2255 {
2256 pVCpu->iem.s.fTbCurInstrIsSti = false;
2257 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2258 }
2259 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2260
2261 /*
2262 * Emit the call.
2263 */
2264 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2265 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2266 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2267 pCall->idxInstr = pTb->cInstructions;
2268 pCall->uUnused0 = 0;
2269 pCall->offOpcode = 0;
2270 pCall->cbOpcode = 0;
2271 pCall->idxRange = 0;
2272 pCall->auParams[0] = 0;
2273 pCall->auParams[1] = 0;
2274 pCall->auParams[2] = 0;
2275
2276 /*
2277 * Check for deliverable IRQs and pending force flags.
2278 */
2279 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2280}
2281
2282
2283/**
2284 * Compiles a new TB and executes it.
2285 *
2286 * We combine compilation and execution here as it makes it simpler code flow
2287 * in the main loop and it allows interpreting while compiling if we want to
2288 * explore that option.
2289 *
2290 * @returns Strict VBox status code.
2291 * @param pVM The cross context virtual machine structure.
2292 * @param pVCpu The cross context virtual CPU structure of the calling
2293 * thread.
2294 * @param GCPhysPc The physical address corresponding to the current
2295 * RIP+CS.BASE.
2296 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2297 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2298 */
2299static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2300{
2301 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2302 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2303
2304 /*
2305 * Get the TB we use for the recompiling. This is a maxed-out TB so
2306 * that'll we'll make a more efficient copy of when we're done compiling.
2307 */
2308 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2309 if (pTb)
2310 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2311 else
2312 {
2313 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2314 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2315 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2316 }
2317
2318 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2319 functions may get at it. */
2320 pVCpu->iem.s.pCurTbR3 = pTb;
2321
2322#if 0
2323 /* Make sure the CheckIrq condition matches the one in EM. */
2324 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2325 const uint32_t cZeroCalls = 1;
2326#else
2327 const uint32_t cZeroCalls = 0;
2328#endif
2329
2330 /*
2331 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2332 */
2333 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2334 iemThreadedCompileInitOpcodeFetching(pVCpu);
2335 VBOXSTRICTRC rcStrict;
2336 for (;;)
2337 {
2338 /* Process the next instruction. */
2339#ifdef LOG_ENABLED
2340 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2341 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2342 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2343 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2344#endif
2345 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2346 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2347
2348 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2349#if 0
2350 for (unsigned i = cCallsPrev; i < pTb->Thrd.cCalls; i++)
2351 Log8(("-> %#u/%u - %d %s\n", i, pTb->Thrd.paCalls[i].idxInstr, pTb->Thrd.paCalls[i].enmFunction,
2352 g_apszIemThreadedFunctions[pTb->Thrd.paCalls[i].enmFunction]));
2353#endif
2354 if ( rcStrict == VINF_SUCCESS
2355 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2356 && !pVCpu->iem.s.fEndTb)
2357 {
2358 Assert(pTb->Thrd.cCalls > cCallsPrev);
2359 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2360
2361 pVCpu->iem.s.cInstructions++;
2362
2363 /* Check for mode change _after_ certain CIMPL calls, so check that
2364 we continue executing with the same mode value. */
2365 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2366 { /* probable */ }
2367 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2368 { /* extremely likely */ }
2369 else
2370 break;
2371
2372#if defined(LOG_ENABLED) && 0 /* for debugging */
2373 //iemThreadedCompileEmitNop(pTb);
2374 iemThreadedCompileEmitLogCpuState(pTb);
2375#endif
2376 }
2377 else
2378 {
2379 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2380 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2381 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2382 rcStrict = VINF_SUCCESS;
2383
2384 if (pTb->Thrd.cCalls > cZeroCalls)
2385 {
2386 if (cCallsPrev != pTb->Thrd.cCalls)
2387 pVCpu->iem.s.cInstructions++;
2388 break;
2389 }
2390
2391 pVCpu->iem.s.pCurTbR3 = NULL;
2392 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2393 }
2394
2395 /* Check for IRQs? */
2396 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2397 pVCpu->iem.s.cInstrTillIrqCheck--;
2398 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2399 break;
2400
2401 /* Still space in the TB? */
2402 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2403 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2404 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2405 else
2406 {
2407 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2408 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2409 break;
2410 }
2411 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2412 }
2413
2414 /*
2415 * Duplicate the TB into a completed one and link it.
2416 */
2417 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2418 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2419
2420 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2421
2422#ifdef IEM_COMPILE_ONLY_MODE
2423 /*
2424 * Execute the translation block.
2425 */
2426#endif
2427
2428 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2429}
2430
2431
2432
2433/*********************************************************************************************************************************
2434* Recompiled Execution Core *
2435*********************************************************************************************************************************/
2436
2437
2438/**
2439 * Executes a translation block.
2440 *
2441 * @returns Strict VBox status code.
2442 * @param pVCpu The cross context virtual CPU structure of the calling
2443 * thread.
2444 * @param pTb The translation block to execute.
2445 */
2446static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2447{
2448 /*
2449 * Check the opcodes in the first page before starting execution.
2450 */
2451/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2452 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2453 * altering the CS limit such that only one or the two instruction bytes are valid.
2454 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2455 * the test succeeds if skipped, but we assert in debug builds. */
2456 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2457 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2458 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2459 { /* likely */ }
2460 else
2461 {
2462 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2463 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2464 return VINF_SUCCESS;
2465 }
2466
2467 /*
2468 * Set the current TB so CIMPL functions may get at it.
2469 */
2470 pVCpu->iem.s.pCurTbR3 = pTb;
2471
2472 /*
2473 * Execute the block.
2474 */
2475#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2476 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2477 {
2478 pVCpu->iem.s.cTbExecNative++;
2479# ifdef LOG_ENABLED
2480 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2481# endif
2482# ifdef RT_ARCH_AMD64
2483 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2484# else
2485 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2486# endif
2487# ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2488 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2489# endif
2490 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2491 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2492 { /* likely */ }
2493 else
2494 {
2495 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2496 pVCpu->iem.s.pCurTbR3 = NULL;
2497 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2498
2499 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2500 only to break out of TB execution early. */
2501 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2502 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2503
2504 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2505 and converted to VINF_SUCCESS or whatever is appropriate. */
2506 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2507 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu, VINF_SUCCESS));
2508
2509 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2510 }
2511 }
2512 else
2513#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2514 {
2515 /*
2516 * The threaded execution loop.
2517 */
2518 pVCpu->iem.s.cTbExecThreaded++;
2519#ifdef LOG_ENABLED
2520 uint64_t uRipPrev = UINT64_MAX;
2521#endif
2522 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2523 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2524 while (cCallsLeft-- > 0)
2525 {
2526#ifdef LOG_ENABLED
2527 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2528 {
2529 uRipPrev = pVCpu->cpum.GstCtx.rip;
2530 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2531 }
2532 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2533 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2534 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2535#endif
2536#ifdef VBOX_WITH_STATISTICS
2537 AssertCompile(RT_ELEMENTS(pVCpu->iem.s.acThreadedFuncStats) >= kIemThreadedFunc_End);
2538 pVCpu->iem.s.acThreadedFuncStats[pCallEntry->enmFunction] += 1;
2539#endif
2540 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2541 pCallEntry->auParams[0],
2542 pCallEntry->auParams[1],
2543 pCallEntry->auParams[2]);
2544 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2545 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2546 pCallEntry++;
2547 else
2548 {
2549 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2550 pVCpu->iem.s.pCurTbR3 = NULL;
2551 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2552
2553 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2554 only to break out of TB execution early. */
2555 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2556 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2557 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2558 }
2559 }
2560 }
2561
2562 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2563 pVCpu->iem.s.pCurTbR3 = NULL;
2564 return VINF_SUCCESS;
2565}
2566
2567
2568/**
2569 * This is called when the PC doesn't match the current pbInstrBuf.
2570 *
2571 * Upon return, we're ready for opcode fetching. But please note that
2572 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2573 * MMIO or unassigned).
2574 */
2575static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2576{
2577 pVCpu->iem.s.pbInstrBuf = NULL;
2578 pVCpu->iem.s.offCurInstrStart = 0;
2579 pVCpu->iem.s.offInstrNextByte = 0;
2580 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2581 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2582}
2583
2584
2585/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2586DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2587{
2588 /*
2589 * Set uCurTbStartPc to RIP and calc the effective PC.
2590 */
2591 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2592 pVCpu->iem.s.uCurTbStartPc = uPc;
2593 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2594 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2595
2596 /*
2597 * Advance within the current buffer (PAGE) when possible.
2598 */
2599 if (pVCpu->iem.s.pbInstrBuf)
2600 {
2601 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2602 if (off < pVCpu->iem.s.cbInstrBufTotal)
2603 {
2604 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2605 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2606 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2607 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2608 else
2609 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2610
2611 return pVCpu->iem.s.GCPhysInstrBuf + off;
2612 }
2613 }
2614 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2615}
2616
2617
2618/**
2619 * Determines the extra IEMTB_F_XXX flags.
2620 *
2621 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2622 * IEMTB_F_CS_LIM_CHECKS (or zero).
2623 * @param pVCpu The cross context virtual CPU structure of the calling
2624 * thread.
2625 */
2626DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2627{
2628 uint32_t fRet = 0;
2629
2630 /*
2631 * Determine the inhibit bits.
2632 */
2633 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2634 { /* typical */ }
2635 else
2636 {
2637 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2638 fRet |= IEMTB_F_INHIBIT_SHADOW;
2639 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2640 fRet |= IEMTB_F_INHIBIT_NMI;
2641 }
2642
2643 /*
2644 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2645 * likely to go invalid before the end of the translation block.
2646 */
2647 if (IEM_IS_64BIT_CODE(pVCpu))
2648 return fRet;
2649
2650 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2651 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2652 return fRet;
2653 return fRet | IEMTB_F_CS_LIM_CHECKS;
2654}
2655
2656
2657VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2658{
2659 /*
2660 * See if there is an interrupt pending in TRPM, inject it if we can.
2661 */
2662 if (!TRPMHasTrap(pVCpu))
2663 { /* likely */ }
2664 else
2665 {
2666 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2667 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2668 { /*likely */ }
2669 else
2670 return rcStrict;
2671 }
2672
2673 /*
2674 * Init the execution environment.
2675 */
2676#if 1 /** @todo this seems like a good idea, however if we ever share memory
2677 * directly with other threads on the host, it isn't necessarily... */
2678 if (pVM->cCpus == 1)
2679 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2680 else
2681#endif
2682 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2683 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2684 { }
2685 else
2686 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2687
2688 /*
2689 * Run-loop.
2690 *
2691 * If we're using setjmp/longjmp we combine all the catching here to avoid
2692 * having to call setjmp for each block we're executing.
2693 */
2694 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2695 for (;;)
2696 {
2697 PIEMTB pTb = NULL;
2698 VBOXSTRICTRC rcStrict;
2699 IEM_TRY_SETJMP(pVCpu, rcStrict)
2700 {
2701 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2702 for (uint32_t iIterations = 0; ; iIterations++)
2703 {
2704 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2705 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2706 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2707
2708 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2709 if (pTb)
2710 rcStrict = iemTbExec(pVCpu, pTb);
2711 else
2712 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2713 if (rcStrict == VINF_SUCCESS)
2714 {
2715 Assert(pVCpu->iem.s.cActiveMappings == 0);
2716
2717 uint64_t fCpu = pVCpu->fLocalForcedActions;
2718 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2719 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2720 | VMCPU_FF_TLB_FLUSH
2721 | VMCPU_FF_UNHALT );
2722 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2723 if (RT_LIKELY( ( !fCpu
2724 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2725 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2726 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2727 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2728 {
2729 if (RT_LIKELY( (iIterations & cPollRate) != 0
2730 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2731 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2732 else
2733 return VINF_SUCCESS;
2734 }
2735 else
2736 return VINF_SUCCESS;
2737 }
2738 else
2739 return rcStrict;
2740 }
2741 }
2742 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2743 {
2744 pVCpu->iem.s.cLongJumps++;
2745#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER_LONGJMP
2746 pVCpu->iem.s.pvTbFramePointerR3 = NULL;
2747#endif
2748 if (pVCpu->iem.s.cActiveMappings > 0)
2749 iemMemRollback(pVCpu);
2750
2751#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2752 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2753 {
2754 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2755 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2756 }
2757#endif
2758
2759#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2760 /* If pTb isn't NULL we're in iemTbExec. */
2761 if (!pTb)
2762 {
2763 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2764 pTb = pVCpu->iem.s.pCurTbR3;
2765 if (pTb)
2766 {
2767 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2768 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2769 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2770 }
2771 }
2772#endif
2773 return rcStrict;
2774 }
2775 IEM_CATCH_LONGJMP_END(pVCpu);
2776 }
2777}
2778
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette