VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 102572

Last change on this file since 102572 was 102557, checked in by vboxsync, 12 months ago

VMM/IEM: Try deal with running out of executable memory. bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 107.4 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 102557 2023-12-08 22:13:00Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p L %d\n", fFlags, GCPhysPc, idxHash, pTb, cLeft));
643 while (pTb)
644 {
645 if (pTb->GCPhysPc == GCPhysPc)
646 {
647 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
648 {
649 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
650 {
651 STAM_COUNTER_INC(&pTbCache->cLookupHits);
652 AssertMsg(cLeft > 0, ("%d\n", cLeft));
653
654 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
655 pTb->cUsed++;
656#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
657 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
658 return pTb;
659 return iemNativeRecompile(pVCpu, pTb);
660#else
661 return pTb;
662#endif
663 }
664 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
665 }
666 else
667 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
668 }
669 else
670 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
671
672 pTb = pTb->pNext;
673#ifdef VBOX_STRICT
674 cLeft--;
675#endif
676 }
677 AssertMsg(cLeft == 0, ("%d\n", cLeft));
678 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
679 return pTb;
680}
681
682
683/*********************************************************************************************************************************
684* Translation Block Allocator.
685*********************************************************************************************************************************/
686/*
687 * Translation block allocationmanagement.
688 */
689
690#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
691# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
692 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
693# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
694 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
695# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
696 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
697#else
698# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
699 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
700# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
701 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
702# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
703 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
704#endif
705/** Makes a TB index from a chunk index and TB index within that chunk. */
706#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
707 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
708
709
710/**
711 * Initializes the TB allocator and cache for an EMT.
712 *
713 * @returns VBox status code.
714 * @param pVM The VM handle.
715 * @param cInitialTbs The initial number of translation blocks to
716 * preallocator.
717 * @param cMaxTbs The max number of translation blocks allowed.
718 * @param cbInitialExec The initial size of the executable memory allocator.
719 * @param cbMaxExec The max size of the executable memory allocator.
720 * @param cbChunkExec The chunk size for executable memory allocator. Zero
721 * or UINT32_MAX for automatically determining this.
722 * @thread EMT
723 */
724DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
725 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
726{
727 PVMCPUCC pVCpu = VMMGetCpu(pVM);
728 Assert(!pVCpu->iem.s.pTbCacheR3);
729 Assert(!pVCpu->iem.s.pTbAllocatorR3);
730
731 /*
732 * Calculate the chunk size of the TB allocator.
733 * The minimum chunk size is 2MiB.
734 */
735 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
736 uint32_t cbPerChunk = _2M;
737 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
738#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
739 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
740 uint8_t cChunkShift = 21 - cTbShift;
741 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
742#endif
743 for (;;)
744 {
745 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
746 break;
747 cbPerChunk *= 2;
748 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
749#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
750 cChunkShift += 1;
751#endif
752 }
753
754 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
755 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
756 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
757
758 cMaxTbs = cMaxChunks * cTbsPerChunk;
759
760 /*
761 * Allocate and initalize it.
762 */
763 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
764 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
765 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
766 if (!pTbAllocator)
767 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
768 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
769 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
770 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
771 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
772 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
773 pTbAllocator->cbPerChunk = cbPerChunk;
774 pTbAllocator->cMaxTbs = cMaxTbs;
775#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
776 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
777 pTbAllocator->cChunkShift = cChunkShift;
778 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
779#endif
780
781 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
782 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
783
784 /*
785 * Allocate the initial chunks.
786 */
787 for (uint32_t idxChunk = 0; ; idxChunk++)
788 {
789 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
790 if (!paTbs)
791 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
792 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
793 cbPerChunk, idxChunk, pVCpu->idCpu);
794
795 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
796 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
797 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
798 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
799 pTbAllocator->cTotalTbs += cTbsPerChunk;
800
801 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
802 break;
803 }
804
805 /*
806 * Calculate the size of the hash table. We double the max TB count and
807 * round it up to the nearest power of two.
808 */
809 uint32_t cCacheEntries = cMaxTbs * 2;
810 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
811 {
812 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
813 cCacheEntries = RT_BIT_32(iBitTop);
814 Assert(cCacheEntries >= cMaxTbs * 2);
815 }
816
817 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
818 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
819 if (!pTbCache)
820 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
821 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
822 cbTbCache, cCacheEntries, pVCpu->idCpu);
823
824 /*
825 * Initialize it (assumes zeroed by the allocator).
826 */
827 pTbCache->uMagic = IEMTBCACHE_MAGIC;
828 pTbCache->cHash = cCacheEntries;
829 pTbCache->uHashMask = cCacheEntries - 1;
830 Assert(pTbCache->cHash > pTbCache->uHashMask);
831 pVCpu->iem.s.pTbCacheR3 = pTbCache;
832
833 /*
834 * Initialize the native executable memory allocator.
835 */
836#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
837 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
838 AssertLogRelRCReturn(rc, rc);
839#else
840 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
841#endif
842
843 return VINF_SUCCESS;
844}
845
846
847/**
848 * Inner free worker.
849 */
850static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
851 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
852{
853 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
854 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
855 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
856 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
857
858 /*
859 * Unlink the TB from the hash table.
860 */
861 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
862
863 /*
864 * Free the TB itself.
865 */
866 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
867 {
868 case IEMTB_F_TYPE_THREADED:
869 pTbAllocator->cThreadedTbs -= 1;
870 RTMemFree(pTb->Thrd.paCalls);
871 break;
872#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
873 case IEMTB_F_TYPE_NATIVE:
874 pTbAllocator->cNativeTbs -= 1;
875 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
876 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
877 break;
878#endif
879 default:
880 AssertFailed();
881 }
882 RTMemFree(pTb->pabOpcodes);
883
884 pTb->pNext = NULL;
885 pTb->fFlags = 0;
886 pTb->GCPhysPc = UINT64_MAX;
887 pTb->Gen.uPtr = 0;
888 pTb->Gen.uData = 0;
889 pTb->cbOpcodes = 0;
890 pTb->pabOpcodes = NULL;
891
892 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
893 Assert(pTbAllocator->cInUseTbs > 0);
894
895 pTbAllocator->cInUseTbs -= 1;
896 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
897}
898
899
900/**
901 * Frees the given TB.
902 *
903 * @param pVCpu The cross context virtual CPU structure of the calling
904 * thread.
905 * @param pTb The translation block to free.
906 * @thread EMT(pVCpu)
907 */
908static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
909{
910 /*
911 * Validate state.
912 */
913 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
914 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
915 uint8_t const idxChunk = pTb->idxAllocChunk;
916 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
917 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
918 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
919
920 /*
921 * Call inner worker.
922 */
923 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
924}
925
926
927/**
928 * Schedules a native TB for freeing when it's not longer being executed and
929 * part of the caller's call stack.
930 *
931 * The TB will be removed from the translation block cache, though, so it isn't
932 * possible to executed it again and the IEMTB::pNext member can be used to link
933 * it together with other TBs awaiting freeing.
934 *
935 * @param pVCpu The cross context virtual CPU structure of the calling
936 * thread.
937 * @param pTb The translation block to schedule for freeing.
938 */
939static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
940{
941 /*
942 * Validate state.
943 */
944 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
945 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
946 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
947 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
948 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
949 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
950 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
951 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
952
953 /*
954 * Remove it from the cache and prepend it to the allocator's todo list.
955 */
956 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
957
958 pTb->pNext = pTbAllocator->pDelayedFreeHead;
959 pTbAllocator->pDelayedFreeHead = pTb;
960}
961
962
963/**
964 * Processes the delayed frees.
965 *
966 * This is called by the allocator function as well as the native recompile
967 * function before making any TB or executable memory allocations respectively.
968 */
969void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
970{
971 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
972 pTbAllocator->pDelayedFreeHead = NULL;
973 while (pTb)
974 {
975 PIEMTB const pTbNext = pTb->pNext;
976 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
977 iemTbAllocatorFree(pVCpu, pTb);
978 pTb = pTbNext;
979 }
980}
981
982
983/**
984 * Grow the translation block allocator with another chunk.
985 */
986static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
987{
988 /*
989 * Validate state.
990 */
991 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
992 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
993 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
994 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
995 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
996
997 /*
998 * Allocate a new chunk and add it to the allocator.
999 */
1000 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1001 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1002 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1003
1004 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1005 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1006 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1007 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1008 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1009 pTbAllocator->cTotalTbs += cTbsPerChunk;
1010 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1011
1012 return VINF_SUCCESS;
1013}
1014
1015
1016/**
1017 * Allocates a TB from allocator with free block.
1018 *
1019 * This is common code to both the fast and slow allocator code paths.
1020 */
1021DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1022{
1023 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1024
1025 int idxTb;
1026 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1027 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1028 pTbAllocator->cTotalTbs,
1029 pTbAllocator->iStartHint & ~(uint32_t)63);
1030 else
1031 idxTb = -1;
1032 if (idxTb < 0)
1033 {
1034 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1035 AssertLogRelReturn(idxTb >= 0, NULL);
1036 }
1037 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1038 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1039
1040 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1041 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1042 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1043 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1044 Assert(pTb->idxAllocChunk == idxChunk);
1045
1046 pTbAllocator->cInUseTbs += 1;
1047 if (fThreaded)
1048 pTbAllocator->cThreadedTbs += 1;
1049 else
1050 pTbAllocator->cNativeTbs += 1;
1051 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1052 return pTb;
1053}
1054
1055
1056/**
1057 * Slow path for iemTbAllocatorAlloc.
1058 */
1059static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1060{
1061 /*
1062 * With some luck we can add another chunk.
1063 */
1064 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1065 {
1066 int rc = iemTbAllocatorGrow(pVCpu);
1067 if (RT_SUCCESS(rc))
1068 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1069 }
1070
1071 /*
1072 * We have to prune stuff. Sigh.
1073 *
1074 * This requires scanning for older TBs and kick them out. Not sure how to
1075 * best do this as we don't want to maintain any list of TBs ordered by last
1076 * usage time. But one reasonably simple approach would be that each time we
1077 * get here we continue a sequential scan of the allocation chunks,
1078 * considering just a smallish number of TBs and freeing a fixed portion of
1079 * them. Say, we consider the next 128 TBs, freeing the least recently used
1080 * in out of groups of 4 TBs, resulting in 32 free TBs.
1081 */
1082 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1083 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1084 uint32_t const cTbsToPrune = 128;
1085 uint32_t const cTbsPerGroup = 4;
1086 uint32_t cFreedTbs = 0;
1087#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1088 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1089#else
1090 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1091#endif
1092 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1093 idxTbPruneFrom = 0;
1094 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1095 {
1096 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1097 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1098 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1099 uint32_t cMsAge = msNow - pTb->msLastUsed;
1100 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1101
1102 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1103 {
1104#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1105 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1106 { /* likely */ }
1107 else
1108 {
1109 idxInChunk2 = 0;
1110 idxChunk2 += 1;
1111 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1112 idxChunk2 = 0;
1113 }
1114#endif
1115 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1116 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1117 if ( cMsAge2 > cMsAge
1118 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1119 {
1120 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1121 pTb = pTb2;
1122 idxChunk = idxChunk2;
1123 idxInChunk = idxInChunk2;
1124 cMsAge = cMsAge2;
1125 }
1126 }
1127
1128 /* Free the TB. */
1129 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1130 cFreedTbs++; /* paranoia */
1131 }
1132 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1133 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1134
1135 /*
1136 * Allocate a TB from the ones we've pruned.
1137 */
1138 if (cFreedTbs)
1139 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1140 return NULL;
1141}
1142
1143
1144/**
1145 * Allocate a translation block.
1146 *
1147 * @returns Pointer to block on success, NULL if we're out and is unable to
1148 * free up an existing one (very unlikely once implemented).
1149 * @param pVCpu The cross context virtual CPU structure of the calling
1150 * thread.
1151 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1152 * For statistics.
1153 */
1154DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1155{
1156 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1157 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1158
1159 /* Free any pending TBs before we proceed. */
1160 if (!pTbAllocator->pDelayedFreeHead)
1161 { /* probably likely */ }
1162 else
1163 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1164
1165 /* If the allocator is full, take slow code path.*/
1166 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1167 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1168 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1169}
1170
1171
1172/**
1173 * This is called when we're out of space for native TBs.
1174 *
1175 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1176 * The difference is that we only prune native TBs and will only free any if
1177 * there are least two in a group. The conditions under which we're called are
1178 * different - there will probably be free TBs in the table when we're called.
1179 * Therefore we increase the group size and max scan length, though we'll stop
1180 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1181 * up at least 8 TBs.
1182 */
1183void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1184{
1185 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1186 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1187
1188 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1189
1190 /*
1191 * Flush the delayed free list before we start freeing TBs indiscriminately.
1192 */
1193 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1194
1195 /*
1196 * Scan and free TBs.
1197 */
1198 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1199 uint32_t const cTbsToPrune = 128 * 8;
1200 uint32_t const cTbsPerGroup = 4 * 4;
1201 uint32_t cFreedTbs = 0;
1202 uint32_t cMaxInstrs = 0;
1203 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1204 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1205 {
1206 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1207 idxTbPruneFrom = 0;
1208 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1209 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1210 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1211 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1212 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1213
1214 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1215 {
1216 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1217 { /* likely */ }
1218 else
1219 {
1220 idxInChunk2 = 0;
1221 idxChunk2 += 1;
1222 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1223 idxChunk2 = 0;
1224 }
1225 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1226 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1227 {
1228 cNativeTbs += 1;
1229 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1230 if ( cMsAge2 > cMsAge
1231 || ( cMsAge2 == cMsAge
1232 && ( pTb2->cUsed < pTb->cUsed
1233 || ( pTb2->cUsed == pTb->cUsed
1234 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1235 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1236 {
1237 pTb = pTb2;
1238 idxChunk = idxChunk2;
1239 idxInChunk = idxInChunk2;
1240 cMsAge = cMsAge2;
1241 }
1242 }
1243 }
1244
1245 /* Free the TB if we found at least two native one in this group. */
1246 if (cNativeTbs >= 2)
1247 {
1248 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1249 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1250 cFreedTbs++;
1251 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1252 break;
1253 }
1254 }
1255 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1256
1257 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1258}
1259
1260
1261/*********************************************************************************************************************************
1262* Threaded Recompiler Core *
1263*********************************************************************************************************************************/
1264
1265/**
1266 * Allocate a translation block for threadeded recompilation.
1267 *
1268 * This is allocated with maxed out call table and storage for opcode bytes,
1269 * because it's only supposed to be called once per EMT to allocate the TB
1270 * pointed to by IEMCPU::pThrdCompileTbR3.
1271 *
1272 * @returns Pointer to the translation block on success, NULL on failure.
1273 * @param pVM The cross context virtual machine structure.
1274 * @param pVCpu The cross context virtual CPU structure of the calling
1275 * thread.
1276 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1277 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1278 */
1279static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1280{
1281 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1282 if (pTb)
1283 {
1284 unsigned const cCalls = 256;
1285 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1286 if (pTb->Thrd.paCalls)
1287 {
1288 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1289 if (pTb->pabOpcodes)
1290 {
1291 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1292 pTb->Thrd.cAllocated = cCalls;
1293 pTb->Thrd.cCalls = 0;
1294 pTb->cbOpcodes = 0;
1295 pTb->pNext = NULL;
1296 pTb->cUsed = 0;
1297 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1298 pTb->idxAllocChunk = UINT8_MAX;
1299 pTb->GCPhysPc = GCPhysPc;
1300 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1301 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1302 pTb->cInstructions = 0;
1303
1304 /* Init the first opcode range. */
1305 pTb->cRanges = 1;
1306 pTb->aRanges[0].cbOpcodes = 0;
1307 pTb->aRanges[0].offOpcodes = 0;
1308 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1309 pTb->aRanges[0].u2Unused = 0;
1310 pTb->aRanges[0].idxPhysPage = 0;
1311 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1312 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1313
1314 return pTb;
1315 }
1316 RTMemFree(pTb->Thrd.paCalls);
1317 }
1318 RTMemFree(pTb);
1319 }
1320 RT_NOREF(pVM);
1321 return NULL;
1322}
1323
1324
1325/**
1326 * Called on the TB that are dedicated for recompilation before it's reused.
1327 *
1328 * @param pVCpu The cross context virtual CPU structure of the calling
1329 * thread.
1330 * @param pTb The translation block to reuse.
1331 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1332 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1333 */
1334static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1335{
1336 pTb->GCPhysPc = GCPhysPc;
1337 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1338 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1339 pTb->Thrd.cCalls = 0;
1340 pTb->cbOpcodes = 0;
1341 pTb->cInstructions = 0;
1342
1343 /* Init the first opcode range. */
1344 pTb->cRanges = 1;
1345 pTb->aRanges[0].cbOpcodes = 0;
1346 pTb->aRanges[0].offOpcodes = 0;
1347 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1348 pTb->aRanges[0].u2Unused = 0;
1349 pTb->aRanges[0].idxPhysPage = 0;
1350 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1351 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1352}
1353
1354
1355/**
1356 * Used to duplicate a threded translation block after recompilation is done.
1357 *
1358 * @returns Pointer to the translation block on success, NULL on failure.
1359 * @param pVM The cross context virtual machine structure.
1360 * @param pVCpu The cross context virtual CPU structure of the calling
1361 * thread.
1362 * @param pTbSrc The TB to duplicate.
1363 */
1364static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1365{
1366 /*
1367 * Just using the heap for now. Will make this more efficient and
1368 * complicated later, don't worry. :-)
1369 */
1370 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1371 if (pTb)
1372 {
1373 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1374 memcpy(pTb, pTbSrc, sizeof(*pTb));
1375 pTb->idxAllocChunk = idxAllocChunk;
1376
1377 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1378 Assert(cCalls > 0);
1379 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1380 if (pTb->Thrd.paCalls)
1381 {
1382 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1383 Assert(cbOpcodes > 0);
1384 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1385 if (pTb->pabOpcodes)
1386 {
1387 pTb->Thrd.cAllocated = cCalls;
1388 pTb->pNext = NULL;
1389 pTb->cUsed = 0;
1390 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1391 pTb->fFlags = pTbSrc->fFlags;
1392
1393 return pTb;
1394 }
1395 RTMemFree(pTb->Thrd.paCalls);
1396 }
1397 iemTbAllocatorFree(pVCpu, pTb);
1398 }
1399 RT_NOREF(pVM);
1400 return NULL;
1401
1402}
1403
1404
1405/**
1406 * Adds the given TB to the hash table.
1407 *
1408 * @param pVCpu The cross context virtual CPU structure of the calling
1409 * thread.
1410 * @param pTbCache The cache to add it to.
1411 * @param pTb The translation block to add.
1412 */
1413static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1414{
1415 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1416
1417 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1418 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1419 if (LogIs12Enabled())
1420 {
1421 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1422 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1423 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1424 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1425 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1426 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1427 pTb->aRanges[idxRange].idxPhysPage == 0
1428 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1429 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1430 }
1431}
1432
1433
1434/**
1435 * Called by opcode verifier functions when they detect a problem.
1436 */
1437void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1438{
1439 /* Unless it's safe, we can only immediately free threaded TB, as we will
1440 have more code left to execute in native TBs when fSafeToFree == false. */
1441 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1442 iemTbAllocatorFree(pVCpu, pTb);
1443 else
1444 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1445}
1446
1447
1448/*
1449 * Real code.
1450 */
1451
1452#ifdef LOG_ENABLED
1453/**
1454 * Logs the current instruction.
1455 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1456 * @param pszFunction The IEM function doing the execution.
1457 * @param idxInstr The instruction number in the block.
1458 */
1459static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1460{
1461# ifdef IN_RING3
1462 if (LogIs2Enabled())
1463 {
1464 char szInstr[256];
1465 uint32_t cbInstr = 0;
1466 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1467 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1468 szInstr, sizeof(szInstr), &cbInstr);
1469
1470 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1471 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1472 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1473 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1474 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1475 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1476 " %s\n"
1477 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1478 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1479 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1480 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1481 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1482 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1483 szInstr));
1484
1485 if (LogIs3Enabled())
1486 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL);
1487 }
1488 else
1489# endif
1490 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1491 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1492}
1493#endif /* LOG_ENABLED */
1494
1495
1496#if 0
1497static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1498{
1499 RT_NOREF(pVM, pVCpu);
1500 return rcStrict;
1501}
1502#endif
1503
1504
1505/**
1506 * Initializes the decoder state when compiling TBs.
1507 *
1508 * This presumes that fExec has already be initialized.
1509 *
1510 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1511 * to apply fixes to them as well.
1512 *
1513 * @param pVCpu The cross context virtual CPU structure of the calling
1514 * thread.
1515 * @param fReInit Clear for the first call for a TB, set for subsequent
1516 * calls from inside the compile loop where we can skip a
1517 * couple of things.
1518 * @param fExtraFlags The extra translation block flags when @a fReInit is
1519 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1520 * checked.
1521 */
1522DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1523{
1524 /* ASSUMES: That iemInitExec was already called and that anyone changing
1525 CPU state affecting the fExec bits since then will have updated fExec! */
1526 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1527 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1528
1529 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1530
1531 /* Decoder state: */
1532 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1533 pVCpu->iem.s.enmEffAddrMode = enmMode;
1534 if (enmMode != IEMMODE_64BIT)
1535 {
1536 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1537 pVCpu->iem.s.enmEffOpSize = enmMode;
1538 }
1539 else
1540 {
1541 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1542 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1543 }
1544 pVCpu->iem.s.fPrefixes = 0;
1545 pVCpu->iem.s.uRexReg = 0;
1546 pVCpu->iem.s.uRexB = 0;
1547 pVCpu->iem.s.uRexIndex = 0;
1548 pVCpu->iem.s.idxPrefix = 0;
1549 pVCpu->iem.s.uVex3rdReg = 0;
1550 pVCpu->iem.s.uVexLength = 0;
1551 pVCpu->iem.s.fEvexStuff = 0;
1552 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1553 pVCpu->iem.s.offModRm = 0;
1554 pVCpu->iem.s.iNextMapping = 0;
1555
1556 if (!fReInit)
1557 {
1558 pVCpu->iem.s.cActiveMappings = 0;
1559 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1560 pVCpu->iem.s.fEndTb = false;
1561 pVCpu->iem.s.fTbCheckOpcodes = false;
1562 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1563 pVCpu->iem.s.fTbCrossedPage = false;
1564 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1565 pVCpu->iem.s.fTbCurInstrIsSti = false;
1566 /* Force RF clearing and TF checking on first instruction in the block
1567 as we don't really know what came before and should assume the worst: */
1568 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1569 }
1570 else
1571 {
1572 Assert(pVCpu->iem.s.cActiveMappings == 0);
1573 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1574 Assert(pVCpu->iem.s.fEndTb == false);
1575 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1576 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1577 }
1578 pVCpu->iem.s.fTbCurInstr = 0;
1579
1580#ifdef DBGFTRACE_ENABLED
1581 switch (IEM_GET_CPU_MODE(pVCpu))
1582 {
1583 case IEMMODE_64BIT:
1584 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1585 break;
1586 case IEMMODE_32BIT:
1587 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1588 break;
1589 case IEMMODE_16BIT:
1590 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1591 break;
1592 }
1593#endif
1594}
1595
1596
1597/**
1598 * Initializes the opcode fetcher when starting the compilation.
1599 *
1600 * @param pVCpu The cross context virtual CPU structure of the calling
1601 * thread.
1602 */
1603DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1604{
1605 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1606#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1607 pVCpu->iem.s.offOpcode = 0;
1608#else
1609 RT_NOREF(pVCpu);
1610#endif
1611}
1612
1613
1614/**
1615 * Re-initializes the opcode fetcher between instructions while compiling.
1616 *
1617 * @param pVCpu The cross context virtual CPU structure of the calling
1618 * thread.
1619 */
1620DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1621{
1622 if (pVCpu->iem.s.pbInstrBuf)
1623 {
1624 uint64_t off = pVCpu->cpum.GstCtx.rip;
1625 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1626 off += pVCpu->cpum.GstCtx.cs.u64Base;
1627 off -= pVCpu->iem.s.uInstrBufPc;
1628 if (off < pVCpu->iem.s.cbInstrBufTotal)
1629 {
1630 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1631 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1632 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1633 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1634 else
1635 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1636 }
1637 else
1638 {
1639 pVCpu->iem.s.pbInstrBuf = NULL;
1640 pVCpu->iem.s.offInstrNextByte = 0;
1641 pVCpu->iem.s.offCurInstrStart = 0;
1642 pVCpu->iem.s.cbInstrBuf = 0;
1643 pVCpu->iem.s.cbInstrBufTotal = 0;
1644 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1645 }
1646 }
1647 else
1648 {
1649 pVCpu->iem.s.offInstrNextByte = 0;
1650 pVCpu->iem.s.offCurInstrStart = 0;
1651 pVCpu->iem.s.cbInstrBuf = 0;
1652 pVCpu->iem.s.cbInstrBufTotal = 0;
1653#ifdef VBOX_STRICT
1654 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1655#endif
1656 }
1657#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1658 pVCpu->iem.s.offOpcode = 0;
1659#endif
1660}
1661
1662
1663DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1664{
1665 switch (cbInstr)
1666 {
1667 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1668 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1669 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1670 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1671 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1672 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1673 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1674 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1675 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1676 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1677 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1678 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1679 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1680 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1681 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1682 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1683 }
1684}
1685
1686
1687/**
1688 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1689 *
1690 * - CS LIM check required.
1691 * - Must recheck opcode bytes.
1692 * - Previous instruction branched.
1693 * - TLB load detected, probably due to page crossing.
1694 *
1695 * @returns true if everything went well, false if we're out of space in the TB
1696 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1697 * @param pVCpu The cross context virtual CPU structure of the calling
1698 * thread.
1699 * @param pTb The translation block being compiled.
1700 */
1701bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1702{
1703 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1704 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1705#if 0
1706 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1707 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1708#endif
1709
1710 /*
1711 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1712 * see if it's needed to start checking.
1713 */
1714 bool fConsiderCsLimChecking;
1715 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1716 if ( fMode == IEM_F_MODE_X86_64BIT
1717 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1718 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1719 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1720 fConsiderCsLimChecking = false; /* already enabled or not needed */
1721 else
1722 {
1723 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1724 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1725 fConsiderCsLimChecking = true; /* likely */
1726 else
1727 {
1728 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1729 return false;
1730 }
1731 }
1732
1733 /*
1734 * Prepare call now, even before we know if can accept the instruction in this TB.
1735 * This allows us amending parameters w/o making every case suffer.
1736 */
1737 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1738 uint16_t const offOpcode = pTb->cbOpcodes;
1739 uint8_t idxRange = pTb->cRanges - 1;
1740
1741 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1742 pCall->idxInstr = pTb->cInstructions;
1743 pCall->offOpcode = offOpcode;
1744 pCall->idxRange = idxRange;
1745 pCall->cbOpcode = cbInstr;
1746 pCall->auParams[0] = cbInstr;
1747 pCall->auParams[1] = idxRange;
1748 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1749
1750/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1751 * gotten onto. If we do, stop */
1752
1753 /*
1754 * Case 1: We've branched (RIP changed).
1755 *
1756 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1757 * Req: 1 extra range, no extra phys.
1758 *
1759 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1760 * necessary (fTbCrossedPage is true).
1761 * Req: 1 extra range, probably 1 extra phys page entry.
1762 *
1763 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1764 * but in addition we cross into the following page and require
1765 * another TLB load.
1766 * Req: 2 extra ranges, probably 2 extra phys page entries.
1767 *
1768 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1769 * the following page (thus fTbCrossedPage is true).
1770 * Req: 2 extra ranges, probably 1 extra phys page entry.
1771 *
1772 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1773 * it may trigger "spuriously" from the CPU point of view because of
1774 * physical page changes that'll invalid the physical TLB and trigger a
1775 * call to the function. In theory this be a big deal, just a bit
1776 * performance loss as we'll pick the LoadingTlb variants.
1777 *
1778 * Note! We do not currently optimize branching to the next instruction (sorry
1779 * 32-bit PIC code). We could maybe do that in the branching code that
1780 * sets (or not) fTbBranched.
1781 */
1782 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1783 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1784 * code. This'll require filtering out far jmps and calls, as they
1785 * load CS which should technically be considered indirect since the
1786 * GDT/LDT entry's base address can be modified independently from
1787 * the code. */
1788 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1789 {
1790 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1791 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1792 {
1793 /* 1a + 1b - instruction fully within the branched to page. */
1794 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1795 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1796
1797 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1798 {
1799 /* Check that we've got a free range. */
1800 idxRange += 1;
1801 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1802 { /* likely */ }
1803 else
1804 {
1805 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1806 return false;
1807 }
1808 pCall->idxRange = idxRange;
1809 pCall->auParams[1] = idxRange;
1810 pCall->auParams[2] = 0;
1811
1812 /* Check that we've got a free page slot. */
1813 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1814 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1815 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1816 pTb->aRanges[idxRange].idxPhysPage = 0;
1817 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1818 || pTb->aGCPhysPages[0] == GCPhysNew)
1819 {
1820 pTb->aGCPhysPages[0] = GCPhysNew;
1821 pTb->aRanges[idxRange].idxPhysPage = 1;
1822 }
1823 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1824 || pTb->aGCPhysPages[1] == GCPhysNew)
1825 {
1826 pTb->aGCPhysPages[1] = GCPhysNew;
1827 pTb->aRanges[idxRange].idxPhysPage = 2;
1828 }
1829 else
1830 {
1831 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1832 return false;
1833 }
1834
1835 /* Finish setting up the new range. */
1836 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1837 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1838 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1839 pTb->aRanges[idxRange].u2Unused = 0;
1840 pTb->cRanges++;
1841 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
1842 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
1843 pTb->aRanges[idxRange].offOpcodes));
1844 }
1845 else
1846 {
1847 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1848 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1849 }
1850
1851 /* Determin which function we need to load & check.
1852 Note! For jumps to a new page, we'll set both fTbBranched and
1853 fTbCrossedPage to avoid unnecessary TLB work for intra
1854 page branching */
1855 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1856 || pVCpu->iem.s.fTbCrossedPage)
1857 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1858 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1859 : !fConsiderCsLimChecking
1860 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1861 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1862 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1863 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1864 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1865 : !fConsiderCsLimChecking
1866 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1867 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1868 else
1869 {
1870 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1871 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1872 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1873 : !fConsiderCsLimChecking
1874 ? kIemThreadedFunc_BltIn_CheckOpcodes
1875 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1876 }
1877 }
1878 else
1879 {
1880 /* 1c + 1d - instruction crosses pages. */
1881 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1882 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1883
1884 /* Lazy bird: Check that this isn't case 1c, since we've already
1885 load the first physical address. End the TB and
1886 make it a case 2b instead.
1887
1888 Hmm. Too much bother to detect, so just do the same
1889 with case 1d as well. */
1890#if 0 /** @todo get back to this later when we've got the actual branch code in
1891 * place. */
1892 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1893
1894 /* Check that we've got two free ranges. */
1895 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1896 { /* likely */ }
1897 else
1898 return false;
1899 idxRange += 1;
1900 pCall->idxRange = idxRange;
1901 pCall->auParams[1] = idxRange;
1902 pCall->auParams[2] = 0;
1903
1904 /* ... */
1905
1906#else
1907 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1908 return false;
1909#endif
1910 }
1911 }
1912
1913 /*
1914 * Case 2: Page crossing.
1915 *
1916 * Sub-case 2a: The instruction starts on the first byte in the next page.
1917 *
1918 * Sub-case 2b: The instruction has opcode bytes in both the current and
1919 * following page.
1920 *
1921 * Both cases requires a new range table entry and probably a new physical
1922 * page entry. The difference is in which functions to emit and whether to
1923 * add bytes to the current range.
1924 */
1925 else if (pVCpu->iem.s.fTbCrossedPage)
1926 {
1927 /* Check that we've got a free range. */
1928 idxRange += 1;
1929 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1930 { /* likely */ }
1931 else
1932 {
1933 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1934 return false;
1935 }
1936
1937 /* Check that we've got a free page slot. */
1938 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1939 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1940 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1941 pTb->aRanges[idxRange].idxPhysPage = 0;
1942 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1943 || pTb->aGCPhysPages[0] == GCPhysNew)
1944 {
1945 pTb->aGCPhysPages[0] = GCPhysNew;
1946 pTb->aRanges[idxRange].idxPhysPage = 1;
1947 }
1948 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1949 || pTb->aGCPhysPages[1] == GCPhysNew)
1950 {
1951 pTb->aGCPhysPages[1] = GCPhysNew;
1952 pTb->aRanges[idxRange].idxPhysPage = 2;
1953 }
1954 else
1955 {
1956 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1957 return false;
1958 }
1959
1960 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
1961 {
1962 Assert(pVCpu->iem.s.offCurInstrStart == 0);
1963 pCall->idxRange = idxRange;
1964 pCall->auParams[1] = idxRange;
1965 pCall->auParams[2] = 0;
1966
1967 /* Finish setting up the new range. */
1968 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1969 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1970 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1971 pTb->aRanges[idxRange].u2Unused = 0;
1972 pTb->cRanges++;
1973 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
1974 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
1975 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
1976
1977 /* Determin which function we need to load & check. */
1978 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1979 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
1980 : !fConsiderCsLimChecking
1981 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
1982 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
1983 }
1984 else
1985 {
1986 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1987 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1988 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1989 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
1990
1991 /* We've good. Split the instruction over the old and new range table entries. */
1992 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
1993
1994 pTb->aRanges[idxRange].offPhysPage = 0;
1995 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
1996 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
1997 pTb->aRanges[idxRange].u2Unused = 0;
1998 pTb->cRanges++;
1999 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2000 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2001 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2002
2003 /* Determin which function we need to load & check. */
2004 if (pVCpu->iem.s.fTbCheckOpcodes)
2005 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2006 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2007 : !fConsiderCsLimChecking
2008 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2009 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2010 else
2011 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2012 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2013 : !fConsiderCsLimChecking
2014 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2015 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2016 }
2017 }
2018
2019 /*
2020 * Regular case: No new range required.
2021 */
2022 else
2023 {
2024 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2025 if (pVCpu->iem.s.fTbCheckOpcodes)
2026 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2027 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2028 : kIemThreadedFunc_BltIn_CheckOpcodes;
2029 else
2030 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2031
2032 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2033 pTb->cbOpcodes = offOpcode + cbInstr;
2034 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2035 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2036 }
2037
2038 /*
2039 * Commit the call.
2040 */
2041 pTb->Thrd.cCalls++;
2042
2043 /*
2044 * Clear state.
2045 */
2046 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2047 pVCpu->iem.s.fTbCrossedPage = false;
2048 pVCpu->iem.s.fTbCheckOpcodes = false;
2049
2050 /*
2051 * Copy opcode bytes.
2052 */
2053 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2054 pTb->cbOpcodes = offOpcode + cbInstr;
2055 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2056
2057 return true;
2058}
2059
2060
2061/**
2062 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2063 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2064 *
2065 * @returns true if anything is pending, false if not.
2066 * @param pVCpu The cross context virtual CPU structure of the calling
2067 * thread.
2068 */
2069DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2070{
2071 uint64_t fCpu = pVCpu->fLocalForcedActions;
2072 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2073#if 1
2074 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2075 if (RT_LIKELY( !fCpu
2076 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2077 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2078 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2079 return false;
2080 return true;
2081#else
2082 return false;
2083#endif
2084
2085}
2086
2087
2088/**
2089 * Called by iemThreadedCompile when a block requires a mode check.
2090 *
2091 * @returns true if we should continue, false if we're out of call entries.
2092 * @param pVCpu The cross context virtual CPU structure of the calling
2093 * thread.
2094 * @param pTb The translation block being compiled.
2095 */
2096static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2097{
2098 /* Emit the call. */
2099 uint32_t const idxCall = pTb->Thrd.cCalls;
2100 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2101 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2102 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2103 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2104 pCall->idxInstr = pTb->cInstructions - 1;
2105 pCall->uUnused0 = 0;
2106 pCall->offOpcode = 0;
2107 pCall->cbOpcode = 0;
2108 pCall->idxRange = 0;
2109 pCall->auParams[0] = pVCpu->iem.s.fExec;
2110 pCall->auParams[1] = 0;
2111 pCall->auParams[2] = 0;
2112 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2113 return true;
2114}
2115
2116
2117/**
2118 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2119 * set.
2120 *
2121 * @returns true if we should continue, false if an IRQ is deliverable or a
2122 * relevant force flag is pending.
2123 * @param pVCpu The cross context virtual CPU structure of the calling
2124 * thread.
2125 * @param pTb The translation block being compiled.
2126 * @sa iemThreadedCompileCheckIrq
2127 */
2128bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2129{
2130 /*
2131 * Skip this we've already emitted a call after the previous instruction
2132 * or if it's the first call, as we're always checking FFs between blocks.
2133 */
2134 uint32_t const idxCall = pTb->Thrd.cCalls;
2135 if ( idxCall > 0
2136 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2137 {
2138 /* Emit the call. */
2139 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2140 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2141 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2142 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2143 pCall->idxInstr = pTb->cInstructions;
2144 pCall->uUnused0 = 0;
2145 pCall->offOpcode = 0;
2146 pCall->cbOpcode = 0;
2147 pCall->idxRange = 0;
2148 pCall->auParams[0] = 0;
2149 pCall->auParams[1] = 0;
2150 pCall->auParams[2] = 0;
2151 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2152
2153 /* Reset the IRQ check value. */
2154 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2155
2156 /*
2157 * Check for deliverable IRQs and pending force flags.
2158 */
2159 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2160 }
2161 return true; /* continue */
2162}
2163
2164
2165/**
2166 * Emits an IRQ check call and checks for pending IRQs.
2167 *
2168 * @returns true if we should continue, false if an IRQ is deliverable or a
2169 * relevant force flag is pending.
2170 * @param pVCpu The cross context virtual CPU structure of the calling
2171 * thread.
2172 * @param pTb The transation block.
2173 * @sa iemThreadedCompileBeginEmitCallsComplications
2174 */
2175static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2176{
2177 /* Check again in a little bit, unless it is immediately following an STI
2178 in which case we *must* check immediately after the next instruction
2179 as well in case it's executed with interrupt inhibition. We could
2180 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2181 bs3-timers-1 which is doing sti + sti + cli. */
2182 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2183 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2184 else
2185 {
2186 pVCpu->iem.s.fTbCurInstrIsSti = false;
2187 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2188 }
2189 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2190
2191 /*
2192 * Emit the call.
2193 */
2194 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2195 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2196 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2197 pCall->idxInstr = pTb->cInstructions;
2198 pCall->uUnused0 = 0;
2199 pCall->offOpcode = 0;
2200 pCall->cbOpcode = 0;
2201 pCall->idxRange = 0;
2202 pCall->auParams[0] = 0;
2203 pCall->auParams[1] = 0;
2204 pCall->auParams[2] = 0;
2205
2206 /*
2207 * Check for deliverable IRQs and pending force flags.
2208 */
2209 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2210}
2211
2212
2213/**
2214 * Compiles a new TB and executes it.
2215 *
2216 * We combine compilation and execution here as it makes it simpler code flow
2217 * in the main loop and it allows interpreting while compiling if we want to
2218 * explore that option.
2219 *
2220 * @returns Strict VBox status code.
2221 * @param pVM The cross context virtual machine structure.
2222 * @param pVCpu The cross context virtual CPU structure of the calling
2223 * thread.
2224 * @param GCPhysPc The physical address corresponding to the current
2225 * RIP+CS.BASE.
2226 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2227 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2228 */
2229static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2230{
2231 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2232 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2233
2234 /*
2235 * Get the TB we use for the recompiling. This is a maxed-out TB so
2236 * that'll we'll make a more efficient copy of when we're done compiling.
2237 */
2238 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2239 if (pTb)
2240 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2241 else
2242 {
2243 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2244 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2245 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2246 }
2247
2248 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2249 functions may get at it. */
2250 pVCpu->iem.s.pCurTbR3 = pTb;
2251
2252#if 0
2253 /* Make sure the CheckIrq condition matches the one in EM. */
2254 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2255 const uint32_t cZeroCalls = 1;
2256#else
2257 const uint32_t cZeroCalls = 0;
2258#endif
2259
2260 /*
2261 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2262 */
2263 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2264 iemThreadedCompileInitOpcodeFetching(pVCpu);
2265 VBOXSTRICTRC rcStrict;
2266 for (;;)
2267 {
2268 /* Process the next instruction. */
2269#ifdef LOG_ENABLED
2270 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2271 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2272 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2273 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2274#endif
2275 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2276 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2277
2278 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2279 if ( rcStrict == VINF_SUCCESS
2280 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2281 && !pVCpu->iem.s.fEndTb)
2282 {
2283 Assert(pTb->Thrd.cCalls > cCallsPrev);
2284 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2285
2286 pVCpu->iem.s.cInstructions++;
2287
2288 /* Check for mode change _after_ certain CIMPL calls, so check that
2289 we continue executing with the same mode value. */
2290 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2291 { /* probable */ }
2292 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2293 { /* extremely likely */ }
2294 else
2295 break;
2296 }
2297 else
2298 {
2299 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2300 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2301 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2302 rcStrict = VINF_SUCCESS;
2303
2304 if (pTb->Thrd.cCalls > cZeroCalls)
2305 {
2306 if (cCallsPrev != pTb->Thrd.cCalls)
2307 pVCpu->iem.s.cInstructions++;
2308 break;
2309 }
2310
2311 pVCpu->iem.s.pCurTbR3 = NULL;
2312 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2313 }
2314
2315 /* Check for IRQs? */
2316 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2317 pVCpu->iem.s.cInstrTillIrqCheck--;
2318 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2319 break;
2320
2321 /* Still space in the TB? */
2322 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2323 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2324 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2325 else
2326 {
2327 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2328 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2329 break;
2330 }
2331 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2332 }
2333
2334 /*
2335 * Duplicate the TB into a completed one and link it.
2336 */
2337 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2338 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2339
2340 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2341
2342#ifdef IEM_COMPILE_ONLY_MODE
2343 /*
2344 * Execute the translation block.
2345 */
2346#endif
2347
2348 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2349}
2350
2351
2352
2353/*********************************************************************************************************************************
2354* Recompiled Execution Core *
2355*********************************************************************************************************************************/
2356
2357
2358/**
2359 * Executes a translation block.
2360 *
2361 * @returns Strict VBox status code.
2362 * @param pVCpu The cross context virtual CPU structure of the calling
2363 * thread.
2364 * @param pTb The translation block to execute.
2365 */
2366static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2367{
2368 /*
2369 * Check the opcodes in the first page before starting execution.
2370 */
2371/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2372 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2373 * altering the CS limit such that only one or the two instruction bytes are valid.
2374 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2375 * the test succeeds if skipped, but we assert in debug builds. */
2376 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2377 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2378 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2379 { /* likely */ }
2380 else
2381 {
2382 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2383 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2384 return VINF_SUCCESS;
2385 }
2386
2387 /*
2388 * Set the current TB so CIMPL functions may get at it.
2389 */
2390 pVCpu->iem.s.pCurTbR3 = pTb;
2391
2392 /*
2393 * Execute the block.
2394 */
2395#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2396 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2397 {
2398 pVCpu->iem.s.cTbExecNative++;
2399# ifdef LOG_ENABLED
2400 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2401# endif
2402# ifdef RT_ARCH_AMD64
2403 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2404# else
2405 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2406# endif
2407 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2408 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2409 { /* likely */ }
2410 else
2411 {
2412 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2413 pVCpu->iem.s.pCurTbR3 = NULL;
2414 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2415
2416 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2417 only to break out of TB execution early. */
2418 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2419 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2420
2421 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2422 and converted to VINF_SUCCESS or whatever is appropriate. */
2423 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2424 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu));
2425
2426 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2427 }
2428 }
2429 else
2430#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2431 {
2432 /*
2433 * The threaded execution loop.
2434 */
2435 pVCpu->iem.s.cTbExecThreaded++;
2436#ifdef LOG_ENABLED
2437 uint64_t uRipPrev = UINT64_MAX;
2438#endif
2439 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2440 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2441 while (cCallsLeft-- > 0)
2442 {
2443#ifdef LOG_ENABLED
2444 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2445 {
2446 uRipPrev = pVCpu->cpum.GstCtx.rip;
2447 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2448 }
2449 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2450 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2451 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2452#endif
2453 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2454 pCallEntry->auParams[0],
2455 pCallEntry->auParams[1],
2456 pCallEntry->auParams[2]);
2457 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2458 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2459 pCallEntry++;
2460 else
2461 {
2462 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2463 pVCpu->iem.s.pCurTbR3 = NULL;
2464 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2465
2466 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2467 only to break out of TB execution early. */
2468 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2469 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2470 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2471 }
2472 }
2473 }
2474
2475 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2476 pVCpu->iem.s.pCurTbR3 = NULL;
2477 return VINF_SUCCESS;
2478}
2479
2480
2481/**
2482 * This is called when the PC doesn't match the current pbInstrBuf.
2483 *
2484 * Upon return, we're ready for opcode fetching. But please note that
2485 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2486 * MMIO or unassigned).
2487 */
2488static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2489{
2490 pVCpu->iem.s.pbInstrBuf = NULL;
2491 pVCpu->iem.s.offCurInstrStart = 0;
2492 pVCpu->iem.s.offInstrNextByte = 0;
2493 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2494 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2495}
2496
2497
2498/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2499DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2500{
2501 /*
2502 * Set uCurTbStartPc to RIP and calc the effective PC.
2503 */
2504 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2505 pVCpu->iem.s.uCurTbStartPc = uPc;
2506 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2507 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2508
2509 /*
2510 * Advance within the current buffer (PAGE) when possible.
2511 */
2512 if (pVCpu->iem.s.pbInstrBuf)
2513 {
2514 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2515 if (off < pVCpu->iem.s.cbInstrBufTotal)
2516 {
2517 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2518 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2519 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2520 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2521 else
2522 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2523
2524 return pVCpu->iem.s.GCPhysInstrBuf + off;
2525 }
2526 }
2527 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2528}
2529
2530
2531/**
2532 * Determines the extra IEMTB_F_XXX flags.
2533 *
2534 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2535 * IEMTB_F_CS_LIM_CHECKS (or zero).
2536 * @param pVCpu The cross context virtual CPU structure of the calling
2537 * thread.
2538 */
2539DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2540{
2541 uint32_t fRet = 0;
2542
2543 /*
2544 * Determine the inhibit bits.
2545 */
2546 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2547 { /* typical */ }
2548 else
2549 {
2550 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2551 fRet |= IEMTB_F_INHIBIT_SHADOW;
2552 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2553 fRet |= IEMTB_F_INHIBIT_NMI;
2554 }
2555
2556 /*
2557 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2558 * likely to go invalid before the end of the translation block.
2559 */
2560 if (IEM_IS_64BIT_CODE(pVCpu))
2561 return fRet;
2562
2563 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2564 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2565 return fRet;
2566 return fRet | IEMTB_F_CS_LIM_CHECKS;
2567}
2568
2569
2570VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2571{
2572 /*
2573 * See if there is an interrupt pending in TRPM, inject it if we can.
2574 */
2575 if (!TRPMHasTrap(pVCpu))
2576 { /* likely */ }
2577 else
2578 {
2579 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2580 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2581 { /*likely */ }
2582 else
2583 return rcStrict;
2584 }
2585
2586 /*
2587 * Init the execution environment.
2588 */
2589#ifdef RT_ARCH_ARM64 /** @todo ARM64: fix unaligned locked instructions properly. @bugref{10547} */
2590 if (pVM->cCpus == 1)
2591 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2592 else
2593#endif
2594 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2595 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2596 { }
2597 else
2598 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2599
2600 /*
2601 * Run-loop.
2602 *
2603 * If we're using setjmp/longjmp we combine all the catching here to avoid
2604 * having to call setjmp for each block we're executing.
2605 */
2606 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2607 for (;;)
2608 {
2609 PIEMTB pTb = NULL;
2610 VBOXSTRICTRC rcStrict;
2611 IEM_TRY_SETJMP(pVCpu, rcStrict)
2612 {
2613 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2614 for (uint32_t iIterations = 0; ; iIterations++)
2615 {
2616 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2617 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2618 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2619
2620 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2621 if (pTb)
2622 rcStrict = iemTbExec(pVCpu, pTb);
2623 else
2624 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2625 if (rcStrict == VINF_SUCCESS)
2626 {
2627 Assert(pVCpu->iem.s.cActiveMappings == 0);
2628
2629 uint64_t fCpu = pVCpu->fLocalForcedActions;
2630 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2631 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2632 | VMCPU_FF_TLB_FLUSH
2633 | VMCPU_FF_UNHALT );
2634 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2635 if (RT_LIKELY( ( !fCpu
2636 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2637 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2638 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2639 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2640 {
2641 if (RT_LIKELY( (iIterations & cPollRate) != 0
2642 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2643 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2644 else
2645 return VINF_SUCCESS;
2646 }
2647 else
2648 return VINF_SUCCESS;
2649 }
2650 else
2651 return rcStrict;
2652 }
2653 }
2654 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2655 {
2656 pVCpu->iem.s.cLongJumps++;
2657 if (pVCpu->iem.s.cActiveMappings > 0)
2658 iemMemRollback(pVCpu);
2659
2660#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2661 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2662 {
2663 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2664 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2665 }
2666#endif
2667
2668#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2669 /* If pTb isn't NULL we're in iemTbExec. */
2670 if (!pTb)
2671 {
2672 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2673 pTb = pVCpu->iem.s.pCurTbR3;
2674 if (pTb)
2675 {
2676 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2677 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2678 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2679 }
2680 }
2681#endif
2682 return rcStrict;
2683 }
2684 IEM_CATCH_LONGJMP_END(pVCpu);
2685 }
2686}
2687
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette