VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMAll/IEMAllThrdRecompiler.cpp@ 102781

Last change on this file since 102781 was 102684, checked in by vboxsync, 14 months ago

VMM/IEM: Fixed another bug in related to ah,ch,dh,bh storing (AMD64 host). bugref:10371

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 110.4 KB
Line 
1/* $Id: IEMAllThrdRecompiler.cpp 102684 2023-12-21 21:36:01Z vboxsync $ */
2/** @file
3 * IEM - Instruction Decoding and Threaded Recompilation.
4 *
5 * Logging group IEM_RE_THREADED assignments:
6 * - Level 1 (Log) : Errors, exceptions, interrupts and such major events. [same as IEM]
7 * - Flow (LogFlow) : TB calls being emitted.
8 * - Level 2 (Log2) : Basic instruction execution state info. [same as IEM]
9 * - Level 3 (Log3) : More detailed execution state info. [same as IEM]
10 * - Level 4 (Log4) : Decoding mnemonics w/ EIP. [same as IEM]
11 * - Level 5 (Log5) : Decoding details. [same as IEM]
12 * - Level 6 (Log6) : TB opcode range management.
13 * - Level 7 (Log7) : TB obsoletion.
14 * - Level 8 (Log8) : TB compilation.
15 * - Level 9 (Log9) : TB exec.
16 * - Level 10 (Log10): TB block lookup.
17 * - Level 11 (Log11): TB block lookup details.
18 * - Level 12 (Log12): TB insertion.
19 */
20
21/*
22 * Copyright (C) 2011-2023 Oracle and/or its affiliates.
23 *
24 * This file is part of VirtualBox base platform packages, as
25 * available from https://www.virtualbox.org.
26 *
27 * This program is free software; you can redistribute it and/or
28 * modify it under the terms of the GNU General Public License
29 * as published by the Free Software Foundation, in version 3 of the
30 * License.
31 *
32 * This program is distributed in the hope that it will be useful, but
33 * WITHOUT ANY WARRANTY; without even the implied warranty of
34 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
35 * General Public License for more details.
36 *
37 * You should have received a copy of the GNU General Public License
38 * along with this program; if not, see <https://www.gnu.org/licenses>.
39 *
40 * SPDX-License-Identifier: GPL-3.0-only
41 */
42
43
44/*********************************************************************************************************************************
45* Header Files *
46*********************************************************************************************************************************/
47#ifndef LOG_GROUP /* defined when included by tstIEMCheckMc.cpp */
48# define LOG_GROUP LOG_GROUP_IEM_RE_THREADED
49#endif
50#define IEM_WITH_CODE_TLB_AND_OPCODE_BUF /* A bit hackish, but its all in IEMInline.h. */
51#define VMCPU_INCL_CPUM_GST_CTX
52#include <VBox/vmm/iem.h>
53#include <VBox/vmm/cpum.h>
54#include <VBox/vmm/apic.h>
55#include <VBox/vmm/pdm.h>
56#include <VBox/vmm/pgm.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/em.h>
59#include <VBox/vmm/hm.h>
60#include <VBox/vmm/nem.h>
61#include <VBox/vmm/gim.h>
62#ifdef VBOX_WITH_NESTED_HWVIRT_SVM
63# include <VBox/vmm/em.h>
64# include <VBox/vmm/hm_svm.h>
65#endif
66#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
67# include <VBox/vmm/hmvmxinline.h>
68#endif
69#include <VBox/vmm/tm.h>
70#include <VBox/vmm/dbgf.h>
71#include <VBox/vmm/dbgftrace.h>
72#ifndef TST_IEM_CHECK_MC
73# include "IEMInternal.h"
74#endif
75#include <VBox/vmm/vmcc.h>
76#include <VBox/log.h>
77#include <VBox/err.h>
78#include <VBox/param.h>
79#include <VBox/dis.h>
80#include <VBox/disopcode-x86-amd64.h>
81#include <iprt/asm-math.h>
82#include <iprt/assert.h>
83#include <iprt/mem.h>
84#include <iprt/string.h>
85#include <iprt/sort.h>
86#include <iprt/x86.h>
87
88#ifndef TST_IEM_CHECK_MC
89# include "IEMInline.h"
90# include "IEMOpHlp.h"
91# include "IEMMc.h"
92#endif
93
94#include "IEMThreadedFunctions.h"
95
96
97/*
98 * Narrow down configs here to avoid wasting time on unused configs here.
99 */
100
101#ifndef IEM_WITH_CODE_TLB
102# error The code TLB must be enabled for the recompiler.
103#endif
104
105#ifndef IEM_WITH_DATA_TLB
106# error The data TLB must be enabled for the recompiler.
107#endif
108
109#ifndef IEM_WITH_SETJMP
110# error The setjmp approach must be enabled for the recompiler.
111#endif
112
113
114/*********************************************************************************************************************************
115* Internal Functions *
116*********************************************************************************************************************************/
117static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb);
118
119
120/**
121 * Calculates the effective address of a ModR/M memory operand, extended version
122 * for use in the recompilers.
123 *
124 * Meant to be used via IEM_MC_CALC_RM_EFF_ADDR.
125 *
126 * May longjmp on internal error.
127 *
128 * @return The effective address.
129 * @param pVCpu The cross context virtual CPU structure of the calling thread.
130 * @param bRm The ModRM byte.
131 * @param cbImmAndRspOffset - First byte: The size of any immediate
132 * following the effective address opcode bytes
133 * (only for RIP relative addressing).
134 * - Second byte: RSP displacement (for POP [ESP]).
135 * @param puInfo Extra info: 32-bit displacement (bits 31:0) and
136 * SIB byte (bits 39:32).
137 *
138 * @note This must be defined in a source file with matching
139 * IEM_WITH_CODE_TLB_AND_OPCODE_BUF define till the define is made default
140 * or implemented differently...
141 */
142RTGCPTR iemOpHlpCalcRmEffAddrJmpEx(PVMCPUCC pVCpu, uint8_t bRm, uint32_t cbImmAndRspOffset, uint64_t *puInfo) IEM_NOEXCEPT_MAY_LONGJMP
143{
144 Log5(("iemOpHlpCalcRmEffAddrJmp: bRm=%#x\n", bRm));
145# define SET_SS_DEF() \
146 do \
147 { \
148 if (!(pVCpu->iem.s.fPrefixes & IEM_OP_PRF_SEG_MASK)) \
149 pVCpu->iem.s.iEffSeg = X86_SREG_SS; \
150 } while (0)
151
152 if (!IEM_IS_64BIT_CODE(pVCpu))
153 {
154/** @todo Check the effective address size crap! */
155 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_16BIT)
156 {
157 uint16_t u16EffAddr;
158
159 /* Handle the disp16 form with no registers first. */
160 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 6)
161 {
162 IEM_OPCODE_GET_NEXT_U16(&u16EffAddr);
163 *puInfo = u16EffAddr;
164 }
165 else
166 {
167 /* Get the displacment. */
168 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
169 {
170 case 0: u16EffAddr = 0; break;
171 case 1: IEM_OPCODE_GET_NEXT_S8_SX_U16(&u16EffAddr); break;
172 case 2: IEM_OPCODE_GET_NEXT_U16(&u16EffAddr); break;
173 default: AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_1)); /* (caller checked for these) */
174 }
175 *puInfo = u16EffAddr;
176
177 /* Add the base and index registers to the disp. */
178 switch (bRm & X86_MODRM_RM_MASK)
179 {
180 case 0: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.si; break;
181 case 1: u16EffAddr += pVCpu->cpum.GstCtx.bx + pVCpu->cpum.GstCtx.di; break;
182 case 2: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.si; SET_SS_DEF(); break;
183 case 3: u16EffAddr += pVCpu->cpum.GstCtx.bp + pVCpu->cpum.GstCtx.di; SET_SS_DEF(); break;
184 case 4: u16EffAddr += pVCpu->cpum.GstCtx.si; break;
185 case 5: u16EffAddr += pVCpu->cpum.GstCtx.di; break;
186 case 6: u16EffAddr += pVCpu->cpum.GstCtx.bp; SET_SS_DEF(); break;
187 case 7: u16EffAddr += pVCpu->cpum.GstCtx.bx; break;
188 }
189 }
190
191 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#06RX16 uInfo=%#RX64\n", u16EffAddr, *puInfo));
192 return u16EffAddr;
193 }
194
195 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
196 uint32_t u32EffAddr;
197 uint64_t uInfo;
198
199 /* Handle the disp32 form with no registers first. */
200 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
201 {
202 IEM_OPCODE_GET_NEXT_U32(&u32EffAddr);
203 uInfo = u32EffAddr;
204 }
205 else
206 {
207 /* Get the register (or SIB) value. */
208 uInfo = 0;
209 switch ((bRm & X86_MODRM_RM_MASK))
210 {
211 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
212 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
213 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
214 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
215 case 4: /* SIB */
216 {
217 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
218 uInfo = (uint64_t)bSib << 32;
219
220 /* Get the index and scale it. */
221 switch ((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK)
222 {
223 case 0: u32EffAddr = pVCpu->cpum.GstCtx.eax; break;
224 case 1: u32EffAddr = pVCpu->cpum.GstCtx.ecx; break;
225 case 2: u32EffAddr = pVCpu->cpum.GstCtx.edx; break;
226 case 3: u32EffAddr = pVCpu->cpum.GstCtx.ebx; break;
227 case 4: u32EffAddr = 0; /*none */ break;
228 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; break;
229 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
230 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
231 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
232 }
233 u32EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
234
235 /* add base */
236 switch (bSib & X86_SIB_BASE_MASK)
237 {
238 case 0: u32EffAddr += pVCpu->cpum.GstCtx.eax; break;
239 case 1: u32EffAddr += pVCpu->cpum.GstCtx.ecx; break;
240 case 2: u32EffAddr += pVCpu->cpum.GstCtx.edx; break;
241 case 3: u32EffAddr += pVCpu->cpum.GstCtx.ebx; break;
242 case 4: u32EffAddr += pVCpu->cpum.GstCtx.esp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
243 case 5:
244 if ((bRm & X86_MODRM_MOD_MASK) != 0)
245 {
246 u32EffAddr += pVCpu->cpum.GstCtx.ebp;
247 SET_SS_DEF();
248 }
249 else
250 {
251 uint32_t u32Disp;
252 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
253 u32EffAddr += u32Disp;
254 uInfo |= u32Disp;
255 }
256 break;
257 case 6: u32EffAddr += pVCpu->cpum.GstCtx.esi; break;
258 case 7: u32EffAddr += pVCpu->cpum.GstCtx.edi; break;
259 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
260 }
261 break;
262 }
263 case 5: u32EffAddr = pVCpu->cpum.GstCtx.ebp; SET_SS_DEF(); break;
264 case 6: u32EffAddr = pVCpu->cpum.GstCtx.esi; break;
265 case 7: u32EffAddr = pVCpu->cpum.GstCtx.edi; break;
266 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
267 }
268
269 /* Get and add the displacement. */
270 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
271 {
272 case 0:
273 break;
274 case 1:
275 {
276 int8_t i8Disp; IEM_OPCODE_GET_NEXT_S8(&i8Disp);
277 u32EffAddr += i8Disp;
278 uInfo |= (uint32_t)(int32_t)i8Disp;
279 break;
280 }
281 case 2:
282 {
283 uint32_t u32Disp; IEM_OPCODE_GET_NEXT_U32(&u32Disp);
284 u32EffAddr += u32Disp;
285 uInfo |= u32Disp;
286 break;
287 }
288 default:
289 AssertFailedStmt(IEM_DO_LONGJMP(pVCpu, VERR_IEM_IPE_2)); /* (caller checked for these) */
290 }
291 }
292
293 *puInfo = uInfo;
294 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RX32 uInfo=%#RX64\n", u32EffAddr, uInfo));
295 return u32EffAddr;
296 }
297
298 uint64_t u64EffAddr;
299 uint64_t uInfo;
300
301 /* Handle the rip+disp32 form with no registers first. */
302 if ((bRm & (X86_MODRM_MOD_MASK | X86_MODRM_RM_MASK)) == 5)
303 {
304 IEM_OPCODE_GET_NEXT_S32_SX_U64(&u64EffAddr);
305 uInfo = (uint32_t)u64EffAddr;
306 u64EffAddr += pVCpu->cpum.GstCtx.rip + IEM_GET_INSTR_LEN(pVCpu) + (cbImmAndRspOffset & UINT32_C(0xff));
307 }
308 else
309 {
310 /* Get the register (or SIB) value. */
311 uInfo = 0;
312 switch ((bRm & X86_MODRM_RM_MASK) | pVCpu->iem.s.uRexB)
313 {
314 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
315 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
316 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
317 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
318 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; SET_SS_DEF(); break;
319 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
320 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
321 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
322 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
323 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
324 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
325 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
326 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
327 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
328 /* SIB */
329 case 4:
330 case 12:
331 {
332 uint8_t bSib; IEM_OPCODE_GET_NEXT_U8(&bSib);
333 uInfo = (uint64_t)bSib << 32;
334
335 /* Get the index and scale it. */
336 switch (((bSib >> X86_SIB_INDEX_SHIFT) & X86_SIB_INDEX_SMASK) | pVCpu->iem.s.uRexIndex)
337 {
338 case 0: u64EffAddr = pVCpu->cpum.GstCtx.rax; break;
339 case 1: u64EffAddr = pVCpu->cpum.GstCtx.rcx; break;
340 case 2: u64EffAddr = pVCpu->cpum.GstCtx.rdx; break;
341 case 3: u64EffAddr = pVCpu->cpum.GstCtx.rbx; break;
342 case 4: u64EffAddr = 0; /*none */ break;
343 case 5: u64EffAddr = pVCpu->cpum.GstCtx.rbp; break;
344 case 6: u64EffAddr = pVCpu->cpum.GstCtx.rsi; break;
345 case 7: u64EffAddr = pVCpu->cpum.GstCtx.rdi; break;
346 case 8: u64EffAddr = pVCpu->cpum.GstCtx.r8; break;
347 case 9: u64EffAddr = pVCpu->cpum.GstCtx.r9; break;
348 case 10: u64EffAddr = pVCpu->cpum.GstCtx.r10; break;
349 case 11: u64EffAddr = pVCpu->cpum.GstCtx.r11; break;
350 case 12: u64EffAddr = pVCpu->cpum.GstCtx.r12; break;
351 case 13: u64EffAddr = pVCpu->cpum.GstCtx.r13; break;
352 case 14: u64EffAddr = pVCpu->cpum.GstCtx.r14; break;
353 case 15: u64EffAddr = pVCpu->cpum.GstCtx.r15; break;
354 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
355 }
356 u64EffAddr <<= (bSib >> X86_SIB_SCALE_SHIFT) & X86_SIB_SCALE_SMASK;
357
358 /* add base */
359 switch ((bSib & X86_SIB_BASE_MASK) | pVCpu->iem.s.uRexB)
360 {
361 case 0: u64EffAddr += pVCpu->cpum.GstCtx.rax; break;
362 case 1: u64EffAddr += pVCpu->cpum.GstCtx.rcx; break;
363 case 2: u64EffAddr += pVCpu->cpum.GstCtx.rdx; break;
364 case 3: u64EffAddr += pVCpu->cpum.GstCtx.rbx; break;
365 case 4: u64EffAddr += pVCpu->cpum.GstCtx.rsp + (cbImmAndRspOffset >> 8); SET_SS_DEF(); break;
366 case 6: u64EffAddr += pVCpu->cpum.GstCtx.rsi; break;
367 case 7: u64EffAddr += pVCpu->cpum.GstCtx.rdi; break;
368 case 8: u64EffAddr += pVCpu->cpum.GstCtx.r8; break;
369 case 9: u64EffAddr += pVCpu->cpum.GstCtx.r9; break;
370 case 10: u64EffAddr += pVCpu->cpum.GstCtx.r10; break;
371 case 11: u64EffAddr += pVCpu->cpum.GstCtx.r11; break;
372 case 12: u64EffAddr += pVCpu->cpum.GstCtx.r12; break;
373 case 14: u64EffAddr += pVCpu->cpum.GstCtx.r14; break;
374 case 15: u64EffAddr += pVCpu->cpum.GstCtx.r15; break;
375 /* complicated encodings */
376 case 5:
377 case 13:
378 if ((bRm & X86_MODRM_MOD_MASK) != 0)
379 {
380 if (!pVCpu->iem.s.uRexB)
381 {
382 u64EffAddr += pVCpu->cpum.GstCtx.rbp;
383 SET_SS_DEF();
384 }
385 else
386 u64EffAddr += pVCpu->cpum.GstCtx.r13;
387 }
388 else
389 {
390 uint32_t u32Disp;
391 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
392 u64EffAddr += (int32_t)u32Disp;
393 uInfo |= u32Disp;
394 }
395 break;
396 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
397 }
398 break;
399 }
400 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX);
401 }
402
403 /* Get and add the displacement. */
404 switch ((bRm >> X86_MODRM_MOD_SHIFT) & X86_MODRM_MOD_SMASK)
405 {
406 case 0:
407 break;
408 case 1:
409 {
410 int8_t i8Disp;
411 IEM_OPCODE_GET_NEXT_S8(&i8Disp);
412 u64EffAddr += i8Disp;
413 uInfo |= (uint32_t)(int32_t)i8Disp;
414 break;
415 }
416 case 2:
417 {
418 uint32_t u32Disp;
419 IEM_OPCODE_GET_NEXT_U32(&u32Disp);
420 u64EffAddr += (int32_t)u32Disp;
421 uInfo |= u32Disp;
422 break;
423 }
424 IEM_NOT_REACHED_DEFAULT_CASE_RET2(RTGCPTR_MAX); /* (caller checked for these) */
425 }
426
427 }
428
429 *puInfo = uInfo;
430 if (pVCpu->iem.s.enmEffAddrMode == IEMMODE_64BIT)
431 {
432 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr, uInfo));
433 return u64EffAddr;
434 }
435 Assert(pVCpu->iem.s.enmEffAddrMode == IEMMODE_32BIT);
436 Log5(("iemOpHlpCalcRmEffAddrJmp: EffAddr=%#010RGv uInfo=%#RX64\n", u64EffAddr & UINT32_MAX, uInfo));
437 return u64EffAddr & UINT32_MAX;
438}
439
440
441/*********************************************************************************************************************************
442* Translation Block Cache. *
443*********************************************************************************************************************************/
444
445/** @callback_method_impl{FNRTSORTCMP, Compare two TBs for pruning sorting purposes.} */
446static DECLCALLBACK(int) iemTbCachePruneCmpTb(void const *pvElement1, void const *pvElement2, void *pvUser)
447{
448 PCIEMTB const pTb1 = (PCIEMTB)pvElement1;
449 PCIEMTB const pTb2 = (PCIEMTB)pvElement2;
450 uint32_t const cMsSinceUse1 = (uint32_t)(uintptr_t)pvUser - pTb1->msLastUsed;
451 uint32_t const cMsSinceUse2 = (uint32_t)(uintptr_t)pvUser - pTb2->msLastUsed;
452 if (cMsSinceUse1 != cMsSinceUse2)
453 return cMsSinceUse1 < cMsSinceUse2 ? -1 : 1;
454 if (pTb1->cUsed != pTb2->cUsed)
455 return pTb1->cUsed > pTb2->cUsed ? -1 : 1;
456 if ((pTb1->fFlags & IEMTB_F_TYPE_MASK) != (pTb2->fFlags & IEMTB_F_TYPE_MASK))
457 return (pTb1->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE ? -1 : 1;
458 return 0;
459}
460
461#ifdef VBOX_STRICT
462/**
463 * Assertion helper that checks a collisions list count.
464 */
465static void iemTbCacheAssertCorrectCount(PIEMTBCACHE pTbCache, uint32_t idxHash, const char *pszOperation)
466{
467 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
468 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
469 while (pTb)
470 {
471 pTb = pTb->pNext;
472 cLeft--;
473 }
474 AssertMsg(cLeft == 0,
475 ("idxHash=%#x cLeft=%d; entry count=%d; %s\n",
476 idxHash, cLeft, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]), pszOperation));
477}
478#endif
479
480
481DECL_NO_INLINE(static, void) iemTbCacheAddWithPruning(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb, uint32_t idxHash)
482{
483 STAM_PROFILE_START(&pTbCache->StatPrune, a);
484
485 /*
486 * First convert the collision list to an array.
487 */
488 PIEMTB apSortedTbs[IEMTBCACHE_PTR_MAX_COUNT];
489 uintptr_t cInserted = 0;
490 PIEMTB pTbCollision = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
491
492 pTbCache->apHash[idxHash] = NULL; /* Must NULL the entry before trying to free anything. */
493
494 while (pTbCollision && cInserted < RT_ELEMENTS(apSortedTbs))
495 {
496 apSortedTbs[cInserted++] = pTbCollision;
497 pTbCollision = pTbCollision->pNext;
498 }
499
500 /* Free any excess (impossible). */
501 if (RT_LIKELY(!pTbCollision))
502 Assert(cInserted == RT_ELEMENTS(apSortedTbs));
503 else
504 do
505 {
506 PIEMTB pTbToFree = pTbCollision;
507 pTbCollision = pTbToFree->pNext;
508 iemTbAllocatorFree(pVCpu, pTbToFree);
509 } while (pTbCollision);
510
511 /*
512 * Sort it by most recently used and usage count.
513 */
514 RTSortApvShell((void **)apSortedTbs, cInserted, iemTbCachePruneCmpTb, (void *)(uintptr_t)pVCpu->iem.s.msRecompilerPollNow);
515
516 /* We keep half the list for now. Perhaps a bit aggressive... */
517 uintptr_t const cKeep = cInserted / 2;
518
519 /* First free up the TBs we don't wish to keep (before creating the new
520 list because otherwise the free code will scan the list for each one
521 without ever finding it). */
522 for (uintptr_t idx = cKeep; idx < cInserted; idx++)
523 iemTbAllocatorFree(pVCpu, apSortedTbs[idx]);
524
525 /* Then chain the new TB together with the ones we like to keep of the
526 existing ones and insert this list into the hash table. */
527 pTbCollision = pTb;
528 for (uintptr_t idx = 0; idx < cKeep; idx++)
529 pTbCollision = pTbCollision->pNext = apSortedTbs[idx];
530 pTbCollision->pNext = NULL;
531
532 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cKeep + 1);
533#ifdef VBOX_STRICT
534 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add w/ pruning");
535#endif
536
537 STAM_PROFILE_STOP(&pTbCache->StatPrune, a);
538}
539
540
541static void iemTbCacheAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
542{
543 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
544 PIEMTB const pTbOldHead = pTbCache->apHash[idxHash];
545 if (!pTbOldHead)
546 {
547 pTb->pNext = NULL;
548 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, 1); /** @todo could make 1 implicit... */
549 }
550 else
551 {
552 STAM_REL_COUNTER_INC(&pTbCache->cCollisions);
553 uintptr_t cCollisions = IEMTBCACHE_PTR_GET_COUNT(pTbOldHead);
554 if (cCollisions < IEMTBCACHE_PTR_MAX_COUNT)
555 {
556 pTb->pNext = IEMTBCACHE_PTR_GET_TB(pTbOldHead);
557 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb, cCollisions + 1);
558#ifdef VBOX_STRICT
559 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "add");
560#endif
561 }
562 else
563 iemTbCacheAddWithPruning(pVCpu, pTbCache, pTb, idxHash);
564 }
565}
566
567
568/**
569 * Unlinks @a pTb from the hash table if found in it.
570 *
571 * @returns true if unlinked, false if not present.
572 * @param pTbCache The hash table.
573 * @param pTb The TB to remove.
574 */
575static bool iemTbCacheRemove(PIEMTBCACHE pTbCache, PIEMTB pTb)
576{
577 uint32_t const idxHash = IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc);
578 PIEMTB pTbHash = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
579 uint32_t volatile cLength = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]); RT_NOREF(cLength);
580
581 /*
582 * At the head of the collision list?
583 */
584 if (pTbHash == pTb)
585 {
586 if (!pTb->pNext)
587 pTbCache->apHash[idxHash] = NULL;
588 else
589 {
590 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTb->pNext,
591 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
592#ifdef VBOX_STRICT
593 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #1");
594#endif
595 }
596 return true;
597 }
598
599 /*
600 * Search the collision list.
601 */
602 PIEMTB const pTbHead = pTbHash;
603 while (pTbHash)
604 {
605 PIEMTB const pNextTb = pTbHash->pNext;
606 if (pNextTb == pTb)
607 {
608 pTbHash->pNext = pTb->pNext;
609 pTbCache->apHash[idxHash] = IEMTBCACHE_PTR_MAKE(pTbHead, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - 1);
610#ifdef VBOX_STRICT
611 iemTbCacheAssertCorrectCount(pTbCache, idxHash, "remove #2");
612#endif
613 return true;
614 }
615 pTbHash = pNextTb;
616 }
617 return false;
618}
619
620
621/**
622 * Looks up a TB for the given PC and flags in the cache.
623 *
624 * @returns Pointer to TB on success, NULL if not found.
625 * @param pVCpu The cross context virtual CPU structure of the
626 * calling thread.
627 * @param pTbCache The translation block cache.
628 * @param GCPhysPc The PC to look up a TB for.
629 * @param fExtraFlags The extra flags to join with IEMCPU::fExec for
630 * the lookup.
631 * @thread EMT(pVCpu)
632 */
633static PIEMTB iemTbCacheLookup(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache,
634 RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
635{
636 uint32_t const fFlags = ((pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags) & IEMTB_F_KEY_MASK;
637 uint32_t const idxHash = IEMTBCACHE_HASH_NO_KEY_MASK(pTbCache, fFlags, GCPhysPc);
638 PIEMTB pTb = IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]);
639#if defined(VBOX_STRICT) || defined(LOG_ENABLED)
640 int cLeft = IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]);
641#endif
642 while (pTb)
643 {
644 if (pTb->GCPhysPc == GCPhysPc)
645 {
646 if ((pTb->fFlags & IEMTB_F_KEY_MASK) == fFlags)
647 {
648 if (pTb->x86.fAttr == (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u)
649 {
650 STAM_COUNTER_INC(&pTbCache->cLookupHits);
651 AssertMsg(cLeft > 0, ("%d\n", cLeft));
652
653 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
654 pTb->cUsed++;
655#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
656 if ((pTb->fFlags & IEMTB_F_TYPE_NATIVE) || pTb->cUsed != 16)
657 {
658 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
659 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
660 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
661 return pTb;
662 }
663 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d) - recompiling\n",
664 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
665 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
666 return iemNativeRecompile(pVCpu, pTb);
667#else
668 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: %p (@ %d / %d)\n",
669 fFlags, GCPhysPc, idxHash, pTb, IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) - cLeft,
670 IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
671 return pTb;
672#endif
673 }
674 Log11(("TB miss: CS: %#x, wanted %#x\n", pTb->x86.fAttr, (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u));
675 }
676 else
677 Log11(("TB miss: fFlags: %#x, wanted %#x\n", pTb->fFlags, fFlags));
678 }
679 else
680 Log11(("TB miss: GCPhysPc: %#x, wanted %#x\n", pTb->GCPhysPc, GCPhysPc));
681
682 pTb = pTb->pNext;
683#ifdef VBOX_STRICT
684 cLeft--;
685#endif
686 }
687 AssertMsg(cLeft == 0, ("%d\n", cLeft));
688 STAM_REL_COUNTER_INC(&pTbCache->cLookupMisses);
689 Log10(("TB lookup: fFlags=%#x GCPhysPc=%RGp idxHash=%#x: NULL - (%p L %d)\n", fFlags, GCPhysPc, idxHash,
690 IEMTBCACHE_PTR_GET_TB(pTbCache->apHash[idxHash]), IEMTBCACHE_PTR_GET_COUNT(pTbCache->apHash[idxHash]) ));
691 return pTb;
692}
693
694
695/*********************************************************************************************************************************
696* Translation Block Allocator.
697*********************************************************************************************************************************/
698/*
699 * Translation block allocationmanagement.
700 */
701
702#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
703# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
704 ((a_idxTb) >> (a_pTbAllocator)->cChunkShift)
705# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
706 ((a_idxTb) & (a_pTbAllocator)->fChunkMask)
707# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
708 ((uint32_t)(a_idxChunk) << (a_pTbAllocator)->cChunkShift)
709#else
710# define IEMTBALLOC_IDX_TO_CHUNK(a_pTbAllocator, a_idxTb) \
711 ((a_idxTb) / (a_pTbAllocator)->cTbsPerChunk)
712# define IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(a_pTbAllocator, a_idxTb, a_idxChunk) \
713 ((a_idxTb) - (a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
714# define IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) \
715 ((uint32_t)(a_idxChunk) * (a_pTbAllocator)->cTbsPerChunk)
716#endif
717/** Makes a TB index from a chunk index and TB index within that chunk. */
718#define IEMTBALLOC_IDX_MAKE(a_pTbAllocator, a_idxChunk, a_idxInChunk) \
719 (IEMTBALLOC_IDX_FOR_CHUNK(a_pTbAllocator, a_idxChunk) + (a_idxInChunk))
720
721
722/**
723 * Initializes the TB allocator and cache for an EMT.
724 *
725 * @returns VBox status code.
726 * @param pVM The VM handle.
727 * @param cInitialTbs The initial number of translation blocks to
728 * preallocator.
729 * @param cMaxTbs The max number of translation blocks allowed.
730 * @param cbInitialExec The initial size of the executable memory allocator.
731 * @param cbMaxExec The max size of the executable memory allocator.
732 * @param cbChunkExec The chunk size for executable memory allocator. Zero
733 * or UINT32_MAX for automatically determining this.
734 * @thread EMT
735 */
736DECLCALLBACK(int) iemTbInit(PVMCC pVM, uint32_t cInitialTbs, uint32_t cMaxTbs,
737 uint64_t cbInitialExec, uint64_t cbMaxExec, uint32_t cbChunkExec)
738{
739 PVMCPUCC pVCpu = VMMGetCpu(pVM);
740 Assert(!pVCpu->iem.s.pTbCacheR3);
741 Assert(!pVCpu->iem.s.pTbAllocatorR3);
742
743 /*
744 * Calculate the chunk size of the TB allocator.
745 * The minimum chunk size is 2MiB.
746 */
747 AssertCompile(!(sizeof(IEMTB) & IEMTBCACHE_PTR_COUNT_MASK));
748 uint32_t cbPerChunk = _2M;
749 uint32_t cTbsPerChunk = _2M / sizeof(IEMTB);
750#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
751 uint8_t const cTbShift = ASMBitFirstSetU32((uint32_t)sizeof(IEMTB)) - 1;
752 uint8_t cChunkShift = 21 - cTbShift;
753 AssertCompile(RT_BIT_32(21) == _2M); Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
754#endif
755 for (;;)
756 {
757 if (cMaxTbs <= cTbsPerChunk * (uint64_t)RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks))
758 break;
759 cbPerChunk *= 2;
760 cTbsPerChunk = cbPerChunk / sizeof(IEMTB);
761#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
762 cChunkShift += 1;
763#endif
764 }
765
766 uint32_t cMaxChunks = (cMaxTbs + cTbsPerChunk - 1) / cTbsPerChunk;
767 Assert(cMaxChunks * cTbsPerChunk >= cMaxTbs);
768 Assert(cMaxChunks <= RT_ELEMENTS(pVCpu->iem.s.pTbAllocatorR3->aChunks));
769
770 cMaxTbs = cMaxChunks * cTbsPerChunk;
771
772 /*
773 * Allocate and initalize it.
774 */
775 uint32_t const c64BitWords = RT_ALIGN_32(cMaxTbs, 64) / 64;
776 size_t const cbTbAllocator = RT_UOFFSETOF_DYN(IEMTBALLOCATOR, bmAllocated[c64BitWords]);
777 PIEMTBALLOCATOR const pTbAllocator = (PIEMTBALLOCATOR)RTMemAllocZ(cbTbAllocator);
778 if (!pTbAllocator)
779 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
780 "Failed to allocate %zu bytes (max %u TBs) for the TB allocator of VCpu #%u",
781 cbTbAllocator, cMaxTbs, pVCpu->idCpu);
782 pTbAllocator->uMagic = IEMTBALLOCATOR_MAGIC;
783 pTbAllocator->cMaxChunks = (uint8_t)cMaxChunks;
784 pTbAllocator->cTbsPerChunk = cTbsPerChunk;
785 pTbAllocator->cbPerChunk = cbPerChunk;
786 pTbAllocator->cMaxTbs = cMaxTbs;
787#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
788 pTbAllocator->fChunkMask = cTbsPerChunk - 1;
789 pTbAllocator->cChunkShift = cChunkShift;
790 Assert(RT_BIT_32(cChunkShift) == cTbsPerChunk);
791#endif
792
793 memset(pTbAllocator->bmAllocated, 0xff, c64BitWords * sizeof(uint64_t)); /* Mark all as allocated, clear as chunks are added. */
794 pVCpu->iem.s.pTbAllocatorR3 = pTbAllocator;
795
796 /*
797 * Allocate the initial chunks.
798 */
799 for (uint32_t idxChunk = 0; ; idxChunk++)
800 {
801 PIEMTB const paTbs = pTbAllocator->aChunks[idxChunk].paTbs = (PIEMTB)RTMemPageAllocZ(cbPerChunk);
802 if (!paTbs)
803 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
804 "Failed to initial %zu bytes for the #%u chunk of TBs for VCpu #%u",
805 cbPerChunk, idxChunk, pVCpu->idCpu);
806
807 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
808 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
809 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
810 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
811 pTbAllocator->cTotalTbs += cTbsPerChunk;
812
813 if ((idxChunk + 1) * cTbsPerChunk >= cInitialTbs)
814 break;
815 }
816
817 /*
818 * Calculate the size of the hash table. We double the max TB count and
819 * round it up to the nearest power of two.
820 */
821 uint32_t cCacheEntries = cMaxTbs * 2;
822 if (!RT_IS_POWER_OF_TWO(cCacheEntries))
823 {
824 uint8_t const iBitTop = ASMBitFirstSetU32(cCacheEntries);
825 cCacheEntries = RT_BIT_32(iBitTop);
826 Assert(cCacheEntries >= cMaxTbs * 2);
827 }
828
829 size_t const cbTbCache = RT_UOFFSETOF_DYN(IEMTBCACHE, apHash[cCacheEntries]);
830 PIEMTBCACHE const pTbCache = (PIEMTBCACHE)RTMemAllocZ(cbTbCache);
831 if (!pTbCache)
832 return VMSetError(pVM, VERR_NO_MEMORY, RT_SRC_POS,
833 "Failed to allocate %zu bytes (%u entries) for the TB cache of VCpu #%u",
834 cbTbCache, cCacheEntries, pVCpu->idCpu);
835
836 /*
837 * Initialize it (assumes zeroed by the allocator).
838 */
839 pTbCache->uMagic = IEMTBCACHE_MAGIC;
840 pTbCache->cHash = cCacheEntries;
841 pTbCache->uHashMask = cCacheEntries - 1;
842 Assert(pTbCache->cHash > pTbCache->uHashMask);
843 pVCpu->iem.s.pTbCacheR3 = pTbCache;
844
845 /*
846 * Initialize the native executable memory allocator.
847 */
848#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
849 int rc = iemExecMemAllocatorInit(pVCpu, cbMaxExec, cbInitialExec, cbChunkExec);
850 AssertLogRelRCReturn(rc, rc);
851#else
852 RT_NOREF(cbMaxExec, cbInitialExec, cbChunkExec);
853#endif
854
855 return VINF_SUCCESS;
856}
857
858
859/**
860 * Inner free worker.
861 */
862static void iemTbAllocatorFreeInner(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator,
863 PIEMTB pTb, uint32_t idxChunk, uint32_t idxInChunk)
864{
865 Assert(idxChunk < pTbAllocator->cAllocatedChunks);
866 Assert(idxInChunk < pTbAllocator->cTbsPerChunk);
867 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[idxChunk].paTbs) == idxInChunk);
868 Assert(ASMBitTest(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk)));
869
870 /*
871 * Unlink the TB from the hash table.
872 */
873 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
874
875 /*
876 * Free the TB itself.
877 */
878 switch (pTb->fFlags & IEMTB_F_TYPE_MASK)
879 {
880 case IEMTB_F_TYPE_THREADED:
881 pTbAllocator->cThreadedTbs -= 1;
882 RTMemFree(pTb->Thrd.paCalls);
883 break;
884#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
885 case IEMTB_F_TYPE_NATIVE:
886 pTbAllocator->cNativeTbs -= 1;
887 iemExecMemAllocatorFree(pVCpu, pTb->Native.paInstructions,
888 pTb->Native.cInstructions * sizeof(pTb->Native.paInstructions[0]));
889 break;
890#endif
891 default:
892 AssertFailed();
893 }
894 RTMemFree(pTb->pabOpcodes);
895
896 pTb->pNext = NULL;
897 pTb->fFlags = 0;
898 pTb->GCPhysPc = UINT64_MAX;
899 pTb->Gen.uPtr = 0;
900 pTb->Gen.uData = 0;
901 pTb->cbOpcodes = 0;
902 pTb->pabOpcodes = NULL;
903
904 ASMBitClear(&pTbAllocator->bmAllocated, IEMTBALLOC_IDX_MAKE(pTbAllocator, idxChunk, idxInChunk));
905 Assert(pTbAllocator->cInUseTbs > 0);
906
907 pTbAllocator->cInUseTbs -= 1;
908 STAM_REL_COUNTER_INC(&pTbAllocator->StatFrees);
909}
910
911
912/**
913 * Frees the given TB.
914 *
915 * @param pVCpu The cross context virtual CPU structure of the calling
916 * thread.
917 * @param pTb The translation block to free.
918 * @thread EMT(pVCpu)
919 */
920static void iemTbAllocatorFree(PVMCPUCC pVCpu, PIEMTB pTb)
921{
922 /*
923 * Validate state.
924 */
925 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
926 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
927 uint8_t const idxChunk = pTb->idxAllocChunk;
928 AssertLogRelReturnVoid(idxChunk < pTbAllocator->cAllocatedChunks);
929 uintptr_t const idxInChunk = pTb - pTbAllocator->aChunks[idxChunk].paTbs;
930 AssertLogRelReturnVoid(idxInChunk < pTbAllocator->cTbsPerChunk);
931
932 /*
933 * Call inner worker.
934 */
935 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, (uint32_t)idxInChunk);
936}
937
938
939/**
940 * Schedules a native TB for freeing when it's not longer being executed and
941 * part of the caller's call stack.
942 *
943 * The TB will be removed from the translation block cache, though, so it isn't
944 * possible to executed it again and the IEMTB::pNext member can be used to link
945 * it together with other TBs awaiting freeing.
946 *
947 * @param pVCpu The cross context virtual CPU structure of the calling
948 * thread.
949 * @param pTb The translation block to schedule for freeing.
950 */
951static void iemTbAlloctorScheduleForFree(PVMCPUCC pVCpu, PIEMTB pTb)
952{
953 /*
954 * Validate state.
955 */
956 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
957 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
958 Assert(pTb->idxAllocChunk < pTbAllocator->cAllocatedChunks);
959 Assert((uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs) < pTbAllocator->cTbsPerChunk);
960 Assert(ASMBitTest(&pTbAllocator->bmAllocated,
961 IEMTBALLOC_IDX_MAKE(pTbAllocator, pTb->idxAllocChunk,
962 (uintptr_t)(pTb - pTbAllocator->aChunks[pTb->idxAllocChunk].paTbs))));
963 Assert((pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE);
964
965 /*
966 * Remove it from the cache and prepend it to the allocator's todo list.
967 */
968 iemTbCacheRemove(pVCpu->iem.s.pTbCacheR3, pTb);
969
970 pTb->pNext = pTbAllocator->pDelayedFreeHead;
971 pTbAllocator->pDelayedFreeHead = pTb;
972}
973
974
975/**
976 * Processes the delayed frees.
977 *
978 * This is called by the allocator function as well as the native recompile
979 * function before making any TB or executable memory allocations respectively.
980 */
981void iemTbAllocatorProcessDelayedFrees(PVMCPUCC pVCpu, PIEMTBALLOCATOR pTbAllocator)
982{
983 PIEMTB pTb = pTbAllocator->pDelayedFreeHead;
984 pTbAllocator->pDelayedFreeHead = NULL;
985 while (pTb)
986 {
987 PIEMTB const pTbNext = pTb->pNext;
988 Assert(pVCpu->iem.s.pCurTbR3 != pTb);
989 iemTbAllocatorFree(pVCpu, pTb);
990 pTb = pTbNext;
991 }
992}
993
994
995/**
996 * Grow the translation block allocator with another chunk.
997 */
998static int iemTbAllocatorGrow(PVMCPUCC pVCpu)
999{
1000 /*
1001 * Validate state.
1002 */
1003 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1004 AssertReturn(pTbAllocator, VERR_WRONG_ORDER);
1005 AssertReturn(pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC, VERR_INVALID_MAGIC);
1006 uint32_t const idxChunk = pTbAllocator->cAllocatedChunks;
1007 AssertReturn(idxChunk < pTbAllocator->cMaxChunks, VERR_OUT_OF_RESOURCES);
1008
1009 /*
1010 * Allocate a new chunk and add it to the allocator.
1011 */
1012 PIEMTB const paTbs = (PIEMTB)RTMemPageAllocZ(pTbAllocator->cbPerChunk);
1013 AssertLogRelReturn(paTbs, VERR_NO_PAGE_MEMORY);
1014 pTbAllocator->aChunks[idxChunk].paTbs = paTbs;
1015
1016 uint32_t const cTbsPerChunk = pTbAllocator->cTbsPerChunk;
1017 for (uint32_t iTb = 0; iTb < cTbsPerChunk; iTb++)
1018 paTbs[iTb].idxAllocChunk = idxChunk; /* This is not strictly necessary... */
1019 ASMBitClearRange(pTbAllocator->bmAllocated, idxChunk * cTbsPerChunk, (idxChunk + 1) * cTbsPerChunk);
1020 pTbAllocator->cAllocatedChunks = (uint16_t)(idxChunk + 1);
1021 pTbAllocator->cTotalTbs += cTbsPerChunk;
1022 pTbAllocator->iStartHint = idxChunk * cTbsPerChunk;
1023
1024 return VINF_SUCCESS;
1025}
1026
1027
1028/**
1029 * Allocates a TB from allocator with free block.
1030 *
1031 * This is common code to both the fast and slow allocator code paths.
1032 */
1033DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAllocCore(PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1034{
1035 Assert(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs);
1036
1037 int idxTb;
1038 if (pTbAllocator->iStartHint < pTbAllocator->cTotalTbs)
1039 idxTb = ASMBitNextClear(pTbAllocator->bmAllocated,
1040 pTbAllocator->cTotalTbs,
1041 pTbAllocator->iStartHint & ~(uint32_t)63);
1042 else
1043 idxTb = -1;
1044 if (idxTb < 0)
1045 {
1046 idxTb = ASMBitFirstClear(pTbAllocator->bmAllocated, pTbAllocator->cTotalTbs);
1047 AssertLogRelReturn(idxTb >= 0, NULL);
1048 }
1049 Assert((uint32_t)idxTb < pTbAllocator->cTotalTbs);
1050 ASMBitSet(pTbAllocator->bmAllocated, idxTb);
1051
1052 /** @todo shift/mask optimization for power of two IEMTB sizes. */
1053 uint32_t const idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTb);
1054 uint32_t const idxTbInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTb, idxChunk);
1055 PIEMTB const pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxTbInChunk];
1056 Assert(pTb->idxAllocChunk == idxChunk);
1057
1058 pTbAllocator->cInUseTbs += 1;
1059 if (fThreaded)
1060 pTbAllocator->cThreadedTbs += 1;
1061 else
1062 pTbAllocator->cNativeTbs += 1;
1063 STAM_REL_COUNTER_INC(&pTbAllocator->StatAllocs);
1064 return pTb;
1065}
1066
1067
1068/**
1069 * Slow path for iemTbAllocatorAlloc.
1070 */
1071static PIEMTB iemTbAllocatorAllocSlow(PVMCPUCC pVCpu, PIEMTBALLOCATOR const pTbAllocator, bool fThreaded)
1072{
1073 /*
1074 * With some luck we can add another chunk.
1075 */
1076 if (pTbAllocator->cAllocatedChunks < pTbAllocator->cMaxChunks)
1077 {
1078 int rc = iemTbAllocatorGrow(pVCpu);
1079 if (RT_SUCCESS(rc))
1080 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1081 }
1082
1083 /*
1084 * We have to prune stuff. Sigh.
1085 *
1086 * This requires scanning for older TBs and kick them out. Not sure how to
1087 * best do this as we don't want to maintain any list of TBs ordered by last
1088 * usage time. But one reasonably simple approach would be that each time we
1089 * get here we continue a sequential scan of the allocation chunks,
1090 * considering just a smallish number of TBs and freeing a fixed portion of
1091 * them. Say, we consider the next 128 TBs, freeing the least recently used
1092 * in out of groups of 4 TBs, resulting in 32 free TBs.
1093 */
1094 STAM_PROFILE_START(&pTbAllocator->StatPrune, a);
1095 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1096 uint32_t const cTbsToPrune = 128;
1097 uint32_t const cTbsPerGroup = 4;
1098 uint32_t cFreedTbs = 0;
1099#ifdef IEMTB_SIZE_IS_POWER_OF_TWO
1100 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom & ~(uint32_t)(cTbsToPrune - 1); /* Stay within a chunk! */
1101#else
1102 uint32_t idxTbPruneFrom = pTbAllocator->iPruneFrom;
1103#endif
1104 if (idxTbPruneFrom >= pTbAllocator->cMaxTbs)
1105 idxTbPruneFrom = 0;
1106 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1107 {
1108 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1109 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1110 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1111 uint32_t cMsAge = msNow - pTb->msLastUsed;
1112 Assert(pTb->fFlags & IEMTB_F_TYPE_MASK);
1113
1114 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1115 {
1116#ifndef IEMTB_SIZE_IS_POWER_OF_TWO
1117 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1118 { /* likely */ }
1119 else
1120 {
1121 idxInChunk2 = 0;
1122 idxChunk2 += 1;
1123 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1124 idxChunk2 = 0;
1125 }
1126#endif
1127 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1128 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1129 if ( cMsAge2 > cMsAge
1130 || (cMsAge2 == cMsAge && pTb2->cUsed < pTb->cUsed))
1131 {
1132 Assert(pTb2->fFlags & IEMTB_F_TYPE_MASK);
1133 pTb = pTb2;
1134 idxChunk = idxChunk2;
1135 idxInChunk = idxInChunk2;
1136 cMsAge = cMsAge2;
1137 }
1138 }
1139
1140 /* Free the TB. */
1141 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1142 cFreedTbs++; /* paranoia */
1143 }
1144 pTbAllocator->iPruneFrom = idxTbPruneFrom;
1145 STAM_PROFILE_STOP(&pTbAllocator->StatPrune, a);
1146
1147 /*
1148 * Allocate a TB from the ones we've pruned.
1149 */
1150 if (cFreedTbs)
1151 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1152 return NULL;
1153}
1154
1155
1156/**
1157 * Allocate a translation block.
1158 *
1159 * @returns Pointer to block on success, NULL if we're out and is unable to
1160 * free up an existing one (very unlikely once implemented).
1161 * @param pVCpu The cross context virtual CPU structure of the calling
1162 * thread.
1163 * @param fThreaded Set if threaded TB being allocated, clear if native TB.
1164 * For statistics.
1165 */
1166DECL_FORCE_INLINE(PIEMTB) iemTbAllocatorAlloc(PVMCPUCC pVCpu, bool fThreaded)
1167{
1168 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1169 Assert(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1170
1171 /* Free any pending TBs before we proceed. */
1172 if (!pTbAllocator->pDelayedFreeHead)
1173 { /* probably likely */ }
1174 else
1175 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1176
1177 /* If the allocator is full, take slow code path.*/
1178 if (RT_LIKELY(pTbAllocator->cInUseTbs < pTbAllocator->cTotalTbs))
1179 return iemTbAllocatorAllocCore(pTbAllocator, fThreaded);
1180 return iemTbAllocatorAllocSlow(pVCpu, pTbAllocator, fThreaded);
1181}
1182
1183
1184/**
1185 * This is called when we're out of space for native TBs.
1186 *
1187 * This uses a variation on the pruning in iemTbAllocatorAllocSlow.
1188 * The difference is that we only prune native TBs and will only free any if
1189 * there are least two in a group. The conditions under which we're called are
1190 * different - there will probably be free TBs in the table when we're called.
1191 * Therefore we increase the group size and max scan length, though we'll stop
1192 * scanning once we've reached the requested size (@a cNeededInstrs) and freed
1193 * up at least 8 TBs.
1194 */
1195void iemTbAllocatorFreeupNativeSpace(PVMCPUCC pVCpu, uint32_t cNeededInstrs)
1196{
1197 PIEMTBALLOCATOR const pTbAllocator = pVCpu->iem.s.pTbAllocatorR3;
1198 AssertReturnVoid(pTbAllocator && pTbAllocator->uMagic == IEMTBALLOCATOR_MAGIC);
1199
1200 STAM_REL_PROFILE_START(&pTbAllocator->StatPruneNative, a);
1201
1202 /*
1203 * Flush the delayed free list before we start freeing TBs indiscriminately.
1204 */
1205 iemTbAllocatorProcessDelayedFrees(pVCpu, pTbAllocator);
1206
1207 /*
1208 * Scan and free TBs.
1209 */
1210 uint32_t const msNow = pVCpu->iem.s.msRecompilerPollNow;
1211 uint32_t const cTbsToPrune = 128 * 8;
1212 uint32_t const cTbsPerGroup = 4 * 4;
1213 uint32_t cFreedTbs = 0;
1214 uint32_t cMaxInstrs = 0;
1215 uint32_t idxTbPruneFrom = pTbAllocator->iPruneNativeFrom & ~(uint32_t)(cTbsPerGroup - 1);
1216 for (uint32_t i = 0; i < cTbsToPrune; i += cTbsPerGroup, idxTbPruneFrom += cTbsPerGroup)
1217 {
1218 if (idxTbPruneFrom >= pTbAllocator->cTotalTbs)
1219 idxTbPruneFrom = 0;
1220 uint32_t idxChunk = IEMTBALLOC_IDX_TO_CHUNK(pTbAllocator, idxTbPruneFrom);
1221 uint32_t idxInChunk = IEMTBALLOC_IDX_TO_INDEX_IN_CHUNK(pTbAllocator, idxTbPruneFrom, idxChunk);
1222 PIEMTB pTb = &pTbAllocator->aChunks[idxChunk].paTbs[idxInChunk];
1223 uint32_t cMsAge = pTb->fFlags & IEMTB_F_TYPE_NATIVE ? msNow - pTb->msLastUsed : msNow;
1224 uint8_t cNativeTbs = (pTb->fFlags & IEMTB_F_TYPE_NATIVE) != 0;
1225
1226 for (uint32_t j = 1, idxChunk2 = idxChunk, idxInChunk2 = idxInChunk + 1; j < cTbsPerGroup; j++, idxInChunk2++)
1227 {
1228 if (idxInChunk2 < pTbAllocator->cTbsPerChunk)
1229 { /* likely */ }
1230 else
1231 {
1232 idxInChunk2 = 0;
1233 idxChunk2 += 1;
1234 if (idxChunk2 >= pTbAllocator->cAllocatedChunks)
1235 idxChunk2 = 0;
1236 }
1237 PIEMTB const pTb2 = &pTbAllocator->aChunks[idxChunk2].paTbs[idxInChunk2];
1238 if (pTb2->fFlags & IEMTB_F_TYPE_NATIVE)
1239 {
1240 cNativeTbs += 1;
1241 uint32_t const cMsAge2 = msNow - pTb2->msLastUsed;
1242 if ( cMsAge2 > cMsAge
1243 || ( cMsAge2 == cMsAge
1244 && ( pTb2->cUsed < pTb->cUsed
1245 || ( pTb2->cUsed == pTb->cUsed
1246 && pTb2->Native.cInstructions > pTb->Native.cInstructions)))
1247 || !(pTb->fFlags & IEMTB_F_TYPE_NATIVE))
1248 {
1249 pTb = pTb2;
1250 idxChunk = idxChunk2;
1251 idxInChunk = idxInChunk2;
1252 cMsAge = cMsAge2;
1253 }
1254 }
1255 }
1256
1257 /* Free the TB if we found at least two native one in this group. */
1258 if (cNativeTbs >= 2)
1259 {
1260 cMaxInstrs = RT_MAX(cMaxInstrs, pTb->Native.cInstructions);
1261 iemTbAllocatorFreeInner(pVCpu, pTbAllocator, pTb, idxChunk, idxInChunk);
1262 cFreedTbs++;
1263 if (cFreedTbs >= 8 && cMaxInstrs >= cNeededInstrs)
1264 break;
1265 }
1266 }
1267 pTbAllocator->iPruneNativeFrom = idxTbPruneFrom;
1268
1269 STAM_REL_PROFILE_STOP(&pTbAllocator->StatPruneNative, a);
1270}
1271
1272
1273/*********************************************************************************************************************************
1274* Threaded Recompiler Core *
1275*********************************************************************************************************************************/
1276
1277/**
1278 * Allocate a translation block for threadeded recompilation.
1279 *
1280 * This is allocated with maxed out call table and storage for opcode bytes,
1281 * because it's only supposed to be called once per EMT to allocate the TB
1282 * pointed to by IEMCPU::pThrdCompileTbR3.
1283 *
1284 * @returns Pointer to the translation block on success, NULL on failure.
1285 * @param pVM The cross context virtual machine structure.
1286 * @param pVCpu The cross context virtual CPU structure of the calling
1287 * thread.
1288 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1289 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1290 */
1291static PIEMTB iemThreadedTbAlloc(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1292{
1293 PIEMTB pTb = (PIEMTB)RTMemAllocZ(sizeof(IEMTB));
1294 if (pTb)
1295 {
1296 unsigned const cCalls = 256;
1297 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemAlloc(sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1298 if (pTb->Thrd.paCalls)
1299 {
1300 pTb->pabOpcodes = (uint8_t *)RTMemAlloc(cCalls * 16);
1301 if (pTb->pabOpcodes)
1302 {
1303 pVCpu->iem.s.cbOpcodesAllocated = cCalls * 16;
1304 pTb->Thrd.cAllocated = cCalls;
1305 pTb->Thrd.cCalls = 0;
1306 pTb->cbOpcodes = 0;
1307 pTb->pNext = NULL;
1308 pTb->cUsed = 0;
1309 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1310 pTb->idxAllocChunk = UINT8_MAX;
1311 pTb->GCPhysPc = GCPhysPc;
1312 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1313 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1314 pTb->cInstructions = 0;
1315
1316 /* Init the first opcode range. */
1317 pTb->cRanges = 1;
1318 pTb->aRanges[0].cbOpcodes = 0;
1319 pTb->aRanges[0].offOpcodes = 0;
1320 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1321 pTb->aRanges[0].u2Unused = 0;
1322 pTb->aRanges[0].idxPhysPage = 0;
1323 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1324 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1325
1326 return pTb;
1327 }
1328 RTMemFree(pTb->Thrd.paCalls);
1329 }
1330 RTMemFree(pTb);
1331 }
1332 RT_NOREF(pVM);
1333 return NULL;
1334}
1335
1336
1337/**
1338 * Called on the TB that are dedicated for recompilation before it's reused.
1339 *
1340 * @param pVCpu The cross context virtual CPU structure of the calling
1341 * thread.
1342 * @param pTb The translation block to reuse.
1343 * @param GCPhysPc The physical address corresponding to RIP + CS.BASE.
1344 * @param fExtraFlags Extra flags (IEMTB_F_XXX).
1345 */
1346static void iemThreadedTbReuse(PVMCPUCC pVCpu, PIEMTB pTb, RTGCPHYS GCPhysPc, uint32_t fExtraFlags)
1347{
1348 pTb->GCPhysPc = GCPhysPc;
1349 pTb->fFlags = (pVCpu->iem.s.fExec & IEMTB_F_IEM_F_MASK) | fExtraFlags;
1350 pTb->x86.fAttr = (uint16_t)pVCpu->cpum.GstCtx.cs.Attr.u;
1351 pTb->Thrd.cCalls = 0;
1352 pTb->cbOpcodes = 0;
1353 pTb->cInstructions = 0;
1354
1355 /* Init the first opcode range. */
1356 pTb->cRanges = 1;
1357 pTb->aRanges[0].cbOpcodes = 0;
1358 pTb->aRanges[0].offOpcodes = 0;
1359 pTb->aRanges[0].offPhysPage = GCPhysPc & GUEST_PAGE_OFFSET_MASK;
1360 pTb->aRanges[0].u2Unused = 0;
1361 pTb->aRanges[0].idxPhysPage = 0;
1362 pTb->aGCPhysPages[0] = NIL_RTGCPHYS;
1363 pTb->aGCPhysPages[1] = NIL_RTGCPHYS;
1364}
1365
1366
1367/**
1368 * Used to duplicate a threded translation block after recompilation is done.
1369 *
1370 * @returns Pointer to the translation block on success, NULL on failure.
1371 * @param pVM The cross context virtual machine structure.
1372 * @param pVCpu The cross context virtual CPU structure of the calling
1373 * thread.
1374 * @param pTbSrc The TB to duplicate.
1375 */
1376static PIEMTB iemThreadedTbDuplicate(PVMCC pVM, PVMCPUCC pVCpu, PCIEMTB pTbSrc)
1377{
1378 /*
1379 * Just using the heap for now. Will make this more efficient and
1380 * complicated later, don't worry. :-)
1381 */
1382 PIEMTB pTb = iemTbAllocatorAlloc(pVCpu, true /*fThreaded*/);
1383 if (pTb)
1384 {
1385 uint8_t const idxAllocChunk = pTb->idxAllocChunk;
1386 memcpy(pTb, pTbSrc, sizeof(*pTb));
1387 pTb->idxAllocChunk = idxAllocChunk;
1388
1389 unsigned const cCalls = pTbSrc->Thrd.cCalls;
1390 Assert(cCalls > 0);
1391 pTb->Thrd.paCalls = (PIEMTHRDEDCALLENTRY)RTMemDup(pTbSrc->Thrd.paCalls, sizeof(IEMTHRDEDCALLENTRY) * cCalls);
1392 if (pTb->Thrd.paCalls)
1393 {
1394 unsigned const cbOpcodes = pTbSrc->cbOpcodes;
1395 Assert(cbOpcodes > 0);
1396 pTb->pabOpcodes = (uint8_t *)RTMemDup(pTbSrc->pabOpcodes, cbOpcodes);
1397 if (pTb->pabOpcodes)
1398 {
1399 pTb->Thrd.cAllocated = cCalls;
1400 pTb->pNext = NULL;
1401 pTb->cUsed = 0;
1402 pTb->msLastUsed = pVCpu->iem.s.msRecompilerPollNow;
1403 pTb->fFlags = pTbSrc->fFlags;
1404
1405 return pTb;
1406 }
1407 RTMemFree(pTb->Thrd.paCalls);
1408 }
1409 iemTbAllocatorFree(pVCpu, pTb);
1410 }
1411 RT_NOREF(pVM);
1412 return NULL;
1413
1414}
1415
1416
1417/**
1418 * Adds the given TB to the hash table.
1419 *
1420 * @param pVCpu The cross context virtual CPU structure of the calling
1421 * thread.
1422 * @param pTbCache The cache to add it to.
1423 * @param pTb The translation block to add.
1424 */
1425static void iemThreadedTbAdd(PVMCPUCC pVCpu, PIEMTBCACHE pTbCache, PIEMTB pTb)
1426{
1427 iemTbCacheAdd(pVCpu, pTbCache, pTb);
1428
1429 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedInstr, pTb->cInstructions);
1430 STAM_REL_PROFILE_ADD_PERIOD(&pVCpu->iem.s.StatTbThreadedCalls, pTb->Thrd.cCalls);
1431 if (LogIs12Enabled())
1432 {
1433 Log12(("TB added: %p %RGp LB %#x fl=%#x idxHash=%#x cRanges=%u cInstr=%u cCalls=%u\n",
1434 pTb, pTb->GCPhysPc, pTb->cbOpcodes, pTb->fFlags, IEMTBCACHE_HASH(pTbCache, pTb->fFlags, pTb->GCPhysPc),
1435 pTb->cRanges, pTb->cInstructions, pTb->Thrd.cCalls));
1436 for (uint8_t idxRange = 0; idxRange < pTb->cRanges; idxRange++)
1437 Log12((" range#%u: offPg=%#05x offOp=%#04x LB %#04x pg#%u=%RGp\n", idxRange, pTb->aRanges[idxRange].offPhysPage,
1438 pTb->aRanges[idxRange].offOpcodes, pTb->aRanges[idxRange].cbOpcodes, pTb->aRanges[idxRange].idxPhysPage,
1439 pTb->aRanges[idxRange].idxPhysPage == 0
1440 ? pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK
1441 : pTb->aGCPhysPages[pTb->aRanges[idxRange].idxPhysPage - 1]));
1442 }
1443}
1444
1445
1446/**
1447 * Called by opcode verifier functions when they detect a problem.
1448 */
1449void iemThreadedTbObsolete(PVMCPUCC pVCpu, PIEMTB pTb, bool fSafeToFree)
1450{
1451 /* Unless it's safe, we can only immediately free threaded TB, as we will
1452 have more code left to execute in native TBs when fSafeToFree == false. */
1453 if (fSafeToFree || (pTb->fFlags & IEMTB_F_TYPE_THREADED))
1454 iemTbAllocatorFree(pVCpu, pTb);
1455 else
1456 iemTbAlloctorScheduleForFree(pVCpu, pTb);
1457}
1458
1459
1460/*
1461 * Real code.
1462 */
1463
1464#ifdef LOG_ENABLED
1465/**
1466 * Logs the current instruction.
1467 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1468 * @param pszFunction The IEM function doing the execution.
1469 * @param idxInstr The instruction number in the block.
1470 */
1471static void iemThreadedLogCurInstr(PVMCPUCC pVCpu, const char *pszFunction, uint32_t idxInstr) RT_NOEXCEPT
1472{
1473# ifdef IN_RING3
1474 if (LogIs2Enabled())
1475 {
1476 char szInstr[256];
1477 uint32_t cbInstr = 0;
1478 DBGFR3DisasInstrEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, 0, 0,
1479 DBGF_DISAS_FLAGS_CURRENT_GUEST | DBGF_DISAS_FLAGS_DEFAULT_MODE,
1480 szInstr, sizeof(szInstr), &cbInstr);
1481
1482 PCX86FXSTATE pFpuCtx = &pVCpu->cpum.GstCtx.XState.x87;
1483 Log2(("**** %s fExec=%x pTb=%p cUsed=%u #%u\n"
1484 " eax=%08x ebx=%08x ecx=%08x edx=%08x esi=%08x edi=%08x\n"
1485 " eip=%08x esp=%08x ebp=%08x iopl=%d tr=%04x\n"
1486 " cs=%04x ss=%04x ds=%04x es=%04x fs=%04x gs=%04x efl=%08x\n"
1487 " fsw=%04x fcw=%04x ftw=%02x mxcsr=%04x/%04x\n"
1488 " %s\n"
1489 , pszFunction, pVCpu->iem.s.fExec, pVCpu->iem.s.pCurTbR3, pVCpu->iem.s.pCurTbR3 ? pVCpu->iem.s.pCurTbR3->cUsed : 0, idxInstr,
1490 pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ebx, pVCpu->cpum.GstCtx.ecx, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.esi, pVCpu->cpum.GstCtx.edi,
1491 pVCpu->cpum.GstCtx.eip, pVCpu->cpum.GstCtx.esp, pVCpu->cpum.GstCtx.ebp, pVCpu->cpum.GstCtx.eflags.Bits.u2IOPL, pVCpu->cpum.GstCtx.tr.Sel,
1492 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.ds.Sel, pVCpu->cpum.GstCtx.es.Sel,
1493 pVCpu->cpum.GstCtx.fs.Sel, pVCpu->cpum.GstCtx.gs.Sel, pVCpu->cpum.GstCtx.eflags.u,
1494 pFpuCtx->FSW, pFpuCtx->FCW, pFpuCtx->FTW, pFpuCtx->MXCSR, pFpuCtx->MXCSR_MASK,
1495 szInstr));
1496
1497 /*if (LogIs3Enabled()) - this outputs an insane amount of stuff, so disabled.
1498 DBGFR3InfoEx(pVCpu->pVMR3->pUVM, pVCpu->idCpu, "cpumguest", "verbose", NULL); */
1499 }
1500 else
1501# endif
1502 LogFlow(("%s: cs:rip=%04x:%08RX64 ss:rsp=%04x:%08RX64 EFL=%06x\n", pszFunction, pVCpu->cpum.GstCtx.cs.Sel,
1503 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.ss.Sel, pVCpu->cpum.GstCtx.rsp, pVCpu->cpum.GstCtx.eflags.u));
1504}
1505#endif /* LOG_ENABLED */
1506
1507
1508#if 0
1509static VBOXSTRICTRC iemThreadedCompileLongJumped(PVMCC pVM, PVMCPUCC pVCpu, VBOXSTRICTRC rcStrict)
1510{
1511 RT_NOREF(pVM, pVCpu);
1512 return rcStrict;
1513}
1514#endif
1515
1516
1517/**
1518 * Initializes the decoder state when compiling TBs.
1519 *
1520 * This presumes that fExec has already be initialized.
1521 *
1522 * This is very similar to iemInitDecoder() and iemReInitDecoder(), so may need
1523 * to apply fixes to them as well.
1524 *
1525 * @param pVCpu The cross context virtual CPU structure of the calling
1526 * thread.
1527 * @param fReInit Clear for the first call for a TB, set for subsequent
1528 * calls from inside the compile loop where we can skip a
1529 * couple of things.
1530 * @param fExtraFlags The extra translation block flags when @a fReInit is
1531 * true, otherwise ignored. Only IEMTB_F_INHIBIT_SHADOW is
1532 * checked.
1533 */
1534DECL_FORCE_INLINE(void) iemThreadedCompileInitDecoder(PVMCPUCC pVCpu, bool const fReInit, uint32_t const fExtraFlags)
1535{
1536 /* ASSUMES: That iemInitExec was already called and that anyone changing
1537 CPU state affecting the fExec bits since then will have updated fExec! */
1538 AssertMsg((pVCpu->iem.s.fExec & ~IEM_F_USER_OPTS) == iemCalcExecFlags(pVCpu),
1539 ("fExec=%#x iemCalcExecModeFlags=%#x\n", pVCpu->iem.s.fExec, iemCalcExecFlags(pVCpu)));
1540
1541 IEMMODE const enmMode = IEM_GET_CPU_MODE(pVCpu);
1542
1543 /* Decoder state: */
1544 pVCpu->iem.s.enmDefAddrMode = enmMode; /** @todo check if this is correct... */
1545 pVCpu->iem.s.enmEffAddrMode = enmMode;
1546 if (enmMode != IEMMODE_64BIT)
1547 {
1548 pVCpu->iem.s.enmDefOpSize = enmMode; /** @todo check if this is correct... */
1549 pVCpu->iem.s.enmEffOpSize = enmMode;
1550 }
1551 else
1552 {
1553 pVCpu->iem.s.enmDefOpSize = IEMMODE_32BIT;
1554 pVCpu->iem.s.enmEffOpSize = IEMMODE_32BIT;
1555 }
1556 pVCpu->iem.s.fPrefixes = 0;
1557 pVCpu->iem.s.uRexReg = 0;
1558 pVCpu->iem.s.uRexB = 0;
1559 pVCpu->iem.s.uRexIndex = 0;
1560 pVCpu->iem.s.idxPrefix = 0;
1561 pVCpu->iem.s.uVex3rdReg = 0;
1562 pVCpu->iem.s.uVexLength = 0;
1563 pVCpu->iem.s.fEvexStuff = 0;
1564 pVCpu->iem.s.iEffSeg = X86_SREG_DS;
1565 pVCpu->iem.s.offModRm = 0;
1566 pVCpu->iem.s.iNextMapping = 0;
1567
1568 if (!fReInit)
1569 {
1570 pVCpu->iem.s.cActiveMappings = 0;
1571 pVCpu->iem.s.rcPassUp = VINF_SUCCESS;
1572 pVCpu->iem.s.fEndTb = false;
1573 pVCpu->iem.s.fTbCheckOpcodes = false;
1574 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
1575 pVCpu->iem.s.fTbCrossedPage = false;
1576 pVCpu->iem.s.cInstrTillIrqCheck = !(fExtraFlags & IEMTB_F_INHIBIT_SHADOW) ? 32 : 0;
1577 pVCpu->iem.s.fTbCurInstrIsSti = false;
1578 /* Force RF clearing and TF checking on first instruction in the block
1579 as we don't really know what came before and should assume the worst: */
1580 pVCpu->iem.s.fTbPrevInstr = IEM_CIMPL_F_RFLAGS | IEM_CIMPL_F_END_TB;
1581 }
1582 else
1583 {
1584 Assert(pVCpu->iem.s.cActiveMappings == 0);
1585 Assert(pVCpu->iem.s.rcPassUp == VINF_SUCCESS);
1586 Assert(pVCpu->iem.s.fEndTb == false);
1587 Assert(pVCpu->iem.s.fTbCrossedPage == false);
1588 pVCpu->iem.s.fTbPrevInstr = pVCpu->iem.s.fTbCurInstr;
1589 }
1590 pVCpu->iem.s.fTbCurInstr = 0;
1591
1592#ifdef DBGFTRACE_ENABLED
1593 switch (IEM_GET_CPU_MODE(pVCpu))
1594 {
1595 case IEMMODE_64BIT:
1596 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I64/%u %08llx", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.rip);
1597 break;
1598 case IEMMODE_32BIT:
1599 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I32/%u %04x:%08x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1600 break;
1601 case IEMMODE_16BIT:
1602 RTTraceBufAddMsgF(pVCpu->CTX_SUFF(pVM)->CTX_SUFF(hTraceBuf), "I16/%u %04x:%04x", IEM_GET_CPL(pVCpu), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.eip);
1603 break;
1604 }
1605#endif
1606}
1607
1608
1609/**
1610 * Initializes the opcode fetcher when starting the compilation.
1611 *
1612 * @param pVCpu The cross context virtual CPU structure of the calling
1613 * thread.
1614 */
1615DECL_FORCE_INLINE(void) iemThreadedCompileInitOpcodeFetching(PVMCPUCC pVCpu)
1616{
1617 /* Almost everything is done by iemGetPcWithPhysAndCode() already. We just need to initialize the index into abOpcode. */
1618#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1619 pVCpu->iem.s.offOpcode = 0;
1620#else
1621 RT_NOREF(pVCpu);
1622#endif
1623}
1624
1625
1626/**
1627 * Re-initializes the opcode fetcher between instructions while compiling.
1628 *
1629 * @param pVCpu The cross context virtual CPU structure of the calling
1630 * thread.
1631 */
1632DECL_FORCE_INLINE(void) iemThreadedCompileReInitOpcodeFetching(PVMCPUCC pVCpu)
1633{
1634 if (pVCpu->iem.s.pbInstrBuf)
1635 {
1636 uint64_t off = pVCpu->cpum.GstCtx.rip;
1637 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
1638 off += pVCpu->cpum.GstCtx.cs.u64Base;
1639 off -= pVCpu->iem.s.uInstrBufPc;
1640 if (off < pVCpu->iem.s.cbInstrBufTotal)
1641 {
1642 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
1643 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
1644 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
1645 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
1646 else
1647 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
1648 }
1649 else
1650 {
1651 pVCpu->iem.s.pbInstrBuf = NULL;
1652 pVCpu->iem.s.offInstrNextByte = 0;
1653 pVCpu->iem.s.offCurInstrStart = 0;
1654 pVCpu->iem.s.cbInstrBuf = 0;
1655 pVCpu->iem.s.cbInstrBufTotal = 0;
1656 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1657 }
1658 }
1659 else
1660 {
1661 pVCpu->iem.s.offInstrNextByte = 0;
1662 pVCpu->iem.s.offCurInstrStart = 0;
1663 pVCpu->iem.s.cbInstrBuf = 0;
1664 pVCpu->iem.s.cbInstrBufTotal = 0;
1665#ifdef VBOX_STRICT
1666 pVCpu->iem.s.GCPhysInstrBuf = NIL_RTGCPHYS;
1667#endif
1668 }
1669#ifdef IEM_WITH_CODE_TLB_AND_OPCODE_BUF
1670 pVCpu->iem.s.offOpcode = 0;
1671#endif
1672}
1673
1674#ifdef LOG_ENABLED
1675
1676/**
1677 * Inserts a NOP call.
1678 *
1679 * This is for debugging.
1680 *
1681 * @returns true on success, false if we're out of call entries.
1682 * @param pTb The translation block being compiled.
1683 */
1684bool iemThreadedCompileEmitNop(PIEMTB pTb)
1685{
1686 /* Emit the call. */
1687 uint32_t const idxCall = pTb->Thrd.cCalls;
1688 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1689 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1690 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1691 pCall->enmFunction = kIemThreadedFunc_BltIn_Nop;
1692 pCall->idxInstr = pTb->cInstructions - 1;
1693 pCall->uUnused0 = 0;
1694 pCall->offOpcode = 0;
1695 pCall->cbOpcode = 0;
1696 pCall->idxRange = 0;
1697 pCall->auParams[0] = 0;
1698 pCall->auParams[1] = 0;
1699 pCall->auParams[2] = 0;
1700 return true;
1701}
1702
1703
1704/**
1705 * Called by iemThreadedCompile if cpu state logging is desired.
1706 *
1707 * @returns true on success, false if we're out of call entries.
1708 * @param pTb The translation block being compiled.
1709 */
1710bool iemThreadedCompileEmitLogCpuState(PIEMTB pTb)
1711{
1712 /* Emit the call. */
1713 uint32_t const idxCall = pTb->Thrd.cCalls;
1714 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
1715 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
1716 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
1717 pCall->enmFunction = kIemThreadedFunc_BltIn_LogCpuState;
1718 pCall->idxInstr = pTb->cInstructions - 1;
1719 pCall->uUnused0 = 0;
1720 pCall->offOpcode = 0;
1721 pCall->cbOpcode = 0;
1722 pCall->idxRange = 0;
1723 pCall->auParams[0] = RT_MAKE_U16(pCall->idxInstr, idxCall); /* currently not used, but whatever */
1724 pCall->auParams[1] = 0;
1725 pCall->auParams[2] = 0;
1726 return true;
1727}
1728
1729#endif /* LOG_ENABLED */
1730
1731DECLINLINE(void) iemThreadedCopyOpcodeBytesInline(PCVMCPUCC pVCpu, uint8_t *pbDst, uint8_t cbInstr)
1732{
1733 switch (cbInstr)
1734 {
1735 default: AssertMsgFailed(("%#x\n", cbInstr)); RT_FALL_THROUGH();
1736 case 15: pbDst[14] = pVCpu->iem.s.abOpcode[14]; RT_FALL_THROUGH();
1737 case 14: pbDst[13] = pVCpu->iem.s.abOpcode[13]; RT_FALL_THROUGH();
1738 case 13: pbDst[12] = pVCpu->iem.s.abOpcode[12]; RT_FALL_THROUGH();
1739 case 12: pbDst[11] = pVCpu->iem.s.abOpcode[11]; RT_FALL_THROUGH();
1740 case 11: pbDst[10] = pVCpu->iem.s.abOpcode[10]; RT_FALL_THROUGH();
1741 case 10: pbDst[9] = pVCpu->iem.s.abOpcode[9]; RT_FALL_THROUGH();
1742 case 9: pbDst[8] = pVCpu->iem.s.abOpcode[8]; RT_FALL_THROUGH();
1743 case 8: pbDst[7] = pVCpu->iem.s.abOpcode[7]; RT_FALL_THROUGH();
1744 case 7: pbDst[6] = pVCpu->iem.s.abOpcode[6]; RT_FALL_THROUGH();
1745 case 6: pbDst[5] = pVCpu->iem.s.abOpcode[5]; RT_FALL_THROUGH();
1746 case 5: pbDst[4] = pVCpu->iem.s.abOpcode[4]; RT_FALL_THROUGH();
1747 case 4: pbDst[3] = pVCpu->iem.s.abOpcode[3]; RT_FALL_THROUGH();
1748 case 3: pbDst[2] = pVCpu->iem.s.abOpcode[2]; RT_FALL_THROUGH();
1749 case 2: pbDst[1] = pVCpu->iem.s.abOpcode[1]; RT_FALL_THROUGH();
1750 case 1: pbDst[0] = pVCpu->iem.s.abOpcode[0]; break;
1751 }
1752}
1753
1754
1755/**
1756 * Called by IEM_MC2_BEGIN_EMIT_CALLS() under one of these conditions:
1757 *
1758 * - CS LIM check required.
1759 * - Must recheck opcode bytes.
1760 * - Previous instruction branched.
1761 * - TLB load detected, probably due to page crossing.
1762 *
1763 * @returns true if everything went well, false if we're out of space in the TB
1764 * (e.g. opcode ranges) or needs to start doing CS.LIM checks.
1765 * @param pVCpu The cross context virtual CPU structure of the calling
1766 * thread.
1767 * @param pTb The translation block being compiled.
1768 */
1769bool iemThreadedCompileBeginEmitCallsComplications(PVMCPUCC pVCpu, PIEMTB pTb)
1770{
1771 Log6(("%04x:%08RX64: iemThreadedCompileBeginEmitCallsComplications\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1772 Assert((pVCpu->iem.s.GCPhysInstrBuf & GUEST_PAGE_OFFSET_MASK) == 0);
1773#if 0
1774 if (pVCpu->cpum.GstCtx.rip >= 0xc0000000 && !LogIsEnabled())
1775 RTLogChangeFlags(NULL, 0, RTLOGFLAGS_DISABLED);
1776#endif
1777
1778 /*
1779 * If we're not in 64-bit mode and not already checking CS.LIM we need to
1780 * see if it's needed to start checking.
1781 */
1782 bool fConsiderCsLimChecking;
1783 uint32_t const fMode = pVCpu->iem.s.fExec & IEM_F_MODE_MASK;
1784 if ( fMode == IEM_F_MODE_X86_64BIT
1785 || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS)
1786 || fMode == IEM_F_MODE_X86_32BIT_PROT_FLAT
1787 || fMode == IEM_F_MODE_X86_32BIT_FLAT)
1788 fConsiderCsLimChecking = false; /* already enabled or not needed */
1789 else
1790 {
1791 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
1792 if (offFromLim >= GUEST_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
1793 fConsiderCsLimChecking = true; /* likely */
1794 else
1795 {
1796 Log8(("%04x:%08RX64: Needs CS.LIM checks (%#RX64)\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, offFromLim));
1797 return false;
1798 }
1799 }
1800
1801 /*
1802 * Prepare call now, even before we know if can accept the instruction in this TB.
1803 * This allows us amending parameters w/o making every case suffer.
1804 */
1805 uint8_t const cbInstr = IEM_GET_INSTR_LEN(pVCpu);
1806 uint16_t const offOpcode = pTb->cbOpcodes;
1807 uint8_t idxRange = pTb->cRanges - 1;
1808
1809 PIEMTHRDEDCALLENTRY const pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls];
1810 pCall->idxInstr = pTb->cInstructions;
1811 pCall->offOpcode = offOpcode;
1812 pCall->idxRange = idxRange;
1813 pCall->cbOpcode = cbInstr;
1814 pCall->auParams[0] = cbInstr;
1815 pCall->auParams[1] = idxRange;
1816 pCall->auParams[2] = offOpcode - pTb->aRanges[idxRange].offOpcodes;
1817
1818/** @todo check if we require IEMTB_F_CS_LIM_CHECKS for any new page we've
1819 * gotten onto. If we do, stop */
1820
1821 /*
1822 * Case 1: We've branched (RIP changed).
1823 *
1824 * Sub-case 1a: Same page, no TLB load (fTbCrossedPage is false).
1825 * Req: 1 extra range, no extra phys.
1826 *
1827 * Sub-case 1b: Different page but no page boundrary crossing, so TLB load
1828 * necessary (fTbCrossedPage is true).
1829 * Req: 1 extra range, probably 1 extra phys page entry.
1830 *
1831 * Sub-case 1c: Different page, so TLB load necessary (fTbCrossedPage is true),
1832 * but in addition we cross into the following page and require
1833 * another TLB load.
1834 * Req: 2 extra ranges, probably 2 extra phys page entries.
1835 *
1836 * Sub-case 1d: Same page, so no initial TLB load necessary, but we cross into
1837 * the following page (thus fTbCrossedPage is true).
1838 * Req: 2 extra ranges, probably 1 extra phys page entry.
1839 *
1840 * Note! The setting fTbCrossedPage is done by the iemOpcodeFetchBytesJmp, but
1841 * it may trigger "spuriously" from the CPU point of view because of
1842 * physical page changes that'll invalid the physical TLB and trigger a
1843 * call to the function. In theory this be a big deal, just a bit
1844 * performance loss as we'll pick the LoadingTlb variants.
1845 *
1846 * Note! We do not currently optimize branching to the next instruction (sorry
1847 * 32-bit PIC code). We could maybe do that in the branching code that
1848 * sets (or not) fTbBranched.
1849 */
1850 /** @todo Optimize 'jmp .next_instr' and 'call .next_instr'. Seen the jmp
1851 * variant in win 3.1 code and the call variant in 32-bit linux PIC
1852 * code. This'll require filtering out far jmps and calls, as they
1853 * load CS which should technically be considered indirect since the
1854 * GDT/LDT entry's base address can be modified independently from
1855 * the code. */
1856 if (pVCpu->iem.s.fTbBranched != IEMBRANCHED_F_NO)
1857 {
1858 if ( !pVCpu->iem.s.fTbCrossedPage /* 1a */
1859 || pVCpu->iem.s.offCurInstrStart >= 0 /* 1b */ )
1860 {
1861 /* 1a + 1b - instruction fully within the branched to page. */
1862 Assert(pVCpu->iem.s.offCurInstrStart >= 0);
1863 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr <= GUEST_PAGE_SIZE);
1864
1865 if (!(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_ZERO))
1866 {
1867 /* Check that we've got a free range. */
1868 idxRange += 1;
1869 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1870 { /* likely */ }
1871 else
1872 {
1873 Log8(("%04x:%08RX64: out of ranges after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1874 return false;
1875 }
1876 pCall->idxRange = idxRange;
1877 pCall->auParams[1] = idxRange;
1878 pCall->auParams[2] = 0;
1879
1880 /* Check that we've got a free page slot. */
1881 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
1882 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
1883 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
1884 pTb->aRanges[idxRange].idxPhysPage = 0;
1885 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
1886 || pTb->aGCPhysPages[0] == GCPhysNew)
1887 {
1888 pTb->aGCPhysPages[0] = GCPhysNew;
1889 pTb->aRanges[idxRange].idxPhysPage = 1;
1890 }
1891 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
1892 || pTb->aGCPhysPages[1] == GCPhysNew)
1893 {
1894 pTb->aGCPhysPages[1] = GCPhysNew;
1895 pTb->aRanges[idxRange].idxPhysPage = 2;
1896 }
1897 else
1898 {
1899 Log8(("%04x:%08RX64: out of aGCPhysPages entires after branch\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1900 return false;
1901 }
1902
1903 /* Finish setting up the new range. */
1904 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
1905 pTb->aRanges[idxRange].offOpcodes = offOpcode;
1906 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
1907 pTb->aRanges[idxRange].u2Unused = 0;
1908 pTb->cRanges++;
1909 Log6(("%04x:%08RX64: new range #%u same page: offPhysPage=%#x offOpcodes=%#x\n",
1910 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].offPhysPage,
1911 pTb->aRanges[idxRange].offOpcodes));
1912 }
1913 else
1914 {
1915 Log8(("%04x:%08RX64: zero byte jump\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1916 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
1917 }
1918
1919 /* Determin which function we need to load & check.
1920 Note! For jumps to a new page, we'll set both fTbBranched and
1921 fTbCrossedPage to avoid unnecessary TLB work for intra
1922 page branching */
1923 if ( (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_INDIRECT | IEMBRANCHED_F_FAR)) /* Far is basically indirect. */
1924 || pVCpu->iem.s.fTbCrossedPage)
1925 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1926 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesLoadingTlb
1927 : !fConsiderCsLimChecking
1928 ? kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlb
1929 : kIemThreadedFunc_BltIn_CheckOpcodesLoadingTlbConsiderCsLim;
1930 else if (pVCpu->iem.s.fTbBranched & (IEMBRANCHED_F_CONDITIONAL | /* paranoia: */ IEMBRANCHED_F_DIRECT))
1931 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1932 ? kIemThreadedFunc_BltIn_CheckCsLimAndPcAndOpcodes
1933 : !fConsiderCsLimChecking
1934 ? kIemThreadedFunc_BltIn_CheckPcAndOpcodes
1935 : kIemThreadedFunc_BltIn_CheckPcAndOpcodesConsiderCsLim;
1936 else
1937 {
1938 Assert(pVCpu->iem.s.fTbBranched & IEMBRANCHED_F_RELATIVE);
1939 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
1940 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
1941 : !fConsiderCsLimChecking
1942 ? kIemThreadedFunc_BltIn_CheckOpcodes
1943 : kIemThreadedFunc_BltIn_CheckOpcodesConsiderCsLim;
1944 }
1945 }
1946 else
1947 {
1948 /* 1c + 1d - instruction crosses pages. */
1949 Assert(pVCpu->iem.s.offCurInstrStart < 0);
1950 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
1951
1952 /* Lazy bird: Check that this isn't case 1c, since we've already
1953 load the first physical address. End the TB and
1954 make it a case 2b instead.
1955
1956 Hmm. Too much bother to detect, so just do the same
1957 with case 1d as well. */
1958#if 0 /** @todo get back to this later when we've got the actual branch code in
1959 * place. */
1960 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
1961
1962 /* Check that we've got two free ranges. */
1963 if (idxRange + 2 < RT_ELEMENTS(pTb->aRanges))
1964 { /* likely */ }
1965 else
1966 return false;
1967 idxRange += 1;
1968 pCall->idxRange = idxRange;
1969 pCall->auParams[1] = idxRange;
1970 pCall->auParams[2] = 0;
1971
1972 /* ... */
1973
1974#else
1975 Log8(("%04x:%08RX64: complicated post-branch condition, ending TB.\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
1976 return false;
1977#endif
1978 }
1979 }
1980
1981 /*
1982 * Case 2: Page crossing.
1983 *
1984 * Sub-case 2a: The instruction starts on the first byte in the next page.
1985 *
1986 * Sub-case 2b: The instruction has opcode bytes in both the current and
1987 * following page.
1988 *
1989 * Both cases requires a new range table entry and probably a new physical
1990 * page entry. The difference is in which functions to emit and whether to
1991 * add bytes to the current range.
1992 */
1993 else if (pVCpu->iem.s.fTbCrossedPage)
1994 {
1995 /* Check that we've got a free range. */
1996 idxRange += 1;
1997 if (idxRange < RT_ELEMENTS(pTb->aRanges))
1998 { /* likely */ }
1999 else
2000 {
2001 Log8(("%04x:%08RX64: out of ranges while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2002 return false;
2003 }
2004
2005 /* Check that we've got a free page slot. */
2006 AssertCompile(RT_ELEMENTS(pTb->aGCPhysPages) == 2);
2007 RTGCPHYS const GCPhysNew = pVCpu->iem.s.GCPhysInstrBuf & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK;
2008 if ((pTb->GCPhysPc & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK) == GCPhysNew)
2009 pTb->aRanges[idxRange].idxPhysPage = 0;
2010 else if ( pTb->aGCPhysPages[0] == NIL_RTGCPHYS
2011 || pTb->aGCPhysPages[0] == GCPhysNew)
2012 {
2013 pTb->aGCPhysPages[0] = GCPhysNew;
2014 pTb->aRanges[idxRange].idxPhysPage = 1;
2015 }
2016 else if ( pTb->aGCPhysPages[1] == NIL_RTGCPHYS
2017 || pTb->aGCPhysPages[1] == GCPhysNew)
2018 {
2019 pTb->aGCPhysPages[1] = GCPhysNew;
2020 pTb->aRanges[idxRange].idxPhysPage = 2;
2021 }
2022 else
2023 {
2024 Log8(("%04x:%08RX64: out of aGCPhysPages entires while crossing page\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2025 return false;
2026 }
2027
2028 if (((pTb->aRanges[idxRange - 1].offPhysPage + pTb->aRanges[idxRange - 1].cbOpcodes) & GUEST_PAGE_OFFSET_MASK) == 0)
2029 {
2030 Assert(pVCpu->iem.s.offCurInstrStart == 0);
2031 pCall->idxRange = idxRange;
2032 pCall->auParams[1] = idxRange;
2033 pCall->auParams[2] = 0;
2034
2035 /* Finish setting up the new range. */
2036 pTb->aRanges[idxRange].offPhysPage = pVCpu->iem.s.offCurInstrStart;
2037 pTb->aRanges[idxRange].offOpcodes = offOpcode;
2038 pTb->aRanges[idxRange].cbOpcodes = cbInstr;
2039 pTb->aRanges[idxRange].u2Unused = 0;
2040 pTb->cRanges++;
2041 Log6(("%04x:%08RX64: new range #%u new page (a) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2042 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2043 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2044
2045 /* Determin which function we need to load & check. */
2046 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2047 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNewPageLoadingTlb
2048 : !fConsiderCsLimChecking
2049 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlb
2050 : kIemThreadedFunc_BltIn_CheckOpcodesOnNewPageLoadingTlbConsiderCsLim;
2051 }
2052 else
2053 {
2054 Assert(pVCpu->iem.s.offCurInstrStart < 0);
2055 Assert(pVCpu->iem.s.offCurInstrStart + cbInstr > 0);
2056 uint8_t const cbStartPage = (uint8_t)-pVCpu->iem.s.offCurInstrStart;
2057 pCall->auParams[0] |= (uint64_t)cbStartPage << 32;
2058
2059 /* We've good. Split the instruction over the old and new range table entries. */
2060 pTb->aRanges[idxRange - 1].cbOpcodes += cbStartPage;
2061
2062 pTb->aRanges[idxRange].offPhysPage = 0;
2063 pTb->aRanges[idxRange].offOpcodes = offOpcode + cbStartPage;
2064 pTb->aRanges[idxRange].cbOpcodes = cbInstr - cbStartPage;
2065 pTb->aRanges[idxRange].u2Unused = 0;
2066 pTb->cRanges++;
2067 Log6(("%04x:%08RX64: new range #%u new page (b) %u/%RGp: offPhysPage=%#x offOpcodes=%#x\n",
2068 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, idxRange, pTb->aRanges[idxRange].idxPhysPage, GCPhysNew,
2069 pTb->aRanges[idxRange].offPhysPage, pTb->aRanges[idxRange].offOpcodes));
2070
2071 /* Determin which function we need to load & check. */
2072 if (pVCpu->iem.s.fTbCheckOpcodes)
2073 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2074 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesAcrossPageLoadingTlb
2075 : !fConsiderCsLimChecking
2076 ? kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlb
2077 : kIemThreadedFunc_BltIn_CheckOpcodesAcrossPageLoadingTlbConsiderCsLim;
2078 else
2079 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2080 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodesOnNextPageLoadingTlb
2081 : !fConsiderCsLimChecking
2082 ? kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlb
2083 : kIemThreadedFunc_BltIn_CheckOpcodesOnNextPageLoadingTlbConsiderCsLim;
2084 }
2085 }
2086
2087 /*
2088 * Regular case: No new range required.
2089 */
2090 else
2091 {
2092 Assert(pVCpu->iem.s.fTbCheckOpcodes || (pTb->fFlags & IEMTB_F_CS_LIM_CHECKS));
2093 if (pVCpu->iem.s.fTbCheckOpcodes)
2094 pCall->enmFunction = pTb->fFlags & IEMTB_F_CS_LIM_CHECKS
2095 ? kIemThreadedFunc_BltIn_CheckCsLimAndOpcodes
2096 : kIemThreadedFunc_BltIn_CheckOpcodes;
2097 else
2098 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckCsLim;
2099
2100 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2101 pTb->cbOpcodes = offOpcode + cbInstr;
2102 pTb->aRanges[idxRange].cbOpcodes += cbInstr;
2103 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2104 }
2105
2106 /*
2107 * Commit the call.
2108 */
2109 pTb->Thrd.cCalls++;
2110
2111 /*
2112 * Clear state.
2113 */
2114 pVCpu->iem.s.fTbBranched = IEMBRANCHED_F_NO;
2115 pVCpu->iem.s.fTbCrossedPage = false;
2116 pVCpu->iem.s.fTbCheckOpcodes = false;
2117
2118 /*
2119 * Copy opcode bytes.
2120 */
2121 iemThreadedCopyOpcodeBytesInline(pVCpu, &pTb->pabOpcodes[offOpcode], cbInstr);
2122 pTb->cbOpcodes = offOpcode + cbInstr;
2123 Assert(pTb->cbOpcodes <= pVCpu->iem.s.cbOpcodesAllocated);
2124
2125 return true;
2126}
2127
2128
2129/**
2130 * Worker for iemThreadedCompileBeginEmitCallsComplications and
2131 * iemThreadedCompileCheckIrq that checks for pending delivarable events.
2132 *
2133 * @returns true if anything is pending, false if not.
2134 * @param pVCpu The cross context virtual CPU structure of the calling
2135 * thread.
2136 */
2137DECL_FORCE_INLINE(bool) iemThreadedCompileIsIrqOrForceFlagPending(PVMCPUCC pVCpu)
2138{
2139 uint64_t fCpu = pVCpu->fLocalForcedActions;
2140 fCpu &= VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI;
2141#if 1
2142 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2143 if (RT_LIKELY( !fCpu
2144 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2145 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2146 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))) ))
2147 return false;
2148 return true;
2149#else
2150 return false;
2151#endif
2152
2153}
2154
2155
2156/**
2157 * Called by iemThreadedCompile when a block requires a mode check.
2158 *
2159 * @returns true if we should continue, false if we're out of call entries.
2160 * @param pVCpu The cross context virtual CPU structure of the calling
2161 * thread.
2162 * @param pTb The translation block being compiled.
2163 */
2164static bool iemThreadedCompileEmitCheckMode(PVMCPUCC pVCpu, PIEMTB pTb)
2165{
2166 /* Emit the call. */
2167 uint32_t const idxCall = pTb->Thrd.cCalls;
2168 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2169 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2170 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2171 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckMode;
2172 pCall->idxInstr = pTb->cInstructions - 1;
2173 pCall->uUnused0 = 0;
2174 pCall->offOpcode = 0;
2175 pCall->cbOpcode = 0;
2176 pCall->idxRange = 0;
2177 pCall->auParams[0] = pVCpu->iem.s.fExec;
2178 pCall->auParams[1] = 0;
2179 pCall->auParams[2] = 0;
2180 LogFunc(("%04x:%08RX64 fExec=%#x\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->iem.s.fExec));
2181 return true;
2182}
2183
2184
2185/**
2186 * Called by IEM_MC2_BEGIN_EMIT_CALLS() when IEM_CIMPL_F_CHECK_IRQ_BEFORE is
2187 * set.
2188 *
2189 * @returns true if we should continue, false if an IRQ is deliverable or a
2190 * relevant force flag is pending.
2191 * @param pVCpu The cross context virtual CPU structure of the calling
2192 * thread.
2193 * @param pTb The translation block being compiled.
2194 * @sa iemThreadedCompileCheckIrq
2195 */
2196bool iemThreadedCompileEmitIrqCheckBefore(PVMCPUCC pVCpu, PIEMTB pTb)
2197{
2198 /*
2199 * Skip this we've already emitted a call after the previous instruction
2200 * or if it's the first call, as we're always checking FFs between blocks.
2201 */
2202 uint32_t const idxCall = pTb->Thrd.cCalls;
2203 if ( idxCall > 0
2204 && pTb->Thrd.paCalls[idxCall - 1].enmFunction != kIemThreadedFunc_BltIn_CheckIrq)
2205 {
2206 /* Emit the call. */
2207 AssertReturn(idxCall < pTb->Thrd.cAllocated, false);
2208 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[idxCall];
2209 pTb->Thrd.cCalls = (uint16_t)(idxCall + 1);
2210 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2211 pCall->idxInstr = pTb->cInstructions;
2212 pCall->uUnused0 = 0;
2213 pCall->offOpcode = 0;
2214 pCall->cbOpcode = 0;
2215 pCall->idxRange = 0;
2216 pCall->auParams[0] = 0;
2217 pCall->auParams[1] = 0;
2218 pCall->auParams[2] = 0;
2219 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2220
2221 /* Reset the IRQ check value. */
2222 pVCpu->iem.s.cInstrTillIrqCheck = !CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) ? 32 : 0;
2223
2224 /*
2225 * Check for deliverable IRQs and pending force flags.
2226 */
2227 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2228 }
2229 return true; /* continue */
2230}
2231
2232
2233/**
2234 * Emits an IRQ check call and checks for pending IRQs.
2235 *
2236 * @returns true if we should continue, false if an IRQ is deliverable or a
2237 * relevant force flag is pending.
2238 * @param pVCpu The cross context virtual CPU structure of the calling
2239 * thread.
2240 * @param pTb The transation block.
2241 * @sa iemThreadedCompileBeginEmitCallsComplications
2242 */
2243static bool iemThreadedCompileCheckIrqAfter(PVMCPUCC pVCpu, PIEMTB pTb)
2244{
2245 /* Check again in a little bit, unless it is immediately following an STI
2246 in which case we *must* check immediately after the next instruction
2247 as well in case it's executed with interrupt inhibition. We could
2248 otherwise miss the interrupt window. See the irq2 wait2 varaiant in
2249 bs3-timers-1 which is doing sti + sti + cli. */
2250 if (!pVCpu->iem.s.fTbCurInstrIsSti)
2251 pVCpu->iem.s.cInstrTillIrqCheck = 32;
2252 else
2253 {
2254 pVCpu->iem.s.fTbCurInstrIsSti = false;
2255 pVCpu->iem.s.cInstrTillIrqCheck = 0;
2256 }
2257 LogFunc(("%04x:%08RX64\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
2258
2259 /*
2260 * Emit the call.
2261 */
2262 AssertReturn(pTb->Thrd.cCalls < pTb->Thrd.cAllocated, false);
2263 PIEMTHRDEDCALLENTRY pCall = &pTb->Thrd.paCalls[pTb->Thrd.cCalls++];
2264 pCall->enmFunction = kIemThreadedFunc_BltIn_CheckIrq;
2265 pCall->idxInstr = pTb->cInstructions;
2266 pCall->uUnused0 = 0;
2267 pCall->offOpcode = 0;
2268 pCall->cbOpcode = 0;
2269 pCall->idxRange = 0;
2270 pCall->auParams[0] = 0;
2271 pCall->auParams[1] = 0;
2272 pCall->auParams[2] = 0;
2273
2274 /*
2275 * Check for deliverable IRQs and pending force flags.
2276 */
2277 return !iemThreadedCompileIsIrqOrForceFlagPending(pVCpu);
2278}
2279
2280
2281/**
2282 * Compiles a new TB and executes it.
2283 *
2284 * We combine compilation and execution here as it makes it simpler code flow
2285 * in the main loop and it allows interpreting while compiling if we want to
2286 * explore that option.
2287 *
2288 * @returns Strict VBox status code.
2289 * @param pVM The cross context virtual machine structure.
2290 * @param pVCpu The cross context virtual CPU structure of the calling
2291 * thread.
2292 * @param GCPhysPc The physical address corresponding to the current
2293 * RIP+CS.BASE.
2294 * @param fExtraFlags Extra translation block flags: IEMTB_F_INHIBIT_SHADOW,
2295 * IEMTB_F_INHIBIT_NMI, IEMTB_F_CS_LIM_CHECKS.
2296 */
2297static VBOXSTRICTRC iemThreadedCompile(PVMCC pVM, PVMCPUCC pVCpu, RTGCPHYS GCPhysPc, uint32_t fExtraFlags) IEM_NOEXCEPT_MAY_LONGJMP
2298{
2299 Assert(!(fExtraFlags & IEMTB_F_TYPE_MASK));
2300 fExtraFlags |= IEMTB_F_TYPE_THREADED;
2301
2302 /*
2303 * Get the TB we use for the recompiling. This is a maxed-out TB so
2304 * that'll we'll make a more efficient copy of when we're done compiling.
2305 */
2306 PIEMTB pTb = pVCpu->iem.s.pThrdCompileTbR3;
2307 if (pTb)
2308 iemThreadedTbReuse(pVCpu, pTb, GCPhysPc, fExtraFlags);
2309 else
2310 {
2311 pTb = iemThreadedTbAlloc(pVM, pVCpu, GCPhysPc, fExtraFlags);
2312 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2313 pVCpu->iem.s.pThrdCompileTbR3 = pTb;
2314 }
2315
2316 /* Set the current TB so iemThreadedCompileLongJumped and the CIMPL
2317 functions may get at it. */
2318 pVCpu->iem.s.pCurTbR3 = pTb;
2319
2320#if 0
2321 /* Make sure the CheckIrq condition matches the one in EM. */
2322 iemThreadedCompileCheckIrqAfter(pVCpu, pTb);
2323 const uint32_t cZeroCalls = 1;
2324#else
2325 const uint32_t cZeroCalls = 0;
2326#endif
2327
2328 /*
2329 * Now for the recomplication. (This mimicks IEMExecLots in many ways.)
2330 */
2331 iemThreadedCompileInitDecoder(pVCpu, false /*fReInit*/, fExtraFlags);
2332 iemThreadedCompileInitOpcodeFetching(pVCpu);
2333 VBOXSTRICTRC rcStrict;
2334 for (;;)
2335 {
2336 /* Process the next instruction. */
2337#ifdef LOG_ENABLED
2338 iemThreadedLogCurInstr(pVCpu, "CC", pTb->cInstructions);
2339 uint16_t const uCsLog = pVCpu->cpum.GstCtx.cs.Sel;
2340 uint64_t const uRipLog = pVCpu->cpum.GstCtx.rip;
2341 Assert(uCsLog != 0 || uRipLog > 0x400 || !IEM_IS_REAL_OR_V86_MODE(pVCpu)); /* Detect executing RM interrupt table. */
2342#endif
2343 uint8_t b; IEM_OPCODE_GET_FIRST_U8(&b);
2344 uint16_t const cCallsPrev = pTb->Thrd.cCalls;
2345
2346 rcStrict = FNIEMOP_CALL(g_apfnIemThreadedRecompilerOneByteMap[b]);
2347 if ( rcStrict == VINF_SUCCESS
2348 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS
2349 && !pVCpu->iem.s.fEndTb)
2350 {
2351 Assert(pTb->Thrd.cCalls > cCallsPrev);
2352 Assert(cCallsPrev - pTb->Thrd.cCalls < 5);
2353
2354 pVCpu->iem.s.cInstructions++;
2355
2356 /* Check for mode change _after_ certain CIMPL calls, so check that
2357 we continue executing with the same mode value. */
2358 if (!(pVCpu->iem.s.fTbCurInstr & (IEM_CIMPL_F_MODE | IEM_CIMPL_F_XCPT | IEM_CIMPL_F_VMEXIT)))
2359 { /* probable */ }
2360 else if (RT_LIKELY(iemThreadedCompileEmitCheckMode(pVCpu, pTb)))
2361 { /* extremely likely */ }
2362 else
2363 break;
2364
2365#if defined(LOG_ENABLED) && 0 /* for debugging */
2366 //iemThreadedCompileEmitNop(pTb);
2367 iemThreadedCompileEmitLogCpuState(pTb);
2368#endif
2369 }
2370 else
2371 {
2372 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, rc=%d\n",
2373 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, VBOXSTRICTRC_VAL(rcStrict)));
2374 if (rcStrict == VINF_IEM_RECOMPILE_END_TB)
2375 rcStrict = VINF_SUCCESS;
2376
2377 if (pTb->Thrd.cCalls > cZeroCalls)
2378 {
2379 if (cCallsPrev != pTb->Thrd.cCalls)
2380 pVCpu->iem.s.cInstructions++;
2381 break;
2382 }
2383
2384 pVCpu->iem.s.pCurTbR3 = NULL;
2385 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2386 }
2387
2388 /* Check for IRQs? */
2389 if (pVCpu->iem.s.cInstrTillIrqCheck > 0)
2390 pVCpu->iem.s.cInstrTillIrqCheck--;
2391 else if (!iemThreadedCompileCheckIrqAfter(pVCpu, pTb))
2392 break;
2393
2394 /* Still space in the TB? */
2395 if ( pTb->Thrd.cCalls + 5 < pTb->Thrd.cAllocated
2396 && pTb->cbOpcodes + 16 <= pVCpu->iem.s.cbOpcodesAllocated)
2397 iemThreadedCompileInitDecoder(pVCpu, true /*fReInit*/, 0);
2398 else
2399 {
2400 Log8(("%04x:%08RX64: End TB - %u instr, %u calls, %u opcode bytes - full\n",
2401 uCsLog, uRipLog, pTb->cInstructions, pTb->Thrd.cCalls, pTb->cbOpcodes));
2402 break;
2403 }
2404 iemThreadedCompileReInitOpcodeFetching(pVCpu);
2405 }
2406
2407 /*
2408 * Duplicate the TB into a completed one and link it.
2409 */
2410 pTb = iemThreadedTbDuplicate(pVM, pVCpu, pTb);
2411 AssertReturn(pTb, VERR_IEM_TB_ALLOC_FAILED);
2412
2413 iemThreadedTbAdd(pVCpu, pVCpu->iem.s.pTbCacheR3, pTb);
2414
2415#ifdef IEM_COMPILE_ONLY_MODE
2416 /*
2417 * Execute the translation block.
2418 */
2419#endif
2420
2421 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2422}
2423
2424
2425
2426/*********************************************************************************************************************************
2427* Recompiled Execution Core *
2428*********************************************************************************************************************************/
2429
2430
2431/**
2432 * Executes a translation block.
2433 *
2434 * @returns Strict VBox status code.
2435 * @param pVCpu The cross context virtual CPU structure of the calling
2436 * thread.
2437 * @param pTb The translation block to execute.
2438 */
2439static VBOXSTRICTRC iemTbExec(PVMCPUCC pVCpu, PIEMTB pTb) IEM_NOEXCEPT_MAY_LONGJMP
2440{
2441 /*
2442 * Check the opcodes in the first page before starting execution.
2443 */
2444/** @todo this test should take IEMTB_F_CS_LIM_CHECKS into account or something.
2445 * The 'near jmp+call' test in bs3-cpu-basic-2 triggers the 2nd assertion here by
2446 * altering the CS limit such that only one or the two instruction bytes are valid.
2447 * Since it's a CS_LIMT problem, the pbInstrBuf is good for the full length, and
2448 * the test succeeds if skipped, but we assert in debug builds. */
2449 Assert(!(pVCpu->iem.s.GCPhysInstrBuf & (RTGCPHYS)GUEST_PAGE_OFFSET_MASK));
2450 Assert(pTb->aRanges[0].cbOpcodes <= pVCpu->iem.s.cbInstrBufTotal - pVCpu->iem.s.offInstrNextByte);
2451 if (memcmp(pTb->pabOpcodes, &pVCpu->iem.s.pbInstrBuf[pTb->aRanges[0].offPhysPage], pTb->aRanges[0].cbOpcodes) == 0)
2452 { /* likely */ }
2453 else
2454 {
2455 Log7(("TB obsolete: %p GCPhys=%RGp\n", pTb, pTb->GCPhysPc));
2456 iemThreadedTbObsolete(pVCpu, pTb, true /*fSafeToFree*/);
2457 return VINF_SUCCESS;
2458 }
2459
2460 /*
2461 * Set the current TB so CIMPL functions may get at it.
2462 */
2463 pVCpu->iem.s.pCurTbR3 = pTb;
2464
2465 /*
2466 * Execute the block.
2467 */
2468#ifdef VBOX_WITH_IEM_NATIVE_RECOMPILER
2469 if (pTb->fFlags & IEMTB_F_TYPE_NATIVE)
2470 {
2471 pVCpu->iem.s.cTbExecNative++;
2472# ifdef LOG_ENABLED
2473 iemThreadedLogCurInstr(pVCpu, "EXn", 0);
2474# endif
2475# ifdef RT_ARCH_AMD64
2476 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu);
2477# else
2478 VBOXSTRICTRC const rcStrict = ((PFNIEMTBNATIVE)pTb->Native.paInstructions)(pVCpu, &pVCpu->cpum.GstCtx);
2479# endif
2480 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2481 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2482 { /* likely */ }
2483 else
2484 {
2485 /* pVCpu->iem.s.cInstructions is incremented by iemNativeHlpExecStatusCodeFiddling. */
2486 pVCpu->iem.s.pCurTbR3 = NULL;
2487 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2488
2489 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2490 only to break out of TB execution early. */
2491 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2492 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2493
2494 /* VINF_IEM_REEXEC_WITH_FLAGS needs to receive special treatment
2495 and converted to VINF_SUCCESS or whatever is appropriate. */
2496 if (rcStrict == VINF_IEM_REEXEC_FINISH_WITH_FLAGS)
2497 return iemExecStatusCodeFiddling(pVCpu, iemFinishInstructionWithFlagsSet(pVCpu));
2498
2499 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2500 }
2501 }
2502 else
2503#endif /* VBOX_WITH_IEM_NATIVE_RECOMPILER */
2504 {
2505 /*
2506 * The threaded execution loop.
2507 */
2508 pVCpu->iem.s.cTbExecThreaded++;
2509#ifdef LOG_ENABLED
2510 uint64_t uRipPrev = UINT64_MAX;
2511#endif
2512 PCIEMTHRDEDCALLENTRY pCallEntry = pTb->Thrd.paCalls;
2513 uint32_t cCallsLeft = pTb->Thrd.cCalls;
2514 while (cCallsLeft-- > 0)
2515 {
2516#ifdef LOG_ENABLED
2517 if (pVCpu->cpum.GstCtx.rip != uRipPrev)
2518 {
2519 uRipPrev = pVCpu->cpum.GstCtx.rip;
2520 iemThreadedLogCurInstr(pVCpu, "EXt", pTb->Thrd.cCalls - cCallsLeft - 1);
2521 }
2522 Log9(("%04x:%08RX64: #%d/%d - %d %s\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
2523 pTb->Thrd.cCalls - cCallsLeft - 1, pCallEntry->idxInstr, pCallEntry->enmFunction,
2524 g_apszIemThreadedFunctions[pCallEntry->enmFunction]));
2525#endif
2526 VBOXSTRICTRC const rcStrict = g_apfnIemThreadedFunctions[pCallEntry->enmFunction](pVCpu,
2527 pCallEntry->auParams[0],
2528 pCallEntry->auParams[1],
2529 pCallEntry->auParams[2]);
2530 if (RT_LIKELY( rcStrict == VINF_SUCCESS
2531 && pVCpu->iem.s.rcPassUp == VINF_SUCCESS /** @todo this isn't great. */))
2532 pCallEntry++;
2533 else
2534 {
2535 pVCpu->iem.s.cInstructions += pCallEntry->idxInstr; /* This may be one short, but better than zero. */
2536 pVCpu->iem.s.pCurTbR3 = NULL;
2537 STAM_REL_COUNTER_INC(&pVCpu->iem.s.StatTbExecBreaks);
2538
2539 /* VINF_IEM_REEXEC_BREAK should be treated as VINF_SUCCESS as it's
2540 only to break out of TB execution early. */
2541 if (rcStrict == VINF_IEM_REEXEC_BREAK)
2542 return iemExecStatusCodeFiddling(pVCpu, VINF_SUCCESS);
2543 return iemExecStatusCodeFiddling(pVCpu, rcStrict);
2544 }
2545 }
2546 }
2547
2548 pVCpu->iem.s.cInstructions += pTb->cInstructions;
2549 pVCpu->iem.s.pCurTbR3 = NULL;
2550 return VINF_SUCCESS;
2551}
2552
2553
2554/**
2555 * This is called when the PC doesn't match the current pbInstrBuf.
2556 *
2557 * Upon return, we're ready for opcode fetching. But please note that
2558 * pbInstrBuf can be NULL iff the memory doesn't have readable backing (i.e.
2559 * MMIO or unassigned).
2560 */
2561static RTGCPHYS iemGetPcWithPhysAndCodeMissed(PVMCPUCC pVCpu)
2562{
2563 pVCpu->iem.s.pbInstrBuf = NULL;
2564 pVCpu->iem.s.offCurInstrStart = 0;
2565 pVCpu->iem.s.offInstrNextByte = 0;
2566 iemOpcodeFetchBytesJmp(pVCpu, 0, NULL);
2567 return pVCpu->iem.s.GCPhysInstrBuf + pVCpu->iem.s.offCurInstrStart;
2568}
2569
2570
2571/** @todo need private inline decl for throw/nothrow matching IEM_WITH_SETJMP? */
2572DECL_FORCE_INLINE_THROW(RTGCPHYS) iemGetPcWithPhysAndCode(PVMCPUCC pVCpu)
2573{
2574 /*
2575 * Set uCurTbStartPc to RIP and calc the effective PC.
2576 */
2577 uint64_t uPc = pVCpu->cpum.GstCtx.rip;
2578 pVCpu->iem.s.uCurTbStartPc = uPc;
2579 Assert(pVCpu->cpum.GstCtx.cs.u64Base == 0 || !IEM_IS_64BIT_CODE(pVCpu));
2580 uPc += pVCpu->cpum.GstCtx.cs.u64Base;
2581
2582 /*
2583 * Advance within the current buffer (PAGE) when possible.
2584 */
2585 if (pVCpu->iem.s.pbInstrBuf)
2586 {
2587 uint64_t off = uPc - pVCpu->iem.s.uInstrBufPc;
2588 if (off < pVCpu->iem.s.cbInstrBufTotal)
2589 {
2590 pVCpu->iem.s.offInstrNextByte = (uint32_t)off;
2591 pVCpu->iem.s.offCurInstrStart = (uint16_t)off;
2592 if ((uint16_t)off + 15 <= pVCpu->iem.s.cbInstrBufTotal)
2593 pVCpu->iem.s.cbInstrBuf = (uint16_t)off + 15;
2594 else
2595 pVCpu->iem.s.cbInstrBuf = pVCpu->iem.s.cbInstrBufTotal;
2596
2597 return pVCpu->iem.s.GCPhysInstrBuf + off;
2598 }
2599 }
2600 return iemGetPcWithPhysAndCodeMissed(pVCpu);
2601}
2602
2603
2604/**
2605 * Determines the extra IEMTB_F_XXX flags.
2606 *
2607 * @returns A mix of IEMTB_F_INHIBIT_SHADOW, IEMTB_F_INHIBIT_NMI and
2608 * IEMTB_F_CS_LIM_CHECKS (or zero).
2609 * @param pVCpu The cross context virtual CPU structure of the calling
2610 * thread.
2611 */
2612DECL_FORCE_INLINE(uint32_t) iemGetTbFlagsForCurrentPc(PVMCPUCC pVCpu)
2613{
2614 uint32_t fRet = 0;
2615
2616 /*
2617 * Determine the inhibit bits.
2618 */
2619 if (!(pVCpu->cpum.GstCtx.rflags.uBoth & (IEMTB_F_INHIBIT_SHADOW | IEMTB_F_INHIBIT_NMI)))
2620 { /* typical */ }
2621 else
2622 {
2623 if (CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx))
2624 fRet |= IEMTB_F_INHIBIT_SHADOW;
2625 if (CPUMAreInterruptsInhibitedByNmiEx(&pVCpu->cpum.GstCtx))
2626 fRet |= IEMTB_F_INHIBIT_NMI;
2627 }
2628
2629 /*
2630 * Return IEMTB_F_CS_LIM_CHECKS if the current PC is invalid or if it is
2631 * likely to go invalid before the end of the translation block.
2632 */
2633 if (IEM_IS_64BIT_CODE(pVCpu))
2634 return fRet;
2635
2636 int64_t const offFromLim = (int64_t)pVCpu->cpum.GstCtx.cs.u32Limit - (int64_t)pVCpu->cpum.GstCtx.eip;
2637 if (offFromLim >= X86_PAGE_SIZE + 16 - (int32_t)(pVCpu->cpum.GstCtx.cs.u64Base & GUEST_PAGE_OFFSET_MASK))
2638 return fRet;
2639 return fRet | IEMTB_F_CS_LIM_CHECKS;
2640}
2641
2642
2643VMM_INT_DECL(VBOXSTRICTRC) IEMExecRecompiler(PVMCC pVM, PVMCPUCC pVCpu)
2644{
2645 /*
2646 * See if there is an interrupt pending in TRPM, inject it if we can.
2647 */
2648 if (!TRPMHasTrap(pVCpu))
2649 { /* likely */ }
2650 else
2651 {
2652 VBOXSTRICTRC rcStrict = iemExecInjectPendingTrap(pVCpu);
2653 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
2654 { /*likely */ }
2655 else
2656 return rcStrict;
2657 }
2658
2659 /*
2660 * Init the execution environment.
2661 */
2662#ifdef RT_ARCH_ARM64 /** @todo ARM64: fix unaligned locked instructions properly. @bugref{10547} */
2663 if (pVM->cCpus == 1)
2664 iemInitExec(pVCpu, IEM_F_X86_DISREGARD_LOCK /*fExecOpts*/);
2665 else
2666#endif
2667 iemInitExec(pVCpu, 0 /*fExecOpts*/);
2668 if (RT_LIKELY(pVCpu->iem.s.msRecompilerPollNow != 0))
2669 { }
2670 else
2671 pVCpu->iem.s.msRecompilerPollNow = (uint32_t)(TMVirtualGetNoCheck(pVM) / RT_NS_1MS);
2672
2673 /*
2674 * Run-loop.
2675 *
2676 * If we're using setjmp/longjmp we combine all the catching here to avoid
2677 * having to call setjmp for each block we're executing.
2678 */
2679 PIEMTBCACHE const pTbCache = pVCpu->iem.s.pTbCacheR3;
2680 for (;;)
2681 {
2682 PIEMTB pTb = NULL;
2683 VBOXSTRICTRC rcStrict;
2684 IEM_TRY_SETJMP(pVCpu, rcStrict)
2685 {
2686 uint32_t const cPollRate = 511; /* EM.cpp passes 4095 to IEMExecLots, so an eigth of that seems reasonable for now. */
2687 for (uint32_t iIterations = 0; ; iIterations++)
2688 {
2689 /* Translate PC to physical address, we'll need this for both lookup and compilation. */
2690 RTGCPHYS const GCPhysPc = iemGetPcWithPhysAndCode(pVCpu);
2691 uint32_t const fExtraFlags = iemGetTbFlagsForCurrentPc(pVCpu);
2692
2693 pTb = iemTbCacheLookup(pVCpu, pTbCache, GCPhysPc, fExtraFlags);
2694 if (pTb)
2695 rcStrict = iemTbExec(pVCpu, pTb);
2696 else
2697 rcStrict = iemThreadedCompile(pVM, pVCpu, GCPhysPc, fExtraFlags);
2698 if (rcStrict == VINF_SUCCESS)
2699 {
2700 Assert(pVCpu->iem.s.cActiveMappings == 0);
2701
2702 uint64_t fCpu = pVCpu->fLocalForcedActions;
2703 fCpu &= VMCPU_FF_ALL_MASK & ~( VMCPU_FF_PGM_SYNC_CR3
2704 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2705 | VMCPU_FF_TLB_FLUSH
2706 | VMCPU_FF_UNHALT );
2707 /** @todo this isn't even close to the NMI/IRQ conditions in EM. */
2708 if (RT_LIKELY( ( !fCpu
2709 || ( !(fCpu & ~(VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
2710 && ( !pVCpu->cpum.GstCtx.rflags.Bits.u1IF
2711 || CPUMIsInInterruptShadow(&pVCpu->cpum.GstCtx) )) )
2712 && !VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK) ))
2713 {
2714 if (RT_LIKELY( (iIterations & cPollRate) != 0
2715 || !TMTimerPollBoolWith32BitMilliTS(pVM, pVCpu, &pVCpu->iem.s.msRecompilerPollNow)))
2716 pTb = NULL; /* Clear it before looping so iemTbCacheLookup can safely do native recompilation. */
2717 else
2718 return VINF_SUCCESS;
2719 }
2720 else
2721 return VINF_SUCCESS;
2722 }
2723 else
2724 return rcStrict;
2725 }
2726 }
2727 IEM_CATCH_LONGJMP_BEGIN(pVCpu, rcStrict);
2728 {
2729 pVCpu->iem.s.cLongJumps++;
2730 if (pVCpu->iem.s.cActiveMappings > 0)
2731 iemMemRollback(pVCpu);
2732
2733#ifdef IEMNATIVE_WITH_INSTRUCTION_COUNTING
2734 if (pTb && (pTb->fFlags & IEMTB_F_TYPE_MASK) == IEMTB_F_TYPE_NATIVE)
2735 {
2736 Assert(pVCpu->iem.s.idxTbCurInstr < pTb->cInstructions);
2737 pVCpu->iem.s.cInstructions += pVCpu->iem.s.idxTbCurInstr;
2738 }
2739#endif
2740
2741#if 0 /** @todo do we need to clean up anything? If not, we can drop the pTb = NULL some lines up and change the scope. */
2742 /* If pTb isn't NULL we're in iemTbExec. */
2743 if (!pTb)
2744 {
2745 /* If pCurTbR3 is NULL, we're in iemGetPcWithPhysAndCode.*/
2746 pTb = pVCpu->iem.s.pCurTbR3;
2747 if (pTb)
2748 {
2749 if (pTb == pVCpu->iem.s.pThrdCompileTbR3)
2750 return iemThreadedCompileLongJumped(pVM, pVCpu, rcStrict);
2751 Assert(pTb != pVCpu->iem.s.pNativeCompileTbR3);
2752 }
2753 }
2754#endif
2755 return rcStrict;
2756 }
2757 IEM_CATCH_LONGJMP_END(pVCpu);
2758 }
2759}
2760
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette