VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 107882

Last change on this file since 107882 was 107854, checked in by vboxsync, 4 weeks ago

x86.h,VMM: More AMD CPUID bits; addressed some old todos related to these; fixed bugs in svn & vmx world switcher (sanity checks, ++). jiraref:VBP-947 bugref:10738

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 282.0 KB
Line 
1/* $Id: HMVMXR0.cpp 107854 2025-01-18 23:59:26Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2024 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/pdmapic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73/** Enables the fAlwaysInterceptMovDRx related code. */
74#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * VMX page allocation information.
82 */
83typedef struct
84{
85 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
86 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
87 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
88 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
89} VMXPAGEALLOCINFO;
90/** Pointer to VMX page-allocation info. */
91typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
92/** Pointer to a const VMX page-allocation info. */
93typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
94AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
95
96
97/*********************************************************************************************************************************
98* Internal Functions *
99*********************************************************************************************************************************/
100static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
101static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
102
103
104/*********************************************************************************************************************************
105* Global Variables *
106*********************************************************************************************************************************/
107/** The DR6 value after writing zero to the register.
108 * Set by VMXR0GlobalInit(). */
109static uint64_t g_fDr6Zeroed = 0;
110
111
112/**
113 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
114 * @returns @c true if it's part of LBR stack, @c false otherwise.
115 *
116 * @param pVM The cross context VM structure.
117 * @param idMsr The MSR.
118 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
119 * Optional, can be NULL.
120 *
121 * @remarks Must only be called when LBR is enabled.
122 */
123DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
124{
125 Assert(pVM->hmr0.s.vmx.fLbr);
126 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
127 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
128 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
129 if (idxMsr < cLbrStack)
130 {
131 if (pidxMsr)
132 *pidxMsr = idxMsr;
133 return true;
134 }
135 return false;
136}
137
138
139/**
140 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
141 * @returns @c true if it's part of LBR stack, @c false otherwise.
142 *
143 * @param pVM The cross context VM structure.
144 * @param idMsr The MSR.
145 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
146 * Optional, can be NULL.
147 *
148 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
149 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
150 */
151DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
152{
153 Assert(pVM->hmr0.s.vmx.fLbr);
154 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
155 {
156 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
157 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
158 if (idxMsr < cLbrStack)
159 {
160 if (pidxMsr)
161 *pidxMsr = idxMsr;
162 return true;
163 }
164 }
165 return false;
166}
167
168
169/**
170 * Gets the active (in use) VMCS info. object for the specified VCPU.
171 *
172 * This is either the guest or nested-guest VMCS info. and need not necessarily
173 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
174 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
175 * current VMCS while returning to ring-3. However, the VMCS info. object for that
176 * VMCS would still be active and returned here so that we could dump the VMCS
177 * fields to ring-3 for diagnostics. This function is thus only used to
178 * distinguish between the nested-guest or guest VMCS.
179 *
180 * @returns The active VMCS information.
181 * @param pVCpu The cross context virtual CPU structure.
182 *
183 * @thread EMT.
184 * @remarks This function may be called with preemption or interrupts disabled!
185 */
186DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
187{
188 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
189 return &pVCpu->hmr0.s.vmx.VmcsInfo;
190 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
191}
192
193
194/**
195 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
196 * area.
197 *
198 * @returns @c true if it's different, @c false otherwise.
199 * @param pVmcsInfo The VMCS info. object.
200 */
201DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
202{
203 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
204 && pVmcsInfo->pvGuestMsrStore);
205}
206
207
208/**
209 * Sets the given Processor-based VM-execution controls.
210 *
211 * @param pVmxTransient The VMX-transient structure.
212 * @param uProcCtls The Processor-based VM-execution controls to set.
213 */
214static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
218 {
219 pVmcsInfo->u32ProcCtls |= uProcCtls;
220 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
221 AssertRC(rc);
222 }
223}
224
225
226/**
227 * Removes the given Processor-based VM-execution controls.
228 *
229 * @param pVCpu The cross context virtual CPU structure.
230 * @param pVmxTransient The VMX-transient structure.
231 * @param uProcCtls The Processor-based VM-execution controls to remove.
232 *
233 * @remarks When executing a nested-guest, this will not remove any of the specified
234 * controls if the nested hypervisor has set any one of them.
235 */
236static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
237{
238 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
239 if (pVmcsInfo->u32ProcCtls & uProcCtls)
240 {
241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
242 if ( !pVmxTransient->fIsNestedGuest
243 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
244#else
245 NOREF(pVCpu);
246 if (!pVmxTransient->fIsNestedGuest)
247#endif
248 {
249 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
250 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
251 AssertRC(rc);
252 }
253 }
254}
255
256
257/**
258 * Sets the TSC offset for the current VMCS.
259 *
260 * @param uTscOffset The TSC offset to set.
261 * @param pVmcsInfo The VMCS info. object.
262 */
263static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
264{
265 if (pVmcsInfo->u64TscOffset != uTscOffset)
266 {
267 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
268 AssertRC(rc);
269 pVmcsInfo->u64TscOffset = uTscOffset;
270 }
271}
272
273
274/**
275 * Loads the VMCS specified by the VMCS info. object.
276 *
277 * @returns VBox status code.
278 * @param pVmcsInfo The VMCS info. object.
279 *
280 * @remarks Can be called with interrupts disabled.
281 */
282static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
283{
284 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
286
287 return VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
288}
289
290
291/**
292 * Clears the VMCS specified by the VMCS info. object.
293 *
294 * @returns VBox status code.
295 * @param pVmcsInfo The VMCS info. object.
296 *
297 * @remarks Can be called with interrupts disabled.
298 */
299static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
300{
301 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
302 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
303
304 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
305 if (RT_SUCCESS(rc))
306 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
307 return rc;
308}
309
310
311/**
312 * Checks whether the MSR belongs to the set of guest MSRs that we restore
313 * lazily while leaving VT-x.
314 *
315 * @returns true if it does, false otherwise.
316 * @param pVCpu The cross context virtual CPU structure.
317 * @param idMsr The MSR to check.
318 */
319static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
320{
321 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
322 {
323 switch (idMsr)
324 {
325 case MSR_K8_LSTAR:
326 case MSR_K6_STAR:
327 case MSR_K8_SF_MASK:
328 case MSR_K8_KERNEL_GS_BASE:
329 return true;
330 }
331 }
332 return false;
333}
334
335
336/**
337 * Loads a set of guests MSRs to allow read/passthru to the guest.
338 *
339 * The name of this function is slightly confusing. This function does NOT
340 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
341 * common prefix for functions dealing with "lazy restoration" of the shared
342 * MSRs.
343 *
344 * @param pVCpu The cross context virtual CPU structure.
345 *
346 * @remarks No-long-jump zone!!!
347 */
348static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
349{
350 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
351 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
352
353 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
354 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
355 {
356 /*
357 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
358 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
359 * we can skip a few MSR writes.
360 *
361 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
362 * guest MSR values in the guest-CPU context might be different to what's currently
363 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
364 * CPU, see @bugref{8728}.
365 */
366 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
367 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
368 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
369 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
370 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
371 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
372 {
373#ifdef VBOX_STRICT
374 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
375 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
376 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
377 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
378#endif
379 }
380 else
381 {
382 /* Avoid raising #GP caused by writing illegal values to these MSRs. */
383 if ( X86_IS_CANONICAL(pCtx->msrKERNELGSBASE)
384 && X86_IS_CANONICAL(pCtx->msrLSTAR))
385 {
386 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
387 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
388 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
389 /* The system call flag mask register isn't as benign and accepting of all
390 values as the above, so mask it to avoid #GP'ing on corrupted input. */
391 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
392 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
393 }
394 else
395 AssertMsgFailed(("Incompatible lazily-loaded guest MSR values\n"));
396 }
397 }
398 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
399}
400
401
402/**
403 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
404 *
405 * @returns @c true if found, @c false otherwise.
406 * @param pVmcsInfo The VMCS info. object.
407 * @param idMsr The MSR to find.
408 */
409static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
410{
411 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
412 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
413 Assert(pMsrs);
414 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
415 for (uint32_t i = 0; i < cMsrs; i++)
416 {
417 if (pMsrs[i].u32Msr == idMsr)
418 return true;
419 }
420 return false;
421}
422
423
424/**
425 * Performs lazy restoration of the set of host MSRs if they were previously
426 * loaded with guest MSR values.
427 *
428 * @param pVCpu The cross context virtual CPU structure.
429 *
430 * @remarks No-long-jump zone!!!
431 * @remarks The guest MSRs should have been saved back into the guest-CPU
432 * context by vmxHCImportGuestState()!!!
433 */
434static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
435{
436 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
437 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
438
439 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
440 {
441 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
442 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
443 {
444 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
445 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
446 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
447 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
448 }
449 }
450 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
451}
452
453
454/**
455 * Sets pfnStartVm to the best suited variant.
456 *
457 * This must be called whenever anything changes relative to the hmR0VmXStartVm
458 * variant selection:
459 * - pVCpu->hm.s.fLoadSaveGuestXcr0
460 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
461 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
462 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
463 * - Perhaps: CPUMCTX.fXStateMask (windows only)
464 *
465 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
466 * cannot be changed at runtime.
467 */
468static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
469{
470 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
471 {
472 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
474 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
475 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
476 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
477 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
478 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
479 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
480 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
481 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
482 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
483 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
484 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
485 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
486 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
487 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
488 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
489 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
490 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
491 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
492 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
493 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
494 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
495 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
496 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
497 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
498 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
499 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
500 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
501 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
502 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
503 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
504 };
505 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
506 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
507 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
508 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
509 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
510 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
511 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
512 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
513}
514
515
516/**
517 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
518 * stack.
519 *
520 * @returns Strict VBox status code (i.e. informational status codes too).
521 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
522 * @param pVCpu The cross context virtual CPU structure.
523 * @param uValue The value to push to the guest stack.
524 */
525static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
526{
527 /*
528 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
529 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
530 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
531 */
532 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
533 if (pCtx->sp == 1)
534 return VINF_EM_RESET;
535 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
536 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
537 AssertRC(rc);
538 return rc;
539}
540
541
542/**
543 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
544 * unreferenced local parameters in the template code...
545 */
546DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
547{
548 RT_NOREF(pVCpu);
549 return VMXWriteVmcs16(uFieldEnc, u16Val);
550}
551
552
553/**
554 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
555 * unreferenced local parameters in the template code...
556 */
557DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
558{
559 RT_NOREF(pVCpu);
560 return VMXWriteVmcs32(uFieldEnc, u32Val);
561}
562
563
564/**
565 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
566 * unreferenced local parameters in the template code...
567 */
568DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
569{
570 RT_NOREF(pVCpu);
571 return VMXWriteVmcs64(uFieldEnc, u64Val);
572}
573
574
575/**
576 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
577 * unreferenced local parameters in the template code...
578 */
579DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
580{
581 RT_NOREF(pVCpu);
582 return VMXReadVmcs16(uFieldEnc, pu16Val);
583}
584
585
586/**
587 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
588 * unreferenced local parameters in the template code...
589 */
590DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
591{
592 RT_NOREF(pVCpu);
593 return VMXReadVmcs32(uFieldEnc, pu32Val);
594}
595
596
597/**
598 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
599 * unreferenced local parameters in the template code...
600 */
601DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
602{
603 RT_NOREF(pVCpu);
604 return VMXReadVmcs64(uFieldEnc, pu64Val);
605}
606
607
608/*
609 * Instantiate the code we share with the NEM darwin backend.
610 */
611#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
612#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
613
614#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
615#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
616#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
617#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
618
619#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
620#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
621#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
622#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
623
624#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
625#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
626#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
627#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
628
629#include "../VMMAll/VMXAllTemplate.cpp.h"
630
631#undef VMX_VMCS_WRITE_16
632#undef VMX_VMCS_WRITE_32
633#undef VMX_VMCS_WRITE_64
634#undef VMX_VMCS_WRITE_NW
635
636#undef VMX_VMCS_READ_16
637#undef VMX_VMCS_READ_32
638#undef VMX_VMCS_READ_64
639#undef VMX_VMCS_READ_NW
640
641#undef VM_IS_VMX_PREEMPT_TIMER_USED
642#undef VM_IS_VMX_NESTED_PAGING
643#undef VM_IS_VMX_UNRESTRICTED_GUEST
644#undef VCPU_2_VMXSTATS
645#undef VCPU_2_VMXSTATE
646
647
648/**
649 * Updates the VM's last error record.
650 *
651 * If there was a VMX instruction error, reads the error data from the VMCS and
652 * updates VCPU's last error record as well.
653 *
654 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
655 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
656 * VERR_VMX_INVALID_VMCS_FIELD.
657 * @param rc The error code.
658 */
659static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
660{
661 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
662 || rc == VERR_VMX_UNABLE_TO_START_VM)
663 {
664 AssertPtrReturnVoid(pVCpu);
665 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
666 }
667 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
668}
669
670
671/**
672 * Enters VMX root mode operation on the current CPU.
673 *
674 * @returns VBox status code.
675 * @param pHostCpu The HM physical-CPU structure.
676 * @param pVM The cross context VM structure. Can be
677 * NULL, after a resume.
678 * @param HCPhysCpuPage Physical address of the VMXON region.
679 * @param pvCpuPage Pointer to the VMXON region.
680 */
681static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
682{
683 Assert(pHostCpu);
684 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
685 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
686 Assert(pvCpuPage);
687 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
688
689 if (pVM)
690 {
691 /* Write the VMCS revision identifier to the VMXON region. */
692 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
693 }
694
695 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
696 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
697
698 /* Enable the VMX bit in CR4 if necessary. */
699 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
700
701 /* Record whether VMXE was already prior to us enabling it above. */
702 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
703
704 /* Enter VMX root mode. */
705 int rc = VMXEnable(HCPhysCpuPage);
706 if (RT_FAILURE(rc))
707 {
708 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
709 if (!pHostCpu->fVmxeAlreadyEnabled)
710 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
711
712 if (pVM)
713 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
714 }
715
716 /* Restore interrupts. */
717 ASMSetFlags(fEFlags);
718 return rc;
719}
720
721
722/**
723 * Exits VMX root mode operation on the current CPU.
724 *
725 * @returns VBox status code.
726 * @param pHostCpu The HM physical-CPU structure.
727 */
728static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
729{
730 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
731
732 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
733 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
734
735 /* If we're for some reason not in VMX root mode, then don't leave it. */
736 RTCCUINTREG const uHostCr4 = ASMGetCR4();
737
738 int rc;
739 if (uHostCr4 & X86_CR4_VMXE)
740 {
741 /* Exit VMX root mode and clear the VMX bit in CR4. */
742 VMXDisable();
743
744 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
745 if (!pHostCpu->fVmxeAlreadyEnabled)
746 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
747
748 rc = VINF_SUCCESS;
749 }
750 else
751 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
752
753 /* Restore interrupts. */
754 ASMSetFlags(fEFlags);
755 return rc;
756}
757
758
759/**
760 * Allocates pages specified as specified by an array of VMX page allocation info
761 * objects.
762 *
763 * The pages contents are zero'd after allocation.
764 *
765 * @returns VBox status code.
766 * @param phMemObj Where to return the handle to the allocation.
767 * @param paAllocInfo The pointer to the first element of the VMX
768 * page-allocation info object array.
769 * @param cEntries The number of elements in the @a paAllocInfo array.
770 */
771static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
772{
773 *phMemObj = NIL_RTR0MEMOBJ;
774
775 /* Figure out how many pages to allocate. */
776 uint32_t cPages = 0;
777 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
778 cPages += !!paAllocInfo[iPage].fValid;
779
780 /* Allocate the pages. */
781 if (cPages)
782 {
783 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
784 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
785 if (RT_FAILURE(rc))
786 return rc;
787
788 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
789 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
790 RT_BZERO(pvFirstPage, cbPages);
791
792 uint32_t iPage = 0;
793 for (uint32_t i = 0; i < cEntries; i++)
794 if (paAllocInfo[i].fValid)
795 {
796 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
797 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
798 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
799 AssertPtr(pvPage);
800
801 Assert(paAllocInfo[iPage].pHCPhys);
802 Assert(paAllocInfo[iPage].ppVirt);
803 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
804 *paAllocInfo[iPage].ppVirt = pvPage;
805
806 /* Move to next page. */
807 ++iPage;
808 }
809
810 /* Make sure all valid (requested) pages have been assigned. */
811 Assert(iPage == cPages);
812 }
813 return VINF_SUCCESS;
814}
815
816
817/**
818 * Frees pages allocated using hmR0VmxPagesAllocZ.
819 *
820 * @param phMemObj Pointer to the memory object handle. Will be set to
821 * NIL.
822 */
823DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
824{
825 /* We can cleanup wholesale since it's all one allocation. */
826 if (*phMemObj != NIL_RTR0MEMOBJ)
827 {
828 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
829 *phMemObj = NIL_RTR0MEMOBJ;
830 }
831}
832
833
834/**
835 * Initializes a VMCS info. object.
836 *
837 * @param pVmcsInfo The VMCS info. object.
838 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
839 */
840static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
841{
842 RT_ZERO(*pVmcsInfo);
843 RT_ZERO(*pVmcsInfoShared);
844
845 pVmcsInfo->pShared = pVmcsInfoShared;
846 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
847 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
848 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
849 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
850 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
851 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
852 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
853 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
854 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
855 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
856 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
857 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
858}
859
860
861/**
862 * Frees the VT-x structures for a VMCS info. object.
863 *
864 * @param pVmcsInfo The VMCS info. object.
865 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
866 */
867static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
868{
869 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
870 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
871}
872
873
874/**
875 * Allocates the VT-x structures for a VMCS info. object.
876 *
877 * @returns VBox status code.
878 * @param pVCpu The cross context virtual CPU structure.
879 * @param pVmcsInfo The VMCS info. object.
880 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
881 *
882 * @remarks The caller is expected to take care of any and all allocation failures.
883 * This function will not perform any cleanup for failures half-way
884 * through.
885 */
886static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
887{
888 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
889
890 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
891 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
892 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
893 VMXPAGEALLOCINFO aAllocInfo[] =
894 {
895 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
896 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
897 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
898 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
899 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
900 };
901
902 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
903 if (RT_FAILURE(rc))
904 return rc;
905
906 /*
907 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
908 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
909 */
910 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
911 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
912 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
913 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
914
915 /*
916 * Get the virtual-APIC page rather than allocating them again.
917 */
918 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
919 {
920 if (!fIsNstGstVmcs)
921 {
922 if (PDMHasApic(pVM))
923 {
924 rc = PDMR0ApicGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
925 if (RT_FAILURE(rc))
926 return rc;
927 Assert(pVmcsInfo->pbVirtApic);
928 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
929 }
930 }
931 else
932 {
933 /* These are setup later while marging the nested-guest VMCS. */
934 Assert(pVmcsInfo->pbVirtApic == NULL);
935 Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
936 }
937 }
938
939 return VINF_SUCCESS;
940}
941
942
943/**
944 * Free all VT-x structures for the VM.
945 *
946 * @param pVM The cross context VM structure.
947 */
948static void hmR0VmxStructsFree(PVMCC pVM)
949{
950 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
951#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
952 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
953 {
954 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
955 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
956 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
957 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
958 }
959#endif
960
961 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
962 {
963 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
964 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
965#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
966 if (pVM->cpum.ro.GuestFeatures.fVmx)
967 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
968#endif
969 }
970}
971
972
973/**
974 * Allocate all VT-x structures for the VM.
975 *
976 * @returns IPRT status code.
977 * @param pVM The cross context VM structure.
978 *
979 * @remarks This functions will cleanup on memory allocation failures.
980 */
981static int hmR0VmxStructsAlloc(PVMCC pVM)
982{
983 /*
984 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
985 * The VMCS size cannot be more than 4096 bytes.
986 *
987 * See Intel spec. Appendix A.1 "Basic VMX Information".
988 */
989 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
990 if (cbVmcs <= X86_PAGE_4K_SIZE)
991 { /* likely */ }
992 else
993 {
994 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
995 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
996 }
997
998 /*
999 * Allocate per-VM VT-x structures.
1000 */
1001 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
1002 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
1003 VMXPAGEALLOCINFO aAllocInfo[] =
1004 {
1005 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
1006 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
1007 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
1008#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1009 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
1010#endif
1011 };
1012
1013 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1014 if (RT_SUCCESS(rc))
1015 {
1016#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1017 /* Allocate the shadow VMCS-fields array. */
1018 if (fUseVmcsShadowing)
1019 {
1020 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1021 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1022 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1023 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1024 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1025 rc = VERR_NO_MEMORY;
1026 }
1027#endif
1028
1029 /*
1030 * Allocate per-VCPU VT-x structures.
1031 */
1032 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1033 {
1034 /* Allocate the guest VMCS structures. */
1035 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1036 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1037
1038#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1039 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1040 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1041 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1042#endif
1043 }
1044 if (RT_SUCCESS(rc))
1045 return VINF_SUCCESS;
1046 }
1047 hmR0VmxStructsFree(pVM);
1048 return rc;
1049}
1050
1051
1052/**
1053 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1054 *
1055 * @param pVM The cross context VM structure.
1056 */
1057static void hmR0VmxStructsInit(PVMCC pVM)
1058{
1059 /* Paranoia. */
1060 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1061#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1062 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1063#endif
1064
1065 /*
1066 * Initialize members up-front so we can cleanup en masse on allocation failures.
1067 */
1068#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1069 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1070#endif
1071 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1072 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1073 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1074 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1075 {
1076 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1077 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1078 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1079 }
1080}
1081
1082#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1083/**
1084 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1085 *
1086 * @returns @c true if the MSR is intercepted, @c false otherwise.
1087 * @param pbMsrBitmap The MSR bitmap.
1088 * @param offMsr The MSR byte offset.
1089 * @param iBit The bit offset from the byte offset.
1090 */
1091DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1092{
1093 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1094 return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
1095}
1096#endif
1097
1098/**
1099 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1100 *
1101 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1102 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1103 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1104 * the read/write access of this MSR.
1105 *
1106 * @param pVCpu The cross context virtual CPU structure.
1107 * @param pVmcsInfo The VMCS info. object.
1108 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1109 * @param idMsr The MSR value.
1110 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1111 * include both a read -and- a write permission!
1112 *
1113 * @sa CPUMGetVmxMsrPermission.
1114 * @remarks Can be called with interrupts disabled.
1115 */
1116static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1117{
1118 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1119 Assert(pbMsrBitmap);
1120 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1121
1122 /*
1123 * MSR-bitmap Layout:
1124 * Byte index MSR range Interpreted as
1125 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1126 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1127 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1128 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1129 *
1130 * A bit corresponding to an MSR within the above range causes a VM-exit
1131 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1132 * the MSR range, it always cause a VM-exit.
1133 *
1134 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1135 */
1136 uint16_t const offBitmapRead = 0;
1137 uint16_t const offBitmapWrite = 0x800;
1138 uint16_t offMsr;
1139 int32_t iBit;
1140 if (idMsr <= UINT32_C(0x00001fff))
1141 {
1142 offMsr = 0;
1143 iBit = idMsr;
1144 }
1145 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1146 {
1147 offMsr = 0x400;
1148 iBit = idMsr - UINT32_C(0xc0000000);
1149 }
1150 else
1151 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1152
1153 /*
1154 * Set the MSR read permission.
1155 */
1156 uint16_t const offMsrRead = offBitmapRead + offMsr;
1157 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1158 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1159 {
1160#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1161 bool const fClear = !fIsNstGstVmcs ? true
1162 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1163#else
1164 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1165 bool const fClear = true;
1166#endif
1167 if (fClear)
1168 ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
1169 }
1170 else
1171 ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
1172
1173 /*
1174 * Set the MSR write permission.
1175 */
1176 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1177 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1178 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1179 {
1180#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1181 bool const fClear = !fIsNstGstVmcs ? true
1182 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1183#else
1184 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1185 bool const fClear = true;
1186#endif
1187 if (fClear)
1188 ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1189 }
1190 else
1191 ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1192}
1193
1194
1195/**
1196 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1197 * area.
1198 *
1199 * @returns VBox status code.
1200 * @param pVCpu The cross context virtual CPU structure.
1201 * @param pVmcsInfo The VMCS info. object.
1202 * @param cMsrs The number of MSRs.
1203 */
1204static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1205{
1206 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1207 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1208 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1209 {
1210 /* Commit the MSR counts to the VMCS and update the cache. */
1211 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1212 {
1213 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1214 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1215 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1216 pVmcsInfo->cEntryMsrLoad = cMsrs;
1217 pVmcsInfo->cExitMsrStore = cMsrs;
1218 pVmcsInfo->cExitMsrLoad = cMsrs;
1219 }
1220 return VINF_SUCCESS;
1221 }
1222
1223 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1224 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1225 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1226}
1227
1228
1229/**
1230 * Adds a new (or updates the value of an existing) guest/host MSR
1231 * pair to be swapped during the world-switch as part of the
1232 * auto-load/store MSR area in the VMCS.
1233 *
1234 * @returns VBox status code.
1235 * @param pVCpu The cross context virtual CPU structure.
1236 * @param pVmxTransient The VMX-transient structure.
1237 * @param idMsr The MSR.
1238 * @param uGuestMsrValue Value of the guest MSR.
1239 * @param fSetReadWrite Whether to set the guest read/write access of this
1240 * MSR (thus not causing a VM-exit).
1241 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1242 * necessary.
1243 */
1244static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1245 bool fSetReadWrite, bool fUpdateHostMsr)
1246{
1247 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1248 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1249 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1250 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1251 uint32_t i;
1252
1253 /* Paranoia. */
1254 Assert(pGuestMsrLoad);
1255
1256#ifndef DEBUG_bird
1257 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1258#endif
1259
1260 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1261 for (i = 0; i < cMsrs; i++)
1262 {
1263 if (pGuestMsrLoad[i].u32Msr == idMsr)
1264 break;
1265 }
1266
1267 bool fAdded = false;
1268 if (i == cMsrs)
1269 {
1270 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1271 ++cMsrs;
1272 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1273 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1274
1275 /* Set the guest to read/write this MSR without causing VM-exits. */
1276 if ( fSetReadWrite
1277 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1278 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1279
1280 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1281 fAdded = true;
1282 }
1283
1284 /* Update the MSR value for the newly added or already existing MSR. */
1285 pGuestMsrLoad[i].u32Msr = idMsr;
1286 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1287
1288 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1289 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1290 {
1291 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1292 pGuestMsrStore[i].u32Msr = idMsr;
1293 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1294 }
1295
1296 /* Update the corresponding slot in the host MSR area. */
1297 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1298 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1299 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1300 pHostMsr[i].u32Msr = idMsr;
1301
1302 /*
1303 * Only if the caller requests to update the host MSR value AND we've newly added the
1304 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1305 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1306 *
1307 * We do this for performance reasons since reading MSRs may be quite expensive.
1308 */
1309 if (fAdded)
1310 {
1311 if (fUpdateHostMsr)
1312 {
1313 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1314 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1315 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1316 }
1317 else
1318 {
1319 /* Someone else can do the work. */
1320 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1321 }
1322 }
1323 return VINF_SUCCESS;
1324}
1325
1326
1327/**
1328 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1329 * auto-load/store MSR area in the VMCS.
1330 *
1331 * @returns VBox status code.
1332 * @param pVCpu The cross context virtual CPU structure.
1333 * @param pVmxTransient The VMX-transient structure.
1334 * @param idMsr The MSR.
1335 */
1336static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1337{
1338 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1339 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1340 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1341 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1342
1343#ifndef DEBUG_bird
1344 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1345#endif
1346
1347 for (uint32_t i = 0; i < cMsrs; i++)
1348 {
1349 /* Find the MSR. */
1350 if (pGuestMsrLoad[i].u32Msr == idMsr)
1351 {
1352 /*
1353 * If it's the last MSR, we only need to reduce the MSR count.
1354 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1355 */
1356 if (i < cMsrs - 1)
1357 {
1358 /* Remove it from the VM-entry MSR-load area. */
1359 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1360 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1361
1362 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1363 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1364 {
1365 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1366 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1367 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1368 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1369 }
1370
1371 /* Remove it from the VM-exit MSR-load area. */
1372 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1373 Assert(pHostMsr[i].u32Msr == idMsr);
1374 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1375 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1376 }
1377
1378 /* Reduce the count to reflect the removed MSR and bail. */
1379 --cMsrs;
1380 break;
1381 }
1382 }
1383
1384 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1385 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1386 {
1387 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1388 AssertRCReturn(rc, rc);
1389
1390 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1391 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1392 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1393
1394 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1395 return VINF_SUCCESS;
1396 }
1397
1398 return VERR_NOT_FOUND;
1399}
1400
1401
1402/**
1403 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1404 *
1405 * @param pVCpu The cross context virtual CPU structure.
1406 * @param pVmcsInfo The VMCS info. object.
1407 *
1408 * @remarks No-long-jump zone!!!
1409 */
1410static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1411{
1412 RT_NOREF(pVCpu);
1413 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1414
1415 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1416 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1417 Assert(pHostMsrLoad);
1418 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1419 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1420 for (uint32_t i = 0; i < cMsrs; i++)
1421 {
1422 /*
1423 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1424 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1425 */
1426 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1427 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1428 else
1429 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1430 }
1431}
1432
1433
1434/**
1435 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1436 * perform lazy restoration of the host MSRs while leaving VT-x.
1437 *
1438 * @param pVCpu The cross context virtual CPU structure.
1439 *
1440 * @remarks No-long-jump zone!!!
1441 */
1442static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1443{
1444 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1445
1446 /*
1447 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1448 */
1449 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1450 {
1451 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1452 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1453 {
1454 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1455 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1456 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1457 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1458 }
1459 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1460 }
1461}
1462
1463
1464#ifdef VBOX_STRICT
1465
1466/**
1467 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1468 *
1469 * @param pVmcsInfo The VMCS info. object.
1470 */
1471static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1472{
1473 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1474
1475 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1476 {
1477 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1478 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1479 uint64_t uVmcsEferMsrVmcs;
1480 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1481 AssertRC(rc);
1482
1483 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1484 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1485 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1486 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1487 }
1488}
1489
1490
1491/**
1492 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1493 * VMCS are correct.
1494 *
1495 * @param pVCpu The cross context virtual CPU structure.
1496 * @param pVmcsInfo The VMCS info. object.
1497 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1498 */
1499static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1500{
1501 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1502
1503 /* Read the various MSR-area counts from the VMCS. */
1504 uint32_t cEntryLoadMsrs;
1505 uint32_t cExitStoreMsrs;
1506 uint32_t cExitLoadMsrs;
1507 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1508 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1509 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1510
1511 /* Verify all the MSR counts are the same. */
1512 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1513 Assert(cExitStoreMsrs == cExitLoadMsrs);
1514 uint32_t const cMsrs = cExitLoadMsrs;
1515
1516 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1517 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1518
1519 /* Verify the MSR counts are within the allocated page size. */
1520 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1521
1522 /* Verify the relevant contents of the MSR areas match. */
1523 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1524 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1525 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1526 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1527 for (uint32_t i = 0; i < cMsrs; i++)
1528 {
1529 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1530 if (fSeparateExitMsrStorePage)
1531 {
1532 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1533 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1534 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1535 }
1536
1537 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1538 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1539 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1540
1541 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1542 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1543 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1544 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1545
1546 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1547 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1548 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1549 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1550
1551 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1552 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1553 {
1554 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1555 if (fIsEferMsr)
1556 {
1557 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1558 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1559 }
1560 else
1561 {
1562 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1563 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1564 if ( pVM->hmr0.s.vmx.fLbr
1565 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1566 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1567 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1568 {
1569 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1570 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1571 pGuestMsrLoad->u32Msr, cMsrs));
1572 }
1573 else if (!fIsNstGstVmcs)
1574 {
1575 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1576 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1577 }
1578 else
1579 {
1580 /*
1581 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1582 * execute a nested-guest with MSR passthrough.
1583 *
1584 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1585 * allow passthrough too.
1586 */
1587 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1588 Assert(pvMsrBitmapNstGst);
1589 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1590 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1591 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1592 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1593 }
1594 }
1595 }
1596
1597 /* Move to the next MSR. */
1598 pHostMsrLoad++;
1599 pGuestMsrLoad++;
1600 pGuestMsrStore++;
1601 }
1602}
1603
1604#endif /* VBOX_STRICT */
1605
1606/**
1607 * Flushes the TLB using EPT.
1608 *
1609 * @param pVCpu The cross context virtual CPU structure of the calling
1610 * EMT. Can be NULL depending on @a enmTlbFlush.
1611 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1612 * enmTlbFlush.
1613 * @param enmTlbFlush Type of flush.
1614 *
1615 * @remarks Caller is responsible for making sure this function is called only
1616 * when NestedPaging is supported and providing @a enmTlbFlush that is
1617 * supported by the CPU.
1618 * @remarks Can be called with interrupts disabled.
1619 */
1620static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1621{
1622 uint64_t au64Descriptor[2];
1623 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1624 au64Descriptor[0] = 0;
1625 else
1626 {
1627 Assert(pVCpu);
1628 Assert(pVmcsInfo);
1629 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1630 }
1631 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1632
1633 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1634 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1635
1636 if ( RT_SUCCESS(rc)
1637 && pVCpu)
1638 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1639}
1640
1641
1642/**
1643 * Flushes the TLB using VPID.
1644 *
1645 * @param pVCpu The cross context virtual CPU structure of the calling
1646 * EMT. Can be NULL depending on @a enmTlbFlush.
1647 * @param enmTlbFlush Type of flush.
1648 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1649 * on @a enmTlbFlush).
1650 *
1651 * @remarks Can be called with interrupts disabled.
1652 */
1653static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1654{
1655 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1656
1657 uint64_t au64Descriptor[2];
1658 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1659 {
1660 au64Descriptor[0] = 0;
1661 au64Descriptor[1] = 0;
1662 }
1663 else
1664 {
1665 AssertPtr(pVCpu);
1666 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1667 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1668 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1669 au64Descriptor[1] = GCPtr;
1670 }
1671
1672 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1673 AssertMsg(rc == VINF_SUCCESS,
1674 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1675
1676 if ( RT_SUCCESS(rc)
1677 && pVCpu)
1678 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1679 NOREF(rc);
1680}
1681
1682
1683/**
1684 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1685 * otherwise there is nothing really to invalidate.
1686 *
1687 * @returns VBox status code.
1688 * @param pVCpu The cross context virtual CPU structure.
1689 * @param GCVirt Guest virtual address of the page to invalidate.
1690 */
1691VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1692{
1693 AssertPtr(pVCpu);
1694 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1695
1696 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1697 {
1698 /*
1699 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1700 * the EPT case. See @bugref{6043} and @bugref{6177}.
1701 *
1702 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1703 * as this function maybe called in a loop with individual addresses.
1704 */
1705 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1706 if (pVM->hmr0.s.vmx.fVpid)
1707 {
1708 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1709 {
1710 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1711 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1712 }
1713 else
1714 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1715 }
1716 else if (pVM->hmr0.s.fNestedPaging)
1717 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1718 }
1719
1720 return VINF_SUCCESS;
1721}
1722
1723
1724/**
1725 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1726 * case where neither EPT nor VPID is supported by the CPU.
1727 *
1728 * @param pHostCpu The HM physical-CPU structure.
1729 * @param pVCpu The cross context virtual CPU structure.
1730 *
1731 * @remarks Called with interrupts disabled.
1732 */
1733static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1734{
1735 AssertPtr(pVCpu);
1736 AssertPtr(pHostCpu);
1737
1738 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1739
1740 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1741 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1742 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1743 pVCpu->hmr0.s.fForceTLBFlush = false;
1744 return;
1745}
1746
1747
1748/**
1749 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1750 *
1751 * @param pHostCpu The HM physical-CPU structure.
1752 * @param pVCpu The cross context virtual CPU structure.
1753 * @param pVmcsInfo The VMCS info. object.
1754 *
1755 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1756 * nomenclature. The reason is, to avoid confusion in compare statements
1757 * since the host-CPU copies are named "ASID".
1758 *
1759 * @remarks Called with interrupts disabled.
1760 */
1761static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1762{
1763#ifdef VBOX_WITH_STATISTICS
1764 bool fTlbFlushed = false;
1765# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1766# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1767 if (!fTlbFlushed) \
1768 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1769 } while (0)
1770#else
1771# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1772# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1773#endif
1774
1775 AssertPtr(pVCpu);
1776 AssertPtr(pHostCpu);
1777 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1778
1779 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1780 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1781 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1782 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1783
1784 /*
1785 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1786 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1787 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1788 * cannot reuse the current ASID anymore.
1789 */
1790 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1791 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1792 {
1793 ++pHostCpu->uCurrentAsid;
1794 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1795 {
1796 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1797 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1798 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1799 }
1800
1801 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1802 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1803 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1804
1805 /*
1806 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1807 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1808 */
1809 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1810 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1811 HMVMX_SET_TAGGED_TLB_FLUSHED();
1812 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1813 }
1814 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1815 {
1816 /*
1817 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1818 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1819 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1820 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1821 * mappings, see @bugref{6568}.
1822 *
1823 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1824 */
1825 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1826 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1827 HMVMX_SET_TAGGED_TLB_FLUSHED();
1828 }
1829 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1830 {
1831 /*
1832 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1833 * address which requires flushing the TLB of EPT cached structures.
1834 *
1835 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1836 */
1837 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1838 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1839 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1840 HMVMX_SET_TAGGED_TLB_FLUSHED();
1841 }
1842
1843
1844 pVCpu->hmr0.s.fForceTLBFlush = false;
1845 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1846
1847 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1848 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1849 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1850 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1851 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1852 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1853 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1854 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1855 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1856
1857 /* Update VMCS with the VPID. */
1858 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1859 AssertRC(rc);
1860
1861#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1862}
1863
1864
1865/**
1866 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1867 *
1868 * @param pHostCpu The HM physical-CPU structure.
1869 * @param pVCpu The cross context virtual CPU structure.
1870 * @param pVmcsInfo The VMCS info. object.
1871 *
1872 * @remarks Called with interrupts disabled.
1873 */
1874static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1875{
1876 AssertPtr(pVCpu);
1877 AssertPtr(pHostCpu);
1878 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1879 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1880 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1881
1882 /*
1883 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1884 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1885 */
1886 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1887 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1888 {
1889 pVCpu->hmr0.s.fForceTLBFlush = true;
1890 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1891 }
1892
1893 /* Check for explicit TLB flushes. */
1894 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1895 {
1896 pVCpu->hmr0.s.fForceTLBFlush = true;
1897 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1898 }
1899
1900 /* Check for TLB flushes while switching to/from a nested-guest. */
1901 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1902 {
1903 pVCpu->hmr0.s.fForceTLBFlush = true;
1904 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1905 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1906 }
1907
1908 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1909 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1910
1911 if (pVCpu->hmr0.s.fForceTLBFlush)
1912 {
1913 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1914 pVCpu->hmr0.s.fForceTLBFlush = false;
1915 }
1916}
1917
1918
1919/**
1920 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1921 *
1922 * @param pHostCpu The HM physical-CPU structure.
1923 * @param pVCpu The cross context virtual CPU structure.
1924 *
1925 * @remarks Called with interrupts disabled.
1926 */
1927static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1928{
1929 AssertPtr(pVCpu);
1930 AssertPtr(pHostCpu);
1931 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1932 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1933 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1934
1935 /*
1936 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1937 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1938 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1939 * cannot reuse the current ASID anymore.
1940 */
1941 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1942 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1943 {
1944 pVCpu->hmr0.s.fForceTLBFlush = true;
1945 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1946 }
1947
1948 /* Check for explicit TLB flushes. */
1949 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1950 {
1951 /*
1952 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1953 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1954 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1955 * include fExplicitFlush's too) - an obscure corner case.
1956 */
1957 pVCpu->hmr0.s.fForceTLBFlush = true;
1958 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1959 }
1960
1961 /* Check for TLB flushes while switching to/from a nested-guest. */
1962 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1963 {
1964 pVCpu->hmr0.s.fForceTLBFlush = true;
1965 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1966 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1967 }
1968
1969 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1970 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1971 if (pVCpu->hmr0.s.fForceTLBFlush)
1972 {
1973 ++pHostCpu->uCurrentAsid;
1974 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1975 {
1976 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1977 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1978 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1979 }
1980
1981 pVCpu->hmr0.s.fForceTLBFlush = false;
1982 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1983 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1984 if (pHostCpu->fFlushAsidBeforeUse)
1985 {
1986 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1987 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1988 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1989 {
1990 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1991 pHostCpu->fFlushAsidBeforeUse = false;
1992 }
1993 else
1994 {
1995 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1996 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1997 }
1998 }
1999 }
2000
2001 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
2002 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
2003 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
2004 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
2005 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
2006 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
2007 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
2008
2009 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
2010 AssertRC(rc);
2011}
2012
2013
2014/**
2015 * Flushes the guest TLB entry based on CPU capabilities.
2016 *
2017 * @param pHostCpu The HM physical-CPU structure.
2018 * @param pVCpu The cross context virtual CPU structure.
2019 * @param pVmcsInfo The VMCS info. object.
2020 *
2021 * @remarks Called with interrupts disabled.
2022 */
2023static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2024{
2025#ifdef HMVMX_ALWAYS_FLUSH_TLB
2026 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2027#endif
2028 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2029 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2030 {
2031 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2032 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2033 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2034 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2035 default:
2036 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2037 break;
2038 }
2039 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2040}
2041
2042
2043/**
2044 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2045 * TLB entries from the host TLB before VM-entry.
2046 *
2047 * @returns VBox status code.
2048 * @param pVM The cross context VM structure.
2049 */
2050static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2051{
2052 /*
2053 * Determine optimal flush type for nested paging.
2054 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2055 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2056 */
2057 if (pVM->hmr0.s.fNestedPaging)
2058 {
2059 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2060 {
2061 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2062 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2063 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2064 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2065 else
2066 {
2067 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2068 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2069 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2070 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2071 }
2072
2073 /* Make sure the write-back cacheable memory type for EPT is supported. */
2074 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2075 {
2076 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2077 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2078 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2079 }
2080
2081 /* EPT requires a page-walk length of 4. */
2082 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2083 {
2084 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2085 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2086 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2087 }
2088 }
2089 else
2090 {
2091 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2092 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2093 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2094 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2095 }
2096 }
2097
2098 /*
2099 * Determine optimal flush type for VPID.
2100 */
2101 if (pVM->hmr0.s.vmx.fVpid)
2102 {
2103 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2104 {
2105 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2106 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2107 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2108 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2109 else
2110 {
2111 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2112 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2113 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2114 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2115 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2116 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2117 pVM->hmr0.s.vmx.fVpid = false;
2118 }
2119 }
2120 else
2121 {
2122 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2123 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2124 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2125 pVM->hmr0.s.vmx.fVpid = false;
2126 }
2127 }
2128
2129 /*
2130 * Setup the handler for flushing tagged-TLBs.
2131 */
2132 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2133 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2134 else if (pVM->hmr0.s.fNestedPaging)
2135 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2136 else if (pVM->hmr0.s.vmx.fVpid)
2137 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2138 else
2139 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2140
2141
2142 /*
2143 * Copy out the result to ring-3.
2144 */
2145 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2146 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2147 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2148 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2149 return VINF_SUCCESS;
2150}
2151
2152
2153/**
2154 * Sets up the LBR MSR ranges based on the host CPU.
2155 *
2156 * @returns VBox status code.
2157 * @param pVM The cross context VM structure.
2158 *
2159 * @sa nemR3DarwinSetupLbrMsrRange
2160 */
2161static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2162{
2163 Assert(pVM->hmr0.s.vmx.fLbr);
2164 uint32_t idLbrFromIpMsrFirst;
2165 uint32_t idLbrFromIpMsrLast;
2166 uint32_t idLbrToIpMsrFirst;
2167 uint32_t idLbrToIpMsrLast;
2168 uint32_t idLbrTosMsr;
2169
2170 /*
2171 * Determine the LBR MSRs supported for this host CPU family and model.
2172 *
2173 * See Intel spec. 17.4.8 "LBR Stack".
2174 * See Intel "Model-Specific Registers" spec.
2175 */
2176 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2177 | g_CpumHostFeatures.s.uModel;
2178 switch (uFamilyModel)
2179 {
2180 case 0x0f01: case 0x0f02:
2181 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2182 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2183 idLbrToIpMsrFirst = 0x0;
2184 idLbrToIpMsrLast = 0x0;
2185 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2186 break;
2187
2188 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2189 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2190 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2191 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2192 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2193 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2194 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2195 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2196 break;
2197
2198 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2199 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2200 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2201 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2202 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2203 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2204 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2205 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2206 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2207 break;
2208
2209 case 0x0617: case 0x061d: case 0x060f:
2210 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2211 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2212 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2213 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2214 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2215 break;
2216
2217 /* Atom and related microarchitectures we don't care about:
2218 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2219 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2220 case 0x0636: */
2221 /* All other CPUs: */
2222 default:
2223 {
2224 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2225 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2226 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2227 }
2228 }
2229
2230 /*
2231 * Validate.
2232 */
2233 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2234 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2235 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2236 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2237 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2238 {
2239 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2240 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2241 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2242 }
2243 NOREF(pVCpu0);
2244
2245 /*
2246 * Update the LBR info. to the VM struct. for use later.
2247 */
2248 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2249
2250 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2251 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2252
2253 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2254 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2255 return VINF_SUCCESS;
2256}
2257
2258#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2259
2260/**
2261 * Sets up the shadow VMCS fields arrays.
2262 *
2263 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2264 * executing the guest.
2265 *
2266 * @returns VBox status code.
2267 * @param pVM The cross context VM structure.
2268 */
2269static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2270{
2271 /*
2272 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2273 * when the host does not support it.
2274 */
2275 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2276 if ( !fGstVmwriteAll
2277 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2278 { /* likely. */ }
2279 else
2280 {
2281 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2282 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2283 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2284 }
2285
2286 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2287 uint32_t cRwFields = 0;
2288 uint32_t cRoFields = 0;
2289 for (uint32_t i = 0; i < cVmcsFields; i++)
2290 {
2291 VMXVMCSFIELD VmcsField;
2292 VmcsField.u = g_aVmcsFields[i];
2293
2294 /*
2295 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2296 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2297 * in the shadow VMCS fields array as they would be redundant.
2298 *
2299 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2300 * we must not include it in the shadow VMCS fields array. Guests attempting to
2301 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2302 * the required behavior.
2303 */
2304 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2305 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2306 {
2307 /*
2308 * Read-only fields are placed in a separate array so that while syncing shadow
2309 * VMCS fields later (which is more performance critical) we can avoid branches.
2310 *
2311 * However, if the guest can write to all fields (including read-only fields),
2312 * we treat it a as read/write field. Otherwise, writing to these fields would
2313 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2314 */
2315 if ( fGstVmwriteAll
2316 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2317 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2318 else
2319 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2320 }
2321 }
2322
2323 /* Update the counts. */
2324 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2325 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2326 return VINF_SUCCESS;
2327}
2328
2329
2330/**
2331 * Sets up the VMREAD and VMWRITE bitmaps.
2332 *
2333 * @param pVM The cross context VM structure.
2334 */
2335static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2336{
2337 /*
2338 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2339 */
2340 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2341 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2342 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2343 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2344 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2345
2346 /*
2347 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2348 * VMREAD and VMWRITE bitmaps.
2349 */
2350 {
2351 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2352 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2353 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2354 {
2355 uint32_t const uVmcsField = paShadowVmcsFields[i];
2356 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2357 Assert(uVmcsField >> 3 < cbBitmap);
2358 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2359 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2360 }
2361 }
2362
2363 /*
2364 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2365 * if the host supports VMWRITE to all supported VMCS fields.
2366 */
2367 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2368 {
2369 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2370 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2371 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2372 {
2373 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2374 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2375 Assert(uVmcsField >> 3 < cbBitmap);
2376 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2377 }
2378 }
2379}
2380
2381#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2382
2383/**
2384 * Sets up the virtual-APIC page address for the VMCS.
2385 *
2386 * @param pVmcsInfo The VMCS info. object.
2387 */
2388DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2389{
2390 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2391 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2392 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2393 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2394 AssertRC(rc);
2395}
2396
2397
2398/**
2399 * Sets up the MSR-bitmap address for the VMCS.
2400 *
2401 * @param pVmcsInfo The VMCS info. object.
2402 */
2403DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2404{
2405 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2406 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2407 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2408 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2409 AssertRC(rc);
2410}
2411
2412
2413/**
2414 * Sets up the APIC-access page address for the VMCS.
2415 *
2416 * @param pVCpu The cross context virtual CPU structure.
2417 */
2418DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2419{
2420 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2421 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2422 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2423 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2424 AssertRC(rc);
2425}
2426
2427#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2428
2429/**
2430 * Sets up the VMREAD bitmap address for the VMCS.
2431 *
2432 * @param pVCpu The cross context virtual CPU structure.
2433 */
2434DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2435{
2436 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2437 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2438 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2439 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2440 AssertRC(rc);
2441}
2442
2443
2444/**
2445 * Sets up the VMWRITE bitmap address for the VMCS.
2446 *
2447 * @param pVCpu The cross context virtual CPU structure.
2448 */
2449DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2450{
2451 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2452 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2453 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2454 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2455 AssertRC(rc);
2456}
2457
2458#endif
2459
2460/**
2461 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2462 * in the VMCS.
2463 *
2464 * @returns VBox status code.
2465 * @param pVmcsInfo The VMCS info. object.
2466 */
2467DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2468{
2469 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2470 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2471 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2472
2473 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2474 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2475 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2476
2477 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2478 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2479 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2480
2481 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2482 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2483 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2484 return VINF_SUCCESS;
2485}
2486
2487
2488/**
2489 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2490 *
2491 * @param pVCpu The cross context virtual CPU structure.
2492 * @param pVmcsInfo The VMCS info. object.
2493 */
2494static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2495{
2496 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2497
2498 /*
2499 * By default, ensure guest attempts to access any MSR cause VM-exits.
2500 * This shall later be relaxed for specific MSRs as necessary.
2501 *
2502 * Note: For nested-guests, the entire bitmap will be merged prior to
2503 * executing the nested-guest using hardware-assisted VMX and hence there
2504 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2505 */
2506 Assert(pVmcsInfo->pvMsrBitmap);
2507 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2508
2509 /*
2510 * The guest can access the following MSRs (read, write) without causing
2511 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2512 */
2513 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2514 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2515 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2516 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2517 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2518 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2519
2520 /*
2521 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2522 * associated with then. We never need to intercept access (writes need to be
2523 * executed without causing a VM-exit, reads will #GP fault anyway).
2524 *
2525 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2526 * read/write them. We swap the guest/host MSR value using the
2527 * auto-load/store MSR area. Since things keeps getting added here, we should
2528 * technically intercept writes to prevent illegal bits from being set (raise
2529 * #GP), but we don't currently do so for performance raisins/laziness.
2530 */
2531 if (pVM->cpum.ro.GuestFeatures.fIbpb /* && g_CpumHostFeatures.s.fIbpb*/)
2532 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2533 if (pVM->cpum.ro.GuestFeatures.fFlushCmd && g_CpumHostFeatures.s.fFlushCmd)
2534 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2535 if (pVM->cpum.ro.GuestFeatures.fIbrs && g_CpumHostFeatures.s.fIbrs)
2536 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2537
2538 /*
2539 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2540 * required for 64-bit guests.
2541 */
2542 if (pVM->hmr0.s.fAllow64BitGuests)
2543 {
2544 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2545 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2546 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2547 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2548 }
2549
2550 /*
2551 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2552 */
2553#ifdef VBOX_STRICT
2554 Assert(pVmcsInfo->pvMsrBitmap);
2555 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2556 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2557#endif
2558}
2559
2560
2561/**
2562 * Sets up pin-based VM-execution controls in the VMCS.
2563 *
2564 * @returns VBox status code.
2565 * @param pVCpu The cross context virtual CPU structure.
2566 * @param pVmcsInfo The VMCS info. object.
2567 */
2568static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2569{
2570 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2571 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2572 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2573
2574 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2575 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2576
2577 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2578 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2579
2580 /* Enable the VMX-preemption timer. */
2581 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2582 {
2583 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2584 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2585 }
2586
2587#if 0
2588 /* Enable posted-interrupt processing. */
2589 if (pVM->hm.s.fPostedIntrs)
2590 {
2591 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2592 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2593 fVal |= VMX_PIN_CTLS_POSTED_INT;
2594 }
2595#endif
2596
2597 if ((fVal & fZap) != fVal)
2598 {
2599 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2600 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2601 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2602 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2603 }
2604
2605 /* Commit it to the VMCS and update our cache. */
2606 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2607 AssertRC(rc);
2608 pVmcsInfo->u32PinCtls = fVal;
2609
2610 return VINF_SUCCESS;
2611}
2612
2613
2614/**
2615 * Sets up secondary processor-based VM-execution controls in the VMCS.
2616 *
2617 * @returns VBox status code.
2618 * @param pVCpu The cross context virtual CPU structure.
2619 * @param pVmcsInfo The VMCS info. object.
2620 */
2621static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2622{
2623 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2624 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2625 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2626
2627 /* WBINVD causes a VM-exit. */
2628 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2629 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2630
2631 /* Enable EPT (aka nested-paging). */
2632 if (pVM->hmr0.s.fNestedPaging)
2633 fVal |= VMX_PROC_CTLS2_EPT;
2634
2635 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2636 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2637 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2638 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2639 fVal |= VMX_PROC_CTLS2_INVPCID;
2640
2641 /* Enable VPID. */
2642 if (pVM->hmr0.s.vmx.fVpid)
2643 fVal |= VMX_PROC_CTLS2_VPID;
2644
2645 /* Enable unrestricted guest execution. */
2646 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2647 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2648
2649#if 0
2650 if (pVM->hm.s.fVirtApicRegs)
2651 {
2652 /* Enable APIC-register virtualization. */
2653 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2654 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2655
2656 /* Enable virtual-interrupt delivery. */
2657 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2658 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2659 }
2660#endif
2661
2662 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2663 where the TPR shadow resides. */
2664 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2665 * done dynamically. */
2666 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2667 {
2668 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2669 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2670 }
2671
2672 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2673 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2674 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2675 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2676 fVal |= VMX_PROC_CTLS2_RDTSCP;
2677
2678 /* Enable Pause-Loop exiting. */
2679 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2680 && pVM->hm.s.vmx.cPleGapTicks
2681 && pVM->hm.s.vmx.cPleWindowTicks)
2682 {
2683 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2684
2685 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2686 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2687 }
2688
2689 if ((fVal & fZap) != fVal)
2690 {
2691 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2692 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2693 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2694 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2695 }
2696
2697 /* Commit it to the VMCS and update our cache. */
2698 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2699 AssertRC(rc);
2700 pVmcsInfo->u32ProcCtls2 = fVal;
2701
2702 return VINF_SUCCESS;
2703}
2704
2705
2706/**
2707 * Sets up processor-based VM-execution controls in the VMCS.
2708 *
2709 * @returns VBox status code.
2710 * @param pVCpu The cross context virtual CPU structure.
2711 * @param pVmcsInfo The VMCS info. object.
2712 */
2713static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2714{
2715 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2716 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2717 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2718
2719 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2720 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2721 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2722 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2723 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2724 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2725 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2726
2727 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2728 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2729 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2730 {
2731 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2732 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2733 }
2734
2735 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2736 if (!pVM->hmr0.s.fNestedPaging)
2737 {
2738 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2739 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2740 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2741 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2742 }
2743
2744 /* Use TPR shadowing if supported by the CPU. */
2745 if ( PDMHasApic(pVM)
2746 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2747 {
2748 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2749 /* CR8 writes cause a VM-exit based on TPR threshold. */
2750 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2751 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2752 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2753 }
2754 else
2755 {
2756 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2757 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2758 if (pVM->hmr0.s.fAllow64BitGuests)
2759 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2760 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2761 }
2762
2763 /* Use MSR-bitmaps if supported by the CPU. */
2764 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2765 {
2766 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2767 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2768 }
2769
2770 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2771 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2772 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2773
2774 if ((fVal & fZap) != fVal)
2775 {
2776 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2777 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2778 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2779 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2780 }
2781
2782 /* Commit it to the VMCS and update our cache. */
2783 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2784 AssertRC(rc);
2785 pVmcsInfo->u32ProcCtls = fVal;
2786
2787 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2788 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2789 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2790
2791 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2792 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2793 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2794
2795 /* Sanity check, should not really happen. */
2796 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2797 { /* likely */ }
2798 else
2799 {
2800 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2801 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2802 }
2803
2804 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2805 return VINF_SUCCESS;
2806}
2807
2808
2809/**
2810 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2811 * Processor-based VM-execution) control fields in the VMCS.
2812 *
2813 * @returns VBox status code.
2814 * @param pVCpu The cross context virtual CPU structure.
2815 * @param pVmcsInfo The VMCS info. object.
2816 */
2817static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2818{
2819#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2820 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2821 {
2822 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2823 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2824 }
2825#endif
2826
2827 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2828 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2829 AssertRC(rc);
2830
2831 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2832 if (RT_SUCCESS(rc))
2833 {
2834 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2835 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2836
2837 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2838 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2839
2840 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2841 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2842
2843 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2844 {
2845 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2846 AssertRC(rc);
2847 }
2848 return VINF_SUCCESS;
2849 }
2850 else
2851 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2852 return rc;
2853}
2854
2855
2856/**
2857 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2858 *
2859 * We shall setup those exception intercepts that don't change during the
2860 * lifetime of the VM here. The rest are done dynamically while loading the
2861 * guest state.
2862 *
2863 * @param pVCpu The cross context virtual CPU structure.
2864 * @param pVmcsInfo The VMCS info. object.
2865 */
2866static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2867{
2868 /*
2869 * The following exceptions are always intercepted:
2870 *
2871 * #AC - To prevent the guest from hanging the CPU and for dealing with
2872 * split-lock detecting host configs.
2873 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2874 * recursive #DBs can cause a CPU hang.
2875 * #PF - To sync our shadow page tables when nested-paging is not used.
2876 */
2877 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2878 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2879 | RT_BIT(X86_XCPT_DB)
2880 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2881
2882 /* Commit it to the VMCS. */
2883 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2884 AssertRC(rc);
2885
2886 /* Update our cache of the exception bitmap. */
2887 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2888}
2889
2890
2891#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2892/**
2893 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2894 *
2895 * @returns VBox status code.
2896 * @param pVmcsInfo The VMCS info. object.
2897 */
2898static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2899{
2900 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2901 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2902 AssertRC(rc);
2903
2904 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2905 if (RT_SUCCESS(rc))
2906 {
2907 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2908 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2909
2910 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2911 Assert(!pVmcsInfo->u64Cr0Mask);
2912 Assert(!pVmcsInfo->u64Cr4Mask);
2913 return VINF_SUCCESS;
2914 }
2915 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2916 return rc;
2917}
2918#endif
2919
2920
2921/**
2922 * Selector FNHMSVMVMRUN implementation.
2923 */
2924static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2925{
2926 hmR0VmxUpdateStartVmFunction(pVCpu);
2927 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2928}
2929
2930
2931/**
2932 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2933 * VMX.
2934 *
2935 * @returns VBox status code.
2936 * @param pVCpu The cross context virtual CPU structure.
2937 * @param pVmcsInfo The VMCS info. object.
2938 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2939 */
2940static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2941{
2942 Assert(pVmcsInfo->pvVmcs);
2943 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2944
2945 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2946 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2947 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2948
2949 LogFlowFunc(("\n"));
2950
2951 /*
2952 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2953 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2954 */
2955 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2956 if (RT_SUCCESS(rc))
2957 {
2958 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2959 if (RT_SUCCESS(rc))
2960 {
2961 /*
2962 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2963 * The host is always 64-bit since we no longer support 32-bit hosts.
2964 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2965 */
2966 if (!fIsNstGstVmcs)
2967 {
2968 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2969 if (RT_SUCCESS(rc))
2970 {
2971 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2972 if (RT_SUCCESS(rc))
2973 {
2974 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2975 if (RT_SUCCESS(rc))
2976 {
2977 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2978#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2979 /*
2980 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2981 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2982 * making it fit for use when VMCS shadowing is later enabled.
2983 */
2984 if (pVmcsInfo->pvShadowVmcs)
2985 {
2986 VMXVMCSREVID VmcsRevId;
2987 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2988 VmcsRevId.n.fIsShadowVmcs = 1;
2989 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2990 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2991 if (RT_SUCCESS(rc))
2992 { /* likely */ }
2993 else
2994 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2995 }
2996#endif
2997 }
2998 else
2999 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
3000 }
3001 else
3002 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
3003 }
3004 else
3005 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
3006 }
3007 else
3008 {
3009#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3010 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
3011 if (RT_SUCCESS(rc))
3012 { /* likely */ }
3013 else
3014 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3015#else
3016 AssertFailed();
3017#endif
3018 }
3019 }
3020 else
3021 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", pszVmcs, rc));
3022 }
3023 else
3024 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3025
3026 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3027 if (RT_SUCCESS(rc))
3028 {
3029 rc = hmR0VmxClearVmcs(pVmcsInfo);
3030 if (RT_SUCCESS(rc))
3031 { /* likely */ }
3032 else
3033 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3034 }
3035
3036 /*
3037 * Update the last-error record both for failures and success, so we
3038 * can propagate the status code back to ring-3 for diagnostics.
3039 */
3040 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3041 NOREF(pszVmcs);
3042 return rc;
3043}
3044
3045
3046/**
3047 * Does global VT-x initialization (called during module initialization).
3048 *
3049 * @returns VBox status code.
3050 */
3051VMMR0DECL(int) VMXR0GlobalInit(void)
3052{
3053#ifdef HMVMX_USE_FUNCTION_TABLE
3054 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3055# ifdef VBOX_STRICT
3056 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3057 Assert(g_aVMExitHandlers[i].pfn);
3058# endif
3059#endif
3060
3061 /*
3062 * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
3063 */
3064 RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
3065 RTThreadPreemptDisable(&Preempt);
3066 RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
3067 ASMSetDR6(0);
3068 RTCCUINTXREG const fZeroDr6 = ASMGetDR6();
3069 ASMSetDR6(fSavedDr6);
3070 RTThreadPreemptRestore(&Preempt);
3071
3072 g_fDr6Zeroed = fZeroDr6;
3073
3074 return VINF_SUCCESS;
3075}
3076
3077
3078/**
3079 * Does global VT-x termination (called during module termination).
3080 */
3081VMMR0DECL(void) VMXR0GlobalTerm()
3082{
3083 /* Nothing to do currently. */
3084}
3085
3086
3087/**
3088 * Sets up and activates VT-x on the current CPU.
3089 *
3090 * @returns VBox status code.
3091 * @param pHostCpu The HM physical-CPU structure.
3092 * @param pVM The cross context VM structure. Can be
3093 * NULL after a host resume operation.
3094 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3095 * fEnabledByHost is @c true).
3096 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3097 * @a fEnabledByHost is @c true).
3098 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3099 * enable VT-x on the host.
3100 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3101 */
3102VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3103 PCSUPHWVIRTMSRS pHwvirtMsrs)
3104{
3105 AssertPtr(pHostCpu);
3106 AssertPtr(pHwvirtMsrs);
3107 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3108
3109 /* Enable VT-x if it's not already enabled by the host. */
3110 if (!fEnabledByHost)
3111 {
3112 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3113 if (RT_FAILURE(rc))
3114 return rc;
3115 }
3116
3117 /*
3118 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3119 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3120 * invalidated when flushing by VPID.
3121 */
3122 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3123 {
3124 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3125 pHostCpu->fFlushAsidBeforeUse = false;
3126 }
3127 else
3128 pHostCpu->fFlushAsidBeforeUse = true;
3129
3130 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3131 ++pHostCpu->cTlbFlushes;
3132
3133 return VINF_SUCCESS;
3134}
3135
3136
3137/**
3138 * Deactivates VT-x on the current CPU.
3139 *
3140 * @returns VBox status code.
3141 * @param pHostCpu The HM physical-CPU structure.
3142 * @param pvCpuPage Pointer to the VMXON region.
3143 * @param HCPhysCpuPage Physical address of the VMXON region.
3144 *
3145 * @remarks This function should never be called when SUPR0EnableVTx() or
3146 * similar was used to enable VT-x on the host.
3147 */
3148VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3149{
3150 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3151
3152 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3153 return hmR0VmxLeaveRootMode(pHostCpu);
3154}
3155
3156
3157/**
3158 * Does per-VM VT-x initialization.
3159 *
3160 * @returns VBox status code.
3161 * @param pVM The cross context VM structure.
3162 */
3163VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3164{
3165 AssertPtr(pVM);
3166 LogFlowFunc(("pVM=%p\n", pVM));
3167
3168 hmR0VmxStructsInit(pVM);
3169 int rc = hmR0VmxStructsAlloc(pVM);
3170 if (RT_FAILURE(rc))
3171 {
3172 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3173 return rc;
3174 }
3175
3176 /* Setup the crash dump page. */
3177#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3178 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3179 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3180#endif
3181
3182 /*
3183 * Copy out stuff that's for ring-3 and determin default configuration.
3184 */
3185 pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
3186
3187 /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
3188 guest and cause confusion there. It appears that the DR6.RTM bit can be
3189 cleared even if TSX-NI is disabled (microcode update / system / whatever). */
3190#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3191 if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
3192 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
3193 else
3194#endif
3195 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
3196 pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3197
3198 return VINF_SUCCESS;
3199}
3200
3201
3202/**
3203 * Does per-VM VT-x termination.
3204 *
3205 * @returns VBox status code.
3206 * @param pVM The cross context VM structure.
3207 */
3208VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3209{
3210 AssertPtr(pVM);
3211 LogFlowFunc(("pVM=%p\n", pVM));
3212
3213#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3214 if (pVM->hmr0.s.vmx.pbScratch)
3215 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3216#endif
3217 hmR0VmxStructsFree(pVM);
3218 return VINF_SUCCESS;
3219}
3220
3221
3222/**
3223 * Sets up the VM for execution using hardware-assisted VMX.
3224 * This function is only called once per-VM during initialization.
3225 *
3226 * @returns VBox status code.
3227 * @param pVM The cross context VM structure.
3228 */
3229VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3230{
3231 AssertPtr(pVM);
3232 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3233
3234 LogFlowFunc(("pVM=%p\n", pVM));
3235
3236 /*
3237 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3238 * without causing a #GP.
3239 */
3240 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3241 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3242 { /* likely */ }
3243 else
3244 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3245
3246 /*
3247 * Check that nested paging is supported if enabled and copy over the flag to the
3248 * ring-0 only structure.
3249 */
3250 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3251 AssertReturn( !fNestedPaging
3252 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3253 VERR_INCOMPATIBLE_CONFIG);
3254 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3255 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3256
3257 /*
3258 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3259 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3260 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3261 */
3262 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3263 AssertReturn( !fUnrestrictedGuest
3264 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3265 && fNestedPaging),
3266 VERR_INCOMPATIBLE_CONFIG);
3267 if ( !fUnrestrictedGuest
3268 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3269 || !pVM->hm.s.vmx.pRealModeTSS))
3270 {
3271 LogRelFunc(("Invalid real-on-v86 state.\n"));
3272 return VERR_INTERNAL_ERROR;
3273 }
3274 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3275
3276 /* Initialize these always, see hmR3InitFinalizeR0().*/
3277 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3278 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3279
3280 /* Setup the tagged-TLB flush handlers. */
3281 int rc = hmR0VmxSetupTaggedTlb(pVM);
3282 if (RT_FAILURE(rc))
3283 {
3284 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3285 return rc;
3286 }
3287
3288 /* Determine LBR capabilities. */
3289 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3290 if (pVM->hmr0.s.vmx.fLbr)
3291 {
3292 rc = hmR0VmxSetupLbrMsrRange(pVM);
3293 if (RT_FAILURE(rc))
3294 {
3295 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3296 return rc;
3297 }
3298 }
3299
3300#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3301 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3302 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3303 {
3304 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3305 if (RT_SUCCESS(rc))
3306 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3307 else
3308 {
3309 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3310 return rc;
3311 }
3312 }
3313#endif
3314
3315 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3316 {
3317 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3318 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3319
3320 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3321
3322 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3323 if (RT_SUCCESS(rc))
3324 {
3325#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3326 if (pVM->cpum.ro.GuestFeatures.fVmx)
3327 {
3328 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3329 if (RT_SUCCESS(rc))
3330 { /* likely */ }
3331 else
3332 {
3333 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3334 return rc;
3335 }
3336 }
3337#endif
3338 }
3339 else
3340 {
3341 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3342 return rc;
3343 }
3344 }
3345
3346 return VINF_SUCCESS;
3347}
3348
3349
3350/**
3351 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3352 * the VMCS.
3353 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3354 */
3355static uint64_t hmR0VmxExportHostControlRegs(void)
3356{
3357 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3358 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3359 uint64_t uHostCr4 = ASMGetCR4();
3360 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3361 return uHostCr4;
3362}
3363
3364
3365/**
3366 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3367 * the host-state area in the VMCS.
3368 *
3369 * @returns VBox status code.
3370 * @param pVCpu The cross context virtual CPU structure.
3371 * @param uHostCr4 The host CR4 value.
3372 */
3373static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3374{
3375 /*
3376 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3377 * will be messed up. We should -not- save the messed up state without restoring
3378 * the original host-state, see @bugref{7240}.
3379 *
3380 * This apparently can happen (most likely the FPU changes), deal with it rather than
3381 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3382 */
3383 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3384 {
3385 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3386 pVCpu->idCpu));
3387 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3388 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3389 }
3390
3391 /*
3392 * Get all the host info.
3393 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3394 * without also checking the cpuid bit.
3395 */
3396 uint32_t fRestoreHostFlags;
3397#if RT_INLINE_ASM_EXTERNAL
3398 if (uHostCr4 & X86_CR4_FSGSBASE)
3399 {
3400 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3401 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3402 }
3403 else
3404 {
3405 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3406 fRestoreHostFlags = 0;
3407 }
3408 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3409 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3410 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3411 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3412#else
3413 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3414 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3415 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3416 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3417 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3418 if (uHostCr4 & X86_CR4_FSGSBASE)
3419 {
3420 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3421 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3422 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3423 }
3424 else
3425 {
3426 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3427 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3428 fRestoreHostFlags = 0;
3429 }
3430 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3431 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3432 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3433 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3434 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3435#endif
3436
3437 /*
3438 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3439 * gain VM-entry and restore them before we get preempted.
3440 *
3441 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3442 */
3443 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3444 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3445 {
3446 if (!(uSelAll & X86_SEL_LDT))
3447 {
3448#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3449 do { \
3450 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3451 if ((a_uVmcsVar) & X86_SEL_RPL) \
3452 { \
3453 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3454 (a_uVmcsVar) = 0; \
3455 } \
3456 } while (0)
3457 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3458 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3459 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3460 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3461#undef VMXLOCAL_ADJUST_HOST_SEG
3462 }
3463 else
3464 {
3465#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3466 do { \
3467 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3468 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3469 { \
3470 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3471 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3472 else \
3473 { \
3474 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3475 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3476 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3477 } \
3478 (a_uVmcsVar) = 0; \
3479 } \
3480 } while (0)
3481 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3482 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3483 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3484 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3485#undef VMXLOCAL_ADJUST_HOST_SEG
3486 }
3487 }
3488
3489 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3490 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3491 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3492 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3493 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3494 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3495 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3496 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3497
3498 /*
3499 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3500 * them to the maximum limit (0xffff) on every VM-exit.
3501 */
3502 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3503 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3504
3505 /*
3506 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3507 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3508 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3509 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3510 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3511 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3512 * at 0xffff on hosts where we are sure it won't cause trouble.
3513 */
3514#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3515 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3516#else
3517 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3518#endif
3519 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3520
3521 /*
3522 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3523 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3524 * RPL should be too in most cases.
3525 */
3526 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3527 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3528 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3529 VERR_VMX_INVALID_HOST_STATE);
3530
3531 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3532 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3533
3534 /*
3535 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3536 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3537 * restoration if the host has something else. Task switching is not supported in 64-bit
3538 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3539 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3540 *
3541 * [1] See Intel spec. 3.5 "System Descriptor Types".
3542 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3543 */
3544 Assert(pDesc->System.u4Type == 11);
3545 if ( pDesc->System.u16LimitLow != 0x67
3546 || pDesc->System.u4LimitHigh)
3547 {
3548 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3549
3550 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3551 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3552 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3553 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3554 {
3555 /* The GDT is read-only but the writable GDT is available. */
3556 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3557 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3558 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3559 AssertRCReturn(rc, rc);
3560 }
3561 }
3562
3563 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3564
3565 /*
3566 * Do all the VMCS updates in one block to assist nested virtualization.
3567 */
3568 int rc;
3569 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3570 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3571 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3572 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3573 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3574 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3575 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3576 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3577 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3578 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3579 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3580 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3581
3582 return VINF_SUCCESS;
3583}
3584
3585
3586/**
3587 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3588 * host-state area of the VMCS.
3589 *
3590 * These MSRs will be automatically restored on the host after every successful
3591 * VM-exit.
3592 *
3593 * @param pVCpu The cross context virtual CPU structure.
3594 *
3595 * @remarks No-long-jump zone!!!
3596 */
3597static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3598{
3599 AssertPtr(pVCpu);
3600
3601 /*
3602 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3603 * rather than swapping them on every VM-entry.
3604 */
3605 hmR0VmxLazySaveHostMsrs(pVCpu);
3606
3607 /*
3608 * Host Sysenter MSRs.
3609 */
3610 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3611 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3612 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3613
3614 /*
3615 * Host EFER MSR.
3616 *
3617 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3618 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3619 */
3620 if (g_fHmVmxSupportsVmcsEfer)
3621 {
3622 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3623 AssertRC(rc);
3624 }
3625
3626 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3627 * vmxHCExportGuestEntryExitCtls(). */
3628}
3629
3630
3631/**
3632 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3633 *
3634 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3635 * these two bits are handled by VM-entry, see vmxHCExportGuestEntryExitCtls().
3636 *
3637 * @returns true if we need to load guest EFER, false otherwise.
3638 * @param pVCpu The cross context virtual CPU structure.
3639 * @param pVmxTransient The VMX-transient structure.
3640 *
3641 * @remarks Requires EFER, CR4.
3642 * @remarks No-long-jump zone!!!
3643 */
3644static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3645{
3646#ifdef HMVMX_ALWAYS_SWAP_EFER
3647 RT_NOREF2(pVCpu, pVmxTransient);
3648 return true;
3649#else
3650 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3651 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3652 uint64_t const u64GuestEfer = pCtx->msrEFER;
3653
3654# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3655 /*
3656 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3657 * the nested-guest.
3658 */
3659 if ( pVmxTransient->fIsNestedGuest
3660 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3661 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3662 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3663 return true;
3664# else
3665 RT_NOREF(pVmxTransient);
3666#endif
3667
3668 /*
3669 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3670 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3671 */
3672 if ( CPUMIsGuestInLongModeEx(pCtx)
3673 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3674 return true;
3675
3676 /*
3677 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3678 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3679 *
3680 * See Intel spec. 4.5 "IA-32e Paging".
3681 * See Intel spec. 4.1.1 "Three Paging Modes".
3682 *
3683 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3684 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3685 */
3686 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3687 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3688 if ( (pCtx->cr4 & X86_CR4_PAE)
3689 && (pCtx->cr0 & X86_CR0_PG))
3690 {
3691 /*
3692 * If nested paging is not used, verify that the guest paging mode matches the
3693 * shadow paging mode which is/will be placed in the VMCS (which is what will
3694 * actually be used while executing the guest and not the CR4 shadow value).
3695 */
3696 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3697 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3698 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3699 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3700 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3701 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3702 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3703 {
3704 /* Verify that the host is NX capable. */
3705 Assert(g_CpumHostFeatures.s.fNoExecute);
3706 return true;
3707 }
3708 }
3709
3710 return false;
3711#endif
3712}
3713
3714
3715/**
3716 * Exports the guest's RSP into the guest-state area in the VMCS.
3717 *
3718 * @param pVCpu The cross context virtual CPU structure.
3719 *
3720 * @remarks No-long-jump zone!!!
3721 */
3722static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3723{
3724 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3725 {
3726 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3727
3728 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3729 AssertRC(rc);
3730
3731 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3732 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3733 }
3734}
3735
3736
3737/**
3738 * Exports the guest hardware-virtualization state.
3739 *
3740 * @returns VBox status code.
3741 * @param pVCpu The cross context virtual CPU structure.
3742 * @param pVmxTransient The VMX-transient structure.
3743 *
3744 * @remarks No-long-jump zone!!!
3745 */
3746static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3747{
3748 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3749 {
3750#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3751 /*
3752 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3753 * VMCS shadowing.
3754 */
3755 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3756 {
3757 /*
3758 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3759 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3760 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3761 *
3762 * We check for VMX root mode here in case the guest executes VMXOFF without
3763 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3764 * not clear the current VMCS pointer.
3765 */
3766 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3767 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3768 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3769 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3770 {
3771 /* Paranoia. */
3772 Assert(!pVmxTransient->fIsNestedGuest);
3773
3774 /*
3775 * For performance reasons, also check if the nested hypervisor's current VMCS
3776 * was newly loaded or modified before copying it to the shadow VMCS.
3777 */
3778 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3779 {
3780 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3781 AssertRCReturn(rc, rc);
3782 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3783 }
3784 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3785 }
3786 else
3787 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3788 }
3789#else
3790 NOREF(pVmxTransient);
3791#endif
3792 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3793 }
3794 return VINF_SUCCESS;
3795}
3796
3797
3798/**
3799 * Exports the guest debug registers into the guest-state area in the VMCS.
3800 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3801 *
3802 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3803 *
3804 * @returns VBox status code.
3805 * @param pVCpu The cross context virtual CPU structure.
3806 * @param pVmxTransient The VMX-transient structure.
3807 *
3808 * @remarks No-long-jump zone!!!
3809 */
3810static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3811{
3812 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3813
3814 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3815 * stepping. */
3816 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3817 if (pVmxTransient->fIsNestedGuest)
3818 {
3819 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3820 AssertRC(rc);
3821
3822 /*
3823 * We don't want to always intercept MOV DRx for nested-guests as it causes
3824 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3825 * Instead, they are strictly only requested when the nested hypervisor intercepts
3826 * them -- handled while merging VMCS controls.
3827 *
3828 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3829 * then the nested-guest debug state should be actively loaded on the host so that
3830 * nested-guest reads its own debug registers without causing VM-exits.
3831 */
3832 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3833 && !CPUMIsGuestDebugStateActive(pVCpu))
3834 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3835 return VINF_SUCCESS;
3836 }
3837
3838#ifdef VBOX_STRICT
3839 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3840 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3841 {
3842 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3843 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3844 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3845 }
3846#endif
3847
3848 bool fSteppingDB = false;
3849 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3850 if (pVCpu->hm.s.fSingleInstruction)
3851 {
3852 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3853 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3854 {
3855 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3856 Assert(fSteppingDB == false);
3857 }
3858 else
3859 {
3860 pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
3861 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3862 pVCpu->hmr0.s.fClearTrapFlag = true;
3863 fSteppingDB = true;
3864 }
3865 }
3866
3867#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3868 bool fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3869#else
3870 bool fInterceptMovDRx = false;
3871#endif
3872 uint64_t u64GuestDr7;
3873 if ( fSteppingDB
3874 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3875 {
3876 /*
3877 * Use the combined guest and host DRx values found in the hypervisor register set
3878 * because the hypervisor debugger has breakpoints active or someone is single stepping
3879 * on the host side without a monitor trap flag.
3880 *
3881 * Note! DBGF expects a clean DR6 state before executing guest code.
3882 */
3883 if (!CPUMIsHyperDebugStateActive(pVCpu))
3884 {
3885 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3886 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3887 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3888 }
3889
3890 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3891 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3892 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3893 fInterceptMovDRx = true;
3894 }
3895 else
3896 {
3897 /*
3898 * If the guest has enabled debug registers, we need to load them prior to
3899 * executing guest code so they'll trigger at the right time.
3900 */
3901 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3902 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3903 {
3904 if (!CPUMIsGuestDebugStateActive(pVCpu))
3905 {
3906 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3907 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3908 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3909 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3910 }
3911#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3912 Assert(!fInterceptMovDRx);
3913#endif
3914 }
3915 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3916 {
3917 /*
3918 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3919 * must intercept #DB in order to maintain a correct DR6 guest value, and
3920 * because we need to intercept it to prevent nested #DBs from hanging the
3921 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3922 */
3923 fInterceptMovDRx = true;
3924 }
3925
3926 /* Update DR7 with the actual guest value. */
3927 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3928 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3929 }
3930
3931 if (fInterceptMovDRx)
3932 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3933 else
3934 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3935
3936 /*
3937 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3938 * monitor-trap flag and update our cache.
3939 */
3940 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3941 {
3942 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3943 AssertRC(rc);
3944 pVmcsInfo->u32ProcCtls = uProcCtls;
3945 }
3946
3947 /*
3948 * Update guest DR7.
3949 */
3950 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3951 AssertRC(rc);
3952
3953 /*
3954 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3955 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3956 *
3957 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3958 */
3959 if (fSteppingDB)
3960 {
3961 Assert(pVCpu->hm.s.fSingleInstruction);
3962 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3963
3964 uint32_t fIntrState = 0;
3965 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3966 AssertRC(rc);
3967
3968 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3969 {
3970 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3971 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3972 AssertRC(rc);
3973 }
3974 }
3975
3976 return VINF_SUCCESS;
3977}
3978
3979
3980/**
3981 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3982 * areas.
3983 *
3984 * These MSRs will automatically be loaded to the host CPU on every successful
3985 * VM-entry and stored from the host CPU on every successful VM-exit.
3986 *
3987 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3988 * actual host MSR values are not- updated here for performance reasons. See
3989 * hmR0VmxExportHostMsrs().
3990 *
3991 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3992 *
3993 * @returns VBox status code.
3994 * @param pVCpu The cross context virtual CPU structure.
3995 * @param pVmxTransient The VMX-transient structure.
3996 *
3997 * @remarks No-long-jump zone!!!
3998 */
3999static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4000{
4001 AssertPtr(pVCpu);
4002 AssertPtr(pVmxTransient);
4003
4004 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4005 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4006
4007 /*
4008 * MSRs that we use the auto-load/store MSR area in the VMCS.
4009 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
4010 * nothing to do here. The host MSR values are updated when it's safe in
4011 * hmR0VmxLazySaveHostMsrs().
4012 *
4013 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
4014 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
4015 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
4016 * for any MSR that are not part of the lazy MSRs so we do not need to place
4017 * those MSRs into the auto-load/store MSR area. Nothing to do here.
4018 */
4019 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
4020 {
4021 /* No auto-load/store MSRs currently. */
4022 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4023 }
4024
4025 /*
4026 * Guest Sysenter MSRs.
4027 */
4028 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
4029 {
4030 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
4031
4032 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4033 {
4034 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
4035 AssertRC(rc);
4036 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4037 }
4038
4039 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4040 {
4041 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
4042 AssertRC(rc);
4043 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4044 }
4045
4046 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4047 {
4048 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
4049 AssertRC(rc);
4050 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4051 }
4052 }
4053
4054 /*
4055 * Guest/host EFER MSR.
4056 */
4057 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4058 {
4059 /* Whether we are using the VMCS to swap the EFER MSR must have been
4060 determined earlier while exporting VM-entry/VM-exit controls. */
4061 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4062 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4063
4064 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4065 {
4066 /*
4067 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4068 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4069 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4070 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4071 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4072 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4073 * during VM-entry.
4074 */
4075 uint64_t uGuestEferMsr = pCtx->msrEFER;
4076 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4077 {
4078 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4079 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4080 else
4081 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4082 }
4083
4084 /*
4085 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4086 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4087 */
4088 if (g_fHmVmxSupportsVmcsEfer)
4089 {
4090 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4091 AssertRC(rc);
4092 }
4093 else
4094 {
4095 /*
4096 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4097 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4098 */
4099 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4100 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4101 AssertRCReturn(rc, rc);
4102 }
4103
4104 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4105 }
4106 else if (!g_fHmVmxSupportsVmcsEfer)
4107 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4108
4109 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4110 }
4111
4112 /*
4113 * Other MSRs.
4114 */
4115 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4116 {
4117 /* Speculation Control (R/W). */
4118 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4119 if (pVM->cpum.ro.GuestFeatures.fIbrs && g_CpumHostFeatures.s.fIbrs)
4120 {
4121 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4122 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4123 AssertRCReturn(rc, rc);
4124 }
4125
4126 /* Last Branch Record. */
4127 if (pVM->hmr0.s.vmx.fLbr)
4128 {
4129 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4130 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4131 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4132 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4133 Assert(cLbrStack <= 32);
4134 for (uint32_t i = 0; i < cLbrStack; i++)
4135 {
4136 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4137 pVmcsInfoShared->au64LbrFromIpMsr[i],
4138 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4139 AssertRCReturn(rc, rc);
4140
4141 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4142 if (idToIpMsrStart != 0)
4143 {
4144 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4145 pVmcsInfoShared->au64LbrToIpMsr[i],
4146 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4147 AssertRCReturn(rc, rc);
4148 }
4149 }
4150
4151 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4152 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4153 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4154 false /* fUpdateHostMsr */);
4155 AssertRCReturn(rc, rc);
4156 }
4157
4158 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4159 }
4160
4161 return VINF_SUCCESS;
4162}
4163
4164
4165/**
4166 * Wrapper for running the guest code in VT-x.
4167 *
4168 * @returns VBox status code, no informational status codes.
4169 * @param pVCpu The cross context virtual CPU structure.
4170 * @param pVmxTransient The VMX-transient structure.
4171 *
4172 * @remarks No-long-jump zone!!!
4173 */
4174DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4175{
4176 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4177 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4178
4179 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4180 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState == VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4181#ifdef VBOX_WITH_STATISTICS
4182 if (fResumeVM)
4183 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4184 else
4185 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4186#endif
4187 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4188 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4189 return rc;
4190}
4191
4192
4193/**
4194 * Reports world-switch error and dumps some useful debug info.
4195 *
4196 * @param pVCpu The cross context virtual CPU structure.
4197 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4198 * @param pVmxTransient The VMX-transient structure (only
4199 * exitReason updated).
4200 */
4201static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4202{
4203 Assert(pVCpu);
4204 Assert(pVmxTransient);
4205 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4206
4207 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4208 switch (rcVMRun)
4209 {
4210 case VERR_VMX_INVALID_VMXON_PTR:
4211 AssertFailed();
4212 break;
4213 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4214 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4215 {
4216 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4217 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4218 AssertRC(rc);
4219 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4220
4221 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4222 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4223 Cannot do it here as we may have been long preempted. */
4224
4225#ifdef VBOX_STRICT
4226 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4227 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4228 pVmxTransient->uExitReason));
4229 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4230 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4231 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4232 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4233 else
4234 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4235 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4236 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4237
4238 static struct
4239 {
4240 /** Name of the field to log. */
4241 const char *pszName;
4242 /** The VMCS field. */
4243 uint32_t uVmcsField;
4244 /** Whether host support of this field needs to be checked. */
4245 bool fCheckSupport;
4246 } const s_aVmcsFields[] =
4247 {
4248 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4249 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4250 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4251 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4252 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4253 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4254 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4255 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4256 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4257 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4258 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4259 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4260 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4261 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4262 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4263 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4264 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4265 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4266 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4267 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4268 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4269 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4270 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4271 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4272 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4273 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4274 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4275 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4276 /* The order of selector fields below are fixed! */
4277 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4278 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4279 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4280 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4281 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4282 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4283 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4284 /* End of ordered selector fields. */
4285 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4286 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4287 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4288 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4289 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4290 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4291 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4292 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4293 };
4294
4295 RTGDTR HostGdtr;
4296 ASMGetGDTR(&HostGdtr);
4297
4298 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4299 for (uint32_t i = 0; i < cVmcsFields; i++)
4300 {
4301 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4302
4303 bool fSupported;
4304 if (!s_aVmcsFields[i].fCheckSupport)
4305 fSupported = true;
4306 else
4307 {
4308 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4309 switch (uVmcsField)
4310 {
4311 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4312 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4313 case VMX_VMCS32_CTRL_PROC_EXEC2:
4314 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4315 break;
4316 default:
4317 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4318 }
4319 }
4320
4321 if (fSupported)
4322 {
4323 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4324 switch (uWidth)
4325 {
4326 case VMX_VMCSFIELD_WIDTH_16BIT:
4327 {
4328 uint16_t u16Val;
4329 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4330 AssertRC(rc);
4331 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4332
4333 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4334 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4335 {
4336 if (u16Val < HostGdtr.cbGdt)
4337 {
4338 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4339 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4340 "Host FS", "Host GS", "Host TR" };
4341 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4342 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4343 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4344 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4345 }
4346 else
4347 Log4((" Selector value exceeds GDT limit!\n"));
4348 }
4349 break;
4350 }
4351
4352 case VMX_VMCSFIELD_WIDTH_32BIT:
4353 {
4354 uint32_t u32Val;
4355 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4356 AssertRC(rc);
4357 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4358 break;
4359 }
4360
4361 case VMX_VMCSFIELD_WIDTH_64BIT:
4362 case VMX_VMCSFIELD_WIDTH_NATURAL:
4363 {
4364 uint64_t u64Val;
4365 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4366 AssertRC(rc);
4367 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4368 break;
4369 }
4370 }
4371 }
4372 }
4373
4374 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4375 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4376 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4377 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4378 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4379 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4380#endif /* VBOX_STRICT */
4381 break;
4382 }
4383
4384 default:
4385 /* Impossible */
4386 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4387 break;
4388 }
4389}
4390
4391
4392/**
4393 * Sets up the usage of TSC-offsetting and updates the VMCS.
4394 *
4395 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4396 * VMX-preemption timer.
4397 *
4398 * @param pVCpu The cross context virtual CPU structure.
4399 * @param pVmxTransient The VMX-transient structure.
4400 * @param idCurrentCpu The current CPU number.
4401 *
4402 * @remarks No-long-jump zone!!!
4403 */
4404static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4405{
4406 bool fOffsettedTsc;
4407 bool fParavirtTsc;
4408 uint64_t uTscOffset;
4409 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4410
4411 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4412 {
4413 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4414 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4415 uint64_t cTicksToDeadline;
4416 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4417 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4418 {
4419 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4420 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4421 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4422 if ((int64_t)cTicksToDeadline > 0)
4423 { /* hopefully */ }
4424 else
4425 {
4426 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4427 cTicksToDeadline = 0;
4428 }
4429 }
4430 else
4431 {
4432 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4433 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4434 &pVCpu->hmr0.s.vmx.uTscDeadline,
4435 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4436 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4437 if (cTicksToDeadline >= 128)
4438 { /* hopefully */ }
4439 else
4440 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4441 }
4442
4443 /* Make sure the returned values have sane upper and lower boundaries. */
4444 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4445 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4446 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4447 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4448
4449 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4450 * preemption timers here. We probably need to clamp the preemption timer,
4451 * after converting the timer value to the host. */
4452 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4453 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4454 AssertRC(rc);
4455 }
4456 else
4457 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4458
4459 if (fParavirtTsc)
4460 {
4461 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4462 information before every VM-entry, hence disable it for performance sake. */
4463#if 0
4464 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4465 AssertRC(rc);
4466#endif
4467 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4468 }
4469
4470 if ( fOffsettedTsc
4471 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4472 {
4473 if (pVmxTransient->fIsNestedGuest)
4474 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4475 hmR0VmxSetTscOffsetVmcs(pVmxTransient->pVmcsInfo, uTscOffset);
4476 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4477 }
4478 else
4479 {
4480 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4481 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4482 }
4483}
4484
4485
4486/**
4487 * Saves the guest state from the VMCS into the guest-CPU context.
4488 *
4489 * @returns VBox status code.
4490 * @param pVCpu The cross context virtual CPU structure.
4491 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4492 */
4493VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4494{
4495 AssertPtr(pVCpu);
4496 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4497 return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
4498}
4499
4500
4501/**
4502 * Gets VMX VM-exit auxiliary information.
4503 *
4504 * @returns VBox status code.
4505 * @param pVCpu The cross context virtual CPU structure.
4506 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4507 * @param fWhat What to fetch, HMVMX_READ_XXX.
4508 */
4509VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4510{
4511 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4512 if (RT_LIKELY(pVmxTransient))
4513 {
4514 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4515
4516 /* The exit reason is always available. */
4517 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4518
4519
4520 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4521 {
4522 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4523 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4524#ifdef VBOX_STRICT
4525 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4526#endif
4527 }
4528
4529 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4530 {
4531 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
4532 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4533#ifdef VBOX_STRICT
4534 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4535#endif
4536 }
4537
4538 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4539 {
4540 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
4541 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4542#ifdef VBOX_STRICT
4543 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4544#endif
4545 }
4546
4547 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4548 {
4549 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
4550 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4551#ifdef VBOX_STRICT
4552 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4553#endif
4554 }
4555
4556 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4557 {
4558 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
4559 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4560#ifdef VBOX_STRICT
4561 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4562#endif
4563 }
4564
4565 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4566 {
4567 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
4568 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4569#ifdef VBOX_STRICT
4570 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4571#endif
4572 }
4573
4574 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4575 {
4576 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
4577 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4578#ifdef VBOX_STRICT
4579 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4580#endif
4581 }
4582
4583 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4584 {
4585 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
4586 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4587#ifdef VBOX_STRICT
4588 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4589#endif
4590 }
4591
4592 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4593 {
4594 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
4595 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4596#ifdef VBOX_STRICT
4597 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4598#endif
4599 }
4600
4601 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4602 {
4603#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4604 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
4605 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4606#else
4607 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4608#endif
4609#ifdef VBOX_STRICT
4610 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4611#endif
4612 }
4613
4614 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4615 return VINF_SUCCESS;
4616 }
4617 return VERR_NOT_AVAILABLE;
4618}
4619
4620
4621/**
4622 * Does the necessary state syncing before returning to ring-3 for any reason
4623 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4624 *
4625 * @returns VBox status code.
4626 * @param pVCpu The cross context virtual CPU structure.
4627 * @param fImportState Whether to import the guest state from the VMCS back
4628 * to the guest-CPU context.
4629 *
4630 * @remarks No-long-jmp zone!!!
4631 */
4632static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4633{
4634 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4635 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4636
4637 RTCPUID const idCpu = RTMpCpuId();
4638 Log4Func(("HostCpuId=%u\n", idCpu));
4639
4640 /*
4641 * !!! IMPORTANT !!!
4642 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4643 */
4644
4645 /* Save the guest state if necessary. */
4646 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4647 if (fImportState)
4648 {
4649 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4650 AssertRCReturn(rc, rc);
4651 }
4652
4653 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4654 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4655 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4656
4657 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4658#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
4659 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4660 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4661 || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
4662#else
4663 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4664 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4665 || !CPUMIsHyperDebugStateActive(pVCpu));
4666#endif
4667 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4668 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4669 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4670
4671 /* Restore host-state bits that VT-x only restores partially. */
4672 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4673 {
4674 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4675 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4676 }
4677 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4678
4679 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4680 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4681 {
4682 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4683 if (!fImportState)
4684 {
4685 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4686 AssertRCReturn(rc, rc);
4687 }
4688 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4689 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4690 }
4691 else
4692 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4693
4694 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4695 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4696
4697 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4698 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4699 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4700 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4701 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4702 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4703 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4704 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4705 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4706 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4707
4708 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4709
4710 /** @todo This partially defeats the purpose of having preemption hooks.
4711 * The problem is, deregistering the hooks should be moved to a place that
4712 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4713 * context.
4714 */
4715 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4716 AssertRCReturn(rc, rc);
4717
4718#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4719 /*
4720 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4721 * clear a shadow VMCS before allowing that VMCS to become active on another
4722 * logical processor. We may or may not be importing guest state which clears
4723 * it, so cover for it here.
4724 *
4725 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4726 */
4727 if ( pVmcsInfo->pvShadowVmcs
4728 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4729 {
4730 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4731 AssertRCReturn(rc, rc);
4732 }
4733
4734 /*
4735 * Flag that we need to re-export the host state if we switch to this VMCS before
4736 * executing guest or nested-guest code.
4737 */
4738 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4739#endif
4740
4741 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4742 NOREF(idCpu);
4743 return VINF_SUCCESS;
4744}
4745
4746
4747/**
4748 * Leaves the VT-x session.
4749 *
4750 * @returns VBox status code.
4751 * @param pVCpu The cross context virtual CPU structure.
4752 *
4753 * @remarks No-long-jmp zone!!!
4754 */
4755static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4756{
4757 HM_DISABLE_PREEMPT(pVCpu);
4758 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4759 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4760 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4761
4762 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4763 and done this from the VMXR0ThreadCtxCallback(). */
4764 if (!pVCpu->hmr0.s.fLeaveDone)
4765 {
4766 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4767 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4768 pVCpu->hmr0.s.fLeaveDone = true;
4769 }
4770 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4771
4772 /*
4773 * !!! IMPORTANT !!!
4774 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4775 */
4776
4777 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4778 /** @todo Deregistering here means we need to VMCLEAR always
4779 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4780 * for calling VMMR0ThreadCtxHookDisable here! */
4781 VMMR0ThreadCtxHookDisable(pVCpu);
4782
4783 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4784 int rc = HMR0LeaveCpu(pVCpu);
4785 HM_RESTORE_PREEMPT();
4786 return rc;
4787}
4788
4789
4790/**
4791 * Take necessary actions before going back to ring-3.
4792 *
4793 * An action requires us to go back to ring-3. This function does the necessary
4794 * steps before we can safely return to ring-3. This is not the same as longjmps
4795 * to ring-3, this is voluntary and prepares the guest so it may continue
4796 * executing outside HM (recompiler/IEM).
4797 *
4798 * @returns VBox status code.
4799 * @param pVCpu The cross context virtual CPU structure.
4800 * @param rcExit The reason for exiting to ring-3. Can be
4801 * VINF_VMM_UNKNOWN_RING3_CALL.
4802 */
4803static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
4804{
4805 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4806
4807 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4808 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
4809 {
4810 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
4811 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
4812 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4813 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
4814 }
4815
4816 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
4817 VMMRZCallRing3Disable(pVCpu);
4818 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
4819
4820 /*
4821 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
4822 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
4823 *
4824 * This is because execution may continue from ring-3 and we would need to inject
4825 * the event from there (hence place it back in TRPM).
4826 */
4827 if (pVCpu->hm.s.Event.fPending)
4828 {
4829 vmxHCPendingEventToTrpmTrap(pVCpu);
4830 Assert(!pVCpu->hm.s.Event.fPending);
4831
4832 /* Clear the events from the VMCS. */
4833 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
4834 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
4835 }
4836#ifdef VBOX_STRICT
4837 /*
4838 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
4839 * fatal), we don't care about verifying duplicate injection of events. Errors like
4840 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
4841 * function so those should and will be checked below.
4842 */
4843 else if (RT_SUCCESS(rcExit))
4844 {
4845 /*
4846 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
4847 * This can be pretty hard to debug otherwise, interrupts might get injected twice
4848 * occasionally, see @bugref{9180#c42}.
4849 *
4850 * However, if the VM-entry failed, any VM entry-interruption info. field would
4851 * be left unmodified as the event would not have been injected to the guest. In
4852 * such cases, don't assert, we're not going to continue guest execution anyway.
4853 */
4854 uint32_t uExitReason;
4855 uint32_t uEntryIntInfo;
4856 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
4857 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
4858 AssertRC(rc);
4859 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
4860 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
4861 }
4862#endif
4863
4864 /*
4865 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
4866 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
4867 * (e.g. TPR below threshold).
4868 */
4869 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4870 {
4871 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
4872 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
4873 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
4874 }
4875
4876 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
4877 and if we're injecting an event we should have a TRPM trap pending. */
4878 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4879#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
4880 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4881#endif
4882
4883 /* Save guest state and restore host state bits. */
4884 int rc = hmR0VmxLeaveSession(pVCpu);
4885 AssertRCReturn(rc, rc);
4886 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4887
4888 /* Thread-context hooks are unregistered at this point!!! */
4889 /* Ring-3 callback notifications are unregistered at this point!!! */
4890
4891 /* Sync recompiler state. */
4892 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4893 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
4894 | CPUM_CHANGED_LDTR
4895 | CPUM_CHANGED_GDTR
4896 | CPUM_CHANGED_IDTR
4897 | CPUM_CHANGED_TR
4898 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4899 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4900 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
4901 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
4902
4903 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
4904
4905 /* Update the exit-to-ring 3 reason. */
4906 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
4907
4908 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
4909 if ( rcExit != VINF_EM_RAW_INTERRUPT
4910 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4911 {
4912 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
4913 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
4914 }
4915
4916 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
4917 VMMRZCallRing3Enable(pVCpu);
4918 return rc;
4919}
4920
4921
4922/**
4923 * VMMRZCallRing3() callback wrapper which saves the guest state before we
4924 * longjump due to a ring-0 assertion.
4925 *
4926 * @returns VBox status code.
4927 * @param pVCpu The cross context virtual CPU structure.
4928 */
4929VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
4930{
4931 /*
4932 * !!! IMPORTANT !!!
4933 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
4934 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
4935 */
4936 VMMR0AssertionRemoveNotification(pVCpu);
4937 VMMRZCallRing3Disable(pVCpu);
4938 HM_DISABLE_PREEMPT(pVCpu);
4939
4940 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4941 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4942 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4943 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4944
4945 /* Restore host-state bits that VT-x only restores partially. */
4946 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4947 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4948 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4949
4950 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4951 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4952 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4953
4954 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4955 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4956 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4957
4958 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
4959 cleared as part of importing the guest state above. */
4960 hmR0VmxClearVmcs(pVmcsInfo);
4961
4962 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
4963 VMMR0ThreadCtxHookDisable(pVCpu);
4964
4965 /* Leave HM context. This takes care of local init (term). */
4966 HMR0LeaveCpu(pVCpu);
4967 HM_RESTORE_PREEMPT();
4968 return VINF_SUCCESS;
4969}
4970
4971
4972/**
4973 * Enters the VT-x session.
4974 *
4975 * @returns VBox status code.
4976 * @param pVCpu The cross context virtual CPU structure.
4977 */
4978VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
4979{
4980 AssertPtr(pVCpu);
4981 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
4982 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4983
4984 LogFlowFunc(("pVCpu=%p\n", pVCpu));
4985 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
4986 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
4987
4988#ifdef VBOX_STRICT
4989 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
4990 RTCCUINTREG uHostCr4 = ASMGetCR4();
4991 if (!(uHostCr4 & X86_CR4_VMXE))
4992 {
4993 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
4994 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4995 }
4996#endif
4997
4998 /*
4999 * Do the EMT scheduled L1D and MDS flush here if needed.
5000 */
5001 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5002 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5003 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5004 hmR0MdsClear();
5005
5006 /*
5007 * Load the appropriate VMCS as the current and active one.
5008 */
5009 PVMXVMCSINFO pVmcsInfo;
5010 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5011 if (!fInNestedGuestMode)
5012 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5013 else
5014 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5015 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5016 if (RT_SUCCESS(rc))
5017 {
5018 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5019 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5020 pVCpu->hmr0.s.fLeaveDone = false;
5021 Log4Func(("Loaded %s Vmcs. HostCpuId=%u\n", fInNestedGuestMode ? "nested-guest" : "guest", RTMpCpuId()));
5022 }
5023 return rc;
5024}
5025
5026
5027/**
5028 * The thread-context callback.
5029 *
5030 * This is used together with RTThreadCtxHookCreate() on platforms which
5031 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5032 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5033 *
5034 * @param enmEvent The thread-context event.
5035 * @param pVCpu The cross context virtual CPU structure.
5036 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5037 * @thread EMT(pVCpu)
5038 */
5039VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5040{
5041 AssertPtr(pVCpu);
5042 RT_NOREF1(fGlobalInit);
5043
5044 switch (enmEvent)
5045 {
5046 case RTTHREADCTXEVENT_OUT:
5047 {
5048 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5049 VMCPU_ASSERT_EMT(pVCpu);
5050
5051 /* No longjmps (logger flushes, locks) in this fragile context. */
5052 VMMRZCallRing3Disable(pVCpu);
5053 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5054
5055 /* Restore host-state (FPU, debug etc.) */
5056 if (!pVCpu->hmr0.s.fLeaveDone)
5057 {
5058 /*
5059 * Do -not- import the guest-state here as we might already be in the middle of importing
5060 * it, esp. bad if we're holding the PGM lock, see comment at the end of vmxHCImportGuestStateEx().
5061 */
5062 hmR0VmxLeave(pVCpu, false /* fImportState */);
5063 pVCpu->hmr0.s.fLeaveDone = true;
5064 }
5065
5066 /* Leave HM context, takes care of local init (term). */
5067 int rc = HMR0LeaveCpu(pVCpu);
5068 AssertRC(rc);
5069
5070 /* Restore longjmp state. */
5071 VMMRZCallRing3Enable(pVCpu);
5072 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5073 break;
5074 }
5075
5076 case RTTHREADCTXEVENT_IN:
5077 {
5078 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5079 VMCPU_ASSERT_EMT(pVCpu);
5080
5081 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5082 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5083 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5084 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5085 hmR0MdsClear();
5086
5087 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5088 VMMRZCallRing3Disable(pVCpu);
5089 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5090
5091 /* Initialize the bare minimum state required for HM. This takes care of
5092 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5093 int rc = hmR0EnterCpu(pVCpu);
5094 AssertRC(rc);
5095 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5096 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5097
5098 /* Load the active VMCS as the current one. */
5099 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5100 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5101 AssertRC(rc);
5102 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5103 pVCpu->hmr0.s.fLeaveDone = false;
5104
5105 /* Restore longjmp state. */
5106 VMMRZCallRing3Enable(pVCpu);
5107 break;
5108 }
5109
5110 default:
5111 break;
5112 }
5113}
5114
5115
5116/**
5117 * Exports the host state into the VMCS host-state area.
5118 * Sets up the VM-exit MSR-load area.
5119 *
5120 * The CPU state will be loaded from these fields on every successful VM-exit.
5121 *
5122 * @returns VBox status code.
5123 * @param pVCpu The cross context virtual CPU structure.
5124 *
5125 * @remarks No-long-jump zone!!!
5126 */
5127static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5128{
5129 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5130
5131 int rc = VINF_SUCCESS;
5132 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5133 {
5134 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5135
5136 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5137 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5138
5139 hmR0VmxExportHostMsrs(pVCpu);
5140
5141 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5142 }
5143 return rc;
5144}
5145
5146
5147/**
5148 * Saves the host state in the VMCS host-state.
5149 *
5150 * @returns VBox status code.
5151 * @param pVCpu The cross context virtual CPU structure.
5152 *
5153 * @remarks No-long-jump zone!!!
5154 */
5155VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5156{
5157 AssertPtr(pVCpu);
5158 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5159
5160 /*
5161 * Export the host state here while entering HM context.
5162 * When thread-context hooks are used, we might get preempted and have to re-save the host
5163 * state but most of the time we won't be, so do it here before we disable interrupts.
5164 */
5165 return hmR0VmxExportHostState(pVCpu);
5166}
5167
5168
5169/**
5170 * Exports the guest state into the VMCS guest-state area.
5171 *
5172 * The will typically be done before VM-entry when the guest-CPU state and the
5173 * VMCS state may potentially be out of sync.
5174 *
5175 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5176 * VM-entry controls.
5177 * Sets up the appropriate VMX non-root function to execute guest code based on
5178 * the guest CPU mode.
5179 *
5180 * @returns VBox strict status code.
5181 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5182 * without unrestricted guest execution and the VMMDev is not presently
5183 * mapped (e.g. EFI32).
5184 *
5185 * @param pVCpu The cross context virtual CPU structure.
5186 * @param pVmxTransient The VMX-transient structure.
5187 *
5188 * @remarks No-long-jump zone!!!
5189 */
5190static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5191{
5192 AssertPtr(pVCpu);
5193 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5194 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5195
5196 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5197
5198 /*
5199 * Determine real-on-v86 mode.
5200 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5201 */
5202 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5203 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5204 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5205 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5206 else
5207 {
5208 Assert(!pVmxTransient->fIsNestedGuest);
5209 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5210 }
5211
5212 /*
5213 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5214 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5215 */
5216 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5217 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5218
5219 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5220 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5221
5222 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5223 if (rcStrict == VINF_SUCCESS)
5224 { /* likely */ }
5225 else
5226 {
5227 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5228 return rcStrict;
5229 }
5230
5231 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5232 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5233
5234 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5235 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5236
5237 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5238 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5239 vmxHCExportGuestRip(pVCpu);
5240 hmR0VmxExportGuestRsp(pVCpu);
5241 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5242
5243 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5244 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5245
5246 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5247 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5248 | HM_CHANGED_GUEST_CR2
5249 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5250 | HM_CHANGED_GUEST_X87
5251 | HM_CHANGED_GUEST_SSE_AVX
5252 | HM_CHANGED_GUEST_OTHER_XSAVE
5253 | HM_CHANGED_GUEST_XCRx
5254 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5255 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5256 | HM_CHANGED_GUEST_TSC_AUX
5257 | HM_CHANGED_GUEST_OTHER_MSRS
5258 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5259
5260 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5261 return rc;
5262}
5263
5264
5265/**
5266 * Exports the state shared between the host and guest into the VMCS.
5267 *
5268 * @param pVCpu The cross context virtual CPU structure.
5269 * @param pVmxTransient The VMX-transient structure.
5270 *
5271 * @remarks No-long-jump zone!!!
5272 */
5273static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5274{
5275 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5276 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5277
5278 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5279 {
5280 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5281 AssertRC(rc);
5282 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5283
5284 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5285 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5286 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5287 }
5288
5289 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5290 {
5291 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5292 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5293 }
5294
5295 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5296 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5297}
5298
5299
5300/**
5301 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5302 *
5303 * @returns Strict VBox status code (i.e. informational status codes too).
5304 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5305 * without unrestricted guest execution and the VMMDev is not presently
5306 * mapped (e.g. EFI32).
5307 *
5308 * @param pVCpu The cross context virtual CPU structure.
5309 * @param pVmxTransient The VMX-transient structure.
5310 *
5311 * @remarks No-long-jump zone!!!
5312 */
5313static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5314{
5315 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5316 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5317
5318#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5319 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5320#endif
5321
5322 /*
5323 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5324 * changes. First try to export only these without going through all other changed-flag checks.
5325 */
5326 VBOXSTRICTRC rcStrict;
5327 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5328 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5329 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5330
5331 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5332 if ( (fCtxChanged & fMinimalMask)
5333 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5334 {
5335 vmxHCExportGuestRip(pVCpu);
5336 hmR0VmxExportGuestRsp(pVCpu);
5337 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5338 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5339 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5340 }
5341 /* If anything else also changed, go through the full export routine and export as required. */
5342 else if (fCtxChanged & fCtxMask)
5343 {
5344 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5345 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5346 { /* likely */}
5347 else
5348 {
5349 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5350 VBOXSTRICTRC_VAL(rcStrict)));
5351 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5352 return rcStrict;
5353 }
5354 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5355 }
5356 /* Nothing changed, nothing to load here. */
5357 else
5358 rcStrict = VINF_SUCCESS;
5359
5360#ifdef VBOX_STRICT
5361 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5362 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5363 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5364#endif
5365 return rcStrict;
5366}
5367
5368
5369/**
5370 * Map the APIC-access page for virtualizing APIC accesses.
5371 *
5372 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5373 * this not done as part of exporting guest state, see @bugref{8721}.
5374 *
5375 * @returns VBox status code.
5376 * @param pVCpu The cross context virtual CPU structure.
5377 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5378 */
5379static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5380{
5381 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5382 Assert(GCPhysApicBase);
5383
5384 LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5385
5386 /* Unalias the existing mapping. */
5387 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5388 AssertRCReturn(rc, rc);
5389
5390 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5391 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5392 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5393 AssertRCReturn(rc, rc);
5394
5395 return VINF_SUCCESS;
5396}
5397
5398
5399/**
5400 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5401 * CPU.
5402 *
5403 * @param idCpu The ID for the CPU the function is called on.
5404 * @param pvUser1 Null, not used.
5405 * @param pvUser2 Null, not used.
5406 */
5407static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5408{
5409 RT_NOREF3(idCpu, pvUser1, pvUser2);
5410 VMXDispatchHostNmi();
5411}
5412
5413
5414/**
5415 * Dispatching an NMI on the host CPU that received it.
5416 *
5417 * @returns VBox status code.
5418 * @param pVCpu The cross context virtual CPU structure.
5419 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5420 * executing when receiving the host NMI in VMX non-root
5421 * operation.
5422 */
5423static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5424{
5425 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5426 Assert(idCpu != NIL_RTCPUID);
5427
5428 /*
5429 * We don't want to delay dispatching the NMI any more than we have to. However,
5430 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5431 * after executing guest or nested-guest code for the following reasons:
5432 *
5433 * - We would need to perform VMREADs with interrupts disabled and is orders of
5434 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5435 * supported by the host hypervisor.
5436 *
5437 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5438 * longer period of time just for handling an edge case like host NMIs which do
5439 * not occur nearly as frequently as other VM-exits.
5440 *
5441 * Let's cover the most likely scenario first. Check if we are on the target CPU
5442 * and dispatch the NMI right away. This should be much faster than calling into
5443 * RTMpOnSpecific() machinery.
5444 */
5445 bool fDispatched = false;
5446 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5447 if (idCpu == RTMpCpuId())
5448 {
5449 VMXDispatchHostNmi();
5450 fDispatched = true;
5451 }
5452 ASMSetFlags(fEFlags);
5453 if (fDispatched)
5454 {
5455 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5456 return VINF_SUCCESS;
5457 }
5458
5459 /*
5460 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5461 * there should be no race or recursion even if we are unlucky enough to be preempted
5462 * (to the target CPU) without dispatching the host NMI above.
5463 */
5464 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5465 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5466}
5467
5468
5469#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5470/**
5471 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5472 * nested-guest using hardware-assisted VMX.
5473 *
5474 * @param pVCpu The cross context virtual CPU structure.
5475 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5476 * @param pVmcsInfoGst The guest VMCS info. object.
5477 */
5478static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5479{
5480 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5481 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5482 Assert(pu64MsrBitmap);
5483
5484 /*
5485 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5486 * MSR that is intercepted by the guest is also intercepted while executing the
5487 * nested-guest using hardware-assisted VMX.
5488 *
5489 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5490 * nested-guest VM-exit even if the outer guest is not intercepting some
5491 * MSRs. We cannot assume the caller has initialized the nested-guest
5492 * MSR bitmap in this case.
5493 *
5494 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5495 * each of its VM-entry, hence initializing it once per-VM while setting
5496 * up the nested-guest VMCS is not sufficient.
5497 */
5498 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5499 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5500 {
5501 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5502 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5503 Assert(pu64MsrBitmapNstGst);
5504 Assert(pu64MsrBitmapGst);
5505
5506 /** @todo Detect and use EVEX.POR? */
5507 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5508 for (uint32_t i = 0; i < cFrags; i++)
5509 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5510 }
5511 else
5512 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5513}
5514
5515
5516/**
5517 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5518 * hardware-assisted VMX execution of the nested-guest.
5519 *
5520 * For a guest, we don't modify these controls once we set up the VMCS and hence
5521 * this function is never called.
5522 *
5523 * For nested-guests since the nested hypervisor provides these controls on every
5524 * nested-guest VM-entry and could potentially change them everytime we need to
5525 * merge them before every nested-guest VM-entry.
5526 *
5527 * @returns VBox status code.
5528 * @param pVCpu The cross context virtual CPU structure.
5529 */
5530static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5531{
5532 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5533 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5534 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5535
5536 /*
5537 * Merge the controls with the requirements of the guest VMCS.
5538 *
5539 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5540 * VMCS with the features supported by the physical CPU as it's already done by the
5541 * VMLAUNCH/VMRESUME instruction emulation.
5542 *
5543 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5544 * derived from the VMX features supported by the physical CPU.
5545 */
5546
5547 /* Pin-based VM-execution controls. */
5548 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5549
5550 /* Processor-based VM-execution controls. */
5551 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5552 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5553 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5554 | VMX_PROC_CTLS_MOV_DR_EXIT /* hmR0VmxExportSharedDebugState makes
5555 sure guest DRx regs are loaded. */
5556 | VMX_PROC_CTLS_USE_TPR_SHADOW
5557 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5558
5559 /* Secondary processor-based VM-execution controls. */
5560 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5561 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5562 | VMX_PROC_CTLS2_INVPCID
5563 | VMX_PROC_CTLS2_VMCS_SHADOWING
5564 | VMX_PROC_CTLS2_RDTSCP
5565 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5566 | VMX_PROC_CTLS2_APIC_REG_VIRT
5567 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5568 | VMX_PROC_CTLS2_VMFUNC));
5569
5570 /*
5571 * VM-entry controls:
5572 * These controls contains state that depends on the nested-guest state (primarily
5573 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5574 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5575 * properly continue executing the nested-guest if the EFER MSR changes but does not
5576 * cause a nested-guest VM-exits.
5577 *
5578 * VM-exit controls:
5579 * These controls specify the host state on return. We cannot use the controls from
5580 * the nested hypervisor state as is as it would contain the guest state rather than
5581 * the host state. Since the host state is subject to change (e.g. preemption, trips
5582 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5583 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5584 *
5585 * VM-entry MSR-load:
5586 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5587 * context by the VMLAUNCH/VMRESUME instruction emulation.
5588 *
5589 * VM-exit MSR-store:
5590 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5591 * back into the VM-exit MSR-store area.
5592 *
5593 * VM-exit MSR-load areas:
5594 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5595 * can entirely ignore what the nested hypervisor wants to load here.
5596 */
5597
5598 /*
5599 * Exception bitmap.
5600 *
5601 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5602 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5603 * code more flexible if intercepting exceptions become more dynamic in the future we do
5604 * it as part of exporting the nested-guest state.
5605 */
5606 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5607
5608 /*
5609 * CR0/CR4 guest/host mask.
5610 *
5611 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5612 * cause VM-exits, so we need to merge them here.
5613 */
5614 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5615 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5616
5617 /*
5618 * Page-fault error-code mask and match.
5619 *
5620 * Although we require unrestricted guest execution (and thereby nested-paging) for
5621 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5622 * normally intercept #PFs, it might intercept them for debugging purposes.
5623 *
5624 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5625 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5626 */
5627 uint32_t u32XcptPFMask;
5628 uint32_t u32XcptPFMatch;
5629 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5630 {
5631 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5632 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5633 }
5634 else
5635 {
5636 u32XcptPFMask = 0;
5637 u32XcptPFMatch = 0;
5638 }
5639
5640 /*
5641 * Pause-Loop exiting.
5642 */
5643 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5644 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5645 * this will work... */
5646 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5647 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5648
5649 /*
5650 * Pending debug exceptions.
5651 * Currently just copy whatever the nested-guest provides us.
5652 */
5653 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5654
5655 /*
5656 * I/O Bitmap.
5657 *
5658 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5659 * intercept all I/O port accesses.
5660 */
5661 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5662 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5663
5664 /*
5665 * VMCS shadowing.
5666 *
5667 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5668 * enabled while executing the nested-guest.
5669 */
5670 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5671
5672 /*
5673 * APIC-access page.
5674 */
5675 RTHCPHYS HCPhysApicAccess;
5676 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5677 {
5678 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5679 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5680
5681 void *pvPage;
5682 PGMPAGEMAPLOCK PgLockApicAccess;
5683 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5684 if (RT_SUCCESS(rc))
5685 {
5686 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5687 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5688
5689 /** @todo Handle proper releasing of page-mapping lock later. */
5690 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5691 }
5692 else
5693 return rc;
5694 }
5695 else
5696 HCPhysApicAccess = 0;
5697
5698 /*
5699 * Virtual-APIC page and TPR threshold.
5700 */
5701 RTHCPHYS HCPhysVirtApic;
5702 uint32_t u32TprThreshold;
5703 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5704 {
5705 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5706 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5707
5708 void *pvPage;
5709 PGMPAGEMAPLOCK PgLockVirtApic;
5710 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5711 if (RT_SUCCESS(rc))
5712 {
5713 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5714 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5715
5716 /** @todo Handle proper releasing of page-mapping lock later. */
5717 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5718 }
5719 else
5720 return rc;
5721
5722 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5723 }
5724 else
5725 {
5726 HCPhysVirtApic = 0;
5727 u32TprThreshold = 0;
5728
5729 /*
5730 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5731 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5732 * be taken care of by EPT/shadow paging.
5733 */
5734 if (pVM->hmr0.s.fAllow64BitGuests)
5735 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5736 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5737 }
5738
5739 /*
5740 * Validate basic assumptions.
5741 */
5742 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5743 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5744 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5745 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5746
5747 /*
5748 * Commit it to the nested-guest VMCS.
5749 */
5750 int rc = VINF_SUCCESS;
5751 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5752 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5753 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5754 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5755 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5756 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5757 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5758 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5759 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5760 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5761 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5762 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5763 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5764 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5765 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5766 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5767 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5768 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5769 {
5770 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5771 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5772 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5773 }
5774 if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic)
5775 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
5776 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5777 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5778 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
5779 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
5780 AssertRC(rc);
5781
5782 /*
5783 * Update the nested-guest VMCS cache.
5784 */
5785 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
5786 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
5787 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
5788 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
5789 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
5790 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
5791 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
5792 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
5793 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
5794
5795 /*
5796 * We need to flush the TLB if we are switching the APIC-access page address.
5797 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
5798 */
5799 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5800 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
5801
5802 /*
5803 * MSR bitmap.
5804 *
5805 * The MSR bitmap address has already been initialized while setting up the nested-guest
5806 * VMCS, here we need to merge the MSR bitmaps.
5807 */
5808 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5809 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
5810
5811 return VINF_SUCCESS;
5812}
5813#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
5814
5815
5816/**
5817 * Does the preparations before executing guest code in VT-x.
5818 *
5819 * This may cause longjmps to ring-3 and may even result in rescheduling to the
5820 * recompiler/IEM. We must be cautious what we do here regarding committing
5821 * guest-state information into the VMCS assuming we assuredly execute the
5822 * guest in VT-x mode.
5823 *
5824 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
5825 * the common-state (TRPM/forceflags), we must undo those changes so that the
5826 * recompiler/IEM can (and should) use them when it resumes guest execution.
5827 * Otherwise such operations must be done when we can no longer exit to ring-3.
5828 *
5829 * @returns Strict VBox status code (i.e. informational status codes too).
5830 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
5831 * have been disabled.
5832 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
5833 * pending events).
5834 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
5835 * double-fault into the guest.
5836 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
5837 * dispatched directly.
5838 * @retval VINF_* scheduling changes, we have to go back to ring-3.
5839 *
5840 * @param pVCpu The cross context virtual CPU structure.
5841 * @param pVmxTransient The VMX-transient structure.
5842 * @param fStepping Whether we are single-stepping the guest in the
5843 * hypervisor debugger. Makes us ignore some of the reasons
5844 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
5845 * if event dispatching took place.
5846 */
5847static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
5848{
5849 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5850
5851 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
5852
5853#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
5854 if (pVmxTransient->fIsNestedGuest)
5855 {
5856 RT_NOREF2(pVCpu, fStepping);
5857 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
5858 return VINF_EM_RESCHEDULE_REM;
5859 }
5860#endif
5861
5862 /*
5863 * Check and process force flag actions, some of which might require us to go back to ring-3.
5864 */
5865 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
5866 if (rcStrict == VINF_SUCCESS)
5867 {
5868 /* FFs don't get set all the time. */
5869#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5870 if ( pVmxTransient->fIsNestedGuest
5871 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5872 {
5873 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5874 return VINF_VMX_VMEXIT;
5875 }
5876#endif
5877 }
5878 else
5879 return rcStrict;
5880
5881 /*
5882 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
5883 */
5884 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5885 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
5886 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5887 && PDMHasApic(pVM))
5888 {
5889 /* Get the APIC base MSR from the virtual APIC device. */
5890 uint64_t const uApicBaseMsr = PDMApicGetBaseMsrNoCheck(pVCpu);
5891
5892 /* Map the APIC access page. */
5893 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
5894 AssertRCReturn(rc, rc);
5895
5896 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
5897 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
5898 }
5899
5900#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5901 /*
5902 * Merge guest VMCS controls with the nested-guest VMCS controls.
5903 *
5904 * Even if we have not executed the guest prior to this (e.g. when resuming from a
5905 * saved state), we should be okay with merging controls as we initialize the
5906 * guest VMCS controls as part of VM setup phase.
5907 */
5908 if ( pVmxTransient->fIsNestedGuest
5909 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
5910 {
5911 int rc = hmR0VmxMergeVmcsNested(pVCpu);
5912 AssertRCReturn(rc, rc);
5913 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
5914 }
5915#endif
5916
5917 /*
5918 * Evaluate events to be injected into the guest.
5919 *
5920 * Events in TRPM can be injected without inspecting the guest state.
5921 * If any new events (interrupts/NMI) are pending currently, we try to set up the
5922 * guest to cause a VM-exit the next time they are ready to receive the event.
5923 */
5924 if (TRPMHasTrap(pVCpu))
5925 vmxHCTrpmTrapToPendingEvent(pVCpu);
5926
5927 uint32_t fIntrState;
5928#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5929 if (!pVmxTransient->fIsNestedGuest)
5930 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5931 else
5932 rcStrict = vmxHCEvaluatePendingEventNested(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5933
5934 /*
5935 * While evaluating pending events if something failed (unlikely) or if we were
5936 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
5937 */
5938 if (rcStrict != VINF_SUCCESS)
5939 return rcStrict;
5940 if ( pVmxTransient->fIsNestedGuest
5941 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5942 {
5943 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5944 return VINF_VMX_VMEXIT;
5945 }
5946#else
5947 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5948 Assert(rcStrict == VINF_SUCCESS);
5949#endif
5950
5951 /*
5952 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
5953 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
5954 * also result in triple-faulting the VM.
5955 *
5956 * With nested-guests, the above does not apply since unrestricted guest execution is a
5957 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
5958 */
5959 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest, fIntrState, fStepping);
5960 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5961 { /* likely */ }
5962 else
5963 {
5964 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
5965 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5966 return rcStrict;
5967 }
5968
5969 /*
5970 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
5971 * import CR3 themselves. We will need to update them here, as even as late as the above
5972 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
5973 * the below force flags to be set.
5974 */
5975 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5976 {
5977 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
5978 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5979 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
5980 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
5981 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5982 }
5983
5984#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5985 /* Paranoia. */
5986 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
5987#endif
5988
5989 /*
5990 * No longjmps to ring-3 from this point on!!!
5991 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
5992 * This also disables flushing of the R0-logger instance (if any).
5993 */
5994 VMMRZCallRing3Disable(pVCpu);
5995
5996 /*
5997 * Export the guest state bits.
5998 *
5999 * We cannot perform longjmps while loading the guest state because we do not preserve the
6000 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6001 * CPU migration.
6002 *
6003 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6004 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6005 */
6006 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6007 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6008 { /* likely */ }
6009 else
6010 {
6011 VMMRZCallRing3Enable(pVCpu);
6012 return rcStrict;
6013 }
6014
6015 /*
6016 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6017 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6018 * preemption disabled for a while. Since this is purely to aid the
6019 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6020 * disable interrupt on NT.
6021 *
6022 * We need to check for force-flags that could've possible been altered since we last
6023 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6024 * see @bugref{6398}).
6025 *
6026 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6027 * to ring-3 before executing guest code.
6028 */
6029 pVmxTransient->fEFlags = ASMIntDisableFlags();
6030
6031 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6032 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6033 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6034 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6035 {
6036 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6037 {
6038#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6039 /*
6040 * If we are executing a nested-guest make sure that we should intercept subsequent
6041 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6042 * the VM-exit instruction emulation happy.
6043 */
6044 if (pVmxTransient->fIsNestedGuest)
6045 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6046#endif
6047
6048 /*
6049 * We've injected any pending events. This is really the point of no return (to ring-3).
6050 *
6051 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6052 * returns from this function, so do -not- enable them here.
6053 */
6054 pVCpu->hm.s.Event.fPending = false;
6055 return VINF_SUCCESS;
6056 }
6057
6058 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6059 rcStrict = VINF_EM_RAW_INTERRUPT;
6060 }
6061 else
6062 {
6063 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6064 rcStrict = VINF_EM_RAW_TO_R3;
6065 }
6066
6067 ASMSetFlags(pVmxTransient->fEFlags);
6068 VMMRZCallRing3Enable(pVCpu);
6069
6070 return rcStrict;
6071}
6072
6073
6074/**
6075 * Final preparations before executing guest code using hardware-assisted VMX.
6076 *
6077 * We can no longer get preempted to a different host CPU and there are no returns
6078 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6079 * failures), this function is not intended to fail sans unrecoverable hardware
6080 * errors.
6081 *
6082 * @param pVCpu The cross context virtual CPU structure.
6083 * @param pVmxTransient The VMX-transient structure.
6084 *
6085 * @remarks Called with preemption disabled.
6086 * @remarks No-long-jump zone!!!
6087 */
6088static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6089{
6090 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6091 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6092 Assert(!pVCpu->hm.s.Event.fPending);
6093
6094 /*
6095 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6096 */
6097 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6098 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6099
6100 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6101 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6102 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6103 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6104
6105 if (!CPUMIsGuestFPUStateActive(pVCpu))
6106 {
6107 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6108 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6109 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6110 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6111 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6112 }
6113
6114 /*
6115 * Re-export the host state bits as we may've been preempted (only happens when
6116 * thread-context hooks are used or when the VM start function changes) or if
6117 * the host CR0 is modified while loading the guest FPU state above.
6118 *
6119 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6120 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6121 * see @bugref{8432}.
6122 *
6123 * This may also happen when switching to/from a nested-guest VMCS without leaving
6124 * ring-0.
6125 */
6126 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6127 {
6128 hmR0VmxExportHostState(pVCpu);
6129 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6130 }
6131 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6132
6133 /*
6134 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6135 */
6136 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6137 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6138 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6139
6140 /*
6141 * Store status of the shared guest/host debug state at the time of VM-entry.
6142 */
6143 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6144 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6145
6146 /*
6147 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6148 * more than one conditional check. The post-run side of our code shall determine
6149 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6150 */
6151 if (pVmcsInfo->pbVirtApic)
6152 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6153
6154 /*
6155 * Update the host MSRs values in the VM-exit MSR-load area.
6156 */
6157 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6158 {
6159 if (pVmcsInfo->cExitMsrLoad > 0)
6160 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6161 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6162 }
6163
6164 /*
6165 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6166 * VMX-preemption timer based on the next virtual sync clock deadline.
6167 */
6168 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6169 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6170 {
6171 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6172 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6173 }
6174
6175 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6176 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6177 if (!fIsRdtscIntercepted)
6178 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6179 else
6180 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6181
6182 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6183 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6184 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6185 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6186 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6187 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6188
6189 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6190
6191 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6192 as we're about to start executing the guest. */
6193
6194 /*
6195 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6196 *
6197 * This is done this late as updating the TSC offsetting/preemption timer above
6198 * figures out if we can skip intercepting RDTSCP by calculating the number of
6199 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6200 */
6201 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6202 && !fIsRdtscIntercepted)
6203 {
6204 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6205
6206 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6207 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6208 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6209 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6210 AssertRC(rc);
6211 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6212 pVmxTransient->fRemoveTscAuxMsr = true;
6213 }
6214
6215#ifdef VBOX_STRICT
6216 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6217 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6218 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6219 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6220#endif
6221
6222#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6223 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6224 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6225 * see @bugref{9180#c54}. */
6226 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6227 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6228 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6229#endif
6230}
6231
6232
6233/**
6234 * First C routine invoked after running guest code using hardware-assisted VMX.
6235 *
6236 * @param pVCpu The cross context virtual CPU structure.
6237 * @param pVmxTransient The VMX-transient structure.
6238 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6239 *
6240 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6241 *
6242 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6243 * unconditionally when it is safe to do so.
6244 */
6245static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6246{
6247 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6248 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6249 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6250 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6251 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6252 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6253
6254 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6255 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6256 {
6257 uint64_t uGstTsc;
6258 if (!pVmxTransient->fIsNestedGuest)
6259 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6260 else
6261 {
6262 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6263 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6264 }
6265 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6266 }
6267
6268 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6269 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6270 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6271
6272 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6273 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6274#ifdef VBOX_STRICT
6275 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6276#endif
6277 Assert(!ASMIntAreEnabled());
6278 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6279 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6280
6281#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6282 /*
6283 * Clean all the VMCS fields in the transient structure before reading
6284 * anything from the VMCS.
6285 */
6286 pVmxTransient->uExitReason = 0;
6287 pVmxTransient->uExitIntErrorCode = 0;
6288 pVmxTransient->uExitQual = 0;
6289 pVmxTransient->uGuestLinearAddr = 0;
6290 pVmxTransient->uExitIntInfo = 0;
6291 pVmxTransient->cbExitInstr = 0;
6292 pVmxTransient->ExitInstrInfo.u = 0;
6293 pVmxTransient->uEntryIntInfo = 0;
6294 pVmxTransient->uEntryXcptErrorCode = 0;
6295 pVmxTransient->cbEntryInstr = 0;
6296 pVmxTransient->uIdtVectoringInfo = 0;
6297 pVmxTransient->uIdtVectoringErrorCode = 0;
6298#endif
6299
6300 /*
6301 * Save the basic VM-exit reason and check if the VM-entry failed.
6302 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6303 */
6304 uint32_t uExitReason;
6305 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6306 AssertRC(rc);
6307 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6308 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6309
6310 /*
6311 * Log the VM-exit before logging anything else as otherwise it might be a
6312 * tad confusing what happens before and after the world-switch.
6313 */
6314 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6315
6316 /*
6317 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6318 * bitmap permissions, if it was added before VM-entry.
6319 */
6320 if (pVmxTransient->fRemoveTscAuxMsr)
6321 {
6322 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6323 pVmxTransient->fRemoveTscAuxMsr = false;
6324 }
6325
6326 /*
6327 * Check if VMLAUNCH/VMRESUME succeeded.
6328 * If this failed, we cause a guru meditation and cease further execution.
6329 */
6330 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6331 {
6332 /*
6333 * Update the VM-exit history array here even if the VM-entry failed due to:
6334 * - Invalid guest state.
6335 * - MSR loading.
6336 * - Machine-check event.
6337 *
6338 * In any of the above cases we will still have a "valid" VM-exit reason
6339 * despite @a fVMEntryFailed being false.
6340 *
6341 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6342 *
6343 * Note! We don't have CS or RIP at this point. Will probably address that later
6344 * by amending the history entry added here.
6345 */
6346 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6347 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6348
6349 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6350 {
6351 VMMRZCallRing3Enable(pVCpu);
6352 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6353
6354#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6355 vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient);
6356#endif
6357
6358 /*
6359 * Always import the guest-interruptibility state as we need it while evaluating
6360 * injecting events on re-entry. We could in *theory* postpone reading it for
6361 * exits that does not involve instruction emulation, but since most exits are
6362 * for instruction emulation (exceptions being external interrupts, shadow
6363 * paging building page faults and EPT violations, and interrupt window stuff)
6364 * this is a reasonable simplification.
6365 *
6366 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6367 * checking for real-mode while exporting the state because all bits that cause
6368 * mode changes wrt CR0 are intercepted.
6369 *
6370 * Note! This mask _must_ match the default value for the default a_fDonePostExit
6371 * value for the vmxHCImportGuestState template!
6372 */
6373 /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state
6374 * explicitly in the exit handlers and injection function. That way we have
6375 * fewer clusters of vmread spread around the code, because the EM history
6376 * executor won't execute very many non-exiting instructions before stopping. */
6377 rc = vmxHCImportGuestState< CPUMCTX_EXTRN_INHIBIT_INT
6378 | CPUMCTX_EXTRN_INHIBIT_NMI
6379#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6380 | HMVMX_CPUMCTX_EXTRN_ALL
6381#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6382 | CPUMCTX_EXTRN_RFLAGS
6383#endif
6384 , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__);
6385 AssertRC(rc);
6386
6387 /*
6388 * Sync the TPR shadow with our APIC state.
6389 */
6390 if ( !pVmxTransient->fIsNestedGuest
6391 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6392 {
6393 Assert(pVmcsInfo->pbVirtApic);
6394 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6395 {
6396 rc = PDMApicSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6397 AssertRC(rc);
6398 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6399 }
6400 }
6401
6402 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6403 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6404 || pVmxTransient->fWasHyperDebugStateActive == false);
6405 return;
6406 }
6407 }
6408#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6409 else if (pVmxTransient->fIsNestedGuest)
6410 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6411#endif
6412 else
6413 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6414
6415 VMMRZCallRing3Enable(pVCpu);
6416}
6417
6418
6419/**
6420 * Runs the guest code using hardware-assisted VMX the normal way.
6421 *
6422 * @returns VBox status code.
6423 * @param pVCpu The cross context virtual CPU structure.
6424 * @param pcLoops Pointer to the number of executed loops.
6425 */
6426static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6427{
6428 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6429 Assert(pcLoops);
6430 Assert(*pcLoops <= cMaxResumeLoops);
6431 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6432
6433#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6434 /*
6435 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6436 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6437 * guest VMCS while entering the VMX ring-0 session.
6438 */
6439 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6440 {
6441 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6442 if (RT_SUCCESS(rc))
6443 { /* likely */ }
6444 else
6445 {
6446 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6447 return rc;
6448 }
6449 }
6450#endif
6451
6452 VMXTRANSIENT VmxTransient;
6453 RT_ZERO(VmxTransient);
6454 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6455 Assert(!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
6456
6457 /* Paranoia. */
6458 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6459
6460 VBOXSTRICTRC rcStrict;
6461 for (;;)
6462 {
6463 Assert(!HMR0SuspendPending());
6464 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6465 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6466
6467 /*
6468 * Preparatory work for running nested-guest code, this may force us to
6469 * return to ring-3.
6470 *
6471 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6472 */
6473 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6474 if (rcStrict != VINF_SUCCESS)
6475 break;
6476
6477 /* Interrupts are disabled at this point! */
6478 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6479 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6480 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6481 /* Interrupts are re-enabled at this point! */
6482
6483 /*
6484 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6485 */
6486 if (RT_SUCCESS(rcRun))
6487 { /* very likely */ }
6488 else
6489 {
6490 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6491 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6492 return rcRun;
6493 }
6494
6495 /*
6496 * Profile the VM-exit.
6497 */
6498 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6499 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6500 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6501 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6502 HMVMX_START_EXIT_DISPATCH_PROF();
6503
6504 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6505
6506 /*
6507 * Handle the VM-exit.
6508 */
6509#ifdef HMVMX_USE_FUNCTION_TABLE
6510 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6511#else
6512 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6513#endif
6514 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6515 if (rcStrict == VINF_SUCCESS)
6516 {
6517 if (++(*pcLoops) <= cMaxResumeLoops)
6518 continue;
6519 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6520 rcStrict = VINF_EM_RAW_INTERRUPT;
6521 }
6522 break;
6523 }
6524
6525 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6526 return rcStrict;
6527}
6528
6529
6530#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6531/**
6532 * Runs the nested-guest code using hardware-assisted VMX.
6533 *
6534 * @returns VBox status code.
6535 * @param pVCpu The cross context virtual CPU structure.
6536 * @param pcLoops Pointer to the number of executed loops.
6537 *
6538 * @sa hmR0VmxRunGuestCodeNormal.
6539 */
6540static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6541{
6542 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6543 Assert(pcLoops);
6544 Assert(*pcLoops <= cMaxResumeLoops);
6545 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6546
6547 /*
6548 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6549 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6550 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6551 */
6552 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6553 {
6554 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6555 if (RT_SUCCESS(rc))
6556 { /* likely */ }
6557 else
6558 {
6559 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6560 return rc;
6561 }
6562 }
6563
6564 VMXTRANSIENT VmxTransient;
6565 RT_ZERO(VmxTransient);
6566 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6567 VmxTransient.fIsNestedGuest = true;
6568 Assert(pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs);
6569
6570 /* Paranoia. */
6571 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6572
6573 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */
6574 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6575
6576 VBOXSTRICTRC rcStrict;
6577 for (;;)
6578 {
6579 Assert(!HMR0SuspendPending());
6580 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6581 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6582
6583 /*
6584 * Preparatory work for running guest code, this may force us to
6585 * return to ring-3.
6586 *
6587 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6588 */
6589 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6590 if (rcStrict != VINF_SUCCESS)
6591 break;
6592
6593 /* Interrupts are disabled at this point! */
6594 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6595 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6596 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6597 /* Interrupts are re-enabled at this point! */
6598
6599 /*
6600 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6601 */
6602 if (RT_SUCCESS(rcRun))
6603 { /* very likely */ }
6604 else
6605 {
6606 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6607 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6608 rcStrict = rcRun;
6609 break;
6610 }
6611
6612 /*
6613 * Profile the VM-exit.
6614 */
6615 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6616 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6617 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6618 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6619 HMVMX_START_EXIT_DISPATCH_PROF();
6620
6621 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6622
6623 /*
6624 * Handle the VM-exit.
6625 */
6626 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6627 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6628 if (rcStrict == VINF_SUCCESS)
6629 {
6630 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6631 {
6632 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6633 rcStrict = VINF_VMX_VMEXIT;
6634 }
6635 else
6636 {
6637 if (++(*pcLoops) <= cMaxResumeLoops)
6638 continue;
6639 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6640 rcStrict = VINF_EM_RAW_INTERRUPT;
6641 }
6642 }
6643 else
6644 Assert(rcStrict != VINF_VMX_VMEXIT);
6645 break;
6646 }
6647
6648 /* Ensure VM-exit auxiliary info. is no longer available. */
6649 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6650
6651 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6652 return rcStrict;
6653}
6654#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6655
6656
6657/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6658 * probes.
6659 *
6660 * The following few functions and associated structure contains the bloat
6661 * necessary for providing detailed debug events and dtrace probes as well as
6662 * reliable host side single stepping. This works on the principle of
6663 * "subclassing" the normal execution loop and workers. We replace the loop
6664 * method completely and override selected helpers to add necessary adjustments
6665 * to their core operation.
6666 *
6667 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6668 * any performance for debug and analysis features.
6669 *
6670 * @{
6671 */
6672
6673/**
6674 * Single steps guest code using hardware-assisted VMX.
6675 *
6676 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6677 * but single-stepping through the hypervisor debugger.
6678 *
6679 * @returns Strict VBox status code (i.e. informational status codes too).
6680 * @param pVCpu The cross context virtual CPU structure.
6681 * @param pcLoops Pointer to the number of executed loops.
6682 *
6683 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6684 */
6685static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6686{
6687 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6688 Assert(pcLoops);
6689 Assert(*pcLoops <= cMaxResumeLoops);
6690
6691 VMXTRANSIENT VmxTransient;
6692 RT_ZERO(VmxTransient);
6693 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6694
6695 /* Set HMCPU indicators. */
6696 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
6697 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
6698 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6699 pVCpu->hmr0.s.fUsingDebugLoop = true;
6700
6701 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
6702 VMXRUNDBGSTATE DbgState;
6703 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
6704 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6705
6706 /*
6707 * The loop.
6708 */
6709 VBOXSTRICTRC rcStrict;
6710 for (;;)
6711 {
6712 Assert(!HMR0SuspendPending());
6713 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6714 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6715 bool fStepping = pVCpu->hm.s.fSingleInstruction;
6716
6717 /* Set up VM-execution controls the next two can respond to. */
6718 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6719
6720 /*
6721 * Preparatory work for running guest code, this may force us to
6722 * return to ring-3.
6723 *
6724 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6725 */
6726 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
6727 if (rcStrict != VINF_SUCCESS)
6728 break;
6729
6730 /* Interrupts are disabled at this point! */
6731 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6732
6733 /* Override any obnoxious code in the above two calls. */
6734 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6735
6736 /*
6737 * Finally execute the guest.
6738 */
6739 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6740
6741 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6742 /* Interrupts are re-enabled at this point! */
6743
6744 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
6745 if (RT_SUCCESS(rcRun))
6746 { /* very likely */ }
6747 else
6748 {
6749 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6750 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6751 return rcRun;
6752 }
6753
6754 /* Profile the VM-exit. */
6755 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6756 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
6757 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6758 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6759 HMVMX_START_EXIT_DISPATCH_PROF();
6760
6761 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6762
6763 /*
6764 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
6765 */
6766 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
6767 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6768 if (rcStrict != VINF_SUCCESS)
6769 break;
6770 if (++(*pcLoops) > cMaxResumeLoops)
6771 {
6772 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6773 rcStrict = VINF_EM_RAW_INTERRUPT;
6774 break;
6775 }
6776
6777 /*
6778 * Stepping: Did the RIP change, if so, consider it a single step.
6779 * Otherwise, make sure one of the TFs gets set.
6780 */
6781 if (fStepping)
6782 {
6783 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
6784 AssertRC(rc);
6785 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
6786 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
6787 {
6788 rcStrict = VINF_EM_DBG_STEPPED;
6789 break;
6790 }
6791 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
6792 }
6793
6794 /*
6795 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
6796 */
6797 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
6798 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6799
6800 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
6801 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
6802 Assert(rcStrict == VINF_SUCCESS);
6803 }
6804
6805 /*
6806 * Clear the X86_EFL_TF if necessary.
6807 */
6808 if (pVCpu->hmr0.s.fClearTrapFlag)
6809 {
6810 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
6811 AssertRC(rc);
6812 pVCpu->hmr0.s.fClearTrapFlag = false;
6813 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
6814 }
6815 /** @todo there seems to be issues with the resume flag when the monitor trap
6816 * flag is pending without being used. Seen early in bios init when
6817 * accessing APIC page in protected mode. */
6818
6819/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
6820 * out of the above loop. */
6821
6822 /* Restore HMCPU indicators. */
6823 pVCpu->hmr0.s.fUsingDebugLoop = false;
6824 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6825 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
6826
6827 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6828 return rcStrict;
6829}
6830
6831/** @} */
6832
6833
6834/**
6835 * Checks if any expensive dtrace probes are enabled and we should go to the
6836 * debug loop.
6837 *
6838 * @returns true if we should use debug loop, false if not.
6839 */
6840static bool hmR0VmxAnyExpensiveProbesEnabled(void)
6841{
6842 /* It's probably faster to OR the raw 32-bit counter variables together.
6843 Since the variables are in an array and the probes are next to one
6844 another (more or less), we have good locality. So, better read
6845 eight-nine cache lines ever time and only have one conditional, than
6846 128+ conditionals, right? */
6847 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
6848 | VBOXVMM_XCPT_DE_ENABLED_RAW()
6849 | VBOXVMM_XCPT_DB_ENABLED_RAW()
6850 | VBOXVMM_XCPT_BP_ENABLED_RAW()
6851 | VBOXVMM_XCPT_OF_ENABLED_RAW()
6852 | VBOXVMM_XCPT_BR_ENABLED_RAW()
6853 | VBOXVMM_XCPT_UD_ENABLED_RAW()
6854 | VBOXVMM_XCPT_NM_ENABLED_RAW()
6855 | VBOXVMM_XCPT_DF_ENABLED_RAW()
6856 | VBOXVMM_XCPT_TS_ENABLED_RAW()
6857 | VBOXVMM_XCPT_NP_ENABLED_RAW()
6858 | VBOXVMM_XCPT_SS_ENABLED_RAW()
6859 | VBOXVMM_XCPT_GP_ENABLED_RAW()
6860 | VBOXVMM_XCPT_PF_ENABLED_RAW()
6861 | VBOXVMM_XCPT_MF_ENABLED_RAW()
6862 | VBOXVMM_XCPT_AC_ENABLED_RAW()
6863 | VBOXVMM_XCPT_XF_ENABLED_RAW()
6864 | VBOXVMM_XCPT_VE_ENABLED_RAW()
6865 | VBOXVMM_XCPT_SX_ENABLED_RAW()
6866 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
6867 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
6868 ) != 0
6869 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
6870 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
6871 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
6872 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
6873 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
6874 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
6875 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
6876 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
6877 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
6878 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
6879 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
6880 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
6881 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
6882 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
6883 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
6884 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
6885 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
6886 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
6887 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
6888 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
6889 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
6890 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
6891 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
6892 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
6893 | VBOXVMM_INSTR_STR_ENABLED_RAW()
6894 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
6895 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
6896 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
6897 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
6898 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
6899 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
6900 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
6901 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
6902 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
6903 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
6904 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
6905 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
6906 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
6907 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
6908 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
6909 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
6910 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
6911 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
6912 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
6913 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
6914 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
6915 ) != 0
6916 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
6917 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
6918 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
6919 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
6920 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
6921 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
6922 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
6923 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
6924 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
6925 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
6926 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
6927 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
6928 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
6929 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
6930 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
6931 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
6932 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
6933 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
6934 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
6935 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
6936 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
6937 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
6938 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
6939 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
6940 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
6941 | VBOXVMM_EXIT_STR_ENABLED_RAW()
6942 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
6943 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
6944 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
6945 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
6946 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
6947 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
6948 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
6949 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
6950 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
6951 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
6952 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
6953 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
6954 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
6955 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
6956 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
6957 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
6958 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
6959 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
6960 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
6961 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
6962 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
6963 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
6964 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
6965 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
6966 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
6967 ) != 0;
6968}
6969
6970
6971/**
6972 * Runs the guest using hardware-assisted VMX.
6973 *
6974 * @returns Strict VBox status code (i.e. informational status codes too).
6975 * @param pVCpu The cross context virtual CPU structure.
6976 */
6977VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
6978{
6979 AssertPtr(pVCpu);
6980 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6981 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6982 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
6983 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
6984
6985 VBOXSTRICTRC rcStrict;
6986 uint32_t cLoops = 0;
6987 for (;;)
6988 {
6989#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6990 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
6991#else
6992 NOREF(pCtx);
6993 bool const fInNestedGuestMode = false;
6994#endif
6995 if (!fInNestedGuestMode)
6996 {
6997 if ( !pVCpu->hm.s.fUseDebugLoop
6998 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
6999 && !DBGFIsStepping(pVCpu)
7000 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledSwBreakpoints)
7001 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7002 else
7003 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7004 }
7005#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7006 else
7007 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7008
7009 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7010 {
7011 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7012 continue;
7013 }
7014 if (rcStrict == VINF_VMX_VMEXIT)
7015 {
7016 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7017 continue;
7018 }
7019#endif
7020 break;
7021 }
7022
7023 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7024 switch (rcLoop)
7025 {
7026 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7027 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7028 }
7029
7030 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7031 if (RT_FAILURE(rc2))
7032 {
7033 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7034 rcStrict = rc2;
7035 }
7036 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7037 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7038 return rcStrict;
7039}
7040
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette