VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 100524

Last change on this file since 100524 was 99739, checked in by vboxsync, 20 months ago

*: doxygen corrections (mostly about removing @returns from functions returning void).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 281.4 KB
Line 
1/* $Id: HMVMXR0.cpp 99739 2023-05-11 01:01:08Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2023 Oracle and/or its affiliates.
8 *
9 * This file is part of VirtualBox base platform packages, as
10 * available from https://www.virtualbox.org.
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation, in version 3 of the
15 * License.
16 *
17 * This program is distributed in the hope that it will be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 * General Public License for more details.
21 *
22 * You should have received a copy of the GNU General Public License
23 * along with this program; if not, see <https://www.gnu.org/licenses>.
24 *
25 * SPDX-License-Identifier: GPL-3.0-only
26 */
27
28
29/*********************************************************************************************************************************
30* Header Files *
31*********************************************************************************************************************************/
32#define LOG_GROUP LOG_GROUP_HM
33#define VMCPU_INCL_CPUM_GST_CTX
34#include <iprt/x86.h>
35#include <iprt/asm-amd64-x86.h>
36#include <iprt/thread.h>
37#include <iprt/mem.h>
38#include <iprt/mp.h>
39
40#include <VBox/vmm/pdmapi.h>
41#include <VBox/vmm/dbgf.h>
42#include <VBox/vmm/iem.h>
43#include <VBox/vmm/iom.h>
44#include <VBox/vmm/tm.h>
45#include <VBox/vmm/em.h>
46#include <VBox/vmm/gcm.h>
47#include <VBox/vmm/gim.h>
48#include <VBox/vmm/apic.h>
49#include "HMInternal.h"
50#include <VBox/vmm/vmcc.h>
51#include <VBox/vmm/hmvmxinline.h>
52#include "HMVMXR0.h"
53#include "VMXInternal.h"
54#include "dtrace/VBoxVMM.h"
55
56
57/*********************************************************************************************************************************
58* Defined Constants And Macros *
59*********************************************************************************************************************************/
60#ifdef DEBUG_ramshankar
61# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
62# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
63# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
64# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
65# define HMVMX_ALWAYS_CLEAN_TRANSIENT
66# define HMVMX_ALWAYS_CHECK_GUEST_STATE
67# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
68# define HMVMX_ALWAYS_TRAP_PF
69# define HMVMX_ALWAYS_FLUSH_TLB
70# define HMVMX_ALWAYS_SWAP_EFER
71#endif
72
73/** Enables the fAlwaysInterceptMovDRx related code. */
74#define VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX 1
75
76
77/*********************************************************************************************************************************
78* Structures and Typedefs *
79*********************************************************************************************************************************/
80/**
81 * VMX page allocation information.
82 */
83typedef struct
84{
85 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
86 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
87 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
88 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
89} VMXPAGEALLOCINFO;
90/** Pointer to VMX page-allocation info. */
91typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
92/** Pointer to a const VMX page-allocation info. */
93typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
94AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
95
96
97/*********************************************************************************************************************************
98* Internal Functions *
99*********************************************************************************************************************************/
100static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
101static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
102
103
104/*********************************************************************************************************************************
105* Global Variables *
106*********************************************************************************************************************************/
107/** The DR6 value after writing zero to the register.
108 * Set by VMXR0GlobalInit(). */
109static uint64_t g_fDr6Zeroed = 0;
110
111
112/**
113 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
114 * @returns @c true if it's part of LBR stack, @c false otherwise.
115 *
116 * @param pVM The cross context VM structure.
117 * @param idMsr The MSR.
118 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
119 * Optional, can be NULL.
120 *
121 * @remarks Must only be called when LBR is enabled.
122 */
123DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
124{
125 Assert(pVM->hmr0.s.vmx.fLbr);
126 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
127 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
128 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
129 if (idxMsr < cLbrStack)
130 {
131 if (pidxMsr)
132 *pidxMsr = idxMsr;
133 return true;
134 }
135 return false;
136}
137
138
139/**
140 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
141 * @returns @c true if it's part of LBR stack, @c false otherwise.
142 *
143 * @param pVM The cross context VM structure.
144 * @param idMsr The MSR.
145 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
146 * Optional, can be NULL.
147 *
148 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
149 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
150 */
151DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
152{
153 Assert(pVM->hmr0.s.vmx.fLbr);
154 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
155 {
156 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
157 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
158 if (idxMsr < cLbrStack)
159 {
160 if (pidxMsr)
161 *pidxMsr = idxMsr;
162 return true;
163 }
164 }
165 return false;
166}
167
168
169/**
170 * Gets the active (in use) VMCS info. object for the specified VCPU.
171 *
172 * This is either the guest or nested-guest VMCS info. and need not necessarily
173 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
174 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
175 * current VMCS while returning to ring-3. However, the VMCS info. object for that
176 * VMCS would still be active and returned here so that we could dump the VMCS
177 * fields to ring-3 for diagnostics. This function is thus only used to
178 * distinguish between the nested-guest or guest VMCS.
179 *
180 * @returns The active VMCS information.
181 * @param pVCpu The cross context virtual CPU structure.
182 *
183 * @thread EMT.
184 * @remarks This function may be called with preemption or interrupts disabled!
185 */
186DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
187{
188 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
189 return &pVCpu->hmr0.s.vmx.VmcsInfo;
190 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
191}
192
193
194/**
195 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
196 * area.
197 *
198 * @returns @c true if it's different, @c false otherwise.
199 * @param pVmcsInfo The VMCS info. object.
200 */
201DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
202{
203 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
204 && pVmcsInfo->pvGuestMsrStore);
205}
206
207
208/**
209 * Sets the given Processor-based VM-execution controls.
210 *
211 * @param pVmxTransient The VMX-transient structure.
212 * @param uProcCtls The Processor-based VM-execution controls to set.
213 */
214static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
218 {
219 pVmcsInfo->u32ProcCtls |= uProcCtls;
220 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
221 AssertRC(rc);
222 }
223}
224
225
226/**
227 * Removes the given Processor-based VM-execution controls.
228 *
229 * @param pVCpu The cross context virtual CPU structure.
230 * @param pVmxTransient The VMX-transient structure.
231 * @param uProcCtls The Processor-based VM-execution controls to remove.
232 *
233 * @remarks When executing a nested-guest, this will not remove any of the specified
234 * controls if the nested hypervisor has set any one of them.
235 */
236static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
237{
238 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
239 if (pVmcsInfo->u32ProcCtls & uProcCtls)
240 {
241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
242 if ( !pVmxTransient->fIsNestedGuest
243 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
244#else
245 NOREF(pVCpu);
246 if (!pVmxTransient->fIsNestedGuest)
247#endif
248 {
249 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
250 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
251 AssertRC(rc);
252 }
253 }
254}
255
256
257/**
258 * Sets the TSC offset for the current VMCS.
259 *
260 * @param uTscOffset The TSC offset to set.
261 * @param pVmcsInfo The VMCS info. object.
262 */
263static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
264{
265 if (pVmcsInfo->u64TscOffset != uTscOffset)
266 {
267 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
268 AssertRC(rc);
269 pVmcsInfo->u64TscOffset = uTscOffset;
270 }
271}
272
273
274/**
275 * Loads the VMCS specified by the VMCS info. object.
276 *
277 * @returns VBox status code.
278 * @param pVmcsInfo The VMCS info. object.
279 *
280 * @remarks Can be called with interrupts disabled.
281 */
282static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
283{
284 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
285 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
286
287 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
288 if (RT_SUCCESS(rc))
289 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
290 return rc;
291}
292
293
294/**
295 * Clears the VMCS specified by the VMCS info. object.
296 *
297 * @returns VBox status code.
298 * @param pVmcsInfo The VMCS info. object.
299 *
300 * @remarks Can be called with interrupts disabled.
301 */
302static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
303{
304 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
305 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
306
307 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
308 if (RT_SUCCESS(rc))
309 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
310 return rc;
311}
312
313
314/**
315 * Checks whether the MSR belongs to the set of guest MSRs that we restore
316 * lazily while leaving VT-x.
317 *
318 * @returns true if it does, false otherwise.
319 * @param pVCpu The cross context virtual CPU structure.
320 * @param idMsr The MSR to check.
321 */
322static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
323{
324 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
325 {
326 switch (idMsr)
327 {
328 case MSR_K8_LSTAR:
329 case MSR_K6_STAR:
330 case MSR_K8_SF_MASK:
331 case MSR_K8_KERNEL_GS_BASE:
332 return true;
333 }
334 }
335 return false;
336}
337
338
339/**
340 * Loads a set of guests MSRs to allow read/passthru to the guest.
341 *
342 * The name of this function is slightly confusing. This function does NOT
343 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
344 * common prefix for functions dealing with "lazy restoration" of the shared
345 * MSRs.
346 *
347 * @param pVCpu The cross context virtual CPU structure.
348 *
349 * @remarks No-long-jump zone!!!
350 */
351static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
352{
353 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
354 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
355
356 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
357 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
358 {
359 /*
360 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
361 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
362 * we can skip a few MSR writes.
363 *
364 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
365 * guest MSR values in the guest-CPU context might be different to what's currently
366 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
367 * CPU, see @bugref{8728}.
368 */
369 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
370 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
371 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
372 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
373 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
374 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
375 {
376#ifdef VBOX_STRICT
377 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
378 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
379 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
380 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
381#endif
382 }
383 else
384 {
385 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
386 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
387 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
388 /* The system call flag mask register isn't as benign and accepting of all
389 values as the above, so mask it to avoid #GP'ing on corrupted input. */
390 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
391 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
392 }
393 }
394 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
395}
396
397
398/**
399 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
400 *
401 * @returns @c true if found, @c false otherwise.
402 * @param pVmcsInfo The VMCS info. object.
403 * @param idMsr The MSR to find.
404 */
405static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
406{
407 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
408 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
409 Assert(pMsrs);
410 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
411 for (uint32_t i = 0; i < cMsrs; i++)
412 {
413 if (pMsrs[i].u32Msr == idMsr)
414 return true;
415 }
416 return false;
417}
418
419
420/**
421 * Performs lazy restoration of the set of host MSRs if they were previously
422 * loaded with guest MSR values.
423 *
424 * @param pVCpu The cross context virtual CPU structure.
425 *
426 * @remarks No-long-jump zone!!!
427 * @remarks The guest MSRs should have been saved back into the guest-CPU
428 * context by vmxHCImportGuestState()!!!
429 */
430static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
431{
432 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
433 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
434
435 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
436 {
437 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
438 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
439 {
440 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
441 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
442 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
443 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
444 }
445 }
446 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
447}
448
449
450/**
451 * Sets pfnStartVm to the best suited variant.
452 *
453 * This must be called whenever anything changes relative to the hmR0VmXStartVm
454 * variant selection:
455 * - pVCpu->hm.s.fLoadSaveGuestXcr0
456 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
457 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
458 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
459 * - Perhaps: CPUMCTX.fXStateMask (windows only)
460 *
461 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
462 * cannot be changed at runtime.
463 */
464static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
465{
466 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
467 {
468 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
469 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
470 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
471 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
472 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
473 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
474 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
475 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
476 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
477 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
478 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
479 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
480 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
481 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
482 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
483 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
484 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
485 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
486 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
487 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
488 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
489 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
490 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
491 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
492 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
493 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
494 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
495 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
496 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
497 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
498 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
499 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
500 };
501 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
502 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
503 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
504 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
505 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
506 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
507 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
508 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
509}
510
511
512/**
513 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
514 * stack.
515 *
516 * @returns Strict VBox status code (i.e. informational status codes too).
517 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
518 * @param pVCpu The cross context virtual CPU structure.
519 * @param uValue The value to push to the guest stack.
520 */
521static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
522{
523 /*
524 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
525 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
526 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
527 */
528 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
529 if (pCtx->sp == 1)
530 return VINF_EM_RESET;
531 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
532 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
533 AssertRC(rc);
534 return rc;
535}
536
537
538/**
539 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
540 * unreferenced local parameters in the template code...
541 */
542DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
543{
544 RT_NOREF(pVCpu);
545 return VMXWriteVmcs16(uFieldEnc, u16Val);
546}
547
548
549/**
550 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
551 * unreferenced local parameters in the template code...
552 */
553DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
554{
555 RT_NOREF(pVCpu);
556 return VMXWriteVmcs32(uFieldEnc, u32Val);
557}
558
559
560/**
561 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
562 * unreferenced local parameters in the template code...
563 */
564DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
565{
566 RT_NOREF(pVCpu);
567 return VMXWriteVmcs64(uFieldEnc, u64Val);
568}
569
570
571/**
572 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
573 * unreferenced local parameters in the template code...
574 */
575DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
576{
577 RT_NOREF(pVCpu);
578 return VMXReadVmcs16(uFieldEnc, pu16Val);
579}
580
581
582/**
583 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
584 * unreferenced local parameters in the template code...
585 */
586DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
587{
588 RT_NOREF(pVCpu);
589 return VMXReadVmcs32(uFieldEnc, pu32Val);
590}
591
592
593/**
594 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
595 * unreferenced local parameters in the template code...
596 */
597DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
598{
599 RT_NOREF(pVCpu);
600 return VMXReadVmcs64(uFieldEnc, pu64Val);
601}
602
603
604/*
605 * Instantiate the code we share with the NEM darwin backend.
606 */
607#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
608#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
609
610#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
611#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
612#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
613#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
614
615#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
616#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
617#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
618#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
619
620#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
621#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
622#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
623#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
624
625#include "../VMMAll/VMXAllTemplate.cpp.h"
626
627#undef VMX_VMCS_WRITE_16
628#undef VMX_VMCS_WRITE_32
629#undef VMX_VMCS_WRITE_64
630#undef VMX_VMCS_WRITE_NW
631
632#undef VMX_VMCS_READ_16
633#undef VMX_VMCS_READ_32
634#undef VMX_VMCS_READ_64
635#undef VMX_VMCS_READ_NW
636
637#undef VM_IS_VMX_PREEMPT_TIMER_USED
638#undef VM_IS_VMX_NESTED_PAGING
639#undef VM_IS_VMX_UNRESTRICTED_GUEST
640#undef VCPU_2_VMXSTATS
641#undef VCPU_2_VMXSTATE
642
643
644/**
645 * Updates the VM's last error record.
646 *
647 * If there was a VMX instruction error, reads the error data from the VMCS and
648 * updates VCPU's last error record as well.
649 *
650 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
651 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
652 * VERR_VMX_INVALID_VMCS_FIELD.
653 * @param rc The error code.
654 */
655static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
656{
657 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
658 || rc == VERR_VMX_UNABLE_TO_START_VM)
659 {
660 AssertPtrReturnVoid(pVCpu);
661 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
662 }
663 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
664}
665
666
667/**
668 * Enters VMX root mode operation on the current CPU.
669 *
670 * @returns VBox status code.
671 * @param pHostCpu The HM physical-CPU structure.
672 * @param pVM The cross context VM structure. Can be
673 * NULL, after a resume.
674 * @param HCPhysCpuPage Physical address of the VMXON region.
675 * @param pvCpuPage Pointer to the VMXON region.
676 */
677static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
678{
679 Assert(pHostCpu);
680 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
681 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
682 Assert(pvCpuPage);
683 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
684
685 if (pVM)
686 {
687 /* Write the VMCS revision identifier to the VMXON region. */
688 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
689 }
690
691 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
692 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
693
694 /* Enable the VMX bit in CR4 if necessary. */
695 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
696
697 /* Record whether VMXE was already prior to us enabling it above. */
698 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
699
700 /* Enter VMX root mode. */
701 int rc = VMXEnable(HCPhysCpuPage);
702 if (RT_FAILURE(rc))
703 {
704 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
705 if (!pHostCpu->fVmxeAlreadyEnabled)
706 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
707
708 if (pVM)
709 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
710 }
711
712 /* Restore interrupts. */
713 ASMSetFlags(fEFlags);
714 return rc;
715}
716
717
718/**
719 * Exits VMX root mode operation on the current CPU.
720 *
721 * @returns VBox status code.
722 * @param pHostCpu The HM physical-CPU structure.
723 */
724static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
725{
726 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
727
728 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
729 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
730
731 /* If we're for some reason not in VMX root mode, then don't leave it. */
732 RTCCUINTREG const uHostCr4 = ASMGetCR4();
733
734 int rc;
735 if (uHostCr4 & X86_CR4_VMXE)
736 {
737 /* Exit VMX root mode and clear the VMX bit in CR4. */
738 VMXDisable();
739
740 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
741 if (!pHostCpu->fVmxeAlreadyEnabled)
742 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
743
744 rc = VINF_SUCCESS;
745 }
746 else
747 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
748
749 /* Restore interrupts. */
750 ASMSetFlags(fEFlags);
751 return rc;
752}
753
754
755/**
756 * Allocates pages specified as specified by an array of VMX page allocation info
757 * objects.
758 *
759 * The pages contents are zero'd after allocation.
760 *
761 * @returns VBox status code.
762 * @param phMemObj Where to return the handle to the allocation.
763 * @param paAllocInfo The pointer to the first element of the VMX
764 * page-allocation info object array.
765 * @param cEntries The number of elements in the @a paAllocInfo array.
766 */
767static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
768{
769 *phMemObj = NIL_RTR0MEMOBJ;
770
771 /* Figure out how many pages to allocate. */
772 uint32_t cPages = 0;
773 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
774 cPages += !!paAllocInfo[iPage].fValid;
775
776 /* Allocate the pages. */
777 if (cPages)
778 {
779 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
780 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
781 if (RT_FAILURE(rc))
782 return rc;
783
784 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
785 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
786 RT_BZERO(pvFirstPage, cbPages);
787
788 uint32_t iPage = 0;
789 for (uint32_t i = 0; i < cEntries; i++)
790 if (paAllocInfo[i].fValid)
791 {
792 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
793 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
794 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
795 AssertPtr(pvPage);
796
797 Assert(paAllocInfo[iPage].pHCPhys);
798 Assert(paAllocInfo[iPage].ppVirt);
799 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
800 *paAllocInfo[iPage].ppVirt = pvPage;
801
802 /* Move to next page. */
803 ++iPage;
804 }
805
806 /* Make sure all valid (requested) pages have been assigned. */
807 Assert(iPage == cPages);
808 }
809 return VINF_SUCCESS;
810}
811
812
813/**
814 * Frees pages allocated using hmR0VmxPagesAllocZ.
815 *
816 * @param phMemObj Pointer to the memory object handle. Will be set to
817 * NIL.
818 */
819DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
820{
821 /* We can cleanup wholesale since it's all one allocation. */
822 if (*phMemObj != NIL_RTR0MEMOBJ)
823 {
824 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
825 *phMemObj = NIL_RTR0MEMOBJ;
826 }
827}
828
829
830/**
831 * Initializes a VMCS info. object.
832 *
833 * @param pVmcsInfo The VMCS info. object.
834 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
835 */
836static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
837{
838 RT_ZERO(*pVmcsInfo);
839 RT_ZERO(*pVmcsInfoShared);
840
841 pVmcsInfo->pShared = pVmcsInfoShared;
842 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
843 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
844 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
845 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
846 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
847 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
848 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
849 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
850 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
851 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
852 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
853 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
854}
855
856
857/**
858 * Frees the VT-x structures for a VMCS info. object.
859 *
860 * @param pVmcsInfo The VMCS info. object.
861 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
862 */
863static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
864{
865 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
866 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
867}
868
869
870/**
871 * Allocates the VT-x structures for a VMCS info. object.
872 *
873 * @returns VBox status code.
874 * @param pVCpu The cross context virtual CPU structure.
875 * @param pVmcsInfo The VMCS info. object.
876 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
877 *
878 * @remarks The caller is expected to take care of any and all allocation failures.
879 * This function will not perform any cleanup for failures half-way
880 * through.
881 */
882static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
883{
884 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
885
886 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
887 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
888 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
889 VMXPAGEALLOCINFO aAllocInfo[] =
890 {
891 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
892 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
893 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
894 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
895 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
896 };
897
898 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
899 if (RT_FAILURE(rc))
900 return rc;
901
902 /*
903 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
904 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
905 */
906 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
907 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
908 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
909 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
910
911 /*
912 * Get the virtual-APIC page rather than allocating them again.
913 */
914 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
915 {
916 if (!fIsNstGstVmcs)
917 {
918 if (PDMHasApic(pVM))
919 {
920 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
921 if (RT_FAILURE(rc))
922 return rc;
923 Assert(pVmcsInfo->pbVirtApic);
924 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
925 }
926 }
927 else
928 {
929 /* These are setup later while marging the nested-guest VMCS. */
930 Assert(pVmcsInfo->pbVirtApic == NULL);
931 Assert(pVmcsInfo->HCPhysVirtApic == NIL_RTHCPHYS);
932 }
933 }
934
935 return VINF_SUCCESS;
936}
937
938
939/**
940 * Free all VT-x structures for the VM.
941 *
942 * @param pVM The cross context VM structure.
943 */
944static void hmR0VmxStructsFree(PVMCC pVM)
945{
946 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
947#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
948 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
949 {
950 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
951 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
952 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
953 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
954 }
955#endif
956
957 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
958 {
959 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
960 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
961#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
962 if (pVM->cpum.ro.GuestFeatures.fVmx)
963 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
964#endif
965 }
966}
967
968
969/**
970 * Allocate all VT-x structures for the VM.
971 *
972 * @returns IPRT status code.
973 * @param pVM The cross context VM structure.
974 *
975 * @remarks This functions will cleanup on memory allocation failures.
976 */
977static int hmR0VmxStructsAlloc(PVMCC pVM)
978{
979 /*
980 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
981 * The VMCS size cannot be more than 4096 bytes.
982 *
983 * See Intel spec. Appendix A.1 "Basic VMX Information".
984 */
985 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
986 if (cbVmcs <= X86_PAGE_4K_SIZE)
987 { /* likely */ }
988 else
989 {
990 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
991 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
992 }
993
994 /*
995 * Allocate per-VM VT-x structures.
996 */
997 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
998 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
999 VMXPAGEALLOCINFO aAllocInfo[] =
1000 {
1001 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
1002 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
1003 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
1004#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1005 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
1006#endif
1007 };
1008
1009 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1010 if (RT_SUCCESS(rc))
1011 {
1012#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1013 /* Allocate the shadow VMCS-fields array. */
1014 if (fUseVmcsShadowing)
1015 {
1016 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1017 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1018 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1019 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1020 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1021 rc = VERR_NO_MEMORY;
1022 }
1023#endif
1024
1025 /*
1026 * Allocate per-VCPU VT-x structures.
1027 */
1028 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1029 {
1030 /* Allocate the guest VMCS structures. */
1031 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1032 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1033
1034#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1035 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1036 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1037 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1038#endif
1039 }
1040 if (RT_SUCCESS(rc))
1041 return VINF_SUCCESS;
1042 }
1043 hmR0VmxStructsFree(pVM);
1044 return rc;
1045}
1046
1047
1048/**
1049 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1050 *
1051 * @param pVM The cross context VM structure.
1052 */
1053static void hmR0VmxStructsInit(PVMCC pVM)
1054{
1055 /* Paranoia. */
1056 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1057#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1058 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1059#endif
1060
1061 /*
1062 * Initialize members up-front so we can cleanup en masse on allocation failures.
1063 */
1064#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1065 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1066#endif
1067 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1068 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1069 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1070 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1071 {
1072 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1073 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1074 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1075 }
1076}
1077
1078#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1079/**
1080 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1081 *
1082 * @returns @c true if the MSR is intercepted, @c false otherwise.
1083 * @param pbMsrBitmap The MSR bitmap.
1084 * @param offMsr The MSR byte offset.
1085 * @param iBit The bit offset from the byte offset.
1086 */
1087DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1088{
1089 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1090 return ASMBitTest(pbMsrBitmap, (offMsr << 3) + iBit);
1091}
1092#endif
1093
1094/**
1095 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1096 *
1097 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1098 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1099 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1100 * the read/write access of this MSR.
1101 *
1102 * @param pVCpu The cross context virtual CPU structure.
1103 * @param pVmcsInfo The VMCS info. object.
1104 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1105 * @param idMsr The MSR value.
1106 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1107 * include both a read -and- a write permission!
1108 *
1109 * @sa CPUMGetVmxMsrPermission.
1110 * @remarks Can be called with interrupts disabled.
1111 */
1112static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1113{
1114 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1115 Assert(pbMsrBitmap);
1116 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1117
1118 /*
1119 * MSR-bitmap Layout:
1120 * Byte index MSR range Interpreted as
1121 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1122 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1123 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1124 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1125 *
1126 * A bit corresponding to an MSR within the above range causes a VM-exit
1127 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1128 * the MSR range, it always cause a VM-exit.
1129 *
1130 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1131 */
1132 uint16_t const offBitmapRead = 0;
1133 uint16_t const offBitmapWrite = 0x800;
1134 uint16_t offMsr;
1135 int32_t iBit;
1136 if (idMsr <= UINT32_C(0x00001fff))
1137 {
1138 offMsr = 0;
1139 iBit = idMsr;
1140 }
1141 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1142 {
1143 offMsr = 0x400;
1144 iBit = idMsr - UINT32_C(0xc0000000);
1145 }
1146 else
1147 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1148
1149 /*
1150 * Set the MSR read permission.
1151 */
1152 uint16_t const offMsrRead = offBitmapRead + offMsr;
1153 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1154 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1155 {
1156#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1157 bool const fClear = !fIsNstGstVmcs ? true
1158 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1159#else
1160 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1161 bool const fClear = true;
1162#endif
1163 if (fClear)
1164 ASMBitClear(pbMsrBitmap, (offMsrRead << 3) + iBit);
1165 }
1166 else
1167 ASMBitSet(pbMsrBitmap, (offMsrRead << 3) + iBit);
1168
1169 /*
1170 * Set the MSR write permission.
1171 */
1172 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1173 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1174 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1175 {
1176#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1177 bool const fClear = !fIsNstGstVmcs ? true
1178 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1179#else
1180 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1181 bool const fClear = true;
1182#endif
1183 if (fClear)
1184 ASMBitClear(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1185 }
1186 else
1187 ASMBitSet(pbMsrBitmap, (offMsrWrite << 3) + iBit);
1188}
1189
1190
1191/**
1192 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1193 * area.
1194 *
1195 * @returns VBox status code.
1196 * @param pVCpu The cross context virtual CPU structure.
1197 * @param pVmcsInfo The VMCS info. object.
1198 * @param cMsrs The number of MSRs.
1199 */
1200static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1201{
1202 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1203 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1204 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1205 {
1206 /* Commit the MSR counts to the VMCS and update the cache. */
1207 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1208 {
1209 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1210 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1211 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1212 pVmcsInfo->cEntryMsrLoad = cMsrs;
1213 pVmcsInfo->cExitMsrStore = cMsrs;
1214 pVmcsInfo->cExitMsrLoad = cMsrs;
1215 }
1216 return VINF_SUCCESS;
1217 }
1218
1219 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1220 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1221 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1222}
1223
1224
1225/**
1226 * Adds a new (or updates the value of an existing) guest/host MSR
1227 * pair to be swapped during the world-switch as part of the
1228 * auto-load/store MSR area in the VMCS.
1229 *
1230 * @returns VBox status code.
1231 * @param pVCpu The cross context virtual CPU structure.
1232 * @param pVmxTransient The VMX-transient structure.
1233 * @param idMsr The MSR.
1234 * @param uGuestMsrValue Value of the guest MSR.
1235 * @param fSetReadWrite Whether to set the guest read/write access of this
1236 * MSR (thus not causing a VM-exit).
1237 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1238 * necessary.
1239 */
1240static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1241 bool fSetReadWrite, bool fUpdateHostMsr)
1242{
1243 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1244 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1245 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1246 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1247 uint32_t i;
1248
1249 /* Paranoia. */
1250 Assert(pGuestMsrLoad);
1251
1252#ifndef DEBUG_bird
1253 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1254#endif
1255
1256 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1257 for (i = 0; i < cMsrs; i++)
1258 {
1259 if (pGuestMsrLoad[i].u32Msr == idMsr)
1260 break;
1261 }
1262
1263 bool fAdded = false;
1264 if (i == cMsrs)
1265 {
1266 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1267 ++cMsrs;
1268 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1269 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1270
1271 /* Set the guest to read/write this MSR without causing VM-exits. */
1272 if ( fSetReadWrite
1273 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1274 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1275
1276 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1277 fAdded = true;
1278 }
1279
1280 /* Update the MSR value for the newly added or already existing MSR. */
1281 pGuestMsrLoad[i].u32Msr = idMsr;
1282 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1283
1284 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1285 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1286 {
1287 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1288 pGuestMsrStore[i].u32Msr = idMsr;
1289 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1290 }
1291
1292 /* Update the corresponding slot in the host MSR area. */
1293 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1294 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1295 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1296 pHostMsr[i].u32Msr = idMsr;
1297
1298 /*
1299 * Only if the caller requests to update the host MSR value AND we've newly added the
1300 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1301 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1302 *
1303 * We do this for performance reasons since reading MSRs may be quite expensive.
1304 */
1305 if (fAdded)
1306 {
1307 if (fUpdateHostMsr)
1308 {
1309 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1310 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1311 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1312 }
1313 else
1314 {
1315 /* Someone else can do the work. */
1316 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1317 }
1318 }
1319 return VINF_SUCCESS;
1320}
1321
1322
1323/**
1324 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1325 * auto-load/store MSR area in the VMCS.
1326 *
1327 * @returns VBox status code.
1328 * @param pVCpu The cross context virtual CPU structure.
1329 * @param pVmxTransient The VMX-transient structure.
1330 * @param idMsr The MSR.
1331 */
1332static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1333{
1334 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1335 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1336 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1337 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1338
1339#ifndef DEBUG_bird
1340 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1341#endif
1342
1343 for (uint32_t i = 0; i < cMsrs; i++)
1344 {
1345 /* Find the MSR. */
1346 if (pGuestMsrLoad[i].u32Msr == idMsr)
1347 {
1348 /*
1349 * If it's the last MSR, we only need to reduce the MSR count.
1350 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1351 */
1352 if (i < cMsrs - 1)
1353 {
1354 /* Remove it from the VM-entry MSR-load area. */
1355 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1356 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1357
1358 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1359 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1360 {
1361 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1362 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1363 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1364 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1365 }
1366
1367 /* Remove it from the VM-exit MSR-load area. */
1368 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1369 Assert(pHostMsr[i].u32Msr == idMsr);
1370 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1371 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1372 }
1373
1374 /* Reduce the count to reflect the removed MSR and bail. */
1375 --cMsrs;
1376 break;
1377 }
1378 }
1379
1380 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1381 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1382 {
1383 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1384 AssertRCReturn(rc, rc);
1385
1386 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1387 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1388 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1389
1390 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1391 return VINF_SUCCESS;
1392 }
1393
1394 return VERR_NOT_FOUND;
1395}
1396
1397
1398/**
1399 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1400 *
1401 * @param pVCpu The cross context virtual CPU structure.
1402 * @param pVmcsInfo The VMCS info. object.
1403 *
1404 * @remarks No-long-jump zone!!!
1405 */
1406static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1407{
1408 RT_NOREF(pVCpu);
1409 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1410
1411 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1412 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1413 Assert(pHostMsrLoad);
1414 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1415 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1416 for (uint32_t i = 0; i < cMsrs; i++)
1417 {
1418 /*
1419 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1420 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1421 */
1422 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1423 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1424 else
1425 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1426 }
1427}
1428
1429
1430/**
1431 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1432 * perform lazy restoration of the host MSRs while leaving VT-x.
1433 *
1434 * @param pVCpu The cross context virtual CPU structure.
1435 *
1436 * @remarks No-long-jump zone!!!
1437 */
1438static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1439{
1440 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1441
1442 /*
1443 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1444 */
1445 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1446 {
1447 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1448 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1449 {
1450 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1451 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1452 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1453 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1454 }
1455 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1456 }
1457}
1458
1459
1460#ifdef VBOX_STRICT
1461
1462/**
1463 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1464 *
1465 * @param pVmcsInfo The VMCS info. object.
1466 */
1467static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1468{
1469 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1470
1471 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1472 {
1473 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1474 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1475 uint64_t uVmcsEferMsrVmcs;
1476 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1477 AssertRC(rc);
1478
1479 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1480 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1481 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1482 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1483 }
1484}
1485
1486
1487/**
1488 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1489 * VMCS are correct.
1490 *
1491 * @param pVCpu The cross context virtual CPU structure.
1492 * @param pVmcsInfo The VMCS info. object.
1493 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1494 */
1495static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1496{
1497 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1498
1499 /* Read the various MSR-area counts from the VMCS. */
1500 uint32_t cEntryLoadMsrs;
1501 uint32_t cExitStoreMsrs;
1502 uint32_t cExitLoadMsrs;
1503 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1504 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1505 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1506
1507 /* Verify all the MSR counts are the same. */
1508 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1509 Assert(cExitStoreMsrs == cExitLoadMsrs);
1510 uint32_t const cMsrs = cExitLoadMsrs;
1511
1512 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1513 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1514
1515 /* Verify the MSR counts are within the allocated page size. */
1516 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1517
1518 /* Verify the relevant contents of the MSR areas match. */
1519 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1520 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1521 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1522 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1523 for (uint32_t i = 0; i < cMsrs; i++)
1524 {
1525 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1526 if (fSeparateExitMsrStorePage)
1527 {
1528 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1529 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1530 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1531 }
1532
1533 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1534 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1535 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1536
1537 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1538 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1539 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1540 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1541
1542 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1543 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1544 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1545 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1546
1547 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1548 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1549 {
1550 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1551 if (fIsEferMsr)
1552 {
1553 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1554 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1555 }
1556 else
1557 {
1558 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1559 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1560 if ( pVM->hmr0.s.vmx.fLbr
1561 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1562 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1563 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1564 {
1565 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1566 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1567 pGuestMsrLoad->u32Msr, cMsrs));
1568 }
1569 else if (!fIsNstGstVmcs)
1570 {
1571 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1572 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1573 }
1574 else
1575 {
1576 /*
1577 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1578 * execute a nested-guest with MSR passthrough.
1579 *
1580 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1581 * allow passthrough too.
1582 */
1583 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1584 Assert(pvMsrBitmapNstGst);
1585 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1586 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1587 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1588 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1589 }
1590 }
1591 }
1592
1593 /* Move to the next MSR. */
1594 pHostMsrLoad++;
1595 pGuestMsrLoad++;
1596 pGuestMsrStore++;
1597 }
1598}
1599
1600#endif /* VBOX_STRICT */
1601
1602/**
1603 * Flushes the TLB using EPT.
1604 *
1605 * @param pVCpu The cross context virtual CPU structure of the calling
1606 * EMT. Can be NULL depending on @a enmTlbFlush.
1607 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1608 * enmTlbFlush.
1609 * @param enmTlbFlush Type of flush.
1610 *
1611 * @remarks Caller is responsible for making sure this function is called only
1612 * when NestedPaging is supported and providing @a enmTlbFlush that is
1613 * supported by the CPU.
1614 * @remarks Can be called with interrupts disabled.
1615 */
1616static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1617{
1618 uint64_t au64Descriptor[2];
1619 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1620 au64Descriptor[0] = 0;
1621 else
1622 {
1623 Assert(pVCpu);
1624 Assert(pVmcsInfo);
1625 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1626 }
1627 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1628
1629 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1630 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1631
1632 if ( RT_SUCCESS(rc)
1633 && pVCpu)
1634 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1635}
1636
1637
1638/**
1639 * Flushes the TLB using VPID.
1640 *
1641 * @param pVCpu The cross context virtual CPU structure of the calling
1642 * EMT. Can be NULL depending on @a enmTlbFlush.
1643 * @param enmTlbFlush Type of flush.
1644 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1645 * on @a enmTlbFlush).
1646 *
1647 * @remarks Can be called with interrupts disabled.
1648 */
1649static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1650{
1651 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1652
1653 uint64_t au64Descriptor[2];
1654 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1655 {
1656 au64Descriptor[0] = 0;
1657 au64Descriptor[1] = 0;
1658 }
1659 else
1660 {
1661 AssertPtr(pVCpu);
1662 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1663 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1664 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1665 au64Descriptor[1] = GCPtr;
1666 }
1667
1668 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1669 AssertMsg(rc == VINF_SUCCESS,
1670 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1671
1672 if ( RT_SUCCESS(rc)
1673 && pVCpu)
1674 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1675 NOREF(rc);
1676}
1677
1678
1679/**
1680 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1681 * otherwise there is nothing really to invalidate.
1682 *
1683 * @returns VBox status code.
1684 * @param pVCpu The cross context virtual CPU structure.
1685 * @param GCVirt Guest virtual address of the page to invalidate.
1686 */
1687VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1688{
1689 AssertPtr(pVCpu);
1690 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1691
1692 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1693 {
1694 /*
1695 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1696 * the EPT case. See @bugref{6043} and @bugref{6177}.
1697 *
1698 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1699 * as this function maybe called in a loop with individual addresses.
1700 */
1701 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1702 if (pVM->hmr0.s.vmx.fVpid)
1703 {
1704 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1705 {
1706 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1707 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1708 }
1709 else
1710 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1711 }
1712 else if (pVM->hmr0.s.fNestedPaging)
1713 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1714 }
1715
1716 return VINF_SUCCESS;
1717}
1718
1719
1720/**
1721 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1722 * case where neither EPT nor VPID is supported by the CPU.
1723 *
1724 * @param pHostCpu The HM physical-CPU structure.
1725 * @param pVCpu The cross context virtual CPU structure.
1726 *
1727 * @remarks Called with interrupts disabled.
1728 */
1729static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1730{
1731 AssertPtr(pVCpu);
1732 AssertPtr(pHostCpu);
1733
1734 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1735
1736 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1737 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1738 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1739 pVCpu->hmr0.s.fForceTLBFlush = false;
1740 return;
1741}
1742
1743
1744/**
1745 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1746 *
1747 * @param pHostCpu The HM physical-CPU structure.
1748 * @param pVCpu The cross context virtual CPU structure.
1749 * @param pVmcsInfo The VMCS info. object.
1750 *
1751 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1752 * nomenclature. The reason is, to avoid confusion in compare statements
1753 * since the host-CPU copies are named "ASID".
1754 *
1755 * @remarks Called with interrupts disabled.
1756 */
1757static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1758{
1759#ifdef VBOX_WITH_STATISTICS
1760 bool fTlbFlushed = false;
1761# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1762# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1763 if (!fTlbFlushed) \
1764 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1765 } while (0)
1766#else
1767# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1768# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1769#endif
1770
1771 AssertPtr(pVCpu);
1772 AssertPtr(pHostCpu);
1773 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1774
1775 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1776 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1777 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1778 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1779
1780 /*
1781 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1782 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1783 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1784 * cannot reuse the current ASID anymore.
1785 */
1786 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1787 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1788 {
1789 ++pHostCpu->uCurrentAsid;
1790 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1791 {
1792 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1793 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1794 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1795 }
1796
1797 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1798 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1799 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1800
1801 /*
1802 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1803 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1804 */
1805 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1806 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1807 HMVMX_SET_TAGGED_TLB_FLUSHED();
1808 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1809 }
1810 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1811 {
1812 /*
1813 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1814 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1815 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1816 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1817 * mappings, see @bugref{6568}.
1818 *
1819 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1820 */
1821 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1822 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1823 HMVMX_SET_TAGGED_TLB_FLUSHED();
1824 }
1825 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1826 {
1827 /*
1828 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1829 * address which requires flushing the TLB of EPT cached structures.
1830 *
1831 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1832 */
1833 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1834 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1835 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1836 HMVMX_SET_TAGGED_TLB_FLUSHED();
1837 }
1838
1839
1840 pVCpu->hmr0.s.fForceTLBFlush = false;
1841 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1842
1843 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1844 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1845 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1846 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1847 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1848 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1849 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1850 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1851 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1852
1853 /* Update VMCS with the VPID. */
1854 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1855 AssertRC(rc);
1856
1857#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1858}
1859
1860
1861/**
1862 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1863 *
1864 * @param pHostCpu The HM physical-CPU structure.
1865 * @param pVCpu The cross context virtual CPU structure.
1866 * @param pVmcsInfo The VMCS info. object.
1867 *
1868 * @remarks Called with interrupts disabled.
1869 */
1870static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1871{
1872 AssertPtr(pVCpu);
1873 AssertPtr(pHostCpu);
1874 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1875 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1876 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1877
1878 /*
1879 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1880 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1881 */
1882 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1883 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1884 {
1885 pVCpu->hmr0.s.fForceTLBFlush = true;
1886 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1887 }
1888
1889 /* Check for explicit TLB flushes. */
1890 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1891 {
1892 pVCpu->hmr0.s.fForceTLBFlush = true;
1893 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1894 }
1895
1896 /* Check for TLB flushes while switching to/from a nested-guest. */
1897 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1898 {
1899 pVCpu->hmr0.s.fForceTLBFlush = true;
1900 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1901 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1902 }
1903
1904 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1905 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1906
1907 if (pVCpu->hmr0.s.fForceTLBFlush)
1908 {
1909 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1910 pVCpu->hmr0.s.fForceTLBFlush = false;
1911 }
1912}
1913
1914
1915/**
1916 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1917 *
1918 * @param pHostCpu The HM physical-CPU structure.
1919 * @param pVCpu The cross context virtual CPU structure.
1920 *
1921 * @remarks Called with interrupts disabled.
1922 */
1923static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1924{
1925 AssertPtr(pVCpu);
1926 AssertPtr(pHostCpu);
1927 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1928 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1929 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1930
1931 /*
1932 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1933 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1934 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1935 * cannot reuse the current ASID anymore.
1936 */
1937 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1938 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1939 {
1940 pVCpu->hmr0.s.fForceTLBFlush = true;
1941 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1942 }
1943
1944 /* Check for explicit TLB flushes. */
1945 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1946 {
1947 /*
1948 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1949 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1950 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1951 * include fExplicitFlush's too) - an obscure corner case.
1952 */
1953 pVCpu->hmr0.s.fForceTLBFlush = true;
1954 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1955 }
1956
1957 /* Check for TLB flushes while switching to/from a nested-guest. */
1958 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1959 {
1960 pVCpu->hmr0.s.fForceTLBFlush = true;
1961 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1962 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1963 }
1964
1965 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1966 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1967 if (pVCpu->hmr0.s.fForceTLBFlush)
1968 {
1969 ++pHostCpu->uCurrentAsid;
1970 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1971 {
1972 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1973 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1974 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1975 }
1976
1977 pVCpu->hmr0.s.fForceTLBFlush = false;
1978 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1979 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1980 if (pHostCpu->fFlushAsidBeforeUse)
1981 {
1982 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1983 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1984 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1985 {
1986 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1987 pHostCpu->fFlushAsidBeforeUse = false;
1988 }
1989 else
1990 {
1991 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1992 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1993 }
1994 }
1995 }
1996
1997 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1998 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1999 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
2000 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
2001 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
2002 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
2003 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
2004
2005 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
2006 AssertRC(rc);
2007}
2008
2009
2010/**
2011 * Flushes the guest TLB entry based on CPU capabilities.
2012 *
2013 * @param pHostCpu The HM physical-CPU structure.
2014 * @param pVCpu The cross context virtual CPU structure.
2015 * @param pVmcsInfo The VMCS info. object.
2016 *
2017 * @remarks Called with interrupts disabled.
2018 */
2019static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2020{
2021#ifdef HMVMX_ALWAYS_FLUSH_TLB
2022 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2023#endif
2024 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2025 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2026 {
2027 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2028 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2029 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2030 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2031 default:
2032 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2033 break;
2034 }
2035 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2036}
2037
2038
2039/**
2040 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2041 * TLB entries from the host TLB before VM-entry.
2042 *
2043 * @returns VBox status code.
2044 * @param pVM The cross context VM structure.
2045 */
2046static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2047{
2048 /*
2049 * Determine optimal flush type for nested paging.
2050 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2051 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2052 */
2053 if (pVM->hmr0.s.fNestedPaging)
2054 {
2055 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2056 {
2057 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2058 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2059 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2060 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2061 else
2062 {
2063 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2064 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2065 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2066 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2067 }
2068
2069 /* Make sure the write-back cacheable memory type for EPT is supported. */
2070 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2071 {
2072 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2073 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2074 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2075 }
2076
2077 /* EPT requires a page-walk length of 4. */
2078 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2079 {
2080 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2081 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2082 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2083 }
2084 }
2085 else
2086 {
2087 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2088 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2089 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2090 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2091 }
2092 }
2093
2094 /*
2095 * Determine optimal flush type for VPID.
2096 */
2097 if (pVM->hmr0.s.vmx.fVpid)
2098 {
2099 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2100 {
2101 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2102 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2103 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2104 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2105 else
2106 {
2107 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2108 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2109 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2110 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2111 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2112 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2113 pVM->hmr0.s.vmx.fVpid = false;
2114 }
2115 }
2116 else
2117 {
2118 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2119 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2120 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2121 pVM->hmr0.s.vmx.fVpid = false;
2122 }
2123 }
2124
2125 /*
2126 * Setup the handler for flushing tagged-TLBs.
2127 */
2128 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2129 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2130 else if (pVM->hmr0.s.fNestedPaging)
2131 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2132 else if (pVM->hmr0.s.vmx.fVpid)
2133 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2134 else
2135 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2136
2137
2138 /*
2139 * Copy out the result to ring-3.
2140 */
2141 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2142 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2143 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2144 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2145 return VINF_SUCCESS;
2146}
2147
2148
2149/**
2150 * Sets up the LBR MSR ranges based on the host CPU.
2151 *
2152 * @returns VBox status code.
2153 * @param pVM The cross context VM structure.
2154 *
2155 * @sa nemR3DarwinSetupLbrMsrRange
2156 */
2157static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2158{
2159 Assert(pVM->hmr0.s.vmx.fLbr);
2160 uint32_t idLbrFromIpMsrFirst;
2161 uint32_t idLbrFromIpMsrLast;
2162 uint32_t idLbrToIpMsrFirst;
2163 uint32_t idLbrToIpMsrLast;
2164 uint32_t idLbrTosMsr;
2165
2166 /*
2167 * Determine the LBR MSRs supported for this host CPU family and model.
2168 *
2169 * See Intel spec. 17.4.8 "LBR Stack".
2170 * See Intel "Model-Specific Registers" spec.
2171 */
2172 uint32_t const uFamilyModel = (g_CpumHostFeatures.s.uFamily << 8)
2173 | g_CpumHostFeatures.s.uModel;
2174 switch (uFamilyModel)
2175 {
2176 case 0x0f01: case 0x0f02:
2177 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2178 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2179 idLbrToIpMsrFirst = 0x0;
2180 idLbrToIpMsrLast = 0x0;
2181 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2182 break;
2183
2184 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2185 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2186 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2187 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2188 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2189 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2190 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2191 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2192 break;
2193
2194 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2195 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2196 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2197 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2198 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2199 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2200 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2201 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2202 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2203 break;
2204
2205 case 0x0617: case 0x061d: case 0x060f:
2206 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2207 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2208 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2209 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2210 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2211 break;
2212
2213 /* Atom and related microarchitectures we don't care about:
2214 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2215 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2216 case 0x0636: */
2217 /* All other CPUs: */
2218 default:
2219 {
2220 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2221 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2222 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2223 }
2224 }
2225
2226 /*
2227 * Validate.
2228 */
2229 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2230 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2231 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2232 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2233 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2234 {
2235 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2236 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2237 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2238 }
2239 NOREF(pVCpu0);
2240
2241 /*
2242 * Update the LBR info. to the VM struct. for use later.
2243 */
2244 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2245
2246 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2247 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2248
2249 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2250 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2251 return VINF_SUCCESS;
2252}
2253
2254#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2255
2256/**
2257 * Sets up the shadow VMCS fields arrays.
2258 *
2259 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2260 * executing the guest.
2261 *
2262 * @returns VBox status code.
2263 * @param pVM The cross context VM structure.
2264 */
2265static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2266{
2267 /*
2268 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2269 * when the host does not support it.
2270 */
2271 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2272 if ( !fGstVmwriteAll
2273 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2274 { /* likely. */ }
2275 else
2276 {
2277 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2278 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2279 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2280 }
2281
2282 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2283 uint32_t cRwFields = 0;
2284 uint32_t cRoFields = 0;
2285 for (uint32_t i = 0; i < cVmcsFields; i++)
2286 {
2287 VMXVMCSFIELD VmcsField;
2288 VmcsField.u = g_aVmcsFields[i];
2289
2290 /*
2291 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2292 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2293 * in the shadow VMCS fields array as they would be redundant.
2294 *
2295 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2296 * we must not include it in the shadow VMCS fields array. Guests attempting to
2297 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2298 * the required behavior.
2299 */
2300 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2301 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2302 {
2303 /*
2304 * Read-only fields are placed in a separate array so that while syncing shadow
2305 * VMCS fields later (which is more performance critical) we can avoid branches.
2306 *
2307 * However, if the guest can write to all fields (including read-only fields),
2308 * we treat it a as read/write field. Otherwise, writing to these fields would
2309 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2310 */
2311 if ( fGstVmwriteAll
2312 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2313 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2314 else
2315 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2316 }
2317 }
2318
2319 /* Update the counts. */
2320 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2321 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2322 return VINF_SUCCESS;
2323}
2324
2325
2326/**
2327 * Sets up the VMREAD and VMWRITE bitmaps.
2328 *
2329 * @param pVM The cross context VM structure.
2330 */
2331static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2332{
2333 /*
2334 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2335 */
2336 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2337 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2338 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2339 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2340 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2341
2342 /*
2343 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2344 * VMREAD and VMWRITE bitmaps.
2345 */
2346 {
2347 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2348 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2349 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2350 {
2351 uint32_t const uVmcsField = paShadowVmcsFields[i];
2352 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2353 Assert(uVmcsField >> 3 < cbBitmap);
2354 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2355 ASMBitClear(pbVmwriteBitmap, uVmcsField & 0x7fff);
2356 }
2357 }
2358
2359 /*
2360 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2361 * if the host supports VMWRITE to all supported VMCS fields.
2362 */
2363 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2364 {
2365 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2366 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2367 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2368 {
2369 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2370 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2371 Assert(uVmcsField >> 3 < cbBitmap);
2372 ASMBitClear(pbVmreadBitmap, uVmcsField & 0x7fff);
2373 }
2374 }
2375}
2376
2377#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2378
2379/**
2380 * Sets up the virtual-APIC page address for the VMCS.
2381 *
2382 * @param pVmcsInfo The VMCS info. object.
2383 */
2384DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2385{
2386 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2387 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2388 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2389 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2390 AssertRC(rc);
2391}
2392
2393
2394/**
2395 * Sets up the MSR-bitmap address for the VMCS.
2396 *
2397 * @param pVmcsInfo The VMCS info. object.
2398 */
2399DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2400{
2401 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2402 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2403 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2404 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2405 AssertRC(rc);
2406}
2407
2408
2409/**
2410 * Sets up the APIC-access page address for the VMCS.
2411 *
2412 * @param pVCpu The cross context virtual CPU structure.
2413 */
2414DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2415{
2416 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2417 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2418 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2419 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2420 AssertRC(rc);
2421}
2422
2423#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2424
2425/**
2426 * Sets up the VMREAD bitmap address for the VMCS.
2427 *
2428 * @param pVCpu The cross context virtual CPU structure.
2429 */
2430DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2431{
2432 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2433 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2434 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2435 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2436 AssertRC(rc);
2437}
2438
2439
2440/**
2441 * Sets up the VMWRITE bitmap address for the VMCS.
2442 *
2443 * @param pVCpu The cross context virtual CPU structure.
2444 */
2445DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2446{
2447 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2448 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2449 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2450 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2451 AssertRC(rc);
2452}
2453
2454#endif
2455
2456/**
2457 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2458 * in the VMCS.
2459 *
2460 * @returns VBox status code.
2461 * @param pVmcsInfo The VMCS info. object.
2462 */
2463DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2464{
2465 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2466 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2467 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2468
2469 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2470 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2471 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2472
2473 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2474 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2475 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2476
2477 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2478 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2479 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2480 return VINF_SUCCESS;
2481}
2482
2483
2484/**
2485 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2486 *
2487 * @param pVCpu The cross context virtual CPU structure.
2488 * @param pVmcsInfo The VMCS info. object.
2489 */
2490static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2491{
2492 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2493
2494 /*
2495 * By default, ensure guest attempts to access any MSR cause VM-exits.
2496 * This shall later be relaxed for specific MSRs as necessary.
2497 *
2498 * Note: For nested-guests, the entire bitmap will be merged prior to
2499 * executing the nested-guest using hardware-assisted VMX and hence there
2500 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2501 */
2502 Assert(pVmcsInfo->pvMsrBitmap);
2503 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2504
2505 /*
2506 * The guest can access the following MSRs (read, write) without causing
2507 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2508 */
2509 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2510 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2511 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2512 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2513 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2514 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2515
2516 /*
2517 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2518 * associated with then. We never need to intercept access (writes need to be
2519 * executed without causing a VM-exit, reads will #GP fault anyway).
2520 *
2521 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2522 * read/write them. We swap the guest/host MSR value using the
2523 * auto-load/store MSR area.
2524 */
2525 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2526 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2527 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2528 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2529 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2530 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2531
2532 /*
2533 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2534 * required for 64-bit guests.
2535 */
2536 if (pVM->hmr0.s.fAllow64BitGuests)
2537 {
2538 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2539 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2540 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2541 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2542 }
2543
2544 /*
2545 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2546 */
2547#ifdef VBOX_STRICT
2548 Assert(pVmcsInfo->pvMsrBitmap);
2549 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2550 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2551#endif
2552}
2553
2554
2555/**
2556 * Sets up pin-based VM-execution controls in the VMCS.
2557 *
2558 * @returns VBox status code.
2559 * @param pVCpu The cross context virtual CPU structure.
2560 * @param pVmcsInfo The VMCS info. object.
2561 */
2562static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2563{
2564 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2565 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2566 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2567
2568 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2569 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2570
2571 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2572 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2573
2574 /* Enable the VMX-preemption timer. */
2575 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2576 {
2577 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2578 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2579 }
2580
2581#if 0
2582 /* Enable posted-interrupt processing. */
2583 if (pVM->hm.s.fPostedIntrs)
2584 {
2585 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2586 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2587 fVal |= VMX_PIN_CTLS_POSTED_INT;
2588 }
2589#endif
2590
2591 if ((fVal & fZap) != fVal)
2592 {
2593 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2594 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2595 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2596 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2597 }
2598
2599 /* Commit it to the VMCS and update our cache. */
2600 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2601 AssertRC(rc);
2602 pVmcsInfo->u32PinCtls = fVal;
2603
2604 return VINF_SUCCESS;
2605}
2606
2607
2608/**
2609 * Sets up secondary processor-based VM-execution controls in the VMCS.
2610 *
2611 * @returns VBox status code.
2612 * @param pVCpu The cross context virtual CPU structure.
2613 * @param pVmcsInfo The VMCS info. object.
2614 */
2615static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2616{
2617 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2618 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2619 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2620
2621 /* WBINVD causes a VM-exit. */
2622 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2623 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2624
2625 /* Enable EPT (aka nested-paging). */
2626 if (pVM->hmr0.s.fNestedPaging)
2627 fVal |= VMX_PROC_CTLS2_EPT;
2628
2629 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2630 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2631 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2632 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2633 fVal |= VMX_PROC_CTLS2_INVPCID;
2634
2635 /* Enable VPID. */
2636 if (pVM->hmr0.s.vmx.fVpid)
2637 fVal |= VMX_PROC_CTLS2_VPID;
2638
2639 /* Enable unrestricted guest execution. */
2640 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2641 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2642
2643#if 0
2644 if (pVM->hm.s.fVirtApicRegs)
2645 {
2646 /* Enable APIC-register virtualization. */
2647 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2648 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2649
2650 /* Enable virtual-interrupt delivery. */
2651 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2652 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2653 }
2654#endif
2655
2656 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2657 where the TPR shadow resides. */
2658 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2659 * done dynamically. */
2660 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2661 {
2662 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2663 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2664 }
2665
2666 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2667 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2668 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2669 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2670 fVal |= VMX_PROC_CTLS2_RDTSCP;
2671
2672 /* Enable Pause-Loop exiting. */
2673 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2674 && pVM->hm.s.vmx.cPleGapTicks
2675 && pVM->hm.s.vmx.cPleWindowTicks)
2676 {
2677 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2678
2679 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2680 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2681 }
2682
2683 if ((fVal & fZap) != fVal)
2684 {
2685 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2686 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2687 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2688 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2689 }
2690
2691 /* Commit it to the VMCS and update our cache. */
2692 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2693 AssertRC(rc);
2694 pVmcsInfo->u32ProcCtls2 = fVal;
2695
2696 return VINF_SUCCESS;
2697}
2698
2699
2700/**
2701 * Sets up processor-based VM-execution controls in the VMCS.
2702 *
2703 * @returns VBox status code.
2704 * @param pVCpu The cross context virtual CPU structure.
2705 * @param pVmcsInfo The VMCS info. object.
2706 */
2707static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2708{
2709 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2710 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2711 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2712
2713 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2714 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2715 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2716 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2717 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2718 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2719 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2720
2721 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2722 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2723 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2724 {
2725 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2726 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2727 }
2728
2729 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2730 if (!pVM->hmr0.s.fNestedPaging)
2731 {
2732 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2733 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2734 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2735 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2736 }
2737
2738 /* Use TPR shadowing if supported by the CPU. */
2739 if ( PDMHasApic(pVM)
2740 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2741 {
2742 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2743 /* CR8 writes cause a VM-exit based on TPR threshold. */
2744 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2745 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2746 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2747 }
2748 else
2749 {
2750 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2751 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2752 if (pVM->hmr0.s.fAllow64BitGuests)
2753 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2754 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2755 }
2756
2757 /* Use MSR-bitmaps if supported by the CPU. */
2758 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2759 {
2760 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2761 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2762 }
2763
2764 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2765 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2766 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2767
2768 if ((fVal & fZap) != fVal)
2769 {
2770 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2771 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2772 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2773 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2774 }
2775
2776 /* Commit it to the VMCS and update our cache. */
2777 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2778 AssertRC(rc);
2779 pVmcsInfo->u32ProcCtls = fVal;
2780
2781 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2782 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2783 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2784
2785 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2786 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2787 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2788
2789 /* Sanity check, should not really happen. */
2790 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2791 { /* likely */ }
2792 else
2793 {
2794 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2795 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2796 }
2797
2798 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2799 return VINF_SUCCESS;
2800}
2801
2802
2803/**
2804 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2805 * Processor-based VM-execution) control fields in the VMCS.
2806 *
2807 * @returns VBox status code.
2808 * @param pVCpu The cross context virtual CPU structure.
2809 * @param pVmcsInfo The VMCS info. object.
2810 */
2811static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2812{
2813#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2814 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2815 {
2816 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2817 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2818 }
2819#endif
2820
2821 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2822 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2823 AssertRC(rc);
2824
2825 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2826 if (RT_SUCCESS(rc))
2827 {
2828 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2829 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2830
2831 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2832 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2833
2834 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2835 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2836
2837 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2838 {
2839 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2840 AssertRC(rc);
2841 }
2842 return VINF_SUCCESS;
2843 }
2844 else
2845 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2846 return rc;
2847}
2848
2849
2850/**
2851 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2852 *
2853 * We shall setup those exception intercepts that don't change during the
2854 * lifetime of the VM here. The rest are done dynamically while loading the
2855 * guest state.
2856 *
2857 * @param pVCpu The cross context virtual CPU structure.
2858 * @param pVmcsInfo The VMCS info. object.
2859 */
2860static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2861{
2862 /*
2863 * The following exceptions are always intercepted:
2864 *
2865 * #AC - To prevent the guest from hanging the CPU and for dealing with
2866 * split-lock detecting host configs.
2867 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2868 * recursive #DBs can cause a CPU hang.
2869 * #PF - To sync our shadow page tables when nested-paging is not used.
2870 */
2871 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2872 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2873 | RT_BIT(X86_XCPT_DB)
2874 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2875
2876 /* Commit it to the VMCS. */
2877 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2878 AssertRC(rc);
2879
2880 /* Update our cache of the exception bitmap. */
2881 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2882}
2883
2884
2885#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2886/**
2887 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2888 *
2889 * @returns VBox status code.
2890 * @param pVmcsInfo The VMCS info. object.
2891 */
2892static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2893{
2894 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2895 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2896 AssertRC(rc);
2897
2898 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2899 if (RT_SUCCESS(rc))
2900 {
2901 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2902 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2903
2904 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2905 Assert(!pVmcsInfo->u64Cr0Mask);
2906 Assert(!pVmcsInfo->u64Cr4Mask);
2907 return VINF_SUCCESS;
2908 }
2909 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2910 return rc;
2911}
2912#endif
2913
2914
2915/**
2916 * Selector FNHMSVMVMRUN implementation.
2917 */
2918static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2919{
2920 hmR0VmxUpdateStartVmFunction(pVCpu);
2921 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2922}
2923
2924
2925/**
2926 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2927 * VMX.
2928 *
2929 * @returns VBox status code.
2930 * @param pVCpu The cross context virtual CPU structure.
2931 * @param pVmcsInfo The VMCS info. object.
2932 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2933 */
2934static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2935{
2936 Assert(pVmcsInfo->pvVmcs);
2937 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2938
2939 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2940 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2941 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2942
2943 LogFlowFunc(("\n"));
2944
2945 /*
2946 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2947 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2948 */
2949 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2950 if (RT_SUCCESS(rc))
2951 {
2952 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2953 if (RT_SUCCESS(rc))
2954 {
2955 /*
2956 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2957 * The host is always 64-bit since we no longer support 32-bit hosts.
2958 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2959 */
2960 if (!fIsNstGstVmcs)
2961 {
2962 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2963 if (RT_SUCCESS(rc))
2964 {
2965 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2966 if (RT_SUCCESS(rc))
2967 {
2968 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2969 if (RT_SUCCESS(rc))
2970 {
2971 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2972#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2973 /*
2974 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2975 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2976 * making it fit for use when VMCS shadowing is later enabled.
2977 */
2978 if (pVmcsInfo->pvShadowVmcs)
2979 {
2980 VMXVMCSREVID VmcsRevId;
2981 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2982 VmcsRevId.n.fIsShadowVmcs = 1;
2983 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2984 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2985 if (RT_SUCCESS(rc))
2986 { /* likely */ }
2987 else
2988 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2989 }
2990#endif
2991 }
2992 else
2993 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2994 }
2995 else
2996 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2997 }
2998 else
2999 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
3000 }
3001 else
3002 {
3003#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3004 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
3005 if (RT_SUCCESS(rc))
3006 { /* likely */ }
3007 else
3008 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
3009#else
3010 AssertFailed();
3011#endif
3012 }
3013 }
3014 else
3015 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
3016 }
3017 else
3018 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3019
3020 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3021 if (RT_SUCCESS(rc))
3022 {
3023 rc = hmR0VmxClearVmcs(pVmcsInfo);
3024 if (RT_SUCCESS(rc))
3025 { /* likely */ }
3026 else
3027 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3028 }
3029
3030 /*
3031 * Update the last-error record both for failures and success, so we
3032 * can propagate the status code back to ring-3 for diagnostics.
3033 */
3034 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3035 NOREF(pszVmcs);
3036 return rc;
3037}
3038
3039
3040/**
3041 * Does global VT-x initialization (called during module initialization).
3042 *
3043 * @returns VBox status code.
3044 */
3045VMMR0DECL(int) VMXR0GlobalInit(void)
3046{
3047#ifdef HMVMX_USE_FUNCTION_TABLE
3048 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3049# ifdef VBOX_STRICT
3050 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3051 Assert(g_aVMExitHandlers[i].pfn);
3052# endif
3053#endif
3054
3055 /*
3056 * For detecting whether DR6.RTM is writable or not (done in VMXR0InitVM).
3057 */
3058 RTTHREADPREEMPTSTATE Preempt = RTTHREADPREEMPTSTATE_INITIALIZER;
3059 RTThreadPreemptDisable(&Preempt);
3060 RTCCUINTXREG const fSavedDr6 = ASMGetDR6();
3061 ASMSetDR6(0);
3062 RTCCUINTXREG const fZeroDr6 = ASMGetDR6();
3063 ASMSetDR6(fSavedDr6);
3064 RTThreadPreemptRestore(&Preempt);
3065
3066 g_fDr6Zeroed = fZeroDr6;
3067
3068 return VINF_SUCCESS;
3069}
3070
3071
3072/**
3073 * Does global VT-x termination (called during module termination).
3074 */
3075VMMR0DECL(void) VMXR0GlobalTerm()
3076{
3077 /* Nothing to do currently. */
3078}
3079
3080
3081/**
3082 * Sets up and activates VT-x on the current CPU.
3083 *
3084 * @returns VBox status code.
3085 * @param pHostCpu The HM physical-CPU structure.
3086 * @param pVM The cross context VM structure. Can be
3087 * NULL after a host resume operation.
3088 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3089 * fEnabledByHost is @c true).
3090 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3091 * @a fEnabledByHost is @c true).
3092 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3093 * enable VT-x on the host.
3094 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3095 */
3096VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3097 PCSUPHWVIRTMSRS pHwvirtMsrs)
3098{
3099 AssertPtr(pHostCpu);
3100 AssertPtr(pHwvirtMsrs);
3101 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3102
3103 /* Enable VT-x if it's not already enabled by the host. */
3104 if (!fEnabledByHost)
3105 {
3106 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3107 if (RT_FAILURE(rc))
3108 return rc;
3109 }
3110
3111 /*
3112 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3113 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3114 * invalidated when flushing by VPID.
3115 */
3116 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3117 {
3118 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3119 pHostCpu->fFlushAsidBeforeUse = false;
3120 }
3121 else
3122 pHostCpu->fFlushAsidBeforeUse = true;
3123
3124 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3125 ++pHostCpu->cTlbFlushes;
3126
3127 return VINF_SUCCESS;
3128}
3129
3130
3131/**
3132 * Deactivates VT-x on the current CPU.
3133 *
3134 * @returns VBox status code.
3135 * @param pHostCpu The HM physical-CPU structure.
3136 * @param pvCpuPage Pointer to the VMXON region.
3137 * @param HCPhysCpuPage Physical address of the VMXON region.
3138 *
3139 * @remarks This function should never be called when SUPR0EnableVTx() or
3140 * similar was used to enable VT-x on the host.
3141 */
3142VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3143{
3144 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3145
3146 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3147 return hmR0VmxLeaveRootMode(pHostCpu);
3148}
3149
3150
3151/**
3152 * Does per-VM VT-x initialization.
3153 *
3154 * @returns VBox status code.
3155 * @param pVM The cross context VM structure.
3156 */
3157VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3158{
3159 AssertPtr(pVM);
3160 LogFlowFunc(("pVM=%p\n", pVM));
3161
3162 hmR0VmxStructsInit(pVM);
3163 int rc = hmR0VmxStructsAlloc(pVM);
3164 if (RT_FAILURE(rc))
3165 {
3166 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3167 return rc;
3168 }
3169
3170 /* Setup the crash dump page. */
3171#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3172 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3173 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3174#endif
3175
3176 /*
3177 * Copy out stuff that's for ring-3 and determin default configuration.
3178 */
3179 pVM->hm.s.ForR3.vmx.u64HostDr6Zeroed = g_fDr6Zeroed;
3180
3181 /* Since we do not emulate RTM, make sure DR6.RTM cannot be cleared by the
3182 guest and cause confusion there. It appears that the DR6.RTM bit can be
3183 cleared even if TSX-NI is disabled (microcode update / system / whatever). */
3184#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3185 if (pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg == 0)
3186 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = g_fDr6Zeroed != X86_DR6_RA1_MASK;
3187 else
3188#endif
3189 pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx = pVM->hm.s.vmx.fAlwaysInterceptMovDRxCfg > 0;
3190 pVM->hm.s.ForR3.vmx.fAlwaysInterceptMovDRx = pVM->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3191
3192 return VINF_SUCCESS;
3193}
3194
3195
3196/**
3197 * Does per-VM VT-x termination.
3198 *
3199 * @returns VBox status code.
3200 * @param pVM The cross context VM structure.
3201 */
3202VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3203{
3204 AssertPtr(pVM);
3205 LogFlowFunc(("pVM=%p\n", pVM));
3206
3207#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3208 if (pVM->hmr0.s.vmx.pbScratch)
3209 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3210#endif
3211 hmR0VmxStructsFree(pVM);
3212 return VINF_SUCCESS;
3213}
3214
3215
3216/**
3217 * Sets up the VM for execution using hardware-assisted VMX.
3218 * This function is only called once per-VM during initialization.
3219 *
3220 * @returns VBox status code.
3221 * @param pVM The cross context VM structure.
3222 */
3223VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3224{
3225 AssertPtr(pVM);
3226 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3227
3228 LogFlowFunc(("pVM=%p\n", pVM));
3229
3230 /*
3231 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3232 * without causing a #GP.
3233 */
3234 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3235 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3236 { /* likely */ }
3237 else
3238 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3239
3240 /*
3241 * Check that nested paging is supported if enabled and copy over the flag to the
3242 * ring-0 only structure.
3243 */
3244 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3245 AssertReturn( !fNestedPaging
3246 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3247 VERR_INCOMPATIBLE_CONFIG);
3248 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3249 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3250
3251 /*
3252 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3253 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3254 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3255 */
3256 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3257 AssertReturn( !fUnrestrictedGuest
3258 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3259 && fNestedPaging),
3260 VERR_INCOMPATIBLE_CONFIG);
3261 if ( !fUnrestrictedGuest
3262 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3263 || !pVM->hm.s.vmx.pRealModeTSS))
3264 {
3265 LogRelFunc(("Invalid real-on-v86 state.\n"));
3266 return VERR_INTERNAL_ERROR;
3267 }
3268 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3269
3270 /* Initialize these always, see hmR3InitFinalizeR0().*/
3271 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3272 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3273
3274 /* Setup the tagged-TLB flush handlers. */
3275 int rc = hmR0VmxSetupTaggedTlb(pVM);
3276 if (RT_FAILURE(rc))
3277 {
3278 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3279 return rc;
3280 }
3281
3282 /* Determine LBR capabilities. */
3283 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3284 if (pVM->hmr0.s.vmx.fLbr)
3285 {
3286 rc = hmR0VmxSetupLbrMsrRange(pVM);
3287 if (RT_FAILURE(rc))
3288 {
3289 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3290 return rc;
3291 }
3292 }
3293
3294#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3295 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3296 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3297 {
3298 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3299 if (RT_SUCCESS(rc))
3300 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3301 else
3302 {
3303 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3304 return rc;
3305 }
3306 }
3307#endif
3308
3309 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3310 {
3311 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3312 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3313
3314 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3315
3316 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3317 if (RT_SUCCESS(rc))
3318 {
3319#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3320 if (pVM->cpum.ro.GuestFeatures.fVmx)
3321 {
3322 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3323 if (RT_SUCCESS(rc))
3324 { /* likely */ }
3325 else
3326 {
3327 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3328 return rc;
3329 }
3330 }
3331#endif
3332 }
3333 else
3334 {
3335 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3336 return rc;
3337 }
3338 }
3339
3340 return VINF_SUCCESS;
3341}
3342
3343
3344/**
3345 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3346 * the VMCS.
3347 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3348 */
3349static uint64_t hmR0VmxExportHostControlRegs(void)
3350{
3351 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3352 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3353 uint64_t uHostCr4 = ASMGetCR4();
3354 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3355 return uHostCr4;
3356}
3357
3358
3359/**
3360 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3361 * the host-state area in the VMCS.
3362 *
3363 * @returns VBox status code.
3364 * @param pVCpu The cross context virtual CPU structure.
3365 * @param uHostCr4 The host CR4 value.
3366 */
3367static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3368{
3369 /*
3370 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3371 * will be messed up. We should -not- save the messed up state without restoring
3372 * the original host-state, see @bugref{7240}.
3373 *
3374 * This apparently can happen (most likely the FPU changes), deal with it rather than
3375 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3376 */
3377 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3378 {
3379 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3380 pVCpu->idCpu));
3381 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3382 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3383 }
3384
3385 /*
3386 * Get all the host info.
3387 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3388 * without also checking the cpuid bit.
3389 */
3390 uint32_t fRestoreHostFlags;
3391#if RT_INLINE_ASM_EXTERNAL
3392 if (uHostCr4 & X86_CR4_FSGSBASE)
3393 {
3394 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3395 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3396 }
3397 else
3398 {
3399 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3400 fRestoreHostFlags = 0;
3401 }
3402 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3403 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3404 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3405 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3406#else
3407 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3408 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3409 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3410 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3411 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3412 if (uHostCr4 & X86_CR4_FSGSBASE)
3413 {
3414 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3415 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3416 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3417 }
3418 else
3419 {
3420 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3421 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3422 fRestoreHostFlags = 0;
3423 }
3424 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3425 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3426 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3427 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3428 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3429#endif
3430
3431 /*
3432 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3433 * gain VM-entry and restore them before we get preempted.
3434 *
3435 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3436 */
3437 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3438 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3439 {
3440 if (!(uSelAll & X86_SEL_LDT))
3441 {
3442#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3443 do { \
3444 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3445 if ((a_uVmcsVar) & X86_SEL_RPL) \
3446 { \
3447 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3448 (a_uVmcsVar) = 0; \
3449 } \
3450 } while (0)
3451 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3452 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3453 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3454 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3455#undef VMXLOCAL_ADJUST_HOST_SEG
3456 }
3457 else
3458 {
3459#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3460 do { \
3461 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3462 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3463 { \
3464 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3465 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3466 else \
3467 { \
3468 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3469 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3470 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3471 } \
3472 (a_uVmcsVar) = 0; \
3473 } \
3474 } while (0)
3475 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3476 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3477 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3478 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3479#undef VMXLOCAL_ADJUST_HOST_SEG
3480 }
3481 }
3482
3483 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3484 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3485 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3486 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3487 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3488 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3489 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3490 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3491
3492 /*
3493 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3494 * them to the maximum limit (0xffff) on every VM-exit.
3495 */
3496 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3497 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3498
3499 /*
3500 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3501 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3502 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3503 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3504 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3505 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3506 * at 0xffff on hosts where we are sure it won't cause trouble.
3507 */
3508#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3509 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3510#else
3511 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3512#endif
3513 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3514
3515 /*
3516 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3517 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3518 * RPL should be too in most cases.
3519 */
3520 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3521 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3522 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3523 VERR_VMX_INVALID_HOST_STATE);
3524
3525 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3526 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3527
3528 /*
3529 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3530 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3531 * restoration if the host has something else. Task switching is not supported in 64-bit
3532 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3533 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3534 *
3535 * [1] See Intel spec. 3.5 "System Descriptor Types".
3536 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3537 */
3538 Assert(pDesc->System.u4Type == 11);
3539 if ( pDesc->System.u16LimitLow != 0x67
3540 || pDesc->System.u4LimitHigh)
3541 {
3542 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3543
3544 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3545 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3546 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3547 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3548 {
3549 /* The GDT is read-only but the writable GDT is available. */
3550 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3551 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3552 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3553 AssertRCReturn(rc, rc);
3554 }
3555 }
3556
3557 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3558
3559 /*
3560 * Do all the VMCS updates in one block to assist nested virtualization.
3561 */
3562 int rc;
3563 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3564 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3565 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3566 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3567 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3568 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3569 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3570 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3571 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3572 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3573 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3574 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3575
3576 return VINF_SUCCESS;
3577}
3578
3579
3580/**
3581 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3582 * host-state area of the VMCS.
3583 *
3584 * These MSRs will be automatically restored on the host after every successful
3585 * VM-exit.
3586 *
3587 * @param pVCpu The cross context virtual CPU structure.
3588 *
3589 * @remarks No-long-jump zone!!!
3590 */
3591static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3592{
3593 AssertPtr(pVCpu);
3594
3595 /*
3596 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3597 * rather than swapping them on every VM-entry.
3598 */
3599 hmR0VmxLazySaveHostMsrs(pVCpu);
3600
3601 /*
3602 * Host Sysenter MSRs.
3603 */
3604 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3605 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3606 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3607
3608 /*
3609 * Host EFER MSR.
3610 *
3611 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3612 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3613 */
3614 if (g_fHmVmxSupportsVmcsEfer)
3615 {
3616 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3617 AssertRC(rc);
3618 }
3619
3620 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3621 * hmR0VmxExportGuestEntryExitCtls(). */
3622}
3623
3624
3625/**
3626 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3627 *
3628 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3629 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3630 *
3631 * @returns true if we need to load guest EFER, false otherwise.
3632 * @param pVCpu The cross context virtual CPU structure.
3633 * @param pVmxTransient The VMX-transient structure.
3634 *
3635 * @remarks Requires EFER, CR4.
3636 * @remarks No-long-jump zone!!!
3637 */
3638static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3639{
3640#ifdef HMVMX_ALWAYS_SWAP_EFER
3641 RT_NOREF2(pVCpu, pVmxTransient);
3642 return true;
3643#else
3644 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3645 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3646 uint64_t const u64GuestEfer = pCtx->msrEFER;
3647
3648# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3649 /*
3650 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3651 * the nested-guest.
3652 */
3653 if ( pVmxTransient->fIsNestedGuest
3654 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3655 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3656 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3657 return true;
3658# else
3659 RT_NOREF(pVmxTransient);
3660#endif
3661
3662 /*
3663 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3664 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3665 */
3666 if ( CPUMIsGuestInLongModeEx(pCtx)
3667 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3668 return true;
3669
3670 /*
3671 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3672 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3673 *
3674 * See Intel spec. 4.5 "IA-32e Paging".
3675 * See Intel spec. 4.1.1 "Three Paging Modes".
3676 *
3677 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3678 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3679 */
3680 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3681 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3682 if ( (pCtx->cr4 & X86_CR4_PAE)
3683 && (pCtx->cr0 & X86_CR0_PG))
3684 {
3685 /*
3686 * If nested paging is not used, verify that the guest paging mode matches the
3687 * shadow paging mode which is/will be placed in the VMCS (which is what will
3688 * actually be used while executing the guest and not the CR4 shadow value).
3689 */
3690 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3691 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3692 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3693 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3694 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3695 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3696 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3697 {
3698 /* Verify that the host is NX capable. */
3699 Assert(g_CpumHostFeatures.s.fNoExecute);
3700 return true;
3701 }
3702 }
3703
3704 return false;
3705#endif
3706}
3707
3708
3709/**
3710 * Exports the guest's RSP into the guest-state area in the VMCS.
3711 *
3712 * @param pVCpu The cross context virtual CPU structure.
3713 *
3714 * @remarks No-long-jump zone!!!
3715 */
3716static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3717{
3718 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3719 {
3720 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3721
3722 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3723 AssertRC(rc);
3724
3725 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3726 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3727 }
3728}
3729
3730
3731/**
3732 * Exports the guest hardware-virtualization state.
3733 *
3734 * @returns VBox status code.
3735 * @param pVCpu The cross context virtual CPU structure.
3736 * @param pVmxTransient The VMX-transient structure.
3737 *
3738 * @remarks No-long-jump zone!!!
3739 */
3740static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3741{
3742 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3743 {
3744#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3745 /*
3746 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3747 * VMCS shadowing.
3748 */
3749 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3750 {
3751 /*
3752 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3753 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3754 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3755 *
3756 * We check for VMX root mode here in case the guest executes VMXOFF without
3757 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3758 * not clear the current VMCS pointer.
3759 */
3760 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3761 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3762 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3763 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3764 {
3765 /* Paranoia. */
3766 Assert(!pVmxTransient->fIsNestedGuest);
3767
3768 /*
3769 * For performance reasons, also check if the nested hypervisor's current VMCS
3770 * was newly loaded or modified before copying it to the shadow VMCS.
3771 */
3772 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3773 {
3774 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3775 AssertRCReturn(rc, rc);
3776 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3777 }
3778 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3779 }
3780 else
3781 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3782 }
3783#else
3784 NOREF(pVmxTransient);
3785#endif
3786 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3787 }
3788 return VINF_SUCCESS;
3789}
3790
3791
3792/**
3793 * Exports the guest debug registers into the guest-state area in the VMCS.
3794 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3795 *
3796 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3797 *
3798 * @returns VBox status code.
3799 * @param pVCpu The cross context virtual CPU structure.
3800 * @param pVmxTransient The VMX-transient structure.
3801 *
3802 * @remarks No-long-jump zone!!!
3803 */
3804static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3805{
3806 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3807
3808 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3809 * stepping. */
3810 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3811 if (pVmxTransient->fIsNestedGuest)
3812 {
3813 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3814 AssertRC(rc);
3815
3816 /*
3817 * We don't want to always intercept MOV DRx for nested-guests as it causes
3818 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3819 * Instead, they are strictly only requested when the nested hypervisor intercepts
3820 * them -- handled while merging VMCS controls.
3821 *
3822 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3823 * then the nested-guest debug state should be actively loaded on the host so that
3824 * nested-guest reads its own debug registers without causing VM-exits.
3825 */
3826 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3827 && !CPUMIsGuestDebugStateActive(pVCpu))
3828 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3829 return VINF_SUCCESS;
3830 }
3831
3832#ifdef VBOX_STRICT
3833 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3834 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3835 {
3836 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3837 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3838 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3839 }
3840#endif
3841
3842 bool fSteppingDB = false;
3843 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3844 if (pVCpu->hm.s.fSingleInstruction)
3845 {
3846 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3847 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3848 {
3849 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3850 Assert(fSteppingDB == false);
3851 }
3852 else
3853 {
3854 pVCpu->cpum.GstCtx.eflags.u |= X86_EFL_TF;
3855 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3856 pVCpu->hmr0.s.fClearTrapFlag = true;
3857 fSteppingDB = true;
3858 }
3859 }
3860
3861#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3862 bool fInterceptMovDRx = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx;
3863#else
3864 bool fInterceptMovDRx = false;
3865#endif
3866 uint64_t u64GuestDr7;
3867 if ( fSteppingDB
3868 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3869 {
3870 /*
3871 * Use the combined guest and host DRx values found in the hypervisor register set
3872 * because the hypervisor debugger has breakpoints active or someone is single stepping
3873 * on the host side without a monitor trap flag.
3874 *
3875 * Note! DBGF expects a clean DR6 state before executing guest code.
3876 */
3877 if (!CPUMIsHyperDebugStateActive(pVCpu))
3878 {
3879 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3880 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3881 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3882 }
3883
3884 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3885 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3886 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3887 fInterceptMovDRx = true;
3888 }
3889 else
3890 {
3891 /*
3892 * If the guest has enabled debug registers, we need to load them prior to
3893 * executing guest code so they'll trigger at the right time.
3894 */
3895 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3896 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3897 {
3898 if (!CPUMIsGuestDebugStateActive(pVCpu))
3899 {
3900 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3901 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3902 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3903 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3904 }
3905#ifndef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
3906 Assert(!fInterceptMovDRx);
3907#endif
3908 }
3909 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3910 {
3911 /*
3912 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3913 * must intercept #DB in order to maintain a correct DR6 guest value, and
3914 * because we need to intercept it to prevent nested #DBs from hanging the
3915 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3916 */
3917 fInterceptMovDRx = true;
3918 }
3919
3920 /* Update DR7 with the actual guest value. */
3921 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3922 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3923 }
3924
3925 if (fInterceptMovDRx)
3926 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3927 else
3928 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3929
3930 /*
3931 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3932 * monitor-trap flag and update our cache.
3933 */
3934 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3935 {
3936 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3937 AssertRC(rc);
3938 pVmcsInfo->u32ProcCtls = uProcCtls;
3939 }
3940
3941 /*
3942 * Update guest DR7.
3943 */
3944 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3945 AssertRC(rc);
3946
3947 /*
3948 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3949 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3950 *
3951 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3952 */
3953 if (fSteppingDB)
3954 {
3955 Assert(pVCpu->hm.s.fSingleInstruction);
3956 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3957
3958 uint32_t fIntrState = 0;
3959 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3960 AssertRC(rc);
3961
3962 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3963 {
3964 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3965 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3966 AssertRC(rc);
3967 }
3968 }
3969
3970 return VINF_SUCCESS;
3971}
3972
3973
3974/**
3975 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3976 * areas.
3977 *
3978 * These MSRs will automatically be loaded to the host CPU on every successful
3979 * VM-entry and stored from the host CPU on every successful VM-exit.
3980 *
3981 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3982 * actual host MSR values are not- updated here for performance reasons. See
3983 * hmR0VmxExportHostMsrs().
3984 *
3985 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3986 *
3987 * @returns VBox status code.
3988 * @param pVCpu The cross context virtual CPU structure.
3989 * @param pVmxTransient The VMX-transient structure.
3990 *
3991 * @remarks No-long-jump zone!!!
3992 */
3993static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3994{
3995 AssertPtr(pVCpu);
3996 AssertPtr(pVmxTransient);
3997
3998 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3999 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4000
4001 /*
4002 * MSRs that we use the auto-load/store MSR area in the VMCS.
4003 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
4004 * nothing to do here. The host MSR values are updated when it's safe in
4005 * hmR0VmxLazySaveHostMsrs().
4006 *
4007 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
4008 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
4009 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
4010 * for any MSR that are not part of the lazy MSRs so we do not need to place
4011 * those MSRs into the auto-load/store MSR area. Nothing to do here.
4012 */
4013 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
4014 {
4015 /* No auto-load/store MSRs currently. */
4016 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4017 }
4018
4019 /*
4020 * Guest Sysenter MSRs.
4021 */
4022 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
4023 {
4024 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
4025
4026 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4027 {
4028 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
4029 AssertRC(rc);
4030 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4031 }
4032
4033 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4034 {
4035 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
4036 AssertRC(rc);
4037 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4038 }
4039
4040 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4041 {
4042 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
4043 AssertRC(rc);
4044 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4045 }
4046 }
4047
4048 /*
4049 * Guest/host EFER MSR.
4050 */
4051 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4052 {
4053 /* Whether we are using the VMCS to swap the EFER MSR must have been
4054 determined earlier while exporting VM-entry/VM-exit controls. */
4055 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4056 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4057
4058 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4059 {
4060 /*
4061 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4062 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4063 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4064 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4065 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4066 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4067 * during VM-entry.
4068 */
4069 uint64_t uGuestEferMsr = pCtx->msrEFER;
4070 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4071 {
4072 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4073 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4074 else
4075 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4076 }
4077
4078 /*
4079 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4080 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4081 */
4082 if (g_fHmVmxSupportsVmcsEfer)
4083 {
4084 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4085 AssertRC(rc);
4086 }
4087 else
4088 {
4089 /*
4090 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4091 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4092 */
4093 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4094 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4095 AssertRCReturn(rc, rc);
4096 }
4097
4098 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4099 }
4100 else if (!g_fHmVmxSupportsVmcsEfer)
4101 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4102
4103 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4104 }
4105
4106 /*
4107 * Other MSRs.
4108 */
4109 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4110 {
4111 /* Speculation Control (R/W). */
4112 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4113 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4114 {
4115 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4116 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4117 AssertRCReturn(rc, rc);
4118 }
4119
4120 /* Last Branch Record. */
4121 if (pVM->hmr0.s.vmx.fLbr)
4122 {
4123 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4124 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4125 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4126 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4127 Assert(cLbrStack <= 32);
4128 for (uint32_t i = 0; i < cLbrStack; i++)
4129 {
4130 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4131 pVmcsInfoShared->au64LbrFromIpMsr[i],
4132 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4133 AssertRCReturn(rc, rc);
4134
4135 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4136 if (idToIpMsrStart != 0)
4137 {
4138 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4139 pVmcsInfoShared->au64LbrToIpMsr[i],
4140 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4141 AssertRCReturn(rc, rc);
4142 }
4143 }
4144
4145 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4146 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4147 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4148 false /* fUpdateHostMsr */);
4149 AssertRCReturn(rc, rc);
4150 }
4151
4152 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4153 }
4154
4155 return VINF_SUCCESS;
4156}
4157
4158
4159/**
4160 * Wrapper for running the guest code in VT-x.
4161 *
4162 * @returns VBox status code, no informational status codes.
4163 * @param pVCpu The cross context virtual CPU structure.
4164 * @param pVmxTransient The VMX-transient structure.
4165 *
4166 * @remarks No-long-jump zone!!!
4167 */
4168DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4169{
4170 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4171 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4172
4173 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4174 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4175#ifdef VBOX_WITH_STATISTICS
4176 if (fResumeVM)
4177 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4178 else
4179 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4180#endif
4181 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4182 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4183 return rc;
4184}
4185
4186
4187/**
4188 * Reports world-switch error and dumps some useful debug info.
4189 *
4190 * @param pVCpu The cross context virtual CPU structure.
4191 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4192 * @param pVmxTransient The VMX-transient structure (only
4193 * exitReason updated).
4194 */
4195static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4196{
4197 Assert(pVCpu);
4198 Assert(pVmxTransient);
4199 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4200
4201 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4202 switch (rcVMRun)
4203 {
4204 case VERR_VMX_INVALID_VMXON_PTR:
4205 AssertFailed();
4206 break;
4207 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4208 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4209 {
4210 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4211 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4212 AssertRC(rc);
4213 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4214
4215 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4216 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4217 Cannot do it here as we may have been long preempted. */
4218
4219#ifdef VBOX_STRICT
4220 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4221 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4222 pVmxTransient->uExitReason));
4223 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4224 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4225 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4226 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4227 else
4228 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4229 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4230 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4231
4232 static struct
4233 {
4234 /** Name of the field to log. */
4235 const char *pszName;
4236 /** The VMCS field. */
4237 uint32_t uVmcsField;
4238 /** Whether host support of this field needs to be checked. */
4239 bool fCheckSupport;
4240 } const s_aVmcsFields[] =
4241 {
4242 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4243 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4244 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4245 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4246 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4247 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4248 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4249 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4250 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4251 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4252 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4253 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4254 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4255 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4256 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4257 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4258 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4259 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4260 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4261 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4262 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4263 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4264 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4265 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4266 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4267 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4268 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4269 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4270 /* The order of selector fields below are fixed! */
4271 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4272 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4273 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4274 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4275 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4276 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4277 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4278 /* End of ordered selector fields. */
4279 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4280 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4281 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4282 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4283 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4284 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4285 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4286 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4287 };
4288
4289 RTGDTR HostGdtr;
4290 ASMGetGDTR(&HostGdtr);
4291
4292 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4293 for (uint32_t i = 0; i < cVmcsFields; i++)
4294 {
4295 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4296
4297 bool fSupported;
4298 if (!s_aVmcsFields[i].fCheckSupport)
4299 fSupported = true;
4300 else
4301 {
4302 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4303 switch (uVmcsField)
4304 {
4305 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4306 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4307 case VMX_VMCS32_CTRL_PROC_EXEC2:
4308 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4309 break;
4310 default:
4311 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4312 }
4313 }
4314
4315 if (fSupported)
4316 {
4317 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4318 switch (uWidth)
4319 {
4320 case VMX_VMCSFIELD_WIDTH_16BIT:
4321 {
4322 uint16_t u16Val;
4323 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4324 AssertRC(rc);
4325 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4326
4327 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4328 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4329 {
4330 if (u16Val < HostGdtr.cbGdt)
4331 {
4332 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4333 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4334 "Host FS", "Host GS", "Host TR" };
4335 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4336 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4337 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4338 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4339 }
4340 else
4341 Log4((" Selector value exceeds GDT limit!\n"));
4342 }
4343 break;
4344 }
4345
4346 case VMX_VMCSFIELD_WIDTH_32BIT:
4347 {
4348 uint32_t u32Val;
4349 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4350 AssertRC(rc);
4351 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4352 break;
4353 }
4354
4355 case VMX_VMCSFIELD_WIDTH_64BIT:
4356 case VMX_VMCSFIELD_WIDTH_NATURAL:
4357 {
4358 uint64_t u64Val;
4359 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4360 AssertRC(rc);
4361 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4362 break;
4363 }
4364 }
4365 }
4366 }
4367
4368 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4369 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4370 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4371 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4372 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4373 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4374#endif /* VBOX_STRICT */
4375 break;
4376 }
4377
4378 default:
4379 /* Impossible */
4380 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4381 break;
4382 }
4383}
4384
4385
4386/**
4387 * Sets up the usage of TSC-offsetting and updates the VMCS.
4388 *
4389 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4390 * VMX-preemption timer.
4391 *
4392 * @param pVCpu The cross context virtual CPU structure.
4393 * @param pVmxTransient The VMX-transient structure.
4394 * @param idCurrentCpu The current CPU number.
4395 *
4396 * @remarks No-long-jump zone!!!
4397 */
4398static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4399{
4400 bool fOffsettedTsc;
4401 bool fParavirtTsc;
4402 uint64_t uTscOffset;
4403 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4404 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4405
4406 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4407 {
4408 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4409 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4410 uint64_t cTicksToDeadline;
4411 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4412 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4413 {
4414 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4415 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4416 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4417 if ((int64_t)cTicksToDeadline > 0)
4418 { /* hopefully */ }
4419 else
4420 {
4421 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4422 cTicksToDeadline = 0;
4423 }
4424 }
4425 else
4426 {
4427 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4428 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4429 &pVCpu->hmr0.s.vmx.uTscDeadline,
4430 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4431 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4432 if (cTicksToDeadline >= 128)
4433 { /* hopefully */ }
4434 else
4435 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4436 }
4437
4438 /* Make sure the returned values have sane upper and lower boundaries. */
4439 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4440 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4441 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4442 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4443
4444 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4445 * preemption timers here. We probably need to clamp the preemption timer,
4446 * after converting the timer value to the host. */
4447 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4448 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4449 AssertRC(rc);
4450 }
4451 else
4452 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4453
4454 if (fParavirtTsc)
4455 {
4456 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4457 information before every VM-entry, hence disable it for performance sake. */
4458#if 0
4459 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4460 AssertRC(rc);
4461#endif
4462 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4463 }
4464
4465 if ( fOffsettedTsc
4466 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4467 {
4468 if (pVmxTransient->fIsNestedGuest)
4469 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4470 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4471 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4472 }
4473 else
4474 {
4475 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4476 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4477 }
4478}
4479
4480
4481/**
4482 * Saves the guest state from the VMCS into the guest-CPU context.
4483 *
4484 * @returns VBox status code.
4485 * @param pVCpu The cross context virtual CPU structure.
4486 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4487 */
4488VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4489{
4490 AssertPtr(pVCpu);
4491 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4492 return vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, fWhat);
4493}
4494
4495
4496/**
4497 * Gets VMX VM-exit auxiliary information.
4498 *
4499 * @returns VBox status code.
4500 * @param pVCpu The cross context virtual CPU structure.
4501 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4502 * @param fWhat What to fetch, HMVMX_READ_XXX.
4503 */
4504VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4505{
4506 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4507 if (RT_LIKELY(pVmxTransient))
4508 {
4509 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4510
4511 /* The exit reason is always available. */
4512 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4513
4514
4515 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4516 {
4517 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_QUALIFICATION>(pVCpu, pVmxTransient);
4518 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4519#ifdef VBOX_STRICT
4520 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4521#endif
4522 }
4523
4524 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4525 {
4526 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_INFO>(pVCpu, pVmxTransient);
4527 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4528#ifdef VBOX_STRICT
4529 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4530#endif
4531 }
4532
4533 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4534 {
4535 vmxHCReadToTransientSlow<HMVMX_READ_IDT_VECTORING_ERROR_CODE>(pVCpu, pVmxTransient);
4536 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4537#ifdef VBOX_STRICT
4538 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4539#endif
4540 }
4541
4542 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4543 {
4544 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_LEN>(pVCpu, pVmxTransient);
4545 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4546#ifdef VBOX_STRICT
4547 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4548#endif
4549 }
4550
4551 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4552 {
4553 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_INFO>(pVCpu, pVmxTransient);
4554 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4555#ifdef VBOX_STRICT
4556 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4557#endif
4558 }
4559
4560 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4561 {
4562 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE>(pVCpu, pVmxTransient);
4563 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4564#ifdef VBOX_STRICT
4565 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4566#endif
4567 }
4568
4569 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4570 {
4571 vmxHCReadToTransientSlow<HMVMX_READ_EXIT_INSTR_INFO>(pVCpu, pVmxTransient);
4572 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4573#ifdef VBOX_STRICT
4574 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4575#endif
4576 }
4577
4578 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4579 {
4580 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_LINEAR_ADDR>(pVCpu, pVmxTransient);
4581 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4582#ifdef VBOX_STRICT
4583 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4584#endif
4585 }
4586
4587 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4588 {
4589 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PHYSICAL_ADDR>(pVCpu, pVmxTransient);
4590 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4591#ifdef VBOX_STRICT
4592 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4593#endif
4594 }
4595
4596 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4597 {
4598#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4599 vmxHCReadToTransientSlow<HMVMX_READ_GUEST_PENDING_DBG_XCPTS>(pVCpu, pVmxTransient);
4600 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4601#else
4602 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4603#endif
4604#ifdef VBOX_STRICT
4605 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4606#endif
4607 }
4608
4609 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4610 return VINF_SUCCESS;
4611 }
4612 return VERR_NOT_AVAILABLE;
4613}
4614
4615
4616/**
4617 * Does the necessary state syncing before returning to ring-3 for any reason
4618 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4619 *
4620 * @returns VBox status code.
4621 * @param pVCpu The cross context virtual CPU structure.
4622 * @param fImportState Whether to import the guest state from the VMCS back
4623 * to the guest-CPU context.
4624 *
4625 * @remarks No-long-jmp zone!!!
4626 */
4627static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4628{
4629 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4630 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4631
4632 RTCPUID const idCpu = RTMpCpuId();
4633 Log4Func(("HostCpuId=%u\n", idCpu));
4634
4635 /*
4636 * !!! IMPORTANT !!!
4637 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4638 */
4639
4640 /* Save the guest state if necessary. */
4641 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4642 if (fImportState)
4643 {
4644 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4645 AssertRCReturn(rc, rc);
4646 }
4647
4648 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4649 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4650 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4651
4652 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4653#ifdef VMX_WITH_MAYBE_ALWAYS_INTERCEPT_MOV_DRX
4654 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4655 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4656 || (!CPUMIsHyperDebugStateActive(pVCpu) && !pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fAlwaysInterceptMovDRx));
4657#else
4658 Assert( (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
4659 || pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs
4660 || !CPUMIsHyperDebugStateActive(pVCpu));
4661#endif
4662 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4663 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4664 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4665
4666 /* Restore host-state bits that VT-x only restores partially. */
4667 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4668 {
4669 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4670 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4671 }
4672 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4673
4674 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4675 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4676 {
4677 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4678 if (!fImportState)
4679 {
4680 int rc = vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4681 AssertRCReturn(rc, rc);
4682 }
4683 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4684 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4685 }
4686 else
4687 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4688
4689 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4690 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4691
4692 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4693 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4694 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4695 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4696 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4697 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4698 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4699 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4700 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4701 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4702
4703 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4704
4705 /** @todo This partially defeats the purpose of having preemption hooks.
4706 * The problem is, deregistering the hooks should be moved to a place that
4707 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4708 * context.
4709 */
4710 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4711 AssertRCReturn(rc, rc);
4712
4713#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4714 /*
4715 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4716 * clear a shadow VMCS before allowing that VMCS to become active on another
4717 * logical processor. We may or may not be importing guest state which clears
4718 * it, so cover for it here.
4719 *
4720 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4721 */
4722 if ( pVmcsInfo->pvShadowVmcs
4723 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4724 {
4725 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4726 AssertRCReturn(rc, rc);
4727 }
4728
4729 /*
4730 * Flag that we need to re-export the host state if we switch to this VMCS before
4731 * executing guest or nested-guest code.
4732 */
4733 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4734#endif
4735
4736 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4737 NOREF(idCpu);
4738 return VINF_SUCCESS;
4739}
4740
4741
4742/**
4743 * Leaves the VT-x session.
4744 *
4745 * @returns VBox status code.
4746 * @param pVCpu The cross context virtual CPU structure.
4747 *
4748 * @remarks No-long-jmp zone!!!
4749 */
4750static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4751{
4752 HM_DISABLE_PREEMPT(pVCpu);
4753 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4754 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4755 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4756
4757 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4758 and done this from the VMXR0ThreadCtxCallback(). */
4759 if (!pVCpu->hmr0.s.fLeaveDone)
4760 {
4761 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4762 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4763 pVCpu->hmr0.s.fLeaveDone = true;
4764 }
4765 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4766
4767 /*
4768 * !!! IMPORTANT !!!
4769 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4770 */
4771
4772 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4773 /** @todo Deregistering here means we need to VMCLEAR always
4774 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4775 * for calling VMMR0ThreadCtxHookDisable here! */
4776 VMMR0ThreadCtxHookDisable(pVCpu);
4777
4778 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4779 int rc = HMR0LeaveCpu(pVCpu);
4780 HM_RESTORE_PREEMPT();
4781 return rc;
4782}
4783
4784
4785/**
4786 * Take necessary actions before going back to ring-3.
4787 *
4788 * An action requires us to go back to ring-3. This function does the necessary
4789 * steps before we can safely return to ring-3. This is not the same as longjmps
4790 * to ring-3, this is voluntary and prepares the guest so it may continue
4791 * executing outside HM (recompiler/IEM).
4792 *
4793 * @returns VBox status code.
4794 * @param pVCpu The cross context virtual CPU structure.
4795 * @param rcExit The reason for exiting to ring-3. Can be
4796 * VINF_VMM_UNKNOWN_RING3_CALL.
4797 */
4798static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
4799{
4800 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4801
4802 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4803 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
4804 {
4805 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
4806 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
4807 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4808 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
4809 }
4810
4811 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
4812 VMMRZCallRing3Disable(pVCpu);
4813 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
4814
4815 /*
4816 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
4817 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
4818 *
4819 * This is because execution may continue from ring-3 and we would need to inject
4820 * the event from there (hence place it back in TRPM).
4821 */
4822 if (pVCpu->hm.s.Event.fPending)
4823 {
4824 vmxHCPendingEventToTrpmTrap(pVCpu);
4825 Assert(!pVCpu->hm.s.Event.fPending);
4826
4827 /* Clear the events from the VMCS. */
4828 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
4829 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
4830 }
4831#ifdef VBOX_STRICT
4832 /*
4833 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
4834 * fatal), we don't care about verifying duplicate injection of events. Errors like
4835 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
4836 * function so those should and will be checked below.
4837 */
4838 else if (RT_SUCCESS(rcExit))
4839 {
4840 /*
4841 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
4842 * This can be pretty hard to debug otherwise, interrupts might get injected twice
4843 * occasionally, see @bugref{9180#c42}.
4844 *
4845 * However, if the VM-entry failed, any VM entry-interruption info. field would
4846 * be left unmodified as the event would not have been injected to the guest. In
4847 * such cases, don't assert, we're not going to continue guest execution anyway.
4848 */
4849 uint32_t uExitReason;
4850 uint32_t uEntryIntInfo;
4851 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
4852 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
4853 AssertRC(rc);
4854 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
4855 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
4856 }
4857#endif
4858
4859 /*
4860 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
4861 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
4862 * (e.g. TPR below threshold).
4863 */
4864 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4865 {
4866 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
4867 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
4868 }
4869
4870 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
4871 and if we're injecting an event we should have a TRPM trap pending. */
4872 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4873#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
4874 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
4875#endif
4876
4877 /* Save guest state and restore host state bits. */
4878 int rc = hmR0VmxLeaveSession(pVCpu);
4879 AssertRCReturn(rc, rc);
4880 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4881
4882 /* Thread-context hooks are unregistered at this point!!! */
4883 /* Ring-3 callback notifications are unregistered at this point!!! */
4884
4885 /* Sync recompiler state. */
4886 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4887 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
4888 | CPUM_CHANGED_LDTR
4889 | CPUM_CHANGED_GDTR
4890 | CPUM_CHANGED_IDTR
4891 | CPUM_CHANGED_TR
4892 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4893 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4894 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
4895 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
4896
4897 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
4898
4899 /* Update the exit-to-ring 3 reason. */
4900 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
4901
4902 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
4903 if ( rcExit != VINF_EM_RAW_INTERRUPT
4904 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
4905 {
4906 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
4907 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
4908 }
4909
4910 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
4911 VMMRZCallRing3Enable(pVCpu);
4912 return rc;
4913}
4914
4915
4916/**
4917 * VMMRZCallRing3() callback wrapper which saves the guest state before we
4918 * longjump due to a ring-0 assertion.
4919 *
4920 * @returns VBox status code.
4921 * @param pVCpu The cross context virtual CPU structure.
4922 */
4923VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
4924{
4925 /*
4926 * !!! IMPORTANT !!!
4927 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
4928 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
4929 */
4930 VMMR0AssertionRemoveNotification(pVCpu);
4931 VMMRZCallRing3Disable(pVCpu);
4932 HM_DISABLE_PREEMPT(pVCpu);
4933
4934 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4935 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4936 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4937 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4938
4939 /* Restore host-state bits that VT-x only restores partially. */
4940 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4941 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4942 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4943
4944 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4945 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4946 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4947
4948 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4949 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4950 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4951
4952 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
4953 cleared as part of importing the guest state above. */
4954 hmR0VmxClearVmcs(pVmcsInfo);
4955
4956 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
4957 VMMR0ThreadCtxHookDisable(pVCpu);
4958
4959 /* Leave HM context. This takes care of local init (term). */
4960 HMR0LeaveCpu(pVCpu);
4961 HM_RESTORE_PREEMPT();
4962 return VINF_SUCCESS;
4963}
4964
4965
4966/**
4967 * Enters the VT-x session.
4968 *
4969 * @returns VBox status code.
4970 * @param pVCpu The cross context virtual CPU structure.
4971 */
4972VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
4973{
4974 AssertPtr(pVCpu);
4975 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
4976 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4977
4978 LogFlowFunc(("pVCpu=%p\n", pVCpu));
4979 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
4980 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
4981
4982#ifdef VBOX_STRICT
4983 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
4984 RTCCUINTREG uHostCr4 = ASMGetCR4();
4985 if (!(uHostCr4 & X86_CR4_VMXE))
4986 {
4987 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
4988 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4989 }
4990#endif
4991
4992 /*
4993 * Do the EMT scheduled L1D and MDS flush here if needed.
4994 */
4995 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
4996 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
4997 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
4998 hmR0MdsClear();
4999
5000 /*
5001 * Load the appropriate VMCS as the current and active one.
5002 */
5003 PVMXVMCSINFO pVmcsInfo;
5004 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5005 if (!fInNestedGuestMode)
5006 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5007 else
5008 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5009 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5010 if (RT_SUCCESS(rc))
5011 {
5012 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5013 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5014 pVCpu->hmr0.s.fLeaveDone = false;
5015 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5016 }
5017 return rc;
5018}
5019
5020
5021/**
5022 * The thread-context callback.
5023 *
5024 * This is used together with RTThreadCtxHookCreate() on platforms which
5025 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5026 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5027 *
5028 * @param enmEvent The thread-context event.
5029 * @param pVCpu The cross context virtual CPU structure.
5030 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5031 * @thread EMT(pVCpu)
5032 */
5033VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5034{
5035 AssertPtr(pVCpu);
5036 RT_NOREF1(fGlobalInit);
5037
5038 switch (enmEvent)
5039 {
5040 case RTTHREADCTXEVENT_OUT:
5041 {
5042 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5043 VMCPU_ASSERT_EMT(pVCpu);
5044
5045 /* No longjmps (logger flushes, locks) in this fragile context. */
5046 VMMRZCallRing3Disable(pVCpu);
5047 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5048
5049 /* Restore host-state (FPU, debug etc.) */
5050 if (!pVCpu->hmr0.s.fLeaveDone)
5051 {
5052 /*
5053 * Do -not- import the guest-state here as we might already be in the middle of importing
5054 * it, esp. bad if we're holding the PGM lock, see comment at the end of vmxHCImportGuestStateEx().
5055 */
5056 hmR0VmxLeave(pVCpu, false /* fImportState */);
5057 pVCpu->hmr0.s.fLeaveDone = true;
5058 }
5059
5060 /* Leave HM context, takes care of local init (term). */
5061 int rc = HMR0LeaveCpu(pVCpu);
5062 AssertRC(rc);
5063
5064 /* Restore longjmp state. */
5065 VMMRZCallRing3Enable(pVCpu);
5066 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5067 break;
5068 }
5069
5070 case RTTHREADCTXEVENT_IN:
5071 {
5072 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5073 VMCPU_ASSERT_EMT(pVCpu);
5074
5075 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5076 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5077 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5078 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5079 hmR0MdsClear();
5080
5081 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5082 VMMRZCallRing3Disable(pVCpu);
5083 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5084
5085 /* Initialize the bare minimum state required for HM. This takes care of
5086 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5087 int rc = hmR0EnterCpu(pVCpu);
5088 AssertRC(rc);
5089 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5090 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5091
5092 /* Load the active VMCS as the current one. */
5093 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5094 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5095 AssertRC(rc);
5096 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5097 pVCpu->hmr0.s.fLeaveDone = false;
5098
5099 /* Restore longjmp state. */
5100 VMMRZCallRing3Enable(pVCpu);
5101 break;
5102 }
5103
5104 default:
5105 break;
5106 }
5107}
5108
5109
5110/**
5111 * Exports the host state into the VMCS host-state area.
5112 * Sets up the VM-exit MSR-load area.
5113 *
5114 * The CPU state will be loaded from these fields on every successful VM-exit.
5115 *
5116 * @returns VBox status code.
5117 * @param pVCpu The cross context virtual CPU structure.
5118 *
5119 * @remarks No-long-jump zone!!!
5120 */
5121static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5122{
5123 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5124
5125 int rc = VINF_SUCCESS;
5126 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5127 {
5128 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5129
5130 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5131 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5132
5133 hmR0VmxExportHostMsrs(pVCpu);
5134
5135 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5136 }
5137 return rc;
5138}
5139
5140
5141/**
5142 * Saves the host state in the VMCS host-state.
5143 *
5144 * @returns VBox status code.
5145 * @param pVCpu The cross context virtual CPU structure.
5146 *
5147 * @remarks No-long-jump zone!!!
5148 */
5149VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5150{
5151 AssertPtr(pVCpu);
5152 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5153
5154 /*
5155 * Export the host state here while entering HM context.
5156 * When thread-context hooks are used, we might get preempted and have to re-save the host
5157 * state but most of the time we won't be, so do it here before we disable interrupts.
5158 */
5159 return hmR0VmxExportHostState(pVCpu);
5160}
5161
5162
5163/**
5164 * Exports the guest state into the VMCS guest-state area.
5165 *
5166 * The will typically be done before VM-entry when the guest-CPU state and the
5167 * VMCS state may potentially be out of sync.
5168 *
5169 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5170 * VM-entry controls.
5171 * Sets up the appropriate VMX non-root function to execute guest code based on
5172 * the guest CPU mode.
5173 *
5174 * @returns VBox strict status code.
5175 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5176 * without unrestricted guest execution and the VMMDev is not presently
5177 * mapped (e.g. EFI32).
5178 *
5179 * @param pVCpu The cross context virtual CPU structure.
5180 * @param pVmxTransient The VMX-transient structure.
5181 *
5182 * @remarks No-long-jump zone!!!
5183 */
5184static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5185{
5186 AssertPtr(pVCpu);
5187 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5188 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5189
5190 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5191
5192 /*
5193 * Determine real-on-v86 mode.
5194 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5195 */
5196 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5197 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5198 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5199 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5200 else
5201 {
5202 Assert(!pVmxTransient->fIsNestedGuest);
5203 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5204 }
5205
5206 /*
5207 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5208 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5209 */
5210 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5211 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5212
5213 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5214 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5215
5216 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5217 if (rcStrict == VINF_SUCCESS)
5218 { /* likely */ }
5219 else
5220 {
5221 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5222 return rcStrict;
5223 }
5224
5225 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5226 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5227
5228 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5229 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5230
5231 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5232 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5233 vmxHCExportGuestRip(pVCpu);
5234 hmR0VmxExportGuestRsp(pVCpu);
5235 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5236
5237 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5238 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5239
5240 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5241 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5242 | HM_CHANGED_GUEST_CR2
5243 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5244 | HM_CHANGED_GUEST_X87
5245 | HM_CHANGED_GUEST_SSE_AVX
5246 | HM_CHANGED_GUEST_OTHER_XSAVE
5247 | HM_CHANGED_GUEST_XCRx
5248 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5249 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5250 | HM_CHANGED_GUEST_TSC_AUX
5251 | HM_CHANGED_GUEST_OTHER_MSRS
5252 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5253
5254 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5255 return rc;
5256}
5257
5258
5259/**
5260 * Exports the state shared between the host and guest into the VMCS.
5261 *
5262 * @param pVCpu The cross context virtual CPU structure.
5263 * @param pVmxTransient The VMX-transient structure.
5264 *
5265 * @remarks No-long-jump zone!!!
5266 */
5267static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5268{
5269 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5270 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5271
5272 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5273 {
5274 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5275 AssertRC(rc);
5276 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5277
5278 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5279 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5280 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5281 }
5282
5283 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5284 {
5285 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5286 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5287 }
5288
5289 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5290 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5291}
5292
5293
5294/**
5295 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5296 *
5297 * @returns Strict VBox status code (i.e. informational status codes too).
5298 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5299 * without unrestricted guest execution and the VMMDev is not presently
5300 * mapped (e.g. EFI32).
5301 *
5302 * @param pVCpu The cross context virtual CPU structure.
5303 * @param pVmxTransient The VMX-transient structure.
5304 *
5305 * @remarks No-long-jump zone!!!
5306 */
5307static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5308{
5309 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5310 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5311
5312#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5313 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5314#endif
5315
5316 /*
5317 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5318 * changes. First try to export only these without going through all other changed-flag checks.
5319 */
5320 VBOXSTRICTRC rcStrict;
5321 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5322 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5323 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5324
5325 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5326 if ( (fCtxChanged & fMinimalMask)
5327 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5328 {
5329 vmxHCExportGuestRip(pVCpu);
5330 hmR0VmxExportGuestRsp(pVCpu);
5331 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5332 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5333 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5334 }
5335 /* If anything else also changed, go through the full export routine and export as required. */
5336 else if (fCtxChanged & fCtxMask)
5337 {
5338 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5339 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5340 { /* likely */}
5341 else
5342 {
5343 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5344 VBOXSTRICTRC_VAL(rcStrict)));
5345 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5346 return rcStrict;
5347 }
5348 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5349 }
5350 /* Nothing changed, nothing to load here. */
5351 else
5352 rcStrict = VINF_SUCCESS;
5353
5354#ifdef VBOX_STRICT
5355 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5356 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5357 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5358#endif
5359 return rcStrict;
5360}
5361
5362
5363/**
5364 * Map the APIC-access page for virtualizing APIC accesses.
5365 *
5366 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5367 * this not done as part of exporting guest state, see @bugref{8721}.
5368 *
5369 * @returns VBox status code.
5370 * @param pVCpu The cross context virtual CPU structure.
5371 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5372 */
5373static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5374{
5375 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5376 Assert(GCPhysApicBase);
5377
5378 LogFunc(("Mapping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5379
5380 /* Unalias the existing mapping. */
5381 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5382 AssertRCReturn(rc, rc);
5383
5384 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5385 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5386 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5387 AssertRCReturn(rc, rc);
5388
5389 return VINF_SUCCESS;
5390}
5391
5392
5393/**
5394 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5395 * CPU.
5396 *
5397 * @param idCpu The ID for the CPU the function is called on.
5398 * @param pvUser1 Null, not used.
5399 * @param pvUser2 Null, not used.
5400 */
5401static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5402{
5403 RT_NOREF3(idCpu, pvUser1, pvUser2);
5404 VMXDispatchHostNmi();
5405}
5406
5407
5408/**
5409 * Dispatching an NMI on the host CPU that received it.
5410 *
5411 * @returns VBox status code.
5412 * @param pVCpu The cross context virtual CPU structure.
5413 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5414 * executing when receiving the host NMI in VMX non-root
5415 * operation.
5416 */
5417static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5418{
5419 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5420 Assert(idCpu != NIL_RTCPUID);
5421
5422 /*
5423 * We don't want to delay dispatching the NMI any more than we have to. However,
5424 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5425 * after executing guest or nested-guest code for the following reasons:
5426 *
5427 * - We would need to perform VMREADs with interrupts disabled and is orders of
5428 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5429 * supported by the host hypervisor.
5430 *
5431 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5432 * longer period of time just for handling an edge case like host NMIs which do
5433 * not occur nearly as frequently as other VM-exits.
5434 *
5435 * Let's cover the most likely scenario first. Check if we are on the target CPU
5436 * and dispatch the NMI right away. This should be much faster than calling into
5437 * RTMpOnSpecific() machinery.
5438 */
5439 bool fDispatched = false;
5440 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5441 if (idCpu == RTMpCpuId())
5442 {
5443 VMXDispatchHostNmi();
5444 fDispatched = true;
5445 }
5446 ASMSetFlags(fEFlags);
5447 if (fDispatched)
5448 {
5449 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5450 return VINF_SUCCESS;
5451 }
5452
5453 /*
5454 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5455 * there should be no race or recursion even if we are unlucky enough to be preempted
5456 * (to the target CPU) without dispatching the host NMI above.
5457 */
5458 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5459 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5460}
5461
5462
5463#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5464/**
5465 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5466 * nested-guest using hardware-assisted VMX.
5467 *
5468 * @param pVCpu The cross context virtual CPU structure.
5469 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5470 * @param pVmcsInfoGst The guest VMCS info. object.
5471 */
5472static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5473{
5474 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5475 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5476 Assert(pu64MsrBitmap);
5477
5478 /*
5479 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5480 * MSR that is intercepted by the guest is also intercepted while executing the
5481 * nested-guest using hardware-assisted VMX.
5482 *
5483 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5484 * nested-guest VM-exit even if the outer guest is not intercepting some
5485 * MSRs. We cannot assume the caller has initialized the nested-guest
5486 * MSR bitmap in this case.
5487 *
5488 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5489 * each of its VM-entry, hence initializing it once per-VM while setting
5490 * up the nested-guest VMCS is not sufficient.
5491 */
5492 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5493 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5494 {
5495 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5496 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5497 Assert(pu64MsrBitmapNstGst);
5498 Assert(pu64MsrBitmapGst);
5499
5500 /** @todo Detect and use EVEX.POR? */
5501 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5502 for (uint32_t i = 0; i < cFrags; i++)
5503 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5504 }
5505 else
5506 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5507}
5508
5509
5510/**
5511 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5512 * hardware-assisted VMX execution of the nested-guest.
5513 *
5514 * For a guest, we don't modify these controls once we set up the VMCS and hence
5515 * this function is never called.
5516 *
5517 * For nested-guests since the nested hypervisor provides these controls on every
5518 * nested-guest VM-entry and could potentially change them everytime we need to
5519 * merge them before every nested-guest VM-entry.
5520 *
5521 * @returns VBox status code.
5522 * @param pVCpu The cross context virtual CPU structure.
5523 */
5524static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5525{
5526 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5527 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5528 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5529
5530 /*
5531 * Merge the controls with the requirements of the guest VMCS.
5532 *
5533 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5534 * VMCS with the features supported by the physical CPU as it's already done by the
5535 * VMLAUNCH/VMRESUME instruction emulation.
5536 *
5537 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5538 * derived from the VMX features supported by the physical CPU.
5539 */
5540
5541 /* Pin-based VM-execution controls. */
5542 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5543
5544 /* Processor-based VM-execution controls. */
5545 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5546 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5547 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5548 | VMX_PROC_CTLS_MOV_DR_EXIT /* hmR0VmxExportSharedDebugState makes
5549 sure guest DRx regs are loaded. */
5550 | VMX_PROC_CTLS_USE_TPR_SHADOW
5551 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5552
5553 /* Secondary processor-based VM-execution controls. */
5554 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5555 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5556 | VMX_PROC_CTLS2_INVPCID
5557 | VMX_PROC_CTLS2_VMCS_SHADOWING
5558 | VMX_PROC_CTLS2_RDTSCP
5559 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5560 | VMX_PROC_CTLS2_APIC_REG_VIRT
5561 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5562 | VMX_PROC_CTLS2_VMFUNC));
5563
5564 /*
5565 * VM-entry controls:
5566 * These controls contains state that depends on the nested-guest state (primarily
5567 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5568 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5569 * properly continue executing the nested-guest if the EFER MSR changes but does not
5570 * cause a nested-guest VM-exits.
5571 *
5572 * VM-exit controls:
5573 * These controls specify the host state on return. We cannot use the controls from
5574 * the nested hypervisor state as is as it would contain the guest state rather than
5575 * the host state. Since the host state is subject to change (e.g. preemption, trips
5576 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5577 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5578 *
5579 * VM-entry MSR-load:
5580 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5581 * context by the VMLAUNCH/VMRESUME instruction emulation.
5582 *
5583 * VM-exit MSR-store:
5584 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5585 * back into the VM-exit MSR-store area.
5586 *
5587 * VM-exit MSR-load areas:
5588 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5589 * can entirely ignore what the nested hypervisor wants to load here.
5590 */
5591
5592 /*
5593 * Exception bitmap.
5594 *
5595 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5596 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5597 * code more flexible if intercepting exceptions become more dynamic in the future we do
5598 * it as part of exporting the nested-guest state.
5599 */
5600 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5601
5602 /*
5603 * CR0/CR4 guest/host mask.
5604 *
5605 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5606 * cause VM-exits, so we need to merge them here.
5607 */
5608 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5609 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5610
5611 /*
5612 * Page-fault error-code mask and match.
5613 *
5614 * Although we require unrestricted guest execution (and thereby nested-paging) for
5615 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5616 * normally intercept #PFs, it might intercept them for debugging purposes.
5617 *
5618 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5619 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5620 */
5621 uint32_t u32XcptPFMask;
5622 uint32_t u32XcptPFMatch;
5623 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5624 {
5625 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5626 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5627 }
5628 else
5629 {
5630 u32XcptPFMask = 0;
5631 u32XcptPFMatch = 0;
5632 }
5633
5634 /*
5635 * Pause-Loop exiting.
5636 */
5637 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5638 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5639 * this will work... */
5640 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5641 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5642
5643 /*
5644 * Pending debug exceptions.
5645 * Currently just copy whatever the nested-guest provides us.
5646 */
5647 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5648
5649 /*
5650 * I/O Bitmap.
5651 *
5652 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5653 * intercept all I/O port accesses.
5654 */
5655 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5656 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5657
5658 /*
5659 * VMCS shadowing.
5660 *
5661 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5662 * enabled while executing the nested-guest.
5663 */
5664 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5665
5666 /*
5667 * APIC-access page.
5668 */
5669 RTHCPHYS HCPhysApicAccess;
5670 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5671 {
5672 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5673 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5674
5675 void *pvPage;
5676 PGMPAGEMAPLOCK PgLockApicAccess;
5677 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5678 if (RT_SUCCESS(rc))
5679 {
5680 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5681 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5682
5683 /** @todo Handle proper releasing of page-mapping lock later. */
5684 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5685 }
5686 else
5687 return rc;
5688 }
5689 else
5690 HCPhysApicAccess = 0;
5691
5692 /*
5693 * Virtual-APIC page and TPR threshold.
5694 */
5695 RTHCPHYS HCPhysVirtApic;
5696 uint32_t u32TprThreshold;
5697 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5698 {
5699 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5700 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5701
5702 void *pvPage;
5703 PGMPAGEMAPLOCK PgLockVirtApic;
5704 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5705 if (RT_SUCCESS(rc))
5706 {
5707 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5708 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5709
5710 /** @todo Handle proper releasing of page-mapping lock later. */
5711 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5712 }
5713 else
5714 return rc;
5715
5716 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5717 }
5718 else
5719 {
5720 HCPhysVirtApic = 0;
5721 u32TprThreshold = 0;
5722
5723 /*
5724 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5725 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5726 * be taken care of by EPT/shadow paging.
5727 */
5728 if (pVM->hmr0.s.fAllow64BitGuests)
5729 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5730 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5731 }
5732
5733 /*
5734 * Validate basic assumptions.
5735 */
5736 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5737 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5738 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5739 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5740
5741 /*
5742 * Commit it to the nested-guest VMCS.
5743 */
5744 int rc = VINF_SUCCESS;
5745 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5746 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5747 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5748 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5749 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5750 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5751 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5752 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5753 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5754 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5755 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5756 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5757 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5758 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5759 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5760 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5761 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5762 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5763 {
5764 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5765 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5766 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5767 }
5768 if (pVmcsInfoNstGst->HCPhysVirtApic != HCPhysVirtApic)
5769 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
5770 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5771 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5772 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
5773 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
5774 AssertRC(rc);
5775
5776 /*
5777 * Update the nested-guest VMCS cache.
5778 */
5779 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
5780 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
5781 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
5782 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
5783 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
5784 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
5785 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
5786 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
5787 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
5788
5789 /*
5790 * We need to flush the TLB if we are switching the APIC-access page address.
5791 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
5792 */
5793 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5794 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
5795
5796 /*
5797 * MSR bitmap.
5798 *
5799 * The MSR bitmap address has already been initialized while setting up the nested-guest
5800 * VMCS, here we need to merge the MSR bitmaps.
5801 */
5802 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5803 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
5804
5805 return VINF_SUCCESS;
5806}
5807#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
5808
5809
5810/**
5811 * Does the preparations before executing guest code in VT-x.
5812 *
5813 * This may cause longjmps to ring-3 and may even result in rescheduling to the
5814 * recompiler/IEM. We must be cautious what we do here regarding committing
5815 * guest-state information into the VMCS assuming we assuredly execute the
5816 * guest in VT-x mode.
5817 *
5818 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
5819 * the common-state (TRPM/forceflags), we must undo those changes so that the
5820 * recompiler/IEM can (and should) use them when it resumes guest execution.
5821 * Otherwise such operations must be done when we can no longer exit to ring-3.
5822 *
5823 * @returns Strict VBox status code (i.e. informational status codes too).
5824 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
5825 * have been disabled.
5826 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
5827 * pending events).
5828 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
5829 * double-fault into the guest.
5830 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
5831 * dispatched directly.
5832 * @retval VINF_* scheduling changes, we have to go back to ring-3.
5833 *
5834 * @param pVCpu The cross context virtual CPU structure.
5835 * @param pVmxTransient The VMX-transient structure.
5836 * @param fStepping Whether we are single-stepping the guest in the
5837 * hypervisor debugger. Makes us ignore some of the reasons
5838 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
5839 * if event dispatching took place.
5840 */
5841static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
5842{
5843 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5844
5845 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
5846
5847#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
5848 if (pVmxTransient->fIsNestedGuest)
5849 {
5850 RT_NOREF2(pVCpu, fStepping);
5851 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
5852 return VINF_EM_RESCHEDULE_REM;
5853 }
5854#endif
5855
5856 /*
5857 * Check and process force flag actions, some of which might require us to go back to ring-3.
5858 */
5859 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
5860 if (rcStrict == VINF_SUCCESS)
5861 {
5862 /* FFs don't get set all the time. */
5863#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5864 if ( pVmxTransient->fIsNestedGuest
5865 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5866 {
5867 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5868 return VINF_VMX_VMEXIT;
5869 }
5870#endif
5871 }
5872 else
5873 return rcStrict;
5874
5875 /*
5876 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
5877 */
5878 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5879 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
5880 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5881 && PDMHasApic(pVM))
5882 {
5883 /* Get the APIC base MSR from the virtual APIC device. */
5884 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
5885
5886 /* Map the APIC access page. */
5887 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
5888 AssertRCReturn(rc, rc);
5889
5890 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
5891 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
5892 }
5893
5894#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5895 /*
5896 * Merge guest VMCS controls with the nested-guest VMCS controls.
5897 *
5898 * Even if we have not executed the guest prior to this (e.g. when resuming from a
5899 * saved state), we should be okay with merging controls as we initialize the
5900 * guest VMCS controls as part of VM setup phase.
5901 */
5902 if ( pVmxTransient->fIsNestedGuest
5903 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
5904 {
5905 int rc = hmR0VmxMergeVmcsNested(pVCpu);
5906 AssertRCReturn(rc, rc);
5907 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
5908 }
5909#endif
5910
5911 /*
5912 * Evaluate events to be injected into the guest.
5913 *
5914 * Events in TRPM can be injected without inspecting the guest state.
5915 * If any new events (interrupts/NMI) are pending currently, we try to set up the
5916 * guest to cause a VM-exit the next time they are ready to receive the event.
5917 */
5918 if (TRPMHasTrap(pVCpu))
5919 vmxHCTrpmTrapToPendingEvent(pVCpu);
5920
5921 uint32_t fIntrState;
5922#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5923 if (!pVmxTransient->fIsNestedGuest)
5924 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5925 else
5926 rcStrict = vmxHCEvaluatePendingEventNested(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5927
5928 /*
5929 * While evaluating pending events if something failed (unlikely) or if we were
5930 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
5931 */
5932 if (rcStrict != VINF_SUCCESS)
5933 return rcStrict;
5934 if ( pVmxTransient->fIsNestedGuest
5935 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5936 {
5937 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
5938 return VINF_VMX_VMEXIT;
5939 }
5940#else
5941 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, &fIntrState);
5942 Assert(rcStrict == VINF_SUCCESS);
5943#endif
5944
5945 /*
5946 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
5947 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
5948 * also result in triple-faulting the VM.
5949 *
5950 * With nested-guests, the above does not apply since unrestricted guest execution is a
5951 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
5952 */
5953 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest, fIntrState, fStepping);
5954 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5955 { /* likely */ }
5956 else
5957 {
5958 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
5959 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
5960 return rcStrict;
5961 }
5962
5963 /*
5964 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
5965 * import CR3 themselves. We will need to update them here, as even as late as the above
5966 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
5967 * the below force flags to be set.
5968 */
5969 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5970 {
5971 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
5972 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5973 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
5974 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
5975 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5976 }
5977
5978#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5979 /* Paranoia. */
5980 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
5981#endif
5982
5983 /*
5984 * No longjmps to ring-3 from this point on!!!
5985 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
5986 * This also disables flushing of the R0-logger instance (if any).
5987 */
5988 VMMRZCallRing3Disable(pVCpu);
5989
5990 /*
5991 * Export the guest state bits.
5992 *
5993 * We cannot perform longjmps while loading the guest state because we do not preserve the
5994 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
5995 * CPU migration.
5996 *
5997 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
5998 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
5999 */
6000 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6001 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6002 { /* likely */ }
6003 else
6004 {
6005 VMMRZCallRing3Enable(pVCpu);
6006 return rcStrict;
6007 }
6008
6009 /*
6010 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6011 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6012 * preemption disabled for a while. Since this is purely to aid the
6013 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6014 * disable interrupt on NT.
6015 *
6016 * We need to check for force-flags that could've possible been altered since we last
6017 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6018 * see @bugref{6398}).
6019 *
6020 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6021 * to ring-3 before executing guest code.
6022 */
6023 pVmxTransient->fEFlags = ASMIntDisableFlags();
6024
6025 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6026 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6027 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6028 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6029 {
6030 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6031 {
6032#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6033 /*
6034 * If we are executing a nested-guest make sure that we should intercept subsequent
6035 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6036 * the VM-exit instruction emulation happy.
6037 */
6038 if (pVmxTransient->fIsNestedGuest)
6039 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6040#endif
6041
6042 /*
6043 * We've injected any pending events. This is really the point of no return (to ring-3).
6044 *
6045 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6046 * returns from this function, so do -not- enable them here.
6047 */
6048 pVCpu->hm.s.Event.fPending = false;
6049 return VINF_SUCCESS;
6050 }
6051
6052 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6053 rcStrict = VINF_EM_RAW_INTERRUPT;
6054 }
6055 else
6056 {
6057 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6058 rcStrict = VINF_EM_RAW_TO_R3;
6059 }
6060
6061 ASMSetFlags(pVmxTransient->fEFlags);
6062 VMMRZCallRing3Enable(pVCpu);
6063
6064 return rcStrict;
6065}
6066
6067
6068/**
6069 * Final preparations before executing guest code using hardware-assisted VMX.
6070 *
6071 * We can no longer get preempted to a different host CPU and there are no returns
6072 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6073 * failures), this function is not intended to fail sans unrecoverable hardware
6074 * errors.
6075 *
6076 * @param pVCpu The cross context virtual CPU structure.
6077 * @param pVmxTransient The VMX-transient structure.
6078 *
6079 * @remarks Called with preemption disabled.
6080 * @remarks No-long-jump zone!!!
6081 */
6082static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6083{
6084 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6085 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6086 Assert(!pVCpu->hm.s.Event.fPending);
6087
6088 /*
6089 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6090 */
6091 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6092 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6093
6094 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6095 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6096 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6097 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6098
6099 if (!CPUMIsGuestFPUStateActive(pVCpu))
6100 {
6101 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6102 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6103 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6104 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6105 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6106 }
6107
6108 /*
6109 * Re-export the host state bits as we may've been preempted (only happens when
6110 * thread-context hooks are used or when the VM start function changes) or if
6111 * the host CR0 is modified while loading the guest FPU state above.
6112 *
6113 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6114 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6115 * see @bugref{8432}.
6116 *
6117 * This may also happen when switching to/from a nested-guest VMCS without leaving
6118 * ring-0.
6119 */
6120 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6121 {
6122 hmR0VmxExportHostState(pVCpu);
6123 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6124 }
6125 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6126
6127 /*
6128 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6129 */
6130 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6131 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6132 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6133
6134 /*
6135 * Store status of the shared guest/host debug state at the time of VM-entry.
6136 */
6137 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6138 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6139
6140 /*
6141 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6142 * more than one conditional check. The post-run side of our code shall determine
6143 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6144 */
6145 if (pVmcsInfo->pbVirtApic)
6146 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6147
6148 /*
6149 * Update the host MSRs values in the VM-exit MSR-load area.
6150 */
6151 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6152 {
6153 if (pVmcsInfo->cExitMsrLoad > 0)
6154 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6155 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6156 }
6157
6158 /*
6159 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6160 * VMX-preemption timer based on the next virtual sync clock deadline.
6161 */
6162 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6163 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6164 {
6165 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6166 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6167 }
6168
6169 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6170 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6171 if (!fIsRdtscIntercepted)
6172 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6173 else
6174 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6175
6176 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6177 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6178 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6179 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6180 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6181 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6182
6183 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6184
6185 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6186 as we're about to start executing the guest. */
6187
6188 /*
6189 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6190 *
6191 * This is done this late as updating the TSC offsetting/preemption timer above
6192 * figures out if we can skip intercepting RDTSCP by calculating the number of
6193 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6194 */
6195 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6196 && !fIsRdtscIntercepted)
6197 {
6198 vmxHCImportGuestStateEx(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6199
6200 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6201 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6202 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6203 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6204 AssertRC(rc);
6205 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6206 pVmxTransient->fRemoveTscAuxMsr = true;
6207 }
6208
6209#ifdef VBOX_STRICT
6210 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6211 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6212 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6213 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6214#endif
6215
6216#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6217 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6218 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6219 * see @bugref{9180#c54}. */
6220 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6221 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6222 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6223#endif
6224}
6225
6226
6227/**
6228 * First C routine invoked after running guest code using hardware-assisted VMX.
6229 *
6230 * @param pVCpu The cross context virtual CPU structure.
6231 * @param pVmxTransient The VMX-transient structure.
6232 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6233 *
6234 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6235 *
6236 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6237 * unconditionally when it is safe to do so.
6238 */
6239static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6240{
6241 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6242 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6243 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6244 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6245 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6246 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6247
6248 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6249 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6250 {
6251 uint64_t uGstTsc;
6252 if (!pVmxTransient->fIsNestedGuest)
6253 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6254 else
6255 {
6256 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6257 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6258 }
6259 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6260 }
6261
6262 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6263 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6264 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6265
6266 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6267 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6268#ifdef VBOX_STRICT
6269 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6270#endif
6271 Assert(!ASMIntAreEnabled());
6272 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6273 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6274
6275#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6276 /*
6277 * Clean all the VMCS fields in the transient structure before reading
6278 * anything from the VMCS.
6279 */
6280 pVmxTransient->uExitReason = 0;
6281 pVmxTransient->uExitIntErrorCode = 0;
6282 pVmxTransient->uExitQual = 0;
6283 pVmxTransient->uGuestLinearAddr = 0;
6284 pVmxTransient->uExitIntInfo = 0;
6285 pVmxTransient->cbExitInstr = 0;
6286 pVmxTransient->ExitInstrInfo.u = 0;
6287 pVmxTransient->uEntryIntInfo = 0;
6288 pVmxTransient->uEntryXcptErrorCode = 0;
6289 pVmxTransient->cbEntryInstr = 0;
6290 pVmxTransient->uIdtVectoringInfo = 0;
6291 pVmxTransient->uIdtVectoringErrorCode = 0;
6292#endif
6293
6294 /*
6295 * Save the basic VM-exit reason and check if the VM-entry failed.
6296 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6297 */
6298 uint32_t uExitReason;
6299 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6300 AssertRC(rc);
6301 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6302 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6303
6304 /*
6305 * Log the VM-exit before logging anything else as otherwise it might be a
6306 * tad confusing what happens before and after the world-switch.
6307 */
6308 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6309
6310 /*
6311 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6312 * bitmap permissions, if it was added before VM-entry.
6313 */
6314 if (pVmxTransient->fRemoveTscAuxMsr)
6315 {
6316 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6317 pVmxTransient->fRemoveTscAuxMsr = false;
6318 }
6319
6320 /*
6321 * Check if VMLAUNCH/VMRESUME succeeded.
6322 * If this failed, we cause a guru meditation and cease further execution.
6323 */
6324 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6325 {
6326 /*
6327 * Update the VM-exit history array here even if the VM-entry failed due to:
6328 * - Invalid guest state.
6329 * - MSR loading.
6330 * - Machine-check event.
6331 *
6332 * In any of the above cases we will still have a "valid" VM-exit reason
6333 * despite @a fVMEntryFailed being false.
6334 *
6335 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6336 *
6337 * Note! We don't have CS or RIP at this point. Will probably address that later
6338 * by amending the history entry added here.
6339 */
6340 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6341 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6342
6343 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6344 {
6345 VMMRZCallRing3Enable(pVCpu);
6346 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6347
6348#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6349 vmxHCReadAllRoFieldsVmcs(pVCpu, pVmxTransient);
6350#endif
6351
6352 /*
6353 * Always import the guest-interruptibility state as we need it while evaluating
6354 * injecting events on re-entry. We could in *theory* postpone reading it for
6355 * exits that does not involve instruction emulation, but since most exits are
6356 * for instruction emulation (exceptions being external interrupts, shadow
6357 * paging building page faults and EPT violations, and interrupt window stuff)
6358 * this is a reasonable simplification.
6359 *
6360 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6361 * checking for real-mode while exporting the state because all bits that cause
6362 * mode changes wrt CR0 are intercepted.
6363 *
6364 * Note! This mask _must_ match the default value for the default a_fDonePostExit
6365 * value for the vmxHCImportGuestState template!
6366 */
6367 /** @todo r=bird: consider dropping the INHIBIT_XXX and fetch the state
6368 * explicitly in the exit handlers and injection function. That way we have
6369 * fewer clusters of vmread spread around the code, because the EM history
6370 * executor won't execute very many non-exiting instructions before stopping. */
6371 rc = vmxHCImportGuestState< CPUMCTX_EXTRN_INHIBIT_INT
6372 | CPUMCTX_EXTRN_INHIBIT_NMI
6373#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6374 | HMVMX_CPUMCTX_EXTRN_ALL
6375#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6376 | CPUMCTX_EXTRN_RFLAGS
6377#endif
6378 , 0 /*a_fDoneLocal*/, 0 /*a_fDonePostExit*/>(pVCpu, pVmcsInfo, __FUNCTION__);
6379 AssertRC(rc);
6380
6381 /*
6382 * Sync the TPR shadow with our APIC state.
6383 */
6384 if ( !pVmxTransient->fIsNestedGuest
6385 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6386 {
6387 Assert(pVmcsInfo->pbVirtApic);
6388 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6389 {
6390 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6391 AssertRC(rc);
6392 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6393 }
6394 }
6395
6396 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6397 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6398 || pVmxTransient->fWasHyperDebugStateActive == false);
6399 return;
6400 }
6401 }
6402#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6403 else if (pVmxTransient->fIsNestedGuest)
6404 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6405#endif
6406 else
6407 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6408
6409 VMMRZCallRing3Enable(pVCpu);
6410}
6411
6412
6413/**
6414 * Runs the guest code using hardware-assisted VMX the normal way.
6415 *
6416 * @returns VBox status code.
6417 * @param pVCpu The cross context virtual CPU structure.
6418 * @param pcLoops Pointer to the number of executed loops.
6419 */
6420static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6421{
6422 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6423 Assert(pcLoops);
6424 Assert(*pcLoops <= cMaxResumeLoops);
6425 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6426
6427#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6428 /*
6429 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6430 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6431 * guest VMCS while entering the VMX ring-0 session.
6432 */
6433 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6434 {
6435 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6436 if (RT_SUCCESS(rc))
6437 { /* likely */ }
6438 else
6439 {
6440 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6441 return rc;
6442 }
6443 }
6444#endif
6445
6446 VMXTRANSIENT VmxTransient;
6447 RT_ZERO(VmxTransient);
6448 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6449
6450 /* Paranoia. */
6451 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6452
6453 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6454 for (;;)
6455 {
6456 Assert(!HMR0SuspendPending());
6457 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6458 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6459
6460 /*
6461 * Preparatory work for running nested-guest code, this may force us to
6462 * return to ring-3.
6463 *
6464 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6465 */
6466 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6467 if (rcStrict != VINF_SUCCESS)
6468 break;
6469
6470 /* Interrupts are disabled at this point! */
6471 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6472 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6473 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6474 /* Interrupts are re-enabled at this point! */
6475
6476 /*
6477 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6478 */
6479 if (RT_SUCCESS(rcRun))
6480 { /* very likely */ }
6481 else
6482 {
6483 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6484 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6485 return rcRun;
6486 }
6487
6488 /*
6489 * Profile the VM-exit.
6490 */
6491 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6492 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6493 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6494 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6495 HMVMX_START_EXIT_DISPATCH_PROF();
6496
6497 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6498
6499 /*
6500 * Handle the VM-exit.
6501 */
6502#ifdef HMVMX_USE_FUNCTION_TABLE
6503 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6504#else
6505 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6506#endif
6507 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6508 if (rcStrict == VINF_SUCCESS)
6509 {
6510 if (++(*pcLoops) <= cMaxResumeLoops)
6511 continue;
6512 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6513 rcStrict = VINF_EM_RAW_INTERRUPT;
6514 }
6515 break;
6516 }
6517
6518 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6519 return rcStrict;
6520}
6521
6522
6523#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6524/**
6525 * Runs the nested-guest code using hardware-assisted VMX.
6526 *
6527 * @returns VBox status code.
6528 * @param pVCpu The cross context virtual CPU structure.
6529 * @param pcLoops Pointer to the number of executed loops.
6530 *
6531 * @sa hmR0VmxRunGuestCodeNormal.
6532 */
6533static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6534{
6535 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6536 Assert(pcLoops);
6537 Assert(*pcLoops <= cMaxResumeLoops);
6538 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6539
6540 /*
6541 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6542 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6543 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6544 */
6545 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6546 {
6547 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6548 if (RT_SUCCESS(rc))
6549 { /* likely */ }
6550 else
6551 {
6552 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6553 return rc;
6554 }
6555 }
6556
6557 VMXTRANSIENT VmxTransient;
6558 RT_ZERO(VmxTransient);
6559 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6560 VmxTransient.fIsNestedGuest = true;
6561
6562 /* Paranoia. */
6563 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6564
6565 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info on demand in ring-0. */
6566 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6567
6568 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6569 for (;;)
6570 {
6571 Assert(!HMR0SuspendPending());
6572 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6573 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6574
6575 /*
6576 * Preparatory work for running guest code, this may force us to
6577 * return to ring-3.
6578 *
6579 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6580 */
6581 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6582 if (rcStrict != VINF_SUCCESS)
6583 break;
6584
6585 /* Interrupts are disabled at this point! */
6586 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6587 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6588 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6589 /* Interrupts are re-enabled at this point! */
6590
6591 /*
6592 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6593 */
6594 if (RT_SUCCESS(rcRun))
6595 { /* very likely */ }
6596 else
6597 {
6598 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6599 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6600 rcStrict = rcRun;
6601 break;
6602 }
6603
6604 /*
6605 * Profile the VM-exit.
6606 */
6607 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6608 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6609 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6610 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6611 HMVMX_START_EXIT_DISPATCH_PROF();
6612
6613 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6614
6615 /*
6616 * Handle the VM-exit.
6617 */
6618 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6619 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6620 if (rcStrict == VINF_SUCCESS)
6621 {
6622 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6623 {
6624 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6625 rcStrict = VINF_VMX_VMEXIT;
6626 }
6627 else
6628 {
6629 if (++(*pcLoops) <= cMaxResumeLoops)
6630 continue;
6631 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6632 rcStrict = VINF_EM_RAW_INTERRUPT;
6633 }
6634 }
6635 else
6636 Assert(rcStrict != VINF_VMX_VMEXIT);
6637 break;
6638 }
6639
6640 /* Ensure VM-exit auxiliary info. is no longer available. */
6641 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6642
6643 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6644 return rcStrict;
6645}
6646#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6647
6648
6649/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6650 * probes.
6651 *
6652 * The following few functions and associated structure contains the bloat
6653 * necessary for providing detailed debug events and dtrace probes as well as
6654 * reliable host side single stepping. This works on the principle of
6655 * "subclassing" the normal execution loop and workers. We replace the loop
6656 * method completely and override selected helpers to add necessary adjustments
6657 * to their core operation.
6658 *
6659 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6660 * any performance for debug and analysis features.
6661 *
6662 * @{
6663 */
6664
6665/**
6666 * Single steps guest code using hardware-assisted VMX.
6667 *
6668 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6669 * but single-stepping through the hypervisor debugger.
6670 *
6671 * @returns Strict VBox status code (i.e. informational status codes too).
6672 * @param pVCpu The cross context virtual CPU structure.
6673 * @param pcLoops Pointer to the number of executed loops.
6674 *
6675 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6676 */
6677static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6678{
6679 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6680 Assert(pcLoops);
6681 Assert(*pcLoops <= cMaxResumeLoops);
6682
6683 VMXTRANSIENT VmxTransient;
6684 RT_ZERO(VmxTransient);
6685 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6686
6687 /* Set HMCPU indicators. */
6688 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
6689 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
6690 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6691 pVCpu->hmr0.s.fUsingDebugLoop = true;
6692
6693 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
6694 VMXRUNDBGSTATE DbgState;
6695 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
6696 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6697
6698 /*
6699 * The loop.
6700 */
6701 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6702 for (;;)
6703 {
6704 Assert(!HMR0SuspendPending());
6705 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6706 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6707 bool fStepping = pVCpu->hm.s.fSingleInstruction;
6708
6709 /* Set up VM-execution controls the next two can respond to. */
6710 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6711
6712 /*
6713 * Preparatory work for running guest code, this may force us to
6714 * return to ring-3.
6715 *
6716 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6717 */
6718 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
6719 if (rcStrict != VINF_SUCCESS)
6720 break;
6721
6722 /* Interrupts are disabled at this point! */
6723 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6724
6725 /* Override any obnoxious code in the above two calls. */
6726 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
6727
6728 /*
6729 * Finally execute the guest.
6730 */
6731 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6732
6733 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6734 /* Interrupts are re-enabled at this point! */
6735
6736 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
6737 if (RT_SUCCESS(rcRun))
6738 { /* very likely */ }
6739 else
6740 {
6741 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6742 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6743 return rcRun;
6744 }
6745
6746 /* Profile the VM-exit. */
6747 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6748 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
6749 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6750 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6751 HMVMX_START_EXIT_DISPATCH_PROF();
6752
6753 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6754
6755 /*
6756 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
6757 */
6758 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
6759 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6760 if (rcStrict != VINF_SUCCESS)
6761 break;
6762 if (++(*pcLoops) > cMaxResumeLoops)
6763 {
6764 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6765 rcStrict = VINF_EM_RAW_INTERRUPT;
6766 break;
6767 }
6768
6769 /*
6770 * Stepping: Did the RIP change, if so, consider it a single step.
6771 * Otherwise, make sure one of the TFs gets set.
6772 */
6773 if (fStepping)
6774 {
6775 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
6776 AssertRC(rc);
6777 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
6778 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
6779 {
6780 rcStrict = VINF_EM_DBG_STEPPED;
6781 break;
6782 }
6783 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
6784 }
6785
6786 /*
6787 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
6788 */
6789 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
6790 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
6791
6792 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
6793 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
6794 Assert(rcStrict == VINF_SUCCESS);
6795 }
6796
6797 /*
6798 * Clear the X86_EFL_TF if necessary.
6799 */
6800 if (pVCpu->hmr0.s.fClearTrapFlag)
6801 {
6802 int rc = vmxHCImportGuestStateEx(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
6803 AssertRC(rc);
6804 pVCpu->hmr0.s.fClearTrapFlag = false;
6805 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
6806 }
6807 /** @todo there seems to be issues with the resume flag when the monitor trap
6808 * flag is pending without being used. Seen early in bios init when
6809 * accessing APIC page in protected mode. */
6810
6811/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
6812 * out of the above loop. */
6813
6814 /* Restore HMCPU indicators. */
6815 pVCpu->hmr0.s.fUsingDebugLoop = false;
6816 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
6817 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
6818
6819 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6820 return rcStrict;
6821}
6822
6823/** @} */
6824
6825
6826/**
6827 * Checks if any expensive dtrace probes are enabled and we should go to the
6828 * debug loop.
6829 *
6830 * @returns true if we should use debug loop, false if not.
6831 */
6832static bool hmR0VmxAnyExpensiveProbesEnabled(void)
6833{
6834 /* It's probably faster to OR the raw 32-bit counter variables together.
6835 Since the variables are in an array and the probes are next to one
6836 another (more or less), we have good locality. So, better read
6837 eight-nine cache lines ever time and only have one conditional, than
6838 128+ conditionals, right? */
6839 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
6840 | VBOXVMM_XCPT_DE_ENABLED_RAW()
6841 | VBOXVMM_XCPT_DB_ENABLED_RAW()
6842 | VBOXVMM_XCPT_BP_ENABLED_RAW()
6843 | VBOXVMM_XCPT_OF_ENABLED_RAW()
6844 | VBOXVMM_XCPT_BR_ENABLED_RAW()
6845 | VBOXVMM_XCPT_UD_ENABLED_RAW()
6846 | VBOXVMM_XCPT_NM_ENABLED_RAW()
6847 | VBOXVMM_XCPT_DF_ENABLED_RAW()
6848 | VBOXVMM_XCPT_TS_ENABLED_RAW()
6849 | VBOXVMM_XCPT_NP_ENABLED_RAW()
6850 | VBOXVMM_XCPT_SS_ENABLED_RAW()
6851 | VBOXVMM_XCPT_GP_ENABLED_RAW()
6852 | VBOXVMM_XCPT_PF_ENABLED_RAW()
6853 | VBOXVMM_XCPT_MF_ENABLED_RAW()
6854 | VBOXVMM_XCPT_AC_ENABLED_RAW()
6855 | VBOXVMM_XCPT_XF_ENABLED_RAW()
6856 | VBOXVMM_XCPT_VE_ENABLED_RAW()
6857 | VBOXVMM_XCPT_SX_ENABLED_RAW()
6858 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
6859 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
6860 ) != 0
6861 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
6862 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
6863 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
6864 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
6865 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
6866 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
6867 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
6868 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
6869 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
6870 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
6871 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
6872 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
6873 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
6874 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
6875 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
6876 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
6877 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
6878 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
6879 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
6880 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
6881 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
6882 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
6883 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
6884 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
6885 | VBOXVMM_INSTR_STR_ENABLED_RAW()
6886 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
6887 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
6888 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
6889 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
6890 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
6891 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
6892 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
6893 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
6894 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
6895 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
6896 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
6897 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
6898 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
6899 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
6900 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
6901 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
6902 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
6903 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
6904 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
6905 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
6906 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
6907 ) != 0
6908 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
6909 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
6910 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
6911 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
6912 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
6913 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
6914 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
6915 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
6916 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
6917 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
6918 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
6919 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
6920 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
6921 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
6922 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
6923 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
6924 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
6925 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
6926 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
6927 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
6928 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
6929 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
6930 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
6931 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
6932 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
6933 | VBOXVMM_EXIT_STR_ENABLED_RAW()
6934 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
6935 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
6936 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
6937 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
6938 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
6939 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
6940 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
6941 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
6942 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
6943 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
6944 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
6945 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
6946 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
6947 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
6948 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
6949 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
6950 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
6951 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
6952 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
6953 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
6954 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
6955 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
6956 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
6957 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
6958 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
6959 ) != 0;
6960}
6961
6962
6963/**
6964 * Runs the guest using hardware-assisted VMX.
6965 *
6966 * @returns Strict VBox status code (i.e. informational status codes too).
6967 * @param pVCpu The cross context virtual CPU structure.
6968 */
6969VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
6970{
6971 AssertPtr(pVCpu);
6972 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6973 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6974 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
6975 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
6976
6977 VBOXSTRICTRC rcStrict;
6978 uint32_t cLoops = 0;
6979 for (;;)
6980 {
6981#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6982 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
6983#else
6984 NOREF(pCtx);
6985 bool const fInNestedGuestMode = false;
6986#endif
6987 if (!fInNestedGuestMode)
6988 {
6989 if ( !pVCpu->hm.s.fUseDebugLoop
6990 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
6991 && !DBGFIsStepping(pVCpu)
6992 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
6993 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
6994 else
6995 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
6996 }
6997#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6998 else
6999 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7000
7001 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7002 {
7003 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7004 continue;
7005 }
7006 if (rcStrict == VINF_VMX_VMEXIT)
7007 {
7008 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7009 continue;
7010 }
7011#endif
7012 break;
7013 }
7014
7015 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7016 switch (rcLoop)
7017 {
7018 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7019 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7020 }
7021
7022 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7023 if (RT_FAILURE(rc2))
7024 {
7025 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7026 rcStrict = rc2;
7027 }
7028 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7029 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7030 return rcStrict;
7031}
7032
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette