VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 94882

Last change on this file since 94882 was 94882, checked in by vboxsync, 3 years ago

VMM: First stab at Guest Compatibility Manager, fixing up things like division overflows caused by fast CPUs (see bugref:9735).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 295.6 KB
Line 
1/* $Id: HMVMXR0.cpp 94882 2022-05-06 06:33:54Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#define VMCPU_INCL_CPUM_GST_CTX
24#include <iprt/x86.h>
25#include <iprt/asm-amd64-x86.h>
26#include <iprt/thread.h>
27#include <iprt/mem.h>
28#include <iprt/mp.h>
29
30#include <VBox/vmm/pdmapi.h>
31#include <VBox/vmm/dbgf.h>
32#include <VBox/vmm/iem.h>
33#include <VBox/vmm/iom.h>
34#include <VBox/vmm/tm.h>
35#include <VBox/vmm/em.h>
36#include <VBox/vmm/gcm.h>
37#include <VBox/vmm/gim.h>
38#include <VBox/vmm/apic.h>
39#include "HMInternal.h"
40#include <VBox/vmm/vmcc.h>
41#include <VBox/vmm/hmvmxinline.h>
42#include "HMVMXR0.h"
43#include "VMXInternal.h"
44#include "dtrace/VBoxVMM.h"
45
46
47/*********************************************************************************************************************************
48* Defined Constants And Macros *
49*********************************************************************************************************************************/
50#ifdef DEBUG_ramshankar
51# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
52# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
53# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
54# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
55# define HMVMX_ALWAYS_CLEAN_TRANSIENT
56# define HMVMX_ALWAYS_CHECK_GUEST_STATE
57# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
58# define HMVMX_ALWAYS_TRAP_PF
59# define HMVMX_ALWAYS_FLUSH_TLB
60# define HMVMX_ALWAYS_SWAP_EFER
61#endif
62
63
64/*********************************************************************************************************************************
65* Structures and Typedefs *
66*********************************************************************************************************************************/
67/**
68 * VMX page allocation information.
69 */
70typedef struct
71{
72 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
73 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
74 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
75 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
76} VMXPAGEALLOCINFO;
77/** Pointer to VMX page-allocation info. */
78typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
79/** Pointer to a const VMX page-allocation info. */
80typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
81AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
82
83
84/*********************************************************************************************************************************
85* Internal Functions *
86*********************************************************************************************************************************/
87static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
88static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
89
90
91/**
92 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
93 * @returns @c true if it's part of LBR stack, @c false otherwise.
94 *
95 * @param pVM The cross context VM structure.
96 * @param idMsr The MSR.
97 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
98 * Optional, can be NULL.
99 *
100 * @remarks Must only be called when LBR is enabled.
101 */
102DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
103{
104 Assert(pVM->hmr0.s.vmx.fLbr);
105 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
106 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
107 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
108 if (idxMsr < cLbrStack)
109 {
110 if (pidxMsr)
111 *pidxMsr = idxMsr;
112 return true;
113 }
114 return false;
115}
116
117
118/**
119 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
120 * @returns @c true if it's part of LBR stack, @c false otherwise.
121 *
122 * @param pVM The cross context VM structure.
123 * @param idMsr The MSR.
124 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
125 * Optional, can be NULL.
126 *
127 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
128 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
129 */
130DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
131{
132 Assert(pVM->hmr0.s.vmx.fLbr);
133 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
134 {
135 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
136 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
137 if (idxMsr < cLbrStack)
138 {
139 if (pidxMsr)
140 *pidxMsr = idxMsr;
141 return true;
142 }
143 }
144 return false;
145}
146
147
148/**
149 * Gets the active (in use) VMCS info. object for the specified VCPU.
150 *
151 * This is either the guest or nested-guest VMCS info. and need not necessarily
152 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
153 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
154 * current VMCS while returning to ring-3. However, the VMCS info. object for that
155 * VMCS would still be active and returned here so that we could dump the VMCS
156 * fields to ring-3 for diagnostics. This function is thus only used to
157 * distinguish between the nested-guest or guest VMCS.
158 *
159 * @returns The active VMCS information.
160 * @param pVCpu The cross context virtual CPU structure.
161 *
162 * @thread EMT.
163 * @remarks This function may be called with preemption or interrupts disabled!
164 */
165DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
166{
167 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
168 return &pVCpu->hmr0.s.vmx.VmcsInfo;
169 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
170}
171
172
173/**
174 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
175 * area.
176 *
177 * @returns @c true if it's different, @c false otherwise.
178 * @param pVmcsInfo The VMCS info. object.
179 */
180DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
181{
182 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
183 && pVmcsInfo->pvGuestMsrStore);
184}
185
186
187/**
188 * Sets the given Processor-based VM-execution controls.
189 *
190 * @param pVmxTransient The VMX-transient structure.
191 * @param uProcCtls The Processor-based VM-execution controls to set.
192 */
193static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
194{
195 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
196 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
197 {
198 pVmcsInfo->u32ProcCtls |= uProcCtls;
199 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
200 AssertRC(rc);
201 }
202}
203
204
205/**
206 * Removes the given Processor-based VM-execution controls.
207 *
208 * @param pVCpu The cross context virtual CPU structure.
209 * @param pVmxTransient The VMX-transient structure.
210 * @param uProcCtls The Processor-based VM-execution controls to remove.
211 *
212 * @remarks When executing a nested-guest, this will not remove any of the specified
213 * controls if the nested hypervisor has set any one of them.
214 */
215static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
216{
217 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
218 if (pVmcsInfo->u32ProcCtls & uProcCtls)
219 {
220#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
221 if ( !pVmxTransient->fIsNestedGuest
222 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
223#else
224 NOREF(pVCpu);
225 if (!pVmxTransient->fIsNestedGuest)
226#endif
227 {
228 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
229 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
230 AssertRC(rc);
231 }
232 }
233}
234
235
236/**
237 * Sets the TSC offset for the current VMCS.
238 *
239 * @param uTscOffset The TSC offset to set.
240 * @param pVmcsInfo The VMCS info. object.
241 */
242static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
243{
244 if (pVmcsInfo->u64TscOffset != uTscOffset)
245 {
246 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
247 AssertRC(rc);
248 pVmcsInfo->u64TscOffset = uTscOffset;
249 }
250}
251
252
253/**
254 * Loads the VMCS specified by the VMCS info. object.
255 *
256 * @returns VBox status code.
257 * @param pVmcsInfo The VMCS info. object.
258 *
259 * @remarks Can be called with interrupts disabled.
260 */
261static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
262{
263 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
264 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
265
266 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
267 if (RT_SUCCESS(rc))
268 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
269 return rc;
270}
271
272
273/**
274 * Clears the VMCS specified by the VMCS info. object.
275 *
276 * @returns VBox status code.
277 * @param pVmcsInfo The VMCS info. object.
278 *
279 * @remarks Can be called with interrupts disabled.
280 */
281static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
282{
283 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
284 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
285
286 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
287 if (RT_SUCCESS(rc))
288 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
289 return rc;
290}
291
292
293/**
294 * Checks whether the MSR belongs to the set of guest MSRs that we restore
295 * lazily while leaving VT-x.
296 *
297 * @returns true if it does, false otherwise.
298 * @param pVCpu The cross context virtual CPU structure.
299 * @param idMsr The MSR to check.
300 */
301static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
302{
303 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
304 {
305 switch (idMsr)
306 {
307 case MSR_K8_LSTAR:
308 case MSR_K6_STAR:
309 case MSR_K8_SF_MASK:
310 case MSR_K8_KERNEL_GS_BASE:
311 return true;
312 }
313 }
314 return false;
315}
316
317
318/**
319 * Loads a set of guests MSRs to allow read/passthru to the guest.
320 *
321 * The name of this function is slightly confusing. This function does NOT
322 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
323 * common prefix for functions dealing with "lazy restoration" of the shared
324 * MSRs.
325 *
326 * @param pVCpu The cross context virtual CPU structure.
327 *
328 * @remarks No-long-jump zone!!!
329 */
330static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
331{
332 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
333 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
334
335 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
336 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
337 {
338 /*
339 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
340 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
341 * we can skip a few MSR writes.
342 *
343 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
344 * guest MSR values in the guest-CPU context might be different to what's currently
345 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
346 * CPU, see @bugref{8728}.
347 */
348 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
349 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
350 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
351 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
352 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
353 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
354 {
355#ifdef VBOX_STRICT
356 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
357 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
358 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
359 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
360#endif
361 }
362 else
363 {
364 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
365 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
366 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
367 /* The system call flag mask register isn't as benign and accepting of all
368 values as the above, so mask it to avoid #GP'ing on corrupted input. */
369 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
370 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
371 }
372 }
373 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
374}
375
376
377/**
378 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
379 *
380 * @returns @c true if found, @c false otherwise.
381 * @param pVmcsInfo The VMCS info. object.
382 * @param idMsr The MSR to find.
383 */
384static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
385{
386 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
387 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
388 Assert(pMsrs);
389 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
390 for (uint32_t i = 0; i < cMsrs; i++)
391 {
392 if (pMsrs[i].u32Msr == idMsr)
393 return true;
394 }
395 return false;
396}
397
398
399/**
400 * Performs lazy restoration of the set of host MSRs if they were previously
401 * loaded with guest MSR values.
402 *
403 * @param pVCpu The cross context virtual CPU structure.
404 *
405 * @remarks No-long-jump zone!!!
406 * @remarks The guest MSRs should have been saved back into the guest-CPU
407 * context by hmR0VmxImportGuestState()!!!
408 */
409static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
410{
411 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
412 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
413
414 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
415 {
416 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
417 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
418 {
419 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
420 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
421 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
422 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
423 }
424 }
425 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
426}
427
428
429/**
430 * Sets pfnStartVm to the best suited variant.
431 *
432 * This must be called whenever anything changes relative to the hmR0VmXStartVm
433 * variant selection:
434 * - pVCpu->hm.s.fLoadSaveGuestXcr0
435 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
436 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
437 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
438 * - Perhaps: CPUMCTX.fXStateMask (windows only)
439 *
440 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
441 * cannot be changed at runtime.
442 */
443static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
444{
445 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
446 {
447 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
448 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
449 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
450 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
451 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
452 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
453 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
454 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
455 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
456 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
457 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
458 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
459 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
460 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
461 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
462 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
463 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
464 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
465 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
466 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
467 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
468 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
469 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
470 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
471 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
472 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
473 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
474 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
475 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
476 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
477 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
478 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
479 };
480 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
481 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
482 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
483 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
484 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
485 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
486 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
487 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
488}
489
490
491/**
492 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
493 * stack.
494 *
495 * @returns Strict VBox status code (i.e. informational status codes too).
496 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
497 * @param pVCpu The cross context virtual CPU structure.
498 * @param uValue The value to push to the guest stack.
499 */
500static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
501{
502 /*
503 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
504 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
505 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
506 */
507 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
508 if (pCtx->sp == 1)
509 return VINF_EM_RESET;
510 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
511 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
512 AssertRC(rc);
513 return rc;
514}
515
516
517/**
518 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
519 * unreferenced local parameters in the template code...
520 */
521DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
522{
523 RT_NOREF(pVCpu);
524 return VMXWriteVmcs16(uFieldEnc, u16Val);
525}
526
527
528/**
529 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
530 * unreferenced local parameters in the template code...
531 */
532DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
533{
534 RT_NOREF(pVCpu);
535 return VMXWriteVmcs32(uFieldEnc, u32Val);
536}
537
538
539/**
540 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
541 * unreferenced local parameters in the template code...
542 */
543DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
544{
545 RT_NOREF(pVCpu);
546 return VMXWriteVmcs64(uFieldEnc, u64Val);
547}
548
549
550/**
551 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
552 * unreferenced local parameters in the template code...
553 */
554DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
555{
556 RT_NOREF(pVCpu);
557 return VMXReadVmcs16(uFieldEnc, pu16Val);
558}
559
560
561/**
562 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
563 * unreferenced local parameters in the template code...
564 */
565DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
566{
567 RT_NOREF(pVCpu);
568 return VMXReadVmcs32(uFieldEnc, pu32Val);
569}
570
571
572/**
573 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
574 * unreferenced local parameters in the template code...
575 */
576DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
577{
578 RT_NOREF(pVCpu);
579 return VMXReadVmcs64(uFieldEnc, pu64Val);
580}
581
582
583/*
584 * Instantiate the code we share with the NEM darwin backend.
585 */
586#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
587#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
588
589#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
590#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
591#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
592#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
593
594#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
595#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
596#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
597#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
598
599#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
600#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
601#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
602#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
603
604#include "../VMMAll/VMXAllTemplate.cpp.h"
605
606#undef VMX_VMCS_WRITE_16
607#undef VMX_VMCS_WRITE_32
608#undef VMX_VMCS_WRITE_64
609#undef VMX_VMCS_WRITE_NW
610
611#undef VMX_VMCS_READ_16
612#undef VMX_VMCS_READ_32
613#undef VMX_VMCS_READ_64
614#undef VMX_VMCS_READ_NW
615
616#undef VM_IS_VMX_PREEMPT_TIMER_USED
617#undef VM_IS_VMX_NESTED_PAGING
618#undef VM_IS_VMX_UNRESTRICTED_GUEST
619#undef VCPU_2_VMXSTATS
620#undef VCPU_2_VMXSTATE
621
622
623/**
624 * Updates the VM's last error record.
625 *
626 * If there was a VMX instruction error, reads the error data from the VMCS and
627 * updates VCPU's last error record as well.
628 *
629 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
630 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
631 * VERR_VMX_INVALID_VMCS_FIELD.
632 * @param rc The error code.
633 */
634static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
635{
636 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
637 || rc == VERR_VMX_UNABLE_TO_START_VM)
638 {
639 AssertPtrReturnVoid(pVCpu);
640 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
641 }
642 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
643}
644
645
646/**
647 * Enters VMX root mode operation on the current CPU.
648 *
649 * @returns VBox status code.
650 * @param pHostCpu The HM physical-CPU structure.
651 * @param pVM The cross context VM structure. Can be
652 * NULL, after a resume.
653 * @param HCPhysCpuPage Physical address of the VMXON region.
654 * @param pvCpuPage Pointer to the VMXON region.
655 */
656static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
657{
658 Assert(pHostCpu);
659 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
660 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
661 Assert(pvCpuPage);
662 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
663
664 if (pVM)
665 {
666 /* Write the VMCS revision identifier to the VMXON region. */
667 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
668 }
669
670 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
671 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
672
673 /* Enable the VMX bit in CR4 if necessary. */
674 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
675
676 /* Record whether VMXE was already prior to us enabling it above. */
677 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
678
679 /* Enter VMX root mode. */
680 int rc = VMXEnable(HCPhysCpuPage);
681 if (RT_FAILURE(rc))
682 {
683 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
684 if (!pHostCpu->fVmxeAlreadyEnabled)
685 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
686
687 if (pVM)
688 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
689 }
690
691 /* Restore interrupts. */
692 ASMSetFlags(fEFlags);
693 return rc;
694}
695
696
697/**
698 * Exits VMX root mode operation on the current CPU.
699 *
700 * @returns VBox status code.
701 * @param pHostCpu The HM physical-CPU structure.
702 */
703static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
704{
705 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
706
707 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
708 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
709
710 /* If we're for some reason not in VMX root mode, then don't leave it. */
711 RTCCUINTREG const uHostCr4 = ASMGetCR4();
712
713 int rc;
714 if (uHostCr4 & X86_CR4_VMXE)
715 {
716 /* Exit VMX root mode and clear the VMX bit in CR4. */
717 VMXDisable();
718
719 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
720 if (!pHostCpu->fVmxeAlreadyEnabled)
721 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
722
723 rc = VINF_SUCCESS;
724 }
725 else
726 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
727
728 /* Restore interrupts. */
729 ASMSetFlags(fEFlags);
730 return rc;
731}
732
733
734/**
735 * Allocates pages specified as specified by an array of VMX page allocation info
736 * objects.
737 *
738 * The pages contents are zero'd after allocation.
739 *
740 * @returns VBox status code.
741 * @param phMemObj Where to return the handle to the allocation.
742 * @param paAllocInfo The pointer to the first element of the VMX
743 * page-allocation info object array.
744 * @param cEntries The number of elements in the @a paAllocInfo array.
745 */
746static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
747{
748 *phMemObj = NIL_RTR0MEMOBJ;
749
750 /* Figure out how many pages to allocate. */
751 uint32_t cPages = 0;
752 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
753 cPages += !!paAllocInfo[iPage].fValid;
754
755 /* Allocate the pages. */
756 if (cPages)
757 {
758 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
759 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
760 if (RT_FAILURE(rc))
761 return rc;
762
763 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
764 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
765 RT_BZERO(pvFirstPage, cbPages);
766
767 uint32_t iPage = 0;
768 for (uint32_t i = 0; i < cEntries; i++)
769 if (paAllocInfo[i].fValid)
770 {
771 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
772 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
773 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
774 AssertPtr(pvPage);
775
776 Assert(paAllocInfo[iPage].pHCPhys);
777 Assert(paAllocInfo[iPage].ppVirt);
778 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
779 *paAllocInfo[iPage].ppVirt = pvPage;
780
781 /* Move to next page. */
782 ++iPage;
783 }
784
785 /* Make sure all valid (requested) pages have been assigned. */
786 Assert(iPage == cPages);
787 }
788 return VINF_SUCCESS;
789}
790
791
792/**
793 * Frees pages allocated using hmR0VmxPagesAllocZ.
794 *
795 * @param phMemObj Pointer to the memory object handle. Will be set to
796 * NIL.
797 */
798DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
799{
800 /* We can cleanup wholesale since it's all one allocation. */
801 if (*phMemObj != NIL_RTR0MEMOBJ)
802 {
803 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
804 *phMemObj = NIL_RTR0MEMOBJ;
805 }
806}
807
808
809/**
810 * Initializes a VMCS info. object.
811 *
812 * @param pVmcsInfo The VMCS info. object.
813 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
814 */
815static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
816{
817 RT_ZERO(*pVmcsInfo);
818 RT_ZERO(*pVmcsInfoShared);
819
820 pVmcsInfo->pShared = pVmcsInfoShared;
821 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
822 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
823 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
824 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
825 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
826 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
827 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
828 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
829 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
830 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
831 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
832 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
833}
834
835
836/**
837 * Frees the VT-x structures for a VMCS info. object.
838 *
839 * @param pVmcsInfo The VMCS info. object.
840 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
841 */
842static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
843{
844 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
845 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
846}
847
848
849/**
850 * Allocates the VT-x structures for a VMCS info. object.
851 *
852 * @returns VBox status code.
853 * @param pVCpu The cross context virtual CPU structure.
854 * @param pVmcsInfo The VMCS info. object.
855 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
856 *
857 * @remarks The caller is expected to take care of any and all allocation failures.
858 * This function will not perform any cleanup for failures half-way
859 * through.
860 */
861static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
862{
863 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
864
865 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
866 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
867 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
868 VMXPAGEALLOCINFO aAllocInfo[] =
869 {
870 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
871 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
872 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
873 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
874 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
875 };
876
877 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
878 if (RT_FAILURE(rc))
879 return rc;
880
881 /*
882 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
883 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
884 */
885 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
886 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
887 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
888 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
889
890 /*
891 * Get the virtual-APIC page rather than allocating them again.
892 */
893 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
894 {
895 if (!fIsNstGstVmcs)
896 {
897 if (PDMHasApic(pVM))
898 {
899 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
900 if (RT_FAILURE(rc))
901 return rc;
902 Assert(pVmcsInfo->pbVirtApic);
903 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
904 }
905 }
906 else
907 {
908 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
909 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
910 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
911 }
912 }
913
914 return VINF_SUCCESS;
915}
916
917
918/**
919 * Free all VT-x structures for the VM.
920 *
921 * @returns IPRT status code.
922 * @param pVM The cross context VM structure.
923 */
924static void hmR0VmxStructsFree(PVMCC pVM)
925{
926 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
927#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
928 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
929 {
930 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
931 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
932 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
933 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
934 }
935#endif
936
937 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
938 {
939 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
940 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
941#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
942 if (pVM->cpum.ro.GuestFeatures.fVmx)
943 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
944#endif
945 }
946}
947
948
949/**
950 * Allocate all VT-x structures for the VM.
951 *
952 * @returns IPRT status code.
953 * @param pVM The cross context VM structure.
954 *
955 * @remarks This functions will cleanup on memory allocation failures.
956 */
957static int hmR0VmxStructsAlloc(PVMCC pVM)
958{
959 /*
960 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
961 * The VMCS size cannot be more than 4096 bytes.
962 *
963 * See Intel spec. Appendix A.1 "Basic VMX Information".
964 */
965 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
966 if (cbVmcs <= X86_PAGE_4K_SIZE)
967 { /* likely */ }
968 else
969 {
970 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
971 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
972 }
973
974 /*
975 * Allocate per-VM VT-x structures.
976 */
977 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
978 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
979 VMXPAGEALLOCINFO aAllocInfo[] =
980 {
981 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
982 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
983 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
984#ifdef VBOX_WITH_CRASHDUMP_MAGIC
985 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
986#endif
987 };
988
989 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
990 if (RT_SUCCESS(rc))
991 {
992#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
993 /* Allocate the shadow VMCS-fields array. */
994 if (fUseVmcsShadowing)
995 {
996 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
997 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
998 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
999 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1000 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1001 rc = VERR_NO_MEMORY;
1002 }
1003#endif
1004
1005 /*
1006 * Allocate per-VCPU VT-x structures.
1007 */
1008 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1009 {
1010 /* Allocate the guest VMCS structures. */
1011 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1012 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1013
1014#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1015 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1016 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1017 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1018#endif
1019 }
1020 if (RT_SUCCESS(rc))
1021 return VINF_SUCCESS;
1022 }
1023 hmR0VmxStructsFree(pVM);
1024 return rc;
1025}
1026
1027
1028/**
1029 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1030 *
1031 * @param pVM The cross context VM structure.
1032 */
1033static void hmR0VmxStructsInit(PVMCC pVM)
1034{
1035 /* Paranoia. */
1036 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1037#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1038 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1039#endif
1040
1041 /*
1042 * Initialize members up-front so we can cleanup en masse on allocation failures.
1043 */
1044#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1045 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1046#endif
1047 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1048 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1049 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1050 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1051 {
1052 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1053 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1054 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1055 }
1056}
1057
1058#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1059/**
1060 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1061 *
1062 * @returns @c true if the MSR is intercepted, @c false otherwise.
1063 * @param pbMsrBitmap The MSR bitmap.
1064 * @param offMsr The MSR byte offset.
1065 * @param iBit The bit offset from the byte offset.
1066 */
1067DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1068{
1069 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1070 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
1071}
1072#endif
1073
1074/**
1075 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1076 *
1077 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1078 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1079 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1080 * the read/write access of this MSR.
1081 *
1082 * @param pVCpu The cross context virtual CPU structure.
1083 * @param pVmcsInfo The VMCS info. object.
1084 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1085 * @param idMsr The MSR value.
1086 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1087 * include both a read -and- a write permission!
1088 *
1089 * @sa CPUMGetVmxMsrPermission.
1090 * @remarks Can be called with interrupts disabled.
1091 */
1092static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1093{
1094 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1095 Assert(pbMsrBitmap);
1096 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1097
1098 /*
1099 * MSR-bitmap Layout:
1100 * Byte index MSR range Interpreted as
1101 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1102 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1103 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1104 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1105 *
1106 * A bit corresponding to an MSR within the above range causes a VM-exit
1107 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1108 * the MSR range, it always cause a VM-exit.
1109 *
1110 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1111 */
1112 uint16_t const offBitmapRead = 0;
1113 uint16_t const offBitmapWrite = 0x800;
1114 uint16_t offMsr;
1115 int32_t iBit;
1116 if (idMsr <= UINT32_C(0x00001fff))
1117 {
1118 offMsr = 0;
1119 iBit = idMsr;
1120 }
1121 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1122 {
1123 offMsr = 0x400;
1124 iBit = idMsr - UINT32_C(0xc0000000);
1125 }
1126 else
1127 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1128
1129 /*
1130 * Set the MSR read permission.
1131 */
1132 uint16_t const offMsrRead = offBitmapRead + offMsr;
1133 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1134 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1135 {
1136#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1137 bool const fClear = !fIsNstGstVmcs ? true
1138 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1139#else
1140 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1141 bool const fClear = true;
1142#endif
1143 if (fClear)
1144 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
1145 }
1146 else
1147 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
1148
1149 /*
1150 * Set the MSR write permission.
1151 */
1152 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1153 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1154 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1155 {
1156#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1157 bool const fClear = !fIsNstGstVmcs ? true
1158 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1159#else
1160 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1161 bool const fClear = true;
1162#endif
1163 if (fClear)
1164 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
1165 }
1166 else
1167 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
1168}
1169
1170
1171/**
1172 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1173 * area.
1174 *
1175 * @returns VBox status code.
1176 * @param pVCpu The cross context virtual CPU structure.
1177 * @param pVmcsInfo The VMCS info. object.
1178 * @param cMsrs The number of MSRs.
1179 */
1180static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1181{
1182 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1183 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1184 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1185 {
1186 /* Commit the MSR counts to the VMCS and update the cache. */
1187 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1188 {
1189 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1190 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1191 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1192 pVmcsInfo->cEntryMsrLoad = cMsrs;
1193 pVmcsInfo->cExitMsrStore = cMsrs;
1194 pVmcsInfo->cExitMsrLoad = cMsrs;
1195 }
1196 return VINF_SUCCESS;
1197 }
1198
1199 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1200 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1201 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1202}
1203
1204
1205/**
1206 * Adds a new (or updates the value of an existing) guest/host MSR
1207 * pair to be swapped during the world-switch as part of the
1208 * auto-load/store MSR area in the VMCS.
1209 *
1210 * @returns VBox status code.
1211 * @param pVCpu The cross context virtual CPU structure.
1212 * @param pVmxTransient The VMX-transient structure.
1213 * @param idMsr The MSR.
1214 * @param uGuestMsrValue Value of the guest MSR.
1215 * @param fSetReadWrite Whether to set the guest read/write access of this
1216 * MSR (thus not causing a VM-exit).
1217 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1218 * necessary.
1219 */
1220static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1221 bool fSetReadWrite, bool fUpdateHostMsr)
1222{
1223 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1224 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1225 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1226 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1227 uint32_t i;
1228
1229 /* Paranoia. */
1230 Assert(pGuestMsrLoad);
1231
1232#ifndef DEBUG_bird
1233 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1234#endif
1235
1236 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1237 for (i = 0; i < cMsrs; i++)
1238 {
1239 if (pGuestMsrLoad[i].u32Msr == idMsr)
1240 break;
1241 }
1242
1243 bool fAdded = false;
1244 if (i == cMsrs)
1245 {
1246 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1247 ++cMsrs;
1248 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1249 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1250
1251 /* Set the guest to read/write this MSR without causing VM-exits. */
1252 if ( fSetReadWrite
1253 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1254 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1255
1256 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1257 fAdded = true;
1258 }
1259
1260 /* Update the MSR value for the newly added or already existing MSR. */
1261 pGuestMsrLoad[i].u32Msr = idMsr;
1262 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1263
1264 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1265 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1266 {
1267 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1268 pGuestMsrStore[i].u32Msr = idMsr;
1269 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1270 }
1271
1272 /* Update the corresponding slot in the host MSR area. */
1273 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1274 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1275 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1276 pHostMsr[i].u32Msr = idMsr;
1277
1278 /*
1279 * Only if the caller requests to update the host MSR value AND we've newly added the
1280 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1281 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1282 *
1283 * We do this for performance reasons since reading MSRs may be quite expensive.
1284 */
1285 if (fAdded)
1286 {
1287 if (fUpdateHostMsr)
1288 {
1289 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1290 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1291 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1292 }
1293 else
1294 {
1295 /* Someone else can do the work. */
1296 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1297 }
1298 }
1299 return VINF_SUCCESS;
1300}
1301
1302
1303/**
1304 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1305 * auto-load/store MSR area in the VMCS.
1306 *
1307 * @returns VBox status code.
1308 * @param pVCpu The cross context virtual CPU structure.
1309 * @param pVmxTransient The VMX-transient structure.
1310 * @param idMsr The MSR.
1311 */
1312static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1313{
1314 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1315 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1316 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1317 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1318
1319#ifndef DEBUG_bird
1320 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1321#endif
1322
1323 for (uint32_t i = 0; i < cMsrs; i++)
1324 {
1325 /* Find the MSR. */
1326 if (pGuestMsrLoad[i].u32Msr == idMsr)
1327 {
1328 /*
1329 * If it's the last MSR, we only need to reduce the MSR count.
1330 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1331 */
1332 if (i < cMsrs - 1)
1333 {
1334 /* Remove it from the VM-entry MSR-load area. */
1335 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1336 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1337
1338 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1339 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1340 {
1341 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1342 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1343 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1344 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1345 }
1346
1347 /* Remove it from the VM-exit MSR-load area. */
1348 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1349 Assert(pHostMsr[i].u32Msr == idMsr);
1350 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1351 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1352 }
1353
1354 /* Reduce the count to reflect the removed MSR and bail. */
1355 --cMsrs;
1356 break;
1357 }
1358 }
1359
1360 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1361 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1362 {
1363 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1364 AssertRCReturn(rc, rc);
1365
1366 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1367 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1368 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1369
1370 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1371 return VINF_SUCCESS;
1372 }
1373
1374 return VERR_NOT_FOUND;
1375}
1376
1377
1378/**
1379 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1380 *
1381 * @param pVCpu The cross context virtual CPU structure.
1382 * @param pVmcsInfo The VMCS info. object.
1383 *
1384 * @remarks No-long-jump zone!!!
1385 */
1386static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1387{
1388 RT_NOREF(pVCpu);
1389 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1390
1391 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1392 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1393 Assert(pHostMsrLoad);
1394 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1395 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1396 for (uint32_t i = 0; i < cMsrs; i++)
1397 {
1398 /*
1399 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1400 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1401 */
1402 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1403 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1404 else
1405 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1406 }
1407}
1408
1409
1410/**
1411 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1412 * perform lazy restoration of the host MSRs while leaving VT-x.
1413 *
1414 * @param pVCpu The cross context virtual CPU structure.
1415 *
1416 * @remarks No-long-jump zone!!!
1417 */
1418static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1419{
1420 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1421
1422 /*
1423 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1424 */
1425 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1426 {
1427 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1428 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1429 {
1430 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1431 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1432 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1433 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1434 }
1435 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1436 }
1437}
1438
1439
1440#ifdef VBOX_STRICT
1441
1442/**
1443 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1444 *
1445 * @param pVmcsInfo The VMCS info. object.
1446 */
1447static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1448{
1449 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1450
1451 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1452 {
1453 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1454 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1455 uint64_t uVmcsEferMsrVmcs;
1456 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1457 AssertRC(rc);
1458
1459 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1460 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1461 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1462 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1463 }
1464}
1465
1466
1467/**
1468 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1469 * VMCS are correct.
1470 *
1471 * @param pVCpu The cross context virtual CPU structure.
1472 * @param pVmcsInfo The VMCS info. object.
1473 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1474 */
1475static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1476{
1477 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1478
1479 /* Read the various MSR-area counts from the VMCS. */
1480 uint32_t cEntryLoadMsrs;
1481 uint32_t cExitStoreMsrs;
1482 uint32_t cExitLoadMsrs;
1483 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1484 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1485 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1486
1487 /* Verify all the MSR counts are the same. */
1488 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1489 Assert(cExitStoreMsrs == cExitLoadMsrs);
1490 uint32_t const cMsrs = cExitLoadMsrs;
1491
1492 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1493 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1494
1495 /* Verify the MSR counts are within the allocated page size. */
1496 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1497
1498 /* Verify the relevant contents of the MSR areas match. */
1499 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1500 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1501 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1502 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1503 for (uint32_t i = 0; i < cMsrs; i++)
1504 {
1505 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1506 if (fSeparateExitMsrStorePage)
1507 {
1508 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1509 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1510 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1511 }
1512
1513 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1514 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1515 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1516
1517 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1518 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1519 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1520 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1521
1522 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1523 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1524 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1525 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1526
1527 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1528 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1529 {
1530 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1531 if (fIsEferMsr)
1532 {
1533 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1534 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1535 }
1536 else
1537 {
1538 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1539 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1540 if ( pVM->hmr0.s.vmx.fLbr
1541 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1542 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1543 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1544 {
1545 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1546 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1547 pGuestMsrLoad->u32Msr, cMsrs));
1548 }
1549 else if (!fIsNstGstVmcs)
1550 {
1551 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1552 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1553 }
1554 else
1555 {
1556 /*
1557 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1558 * execute a nested-guest with MSR passthrough.
1559 *
1560 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1561 * allow passthrough too.
1562 */
1563 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1564 Assert(pvMsrBitmapNstGst);
1565 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1566 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1567 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1568 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1569 }
1570 }
1571 }
1572
1573 /* Move to the next MSR. */
1574 pHostMsrLoad++;
1575 pGuestMsrLoad++;
1576 pGuestMsrStore++;
1577 }
1578}
1579
1580#endif /* VBOX_STRICT */
1581
1582/**
1583 * Flushes the TLB using EPT.
1584 *
1585 * @returns VBox status code.
1586 * @param pVCpu The cross context virtual CPU structure of the calling
1587 * EMT. Can be NULL depending on @a enmTlbFlush.
1588 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1589 * enmTlbFlush.
1590 * @param enmTlbFlush Type of flush.
1591 *
1592 * @remarks Caller is responsible for making sure this function is called only
1593 * when NestedPaging is supported and providing @a enmTlbFlush that is
1594 * supported by the CPU.
1595 * @remarks Can be called with interrupts disabled.
1596 */
1597static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1598{
1599 uint64_t au64Descriptor[2];
1600 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1601 au64Descriptor[0] = 0;
1602 else
1603 {
1604 Assert(pVCpu);
1605 Assert(pVmcsInfo);
1606 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1607 }
1608 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1609
1610 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1611 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1612
1613 if ( RT_SUCCESS(rc)
1614 && pVCpu)
1615 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1616}
1617
1618
1619/**
1620 * Flushes the TLB using VPID.
1621 *
1622 * @returns VBox status code.
1623 * @param pVCpu The cross context virtual CPU structure of the calling
1624 * EMT. Can be NULL depending on @a enmTlbFlush.
1625 * @param enmTlbFlush Type of flush.
1626 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1627 * on @a enmTlbFlush).
1628 *
1629 * @remarks Can be called with interrupts disabled.
1630 */
1631static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1632{
1633 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1634
1635 uint64_t au64Descriptor[2];
1636 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1637 {
1638 au64Descriptor[0] = 0;
1639 au64Descriptor[1] = 0;
1640 }
1641 else
1642 {
1643 AssertPtr(pVCpu);
1644 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1645 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1646 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1647 au64Descriptor[1] = GCPtr;
1648 }
1649
1650 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1651 AssertMsg(rc == VINF_SUCCESS,
1652 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1653
1654 if ( RT_SUCCESS(rc)
1655 && pVCpu)
1656 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1657 NOREF(rc);
1658}
1659
1660
1661/**
1662 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1663 * otherwise there is nothing really to invalidate.
1664 *
1665 * @returns VBox status code.
1666 * @param pVCpu The cross context virtual CPU structure.
1667 * @param GCVirt Guest virtual address of the page to invalidate.
1668 */
1669VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1670{
1671 AssertPtr(pVCpu);
1672 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1673
1674 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1675 {
1676 /*
1677 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1678 * the EPT case. See @bugref{6043} and @bugref{6177}.
1679 *
1680 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1681 * as this function maybe called in a loop with individual addresses.
1682 */
1683 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1684 if (pVM->hmr0.s.vmx.fVpid)
1685 {
1686 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1687 {
1688 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1689 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1690 }
1691 else
1692 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1693 }
1694 else if (pVM->hmr0.s.fNestedPaging)
1695 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1696 }
1697
1698 return VINF_SUCCESS;
1699}
1700
1701
1702/**
1703 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1704 * case where neither EPT nor VPID is supported by the CPU.
1705 *
1706 * @param pHostCpu The HM physical-CPU structure.
1707 * @param pVCpu The cross context virtual CPU structure.
1708 *
1709 * @remarks Called with interrupts disabled.
1710 */
1711static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1712{
1713 AssertPtr(pVCpu);
1714 AssertPtr(pHostCpu);
1715
1716 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1717
1718 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1719 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1720 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1721 pVCpu->hmr0.s.fForceTLBFlush = false;
1722 return;
1723}
1724
1725
1726/**
1727 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1728 *
1729 * @param pHostCpu The HM physical-CPU structure.
1730 * @param pVCpu The cross context virtual CPU structure.
1731 * @param pVmcsInfo The VMCS info. object.
1732 *
1733 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1734 * nomenclature. The reason is, to avoid confusion in compare statements
1735 * since the host-CPU copies are named "ASID".
1736 *
1737 * @remarks Called with interrupts disabled.
1738 */
1739static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1740{
1741#ifdef VBOX_WITH_STATISTICS
1742 bool fTlbFlushed = false;
1743# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1744# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1745 if (!fTlbFlushed) \
1746 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1747 } while (0)
1748#else
1749# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1750# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1751#endif
1752
1753 AssertPtr(pVCpu);
1754 AssertPtr(pHostCpu);
1755 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1756
1757 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1758 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1759 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1760 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1761
1762 /*
1763 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1764 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1765 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1766 * cannot reuse the current ASID anymore.
1767 */
1768 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1769 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1770 {
1771 ++pHostCpu->uCurrentAsid;
1772 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1773 {
1774 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1775 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1776 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1777 }
1778
1779 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1780 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1781 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1782
1783 /*
1784 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1785 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1786 */
1787 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1788 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1789 HMVMX_SET_TAGGED_TLB_FLUSHED();
1790 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1791 }
1792 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1793 {
1794 /*
1795 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1796 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1797 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1798 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1799 * mappings, see @bugref{6568}.
1800 *
1801 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1802 */
1803 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1804 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1805 HMVMX_SET_TAGGED_TLB_FLUSHED();
1806 }
1807 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1808 {
1809 /*
1810 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1811 * address which requires flushing the TLB of EPT cached structures.
1812 *
1813 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1814 */
1815 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1816 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1817 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1818 HMVMX_SET_TAGGED_TLB_FLUSHED();
1819 }
1820
1821
1822 pVCpu->hmr0.s.fForceTLBFlush = false;
1823 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1824
1825 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1826 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1827 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1828 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1829 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1830 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1831 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1832 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1833 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1834
1835 /* Update VMCS with the VPID. */
1836 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1837 AssertRC(rc);
1838
1839#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1840}
1841
1842
1843/**
1844 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1845 *
1846 * @param pHostCpu The HM physical-CPU structure.
1847 * @param pVCpu The cross context virtual CPU structure.
1848 * @param pVmcsInfo The VMCS info. object.
1849 *
1850 * @remarks Called with interrupts disabled.
1851 */
1852static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1853{
1854 AssertPtr(pVCpu);
1855 AssertPtr(pHostCpu);
1856 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1857 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1858 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1859
1860 /*
1861 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1862 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1863 */
1864 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1865 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1866 {
1867 pVCpu->hmr0.s.fForceTLBFlush = true;
1868 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1869 }
1870
1871 /* Check for explicit TLB flushes. */
1872 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1873 {
1874 pVCpu->hmr0.s.fForceTLBFlush = true;
1875 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1876 }
1877
1878 /* Check for TLB flushes while switching to/from a nested-guest. */
1879 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1880 {
1881 pVCpu->hmr0.s.fForceTLBFlush = true;
1882 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1883 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1884 }
1885
1886 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1887 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1888
1889 if (pVCpu->hmr0.s.fForceTLBFlush)
1890 {
1891 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1892 pVCpu->hmr0.s.fForceTLBFlush = false;
1893 }
1894}
1895
1896
1897/**
1898 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1899 *
1900 * @param pHostCpu The HM physical-CPU structure.
1901 * @param pVCpu The cross context virtual CPU structure.
1902 *
1903 * @remarks Called with interrupts disabled.
1904 */
1905static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1906{
1907 AssertPtr(pVCpu);
1908 AssertPtr(pHostCpu);
1909 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1910 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1911 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1912
1913 /*
1914 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1915 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1916 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1917 * cannot reuse the current ASID anymore.
1918 */
1919 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1920 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1921 {
1922 pVCpu->hmr0.s.fForceTLBFlush = true;
1923 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1924 }
1925
1926 /* Check for explicit TLB flushes. */
1927 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1928 {
1929 /*
1930 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1931 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1932 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1933 * include fExplicitFlush's too) - an obscure corner case.
1934 */
1935 pVCpu->hmr0.s.fForceTLBFlush = true;
1936 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1937 }
1938
1939 /* Check for TLB flushes while switching to/from a nested-guest. */
1940 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1941 {
1942 pVCpu->hmr0.s.fForceTLBFlush = true;
1943 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1944 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1945 }
1946
1947 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1948 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1949 if (pVCpu->hmr0.s.fForceTLBFlush)
1950 {
1951 ++pHostCpu->uCurrentAsid;
1952 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1953 {
1954 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1955 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1956 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1957 }
1958
1959 pVCpu->hmr0.s.fForceTLBFlush = false;
1960 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1961 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1962 if (pHostCpu->fFlushAsidBeforeUse)
1963 {
1964 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1965 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1966 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1967 {
1968 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1969 pHostCpu->fFlushAsidBeforeUse = false;
1970 }
1971 else
1972 {
1973 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1974 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1975 }
1976 }
1977 }
1978
1979 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1980 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1981 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1982 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1983 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1984 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1985 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1986
1987 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1988 AssertRC(rc);
1989}
1990
1991
1992/**
1993 * Flushes the guest TLB entry based on CPU capabilities.
1994 *
1995 * @param pHostCpu The HM physical-CPU structure.
1996 * @param pVCpu The cross context virtual CPU structure.
1997 * @param pVmcsInfo The VMCS info. object.
1998 *
1999 * @remarks Called with interrupts disabled.
2000 */
2001static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2002{
2003#ifdef HMVMX_ALWAYS_FLUSH_TLB
2004 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2005#endif
2006 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2007 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2008 {
2009 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2010 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2011 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2012 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2013 default:
2014 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2015 break;
2016 }
2017 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2018}
2019
2020
2021/**
2022 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2023 * TLB entries from the host TLB before VM-entry.
2024 *
2025 * @returns VBox status code.
2026 * @param pVM The cross context VM structure.
2027 */
2028static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2029{
2030 /*
2031 * Determine optimal flush type for nested paging.
2032 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2033 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2034 */
2035 if (pVM->hmr0.s.fNestedPaging)
2036 {
2037 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2038 {
2039 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2040 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2041 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2042 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2043 else
2044 {
2045 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2046 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2047 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2048 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2049 }
2050
2051 /* Make sure the write-back cacheable memory type for EPT is supported. */
2052 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2053 {
2054 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2055 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2056 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2057 }
2058
2059 /* EPT requires a page-walk length of 4. */
2060 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2061 {
2062 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2063 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2064 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2065 }
2066 }
2067 else
2068 {
2069 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2070 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2071 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2072 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2073 }
2074 }
2075
2076 /*
2077 * Determine optimal flush type for VPID.
2078 */
2079 if (pVM->hmr0.s.vmx.fVpid)
2080 {
2081 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2082 {
2083 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2084 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2085 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2086 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2087 else
2088 {
2089 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2090 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2091 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2092 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2093 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2094 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2095 pVM->hmr0.s.vmx.fVpid = false;
2096 }
2097 }
2098 else
2099 {
2100 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2101 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2102 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2103 pVM->hmr0.s.vmx.fVpid = false;
2104 }
2105 }
2106
2107 /*
2108 * Setup the handler for flushing tagged-TLBs.
2109 */
2110 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2111 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2112 else if (pVM->hmr0.s.fNestedPaging)
2113 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2114 else if (pVM->hmr0.s.vmx.fVpid)
2115 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2116 else
2117 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2118
2119
2120 /*
2121 * Copy out the result to ring-3.
2122 */
2123 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2124 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2125 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2126 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2127 return VINF_SUCCESS;
2128}
2129
2130
2131/**
2132 * Sets up the LBR MSR ranges based on the host CPU.
2133 *
2134 * @returns VBox status code.
2135 * @param pVM The cross context VM structure.
2136 *
2137 * @sa nemR3DarwinSetupLbrMsrRange
2138 */
2139static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2140{
2141 Assert(pVM->hmr0.s.vmx.fLbr);
2142 uint32_t idLbrFromIpMsrFirst;
2143 uint32_t idLbrFromIpMsrLast;
2144 uint32_t idLbrToIpMsrFirst;
2145 uint32_t idLbrToIpMsrLast;
2146 uint32_t idLbrTosMsr;
2147
2148 /*
2149 * Determine the LBR MSRs supported for this host CPU family and model.
2150 *
2151 * See Intel spec. 17.4.8 "LBR Stack".
2152 * See Intel "Model-Specific Registers" spec.
2153 */
2154 uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
2155 | pVM->cpum.ro.HostFeatures.uModel;
2156 switch (uFamilyModel)
2157 {
2158 case 0x0f01: case 0x0f02:
2159 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2160 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2161 idLbrToIpMsrFirst = 0x0;
2162 idLbrToIpMsrLast = 0x0;
2163 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2164 break;
2165
2166 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2167 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2168 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2169 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2170 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2171 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2172 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2173 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2174 break;
2175
2176 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2177 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2178 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2179 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2180 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2181 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2182 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2183 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2184 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2185 break;
2186
2187 case 0x0617: case 0x061d: case 0x060f:
2188 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2189 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2190 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2191 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2192 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2193 break;
2194
2195 /* Atom and related microarchitectures we don't care about:
2196 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2197 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2198 case 0x0636: */
2199 /* All other CPUs: */
2200 default:
2201 {
2202 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2203 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2204 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2205 }
2206 }
2207
2208 /*
2209 * Validate.
2210 */
2211 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2212 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2213 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2214 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2215 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2216 {
2217 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2218 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2219 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2220 }
2221 NOREF(pVCpu0);
2222
2223 /*
2224 * Update the LBR info. to the VM struct. for use later.
2225 */
2226 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2227
2228 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2229 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2230
2231 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2232 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2233 return VINF_SUCCESS;
2234}
2235
2236
2237#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2238/**
2239 * Sets up the shadow VMCS fields arrays.
2240 *
2241 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2242 * executing the guest.
2243 *
2244 * @returns VBox status code.
2245 * @param pVM The cross context VM structure.
2246 */
2247static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2248{
2249 /*
2250 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2251 * when the host does not support it.
2252 */
2253 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2254 if ( !fGstVmwriteAll
2255 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2256 { /* likely. */ }
2257 else
2258 {
2259 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2260 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2261 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2262 }
2263
2264 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2265 uint32_t cRwFields = 0;
2266 uint32_t cRoFields = 0;
2267 for (uint32_t i = 0; i < cVmcsFields; i++)
2268 {
2269 VMXVMCSFIELD VmcsField;
2270 VmcsField.u = g_aVmcsFields[i];
2271
2272 /*
2273 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2274 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2275 * in the shadow VMCS fields array as they would be redundant.
2276 *
2277 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2278 * we must not include it in the shadow VMCS fields array. Guests attempting to
2279 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2280 * the required behavior.
2281 */
2282 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2283 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2284 {
2285 /*
2286 * Read-only fields are placed in a separate array so that while syncing shadow
2287 * VMCS fields later (which is more performance critical) we can avoid branches.
2288 *
2289 * However, if the guest can write to all fields (including read-only fields),
2290 * we treat it a as read/write field. Otherwise, writing to these fields would
2291 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2292 */
2293 if ( fGstVmwriteAll
2294 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2295 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2296 else
2297 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2298 }
2299 }
2300
2301 /* Update the counts. */
2302 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2303 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2304 return VINF_SUCCESS;
2305}
2306
2307
2308/**
2309 * Sets up the VMREAD and VMWRITE bitmaps.
2310 *
2311 * @param pVM The cross context VM structure.
2312 */
2313static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2314{
2315 /*
2316 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2317 */
2318 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2319 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2320 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2321 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2322 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2323
2324 /*
2325 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2326 * VMREAD and VMWRITE bitmaps.
2327 */
2328 {
2329 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2330 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2331 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2332 {
2333 uint32_t const uVmcsField = paShadowVmcsFields[i];
2334 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2335 Assert(uVmcsField >> 3 < cbBitmap);
2336 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2337 ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
2338 }
2339 }
2340
2341 /*
2342 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2343 * if the host supports VMWRITE to all supported VMCS fields.
2344 */
2345 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2346 {
2347 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2348 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2349 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2350 {
2351 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2352 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2353 Assert(uVmcsField >> 3 < cbBitmap);
2354 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2355 }
2356 }
2357}
2358#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2359
2360
2361/**
2362 * Sets up the virtual-APIC page address for the VMCS.
2363 *
2364 * @param pVmcsInfo The VMCS info. object.
2365 */
2366DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2367{
2368 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2369 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2370 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2371 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2372 AssertRC(rc);
2373}
2374
2375
2376/**
2377 * Sets up the MSR-bitmap address for the VMCS.
2378 *
2379 * @param pVmcsInfo The VMCS info. object.
2380 */
2381DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2382{
2383 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2384 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2385 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2386 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2387 AssertRC(rc);
2388}
2389
2390
2391/**
2392 * Sets up the APIC-access page address for the VMCS.
2393 *
2394 * @param pVCpu The cross context virtual CPU structure.
2395 */
2396DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2397{
2398 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2399 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2400 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2401 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2402 AssertRC(rc);
2403}
2404
2405#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2406
2407/**
2408 * Sets up the VMREAD bitmap address for the VMCS.
2409 *
2410 * @param pVCpu The cross context virtual CPU structure.
2411 */
2412DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2413{
2414 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2415 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2416 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2417 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2418 AssertRC(rc);
2419}
2420
2421
2422/**
2423 * Sets up the VMWRITE bitmap address for the VMCS.
2424 *
2425 * @param pVCpu The cross context virtual CPU structure.
2426 */
2427DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2428{
2429 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2430 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2431 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2432 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2433 AssertRC(rc);
2434}
2435
2436#endif
2437
2438/**
2439 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2440 * in the VMCS.
2441 *
2442 * @returns VBox status code.
2443 * @param pVmcsInfo The VMCS info. object.
2444 */
2445DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2446{
2447 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2448 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2449 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2450
2451 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2452 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2453 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2454
2455 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2456 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2457 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2458
2459 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2460 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2461 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2462 return VINF_SUCCESS;
2463}
2464
2465
2466/**
2467 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2468 *
2469 * @param pVCpu The cross context virtual CPU structure.
2470 * @param pVmcsInfo The VMCS info. object.
2471 */
2472static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2473{
2474 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2475
2476 /*
2477 * By default, ensure guest attempts to access any MSR cause VM-exits.
2478 * This shall later be relaxed for specific MSRs as necessary.
2479 *
2480 * Note: For nested-guests, the entire bitmap will be merged prior to
2481 * executing the nested-guest using hardware-assisted VMX and hence there
2482 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2483 */
2484 Assert(pVmcsInfo->pvMsrBitmap);
2485 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2486
2487 /*
2488 * The guest can access the following MSRs (read, write) without causing
2489 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2490 */
2491 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2492 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2493 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2494 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2495 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2496 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2497
2498 /*
2499 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2500 * associated with then. We never need to intercept access (writes need to be
2501 * executed without causing a VM-exit, reads will #GP fault anyway).
2502 *
2503 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2504 * read/write them. We swap the guest/host MSR value using the
2505 * auto-load/store MSR area.
2506 */
2507 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2508 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2509 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2510 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2511 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2512 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2513
2514 /*
2515 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2516 * required for 64-bit guests.
2517 */
2518 if (pVM->hmr0.s.fAllow64BitGuests)
2519 {
2520 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2521 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2522 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2523 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2524 }
2525
2526 /*
2527 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2528 */
2529#ifdef VBOX_STRICT
2530 Assert(pVmcsInfo->pvMsrBitmap);
2531 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2532 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2533#endif
2534}
2535
2536
2537/**
2538 * Sets up pin-based VM-execution controls in the VMCS.
2539 *
2540 * @returns VBox status code.
2541 * @param pVCpu The cross context virtual CPU structure.
2542 * @param pVmcsInfo The VMCS info. object.
2543 */
2544static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2545{
2546 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2547 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2548 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2549
2550 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2551 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2552
2553 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2554 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2555
2556 /* Enable the VMX-preemption timer. */
2557 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2558 {
2559 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2560 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2561 }
2562
2563#if 0
2564 /* Enable posted-interrupt processing. */
2565 if (pVM->hm.s.fPostedIntrs)
2566 {
2567 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2568 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2569 fVal |= VMX_PIN_CTLS_POSTED_INT;
2570 }
2571#endif
2572
2573 if ((fVal & fZap) != fVal)
2574 {
2575 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2576 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2577 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2578 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2579 }
2580
2581 /* Commit it to the VMCS and update our cache. */
2582 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2583 AssertRC(rc);
2584 pVmcsInfo->u32PinCtls = fVal;
2585
2586 return VINF_SUCCESS;
2587}
2588
2589
2590/**
2591 * Sets up secondary processor-based VM-execution controls in the VMCS.
2592 *
2593 * @returns VBox status code.
2594 * @param pVCpu The cross context virtual CPU structure.
2595 * @param pVmcsInfo The VMCS info. object.
2596 */
2597static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2598{
2599 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2600 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2601 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2602
2603 /* WBINVD causes a VM-exit. */
2604 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2605 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2606
2607 /* Enable EPT (aka nested-paging). */
2608 if (pVM->hmr0.s.fNestedPaging)
2609 fVal |= VMX_PROC_CTLS2_EPT;
2610
2611 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2612 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2613 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2614 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2615 fVal |= VMX_PROC_CTLS2_INVPCID;
2616
2617 /* Enable VPID. */
2618 if (pVM->hmr0.s.vmx.fVpid)
2619 fVal |= VMX_PROC_CTLS2_VPID;
2620
2621 /* Enable unrestricted guest execution. */
2622 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2623 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2624
2625#if 0
2626 if (pVM->hm.s.fVirtApicRegs)
2627 {
2628 /* Enable APIC-register virtualization. */
2629 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2630 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2631
2632 /* Enable virtual-interrupt delivery. */
2633 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2634 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2635 }
2636#endif
2637
2638 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2639 where the TPR shadow resides. */
2640 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2641 * done dynamically. */
2642 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2643 {
2644 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2645 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2646 }
2647
2648 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2649 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2650 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2651 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2652 fVal |= VMX_PROC_CTLS2_RDTSCP;
2653
2654 /* Enable Pause-Loop exiting. */
2655 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2656 && pVM->hm.s.vmx.cPleGapTicks
2657 && pVM->hm.s.vmx.cPleWindowTicks)
2658 {
2659 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2660
2661 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2662 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2663 }
2664
2665 if ((fVal & fZap) != fVal)
2666 {
2667 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2668 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2669 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2670 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2671 }
2672
2673 /* Commit it to the VMCS and update our cache. */
2674 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2675 AssertRC(rc);
2676 pVmcsInfo->u32ProcCtls2 = fVal;
2677
2678 return VINF_SUCCESS;
2679}
2680
2681
2682/**
2683 * Sets up processor-based VM-execution controls in the VMCS.
2684 *
2685 * @returns VBox status code.
2686 * @param pVCpu The cross context virtual CPU structure.
2687 * @param pVmcsInfo The VMCS info. object.
2688 */
2689static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2690{
2691 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2692 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2693 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2694
2695 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2696 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2697 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2698 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2699 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2700 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2701 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2702
2703 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2704 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2705 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2706 {
2707 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2708 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2709 }
2710
2711 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2712 if (!pVM->hmr0.s.fNestedPaging)
2713 {
2714 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2715 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2716 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2717 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2718 }
2719
2720 /* Use TPR shadowing if supported by the CPU. */
2721 if ( PDMHasApic(pVM)
2722 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2723 {
2724 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2725 /* CR8 writes cause a VM-exit based on TPR threshold. */
2726 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2727 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2728 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2729 }
2730 else
2731 {
2732 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2733 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2734 if (pVM->hmr0.s.fAllow64BitGuests)
2735 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2736 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2737 }
2738
2739 /* Use MSR-bitmaps if supported by the CPU. */
2740 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2741 {
2742 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2743 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2744 }
2745
2746 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2747 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2748 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2749
2750 if ((fVal & fZap) != fVal)
2751 {
2752 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2753 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2754 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2755 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2756 }
2757
2758 /* Commit it to the VMCS and update our cache. */
2759 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2760 AssertRC(rc);
2761 pVmcsInfo->u32ProcCtls = fVal;
2762
2763 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2764 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2765 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2766
2767 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2768 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2769 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2770
2771 /* Sanity check, should not really happen. */
2772 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2773 { /* likely */ }
2774 else
2775 {
2776 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2777 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2778 }
2779
2780 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2781 return VINF_SUCCESS;
2782}
2783
2784
2785/**
2786 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2787 * Processor-based VM-execution) control fields in the VMCS.
2788 *
2789 * @returns VBox status code.
2790 * @param pVCpu The cross context virtual CPU structure.
2791 * @param pVmcsInfo The VMCS info. object.
2792 */
2793static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2794{
2795#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2796 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2797 {
2798 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2799 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2800 }
2801#endif
2802
2803 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2804 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2805 AssertRC(rc);
2806
2807 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2808 if (RT_SUCCESS(rc))
2809 {
2810 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2811 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2812
2813 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2814 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2815
2816 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2817 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2818
2819 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2820 {
2821 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2822 AssertRC(rc);
2823 }
2824 return VINF_SUCCESS;
2825 }
2826 else
2827 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2828 return rc;
2829}
2830
2831
2832/**
2833 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2834 *
2835 * We shall setup those exception intercepts that don't change during the
2836 * lifetime of the VM here. The rest are done dynamically while loading the
2837 * guest state.
2838 *
2839 * @param pVCpu The cross context virtual CPU structure.
2840 * @param pVmcsInfo The VMCS info. object.
2841 */
2842static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2843{
2844 /*
2845 * The following exceptions are always intercepted:
2846 *
2847 * #AC - To prevent the guest from hanging the CPU and for dealing with
2848 * split-lock detecting host configs.
2849 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2850 * recursive #DBs can cause a CPU hang.
2851 * #PF - To sync our shadow page tables when nested-paging is not used.
2852 */
2853 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2854 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2855 | RT_BIT(X86_XCPT_DB)
2856 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2857
2858 /* Commit it to the VMCS. */
2859 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2860 AssertRC(rc);
2861
2862 /* Update our cache of the exception bitmap. */
2863 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2864}
2865
2866
2867#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2868/**
2869 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2870 *
2871 * @returns VBox status code.
2872 * @param pVmcsInfo The VMCS info. object.
2873 */
2874static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2875{
2876 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2877 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2878 AssertRC(rc);
2879
2880 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2881 if (RT_SUCCESS(rc))
2882 {
2883 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2884 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2885
2886 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2887 Assert(!pVmcsInfo->u64Cr0Mask);
2888 Assert(!pVmcsInfo->u64Cr4Mask);
2889 return VINF_SUCCESS;
2890 }
2891 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2892 return rc;
2893}
2894#endif
2895
2896
2897/**
2898 * Selector FNHMSVMVMRUN implementation.
2899 */
2900static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2901{
2902 hmR0VmxUpdateStartVmFunction(pVCpu);
2903 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2904}
2905
2906
2907/**
2908 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2909 * VMX.
2910 *
2911 * @returns VBox status code.
2912 * @param pVCpu The cross context virtual CPU structure.
2913 * @param pVmcsInfo The VMCS info. object.
2914 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2915 */
2916static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2917{
2918 Assert(pVmcsInfo->pvVmcs);
2919 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2920
2921 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2922 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2923 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2924
2925 LogFlowFunc(("\n"));
2926
2927 /*
2928 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2929 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2930 */
2931 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2932 if (RT_SUCCESS(rc))
2933 {
2934 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2935 if (RT_SUCCESS(rc))
2936 {
2937 /*
2938 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2939 * The host is always 64-bit since we no longer support 32-bit hosts.
2940 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2941 */
2942 if (!fIsNstGstVmcs)
2943 {
2944 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2945 if (RT_SUCCESS(rc))
2946 {
2947 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2948 if (RT_SUCCESS(rc))
2949 {
2950 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2951 if (RT_SUCCESS(rc))
2952 {
2953 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2954#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2955 /*
2956 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2957 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2958 * making it fit for use when VMCS shadowing is later enabled.
2959 */
2960 if (pVmcsInfo->pvShadowVmcs)
2961 {
2962 VMXVMCSREVID VmcsRevId;
2963 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2964 VmcsRevId.n.fIsShadowVmcs = 1;
2965 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2966 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2967 if (RT_SUCCESS(rc))
2968 { /* likely */ }
2969 else
2970 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2971 }
2972#endif
2973 }
2974 else
2975 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2976 }
2977 else
2978 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2979 }
2980 else
2981 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2982 }
2983 else
2984 {
2985#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2986 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
2987 if (RT_SUCCESS(rc))
2988 { /* likely */ }
2989 else
2990 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
2991#else
2992 AssertFailed();
2993#endif
2994 }
2995 }
2996 else
2997 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
2998 }
2999 else
3000 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3001
3002 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3003 if (RT_SUCCESS(rc))
3004 {
3005 rc = hmR0VmxClearVmcs(pVmcsInfo);
3006 if (RT_SUCCESS(rc))
3007 { /* likely */ }
3008 else
3009 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3010 }
3011
3012 /*
3013 * Update the last-error record both for failures and success, so we
3014 * can propagate the status code back to ring-3 for diagnostics.
3015 */
3016 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3017 NOREF(pszVmcs);
3018 return rc;
3019}
3020
3021
3022/**
3023 * Does global VT-x initialization (called during module initialization).
3024 *
3025 * @returns VBox status code.
3026 */
3027VMMR0DECL(int) VMXR0GlobalInit(void)
3028{
3029#ifdef HMVMX_USE_FUNCTION_TABLE
3030 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3031# ifdef VBOX_STRICT
3032 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3033 Assert(g_aVMExitHandlers[i].pfn);
3034# endif
3035#endif
3036 return VINF_SUCCESS;
3037}
3038
3039
3040/**
3041 * Does global VT-x termination (called during module termination).
3042 */
3043VMMR0DECL(void) VMXR0GlobalTerm()
3044{
3045 /* Nothing to do currently. */
3046}
3047
3048
3049/**
3050 * Sets up and activates VT-x on the current CPU.
3051 *
3052 * @returns VBox status code.
3053 * @param pHostCpu The HM physical-CPU structure.
3054 * @param pVM The cross context VM structure. Can be
3055 * NULL after a host resume operation.
3056 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3057 * fEnabledByHost is @c true).
3058 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3059 * @a fEnabledByHost is @c true).
3060 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3061 * enable VT-x on the host.
3062 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3063 */
3064VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3065 PCSUPHWVIRTMSRS pHwvirtMsrs)
3066{
3067 AssertPtr(pHostCpu);
3068 AssertPtr(pHwvirtMsrs);
3069 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3070
3071 /* Enable VT-x if it's not already enabled by the host. */
3072 if (!fEnabledByHost)
3073 {
3074 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3075 if (RT_FAILURE(rc))
3076 return rc;
3077 }
3078
3079 /*
3080 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3081 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3082 * invalidated when flushing by VPID.
3083 */
3084 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3085 {
3086 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3087 pHostCpu->fFlushAsidBeforeUse = false;
3088 }
3089 else
3090 pHostCpu->fFlushAsidBeforeUse = true;
3091
3092 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3093 ++pHostCpu->cTlbFlushes;
3094
3095 return VINF_SUCCESS;
3096}
3097
3098
3099/**
3100 * Deactivates VT-x on the current CPU.
3101 *
3102 * @returns VBox status code.
3103 * @param pHostCpu The HM physical-CPU structure.
3104 * @param pvCpuPage Pointer to the VMXON region.
3105 * @param HCPhysCpuPage Physical address of the VMXON region.
3106 *
3107 * @remarks This function should never be called when SUPR0EnableVTx() or
3108 * similar was used to enable VT-x on the host.
3109 */
3110VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3111{
3112 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3113
3114 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3115 return hmR0VmxLeaveRootMode(pHostCpu);
3116}
3117
3118
3119/**
3120 * Does per-VM VT-x initialization.
3121 *
3122 * @returns VBox status code.
3123 * @param pVM The cross context VM structure.
3124 */
3125VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3126{
3127 AssertPtr(pVM);
3128 LogFlowFunc(("pVM=%p\n", pVM));
3129
3130 hmR0VmxStructsInit(pVM);
3131 int rc = hmR0VmxStructsAlloc(pVM);
3132 if (RT_FAILURE(rc))
3133 {
3134 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3135 return rc;
3136 }
3137
3138 /* Setup the crash dump page. */
3139#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3140 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3141 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3142#endif
3143 return VINF_SUCCESS;
3144}
3145
3146
3147/**
3148 * Does per-VM VT-x termination.
3149 *
3150 * @returns VBox status code.
3151 * @param pVM The cross context VM structure.
3152 */
3153VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3154{
3155 AssertPtr(pVM);
3156 LogFlowFunc(("pVM=%p\n", pVM));
3157
3158#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3159 if (pVM->hmr0.s.vmx.pbScratch)
3160 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3161#endif
3162 hmR0VmxStructsFree(pVM);
3163 return VINF_SUCCESS;
3164}
3165
3166
3167/**
3168 * Sets up the VM for execution using hardware-assisted VMX.
3169 * This function is only called once per-VM during initialization.
3170 *
3171 * @returns VBox status code.
3172 * @param pVM The cross context VM structure.
3173 */
3174VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3175{
3176 AssertPtr(pVM);
3177 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3178
3179 LogFlowFunc(("pVM=%p\n", pVM));
3180
3181 /*
3182 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3183 * without causing a #GP.
3184 */
3185 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3186 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3187 { /* likely */ }
3188 else
3189 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3190
3191 /*
3192 * Check that nested paging is supported if enabled and copy over the flag to the
3193 * ring-0 only structure.
3194 */
3195 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3196 AssertReturn( !fNestedPaging
3197 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3198 VERR_INCOMPATIBLE_CONFIG);
3199 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3200 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3201
3202 /*
3203 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3204 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3205 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3206 */
3207 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3208 AssertReturn( !fUnrestrictedGuest
3209 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3210 && fNestedPaging),
3211 VERR_INCOMPATIBLE_CONFIG);
3212 if ( !fUnrestrictedGuest
3213 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3214 || !pVM->hm.s.vmx.pRealModeTSS))
3215 {
3216 LogRelFunc(("Invalid real-on-v86 state.\n"));
3217 return VERR_INTERNAL_ERROR;
3218 }
3219 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3220
3221 /* Initialize these always, see hmR3InitFinalizeR0().*/
3222 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3223 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3224
3225 /* Setup the tagged-TLB flush handlers. */
3226 int rc = hmR0VmxSetupTaggedTlb(pVM);
3227 if (RT_FAILURE(rc))
3228 {
3229 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3230 return rc;
3231 }
3232
3233 /* Determine LBR capabilities. */
3234 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3235 if (pVM->hmr0.s.vmx.fLbr)
3236 {
3237 rc = hmR0VmxSetupLbrMsrRange(pVM);
3238 if (RT_FAILURE(rc))
3239 {
3240 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3241 return rc;
3242 }
3243 }
3244
3245#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3246 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3247 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3248 {
3249 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3250 if (RT_SUCCESS(rc))
3251 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3252 else
3253 {
3254 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3255 return rc;
3256 }
3257 }
3258#endif
3259
3260 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3261 {
3262 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3263 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3264
3265 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3266
3267 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3268 if (RT_SUCCESS(rc))
3269 {
3270#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3271 if (pVM->cpum.ro.GuestFeatures.fVmx)
3272 {
3273 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3274 if (RT_SUCCESS(rc))
3275 { /* likely */ }
3276 else
3277 {
3278 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3279 return rc;
3280 }
3281 }
3282#endif
3283 }
3284 else
3285 {
3286 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3287 return rc;
3288 }
3289 }
3290
3291 return VINF_SUCCESS;
3292}
3293
3294
3295/**
3296 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3297 * the VMCS.
3298 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3299 */
3300static uint64_t hmR0VmxExportHostControlRegs(void)
3301{
3302 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3303 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3304 uint64_t uHostCr4 = ASMGetCR4();
3305 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3306 return uHostCr4;
3307}
3308
3309
3310/**
3311 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3312 * the host-state area in the VMCS.
3313 *
3314 * @returns VBox status code.
3315 * @param pVCpu The cross context virtual CPU structure.
3316 * @param uHostCr4 The host CR4 value.
3317 */
3318static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3319{
3320 /*
3321 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3322 * will be messed up. We should -not- save the messed up state without restoring
3323 * the original host-state, see @bugref{7240}.
3324 *
3325 * This apparently can happen (most likely the FPU changes), deal with it rather than
3326 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3327 */
3328 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3329 {
3330 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3331 pVCpu->idCpu));
3332 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3333 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3334 }
3335
3336 /*
3337 * Get all the host info.
3338 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3339 * without also checking the cpuid bit.
3340 */
3341 uint32_t fRestoreHostFlags;
3342#if RT_INLINE_ASM_EXTERNAL
3343 if (uHostCr4 & X86_CR4_FSGSBASE)
3344 {
3345 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3346 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3347 }
3348 else
3349 {
3350 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3351 fRestoreHostFlags = 0;
3352 }
3353 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3354 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3355 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3356 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3357#else
3358 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3359 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3360 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3361 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3362 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3363 if (uHostCr4 & X86_CR4_FSGSBASE)
3364 {
3365 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3366 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3367 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3368 }
3369 else
3370 {
3371 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3372 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3373 fRestoreHostFlags = 0;
3374 }
3375 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3376 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3377 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3378 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3379 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3380#endif
3381
3382 /*
3383 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3384 * gain VM-entry and restore them before we get preempted.
3385 *
3386 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3387 */
3388 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3389 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3390 {
3391 if (!(uSelAll & X86_SEL_LDT))
3392 {
3393#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3394 do { \
3395 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3396 if ((a_uVmcsVar) & X86_SEL_RPL) \
3397 { \
3398 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3399 (a_uVmcsVar) = 0; \
3400 } \
3401 } while (0)
3402 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3403 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3404 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3405 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3406#undef VMXLOCAL_ADJUST_HOST_SEG
3407 }
3408 else
3409 {
3410#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3411 do { \
3412 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3413 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3414 { \
3415 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3416 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3417 else \
3418 { \
3419 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3420 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3421 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3422 } \
3423 (a_uVmcsVar) = 0; \
3424 } \
3425 } while (0)
3426 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3427 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3428 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3429 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3430#undef VMXLOCAL_ADJUST_HOST_SEG
3431 }
3432 }
3433
3434 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3435 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3436 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3437 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3438 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3439 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3440 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3441 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3442
3443 /*
3444 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3445 * them to the maximum limit (0xffff) on every VM-exit.
3446 */
3447 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3448 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3449
3450 /*
3451 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3452 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3453 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3454 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3455 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3456 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3457 * at 0xffff on hosts where we are sure it won't cause trouble.
3458 */
3459#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3460 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3461#else
3462 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3463#endif
3464 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3465
3466 /*
3467 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3468 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3469 * RPL should be too in most cases.
3470 */
3471 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3472 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3473 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3474 VERR_VMX_INVALID_HOST_STATE);
3475
3476 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3477 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3478
3479 /*
3480 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3481 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3482 * restoration if the host has something else. Task switching is not supported in 64-bit
3483 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3484 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3485 *
3486 * [1] See Intel spec. 3.5 "System Descriptor Types".
3487 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3488 */
3489 Assert(pDesc->System.u4Type == 11);
3490 if ( pDesc->System.u16LimitLow != 0x67
3491 || pDesc->System.u4LimitHigh)
3492 {
3493 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3494
3495 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3496 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3497 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3498 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3499 {
3500 /* The GDT is read-only but the writable GDT is available. */
3501 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3502 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3503 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3504 AssertRCReturn(rc, rc);
3505 }
3506 }
3507
3508 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3509
3510 /*
3511 * Do all the VMCS updates in one block to assist nested virtualization.
3512 */
3513 int rc;
3514 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3515 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3516 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3517 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3518 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3519 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3520 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3521 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3522 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3523 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3524 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3525 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3526
3527 return VINF_SUCCESS;
3528}
3529
3530
3531/**
3532 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3533 * host-state area of the VMCS.
3534 *
3535 * These MSRs will be automatically restored on the host after every successful
3536 * VM-exit.
3537 *
3538 * @param pVCpu The cross context virtual CPU structure.
3539 *
3540 * @remarks No-long-jump zone!!!
3541 */
3542static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3543{
3544 AssertPtr(pVCpu);
3545
3546 /*
3547 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3548 * rather than swapping them on every VM-entry.
3549 */
3550 hmR0VmxLazySaveHostMsrs(pVCpu);
3551
3552 /*
3553 * Host Sysenter MSRs.
3554 */
3555 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3556 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3557 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3558
3559 /*
3560 * Host EFER MSR.
3561 *
3562 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3563 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3564 */
3565 if (g_fHmVmxSupportsVmcsEfer)
3566 {
3567 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3568 AssertRC(rc);
3569 }
3570
3571 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3572 * hmR0VmxExportGuestEntryExitCtls(). */
3573}
3574
3575
3576/**
3577 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3578 *
3579 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3580 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3581 *
3582 * @returns true if we need to load guest EFER, false otherwise.
3583 * @param pVCpu The cross context virtual CPU structure.
3584 * @param pVmxTransient The VMX-transient structure.
3585 *
3586 * @remarks Requires EFER, CR4.
3587 * @remarks No-long-jump zone!!!
3588 */
3589static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3590{
3591#ifdef HMVMX_ALWAYS_SWAP_EFER
3592 RT_NOREF2(pVCpu, pVmxTransient);
3593 return true;
3594#else
3595 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3596 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3597 uint64_t const u64GuestEfer = pCtx->msrEFER;
3598
3599# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3600 /*
3601 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3602 * the nested-guest.
3603 */
3604 if ( pVmxTransient->fIsNestedGuest
3605 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3606 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3607 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3608 return true;
3609# else
3610 RT_NOREF(pVmxTransient);
3611#endif
3612
3613 /*
3614 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3615 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3616 */
3617 if ( CPUMIsGuestInLongModeEx(pCtx)
3618 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3619 return true;
3620
3621 /*
3622 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3623 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3624 *
3625 * See Intel spec. 4.5 "IA-32e Paging".
3626 * See Intel spec. 4.1.1 "Three Paging Modes".
3627 *
3628 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3629 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3630 */
3631 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3632 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3633 if ( (pCtx->cr4 & X86_CR4_PAE)
3634 && (pCtx->cr0 & X86_CR0_PG))
3635 {
3636 /*
3637 * If nested paging is not used, verify that the guest paging mode matches the
3638 * shadow paging mode which is/will be placed in the VMCS (which is what will
3639 * actually be used while executing the guest and not the CR4 shadow value).
3640 */
3641 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3642 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3643 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3644 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3645 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3646 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3647 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3648 {
3649 /* Verify that the host is NX capable. */
3650 Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
3651 return true;
3652 }
3653 }
3654
3655 return false;
3656#endif
3657}
3658
3659
3660/**
3661 * Exports the guest's RSP into the guest-state area in the VMCS.
3662 *
3663 * @param pVCpu The cross context virtual CPU structure.
3664 *
3665 * @remarks No-long-jump zone!!!
3666 */
3667static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3668{
3669 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3670 {
3671 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3672
3673 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3674 AssertRC(rc);
3675
3676 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3677 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3678 }
3679}
3680
3681
3682/**
3683 * Exports the guest hardware-virtualization state.
3684 *
3685 * @returns VBox status code.
3686 * @param pVCpu The cross context virtual CPU structure.
3687 * @param pVmxTransient The VMX-transient structure.
3688 *
3689 * @remarks No-long-jump zone!!!
3690 */
3691static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3692{
3693 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3694 {
3695#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3696 /*
3697 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3698 * VMCS shadowing.
3699 */
3700 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3701 {
3702 /*
3703 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3704 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3705 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3706 *
3707 * We check for VMX root mode here in case the guest executes VMXOFF without
3708 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3709 * not clear the current VMCS pointer.
3710 */
3711 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3712 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3713 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3714 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3715 {
3716 /* Paranoia. */
3717 Assert(!pVmxTransient->fIsNestedGuest);
3718
3719 /*
3720 * For performance reasons, also check if the nested hypervisor's current VMCS
3721 * was newly loaded or modified before copying it to the shadow VMCS.
3722 */
3723 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3724 {
3725 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3726 AssertRCReturn(rc, rc);
3727 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3728 }
3729 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3730 }
3731 else
3732 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3733 }
3734#else
3735 NOREF(pVmxTransient);
3736#endif
3737 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3738 }
3739 return VINF_SUCCESS;
3740}
3741
3742
3743/**
3744 * Exports the guest debug registers into the guest-state area in the VMCS.
3745 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3746 *
3747 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3748 *
3749 * @returns VBox status code.
3750 * @param pVCpu The cross context virtual CPU structure.
3751 * @param pVmxTransient The VMX-transient structure.
3752 *
3753 * @remarks No-long-jump zone!!!
3754 */
3755static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3756{
3757 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3758
3759 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3760 * stepping. */
3761 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3762 if (pVmxTransient->fIsNestedGuest)
3763 {
3764 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3765 AssertRC(rc);
3766
3767 /*
3768 * We don't want to always intercept MOV DRx for nested-guests as it causes
3769 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3770 * Instead, they are strictly only requested when the nested hypervisor intercepts
3771 * them -- handled while merging VMCS controls.
3772 *
3773 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3774 * then the nested-guest debug state should be actively loaded on the host so that
3775 * nested-guest reads its own debug registers without causing VM-exits.
3776 */
3777 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3778 && !CPUMIsGuestDebugStateActive(pVCpu))
3779 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3780 return VINF_SUCCESS;
3781 }
3782
3783#ifdef VBOX_STRICT
3784 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3785 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3786 {
3787 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3788 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3789 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3790 }
3791#endif
3792
3793 bool fSteppingDB = false;
3794 bool fInterceptMovDRx = false;
3795 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3796 if (pVCpu->hm.s.fSingleInstruction)
3797 {
3798 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3799 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3800 {
3801 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3802 Assert(fSteppingDB == false);
3803 }
3804 else
3805 {
3806 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
3807 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3808 pVCpu->hmr0.s.fClearTrapFlag = true;
3809 fSteppingDB = true;
3810 }
3811 }
3812
3813 uint64_t u64GuestDr7;
3814 if ( fSteppingDB
3815 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3816 {
3817 /*
3818 * Use the combined guest and host DRx values found in the hypervisor register set
3819 * because the hypervisor debugger has breakpoints active or someone is single stepping
3820 * on the host side without a monitor trap flag.
3821 *
3822 * Note! DBGF expects a clean DR6 state before executing guest code.
3823 */
3824 if (!CPUMIsHyperDebugStateActive(pVCpu))
3825 {
3826 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3827 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3828 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3829 }
3830
3831 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3832 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3833 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3834 fInterceptMovDRx = true;
3835 }
3836 else
3837 {
3838 /*
3839 * If the guest has enabled debug registers, we need to load them prior to
3840 * executing guest code so they'll trigger at the right time.
3841 */
3842 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3843 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3844 {
3845 if (!CPUMIsGuestDebugStateActive(pVCpu))
3846 {
3847 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3848 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3849 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3850 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3851 }
3852 Assert(!fInterceptMovDRx);
3853 }
3854 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3855 {
3856 /*
3857 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3858 * must intercept #DB in order to maintain a correct DR6 guest value, and
3859 * because we need to intercept it to prevent nested #DBs from hanging the
3860 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3861 */
3862 fInterceptMovDRx = true;
3863 }
3864
3865 /* Update DR7 with the actual guest value. */
3866 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3867 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3868 }
3869
3870 if (fInterceptMovDRx)
3871 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3872 else
3873 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3874
3875 /*
3876 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3877 * monitor-trap flag and update our cache.
3878 */
3879 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3880 {
3881 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3882 AssertRC(rc);
3883 pVmcsInfo->u32ProcCtls = uProcCtls;
3884 }
3885
3886 /*
3887 * Update guest DR7.
3888 */
3889 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3890 AssertRC(rc);
3891
3892 /*
3893 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3894 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3895 *
3896 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3897 */
3898 if (fSteppingDB)
3899 {
3900 Assert(pVCpu->hm.s.fSingleInstruction);
3901 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3902
3903 uint32_t fIntrState = 0;
3904 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3905 AssertRC(rc);
3906
3907 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3908 {
3909 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3910 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3911 AssertRC(rc);
3912 }
3913 }
3914
3915 return VINF_SUCCESS;
3916}
3917
3918
3919/**
3920 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3921 * areas.
3922 *
3923 * These MSRs will automatically be loaded to the host CPU on every successful
3924 * VM-entry and stored from the host CPU on every successful VM-exit.
3925 *
3926 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3927 * actual host MSR values are not- updated here for performance reasons. See
3928 * hmR0VmxExportHostMsrs().
3929 *
3930 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3931 *
3932 * @returns VBox status code.
3933 * @param pVCpu The cross context virtual CPU structure.
3934 * @param pVmxTransient The VMX-transient structure.
3935 *
3936 * @remarks No-long-jump zone!!!
3937 */
3938static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3939{
3940 AssertPtr(pVCpu);
3941 AssertPtr(pVmxTransient);
3942
3943 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3944 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3945
3946 /*
3947 * MSRs that we use the auto-load/store MSR area in the VMCS.
3948 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
3949 * nothing to do here. The host MSR values are updated when it's safe in
3950 * hmR0VmxLazySaveHostMsrs().
3951 *
3952 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
3953 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
3954 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
3955 * for any MSR that are not part of the lazy MSRs so we do not need to place
3956 * those MSRs into the auto-load/store MSR area. Nothing to do here.
3957 */
3958 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3959 {
3960 /* No auto-load/store MSRs currently. */
3961 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
3962 }
3963
3964 /*
3965 * Guest Sysenter MSRs.
3966 */
3967 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
3968 {
3969 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
3970
3971 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
3972 {
3973 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
3974 AssertRC(rc);
3975 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
3976 }
3977
3978 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
3979 {
3980 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
3981 AssertRC(rc);
3982 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
3983 }
3984
3985 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
3986 {
3987 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
3988 AssertRC(rc);
3989 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
3990 }
3991 }
3992
3993 /*
3994 * Guest/host EFER MSR.
3995 */
3996 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
3997 {
3998 /* Whether we are using the VMCS to swap the EFER MSR must have been
3999 determined earlier while exporting VM-entry/VM-exit controls. */
4000 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4001 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4002
4003 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4004 {
4005 /*
4006 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4007 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4008 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4009 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4010 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4011 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4012 * during VM-entry.
4013 */
4014 uint64_t uGuestEferMsr = pCtx->msrEFER;
4015 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4016 {
4017 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4018 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4019 else
4020 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4021 }
4022
4023 /*
4024 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4025 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4026 */
4027 if (g_fHmVmxSupportsVmcsEfer)
4028 {
4029 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4030 AssertRC(rc);
4031 }
4032 else
4033 {
4034 /*
4035 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4036 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4037 */
4038 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4039 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4040 AssertRCReturn(rc, rc);
4041 }
4042
4043 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4044 }
4045 else if (!g_fHmVmxSupportsVmcsEfer)
4046 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4047
4048 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4049 }
4050
4051 /*
4052 * Other MSRs.
4053 */
4054 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4055 {
4056 /* Speculation Control (R/W). */
4057 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4058 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4059 {
4060 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4061 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4062 AssertRCReturn(rc, rc);
4063 }
4064
4065 /* Last Branch Record. */
4066 if (pVM->hmr0.s.vmx.fLbr)
4067 {
4068 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4069 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4070 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4071 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4072 Assert(cLbrStack <= 32);
4073 for (uint32_t i = 0; i < cLbrStack; i++)
4074 {
4075 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4076 pVmcsInfoShared->au64LbrFromIpMsr[i],
4077 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4078 AssertRCReturn(rc, rc);
4079
4080 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4081 if (idToIpMsrStart != 0)
4082 {
4083 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4084 pVmcsInfoShared->au64LbrToIpMsr[i],
4085 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4086 AssertRCReturn(rc, rc);
4087 }
4088 }
4089
4090 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4091 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4092 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4093 false /* fUpdateHostMsr */);
4094 AssertRCReturn(rc, rc);
4095 }
4096
4097 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4098 }
4099
4100 return VINF_SUCCESS;
4101}
4102
4103
4104/**
4105 * Wrapper for running the guest code in VT-x.
4106 *
4107 * @returns VBox status code, no informational status codes.
4108 * @param pVCpu The cross context virtual CPU structure.
4109 * @param pVmxTransient The VMX-transient structure.
4110 *
4111 * @remarks No-long-jump zone!!!
4112 */
4113DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4114{
4115 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4116 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4117
4118 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4119 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4120#ifdef VBOX_WITH_STATISTICS
4121 if (fResumeVM)
4122 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4123 else
4124 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4125#endif
4126 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4127 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4128 return rc;
4129}
4130
4131
4132/**
4133 * Reports world-switch error and dumps some useful debug info.
4134 *
4135 * @param pVCpu The cross context virtual CPU structure.
4136 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4137 * @param pVmxTransient The VMX-transient structure (only
4138 * exitReason updated).
4139 */
4140static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4141{
4142 Assert(pVCpu);
4143 Assert(pVmxTransient);
4144 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4145
4146 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4147 switch (rcVMRun)
4148 {
4149 case VERR_VMX_INVALID_VMXON_PTR:
4150 AssertFailed();
4151 break;
4152 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4153 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4154 {
4155 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4156 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4157 AssertRC(rc);
4158 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4159
4160 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4161 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4162 Cannot do it here as we may have been long preempted. */
4163
4164#ifdef VBOX_STRICT
4165 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4166 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4167 pVmxTransient->uExitReason));
4168 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4169 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4170 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4171 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4172 else
4173 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4174 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4175 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4176
4177 static struct
4178 {
4179 /** Name of the field to log. */
4180 const char *pszName;
4181 /** The VMCS field. */
4182 uint32_t uVmcsField;
4183 /** Whether host support of this field needs to be checked. */
4184 bool fCheckSupport;
4185 } const s_aVmcsFields[] =
4186 {
4187 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4188 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4189 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4190 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4191 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4192 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4193 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4194 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4195 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4196 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4197 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4198 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4199 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4200 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4201 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4202 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4203 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4204 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4205 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4206 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4207 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4208 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4209 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4210 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4211 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4212 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4213 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4214 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4215 /* The order of selector fields below are fixed! */
4216 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4217 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4218 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4219 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4220 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4221 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4222 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4223 /* End of ordered selector fields. */
4224 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4225 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4226 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4227 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4228 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4229 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4230 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4231 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4232 };
4233
4234 RTGDTR HostGdtr;
4235 ASMGetGDTR(&HostGdtr);
4236
4237 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4238 for (uint32_t i = 0; i < cVmcsFields; i++)
4239 {
4240 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4241
4242 bool fSupported;
4243 if (!s_aVmcsFields[i].fCheckSupport)
4244 fSupported = true;
4245 else
4246 {
4247 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4248 switch (uVmcsField)
4249 {
4250 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4251 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4252 case VMX_VMCS32_CTRL_PROC_EXEC2:
4253 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4254 break;
4255 default:
4256 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4257 }
4258 }
4259
4260 if (fSupported)
4261 {
4262 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4263 switch (uWidth)
4264 {
4265 case VMX_VMCSFIELD_WIDTH_16BIT:
4266 {
4267 uint16_t u16Val;
4268 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4269 AssertRC(rc);
4270 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4271
4272 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4273 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4274 {
4275 if (u16Val < HostGdtr.cbGdt)
4276 {
4277 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4278 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4279 "Host FS", "Host GS", "Host TR" };
4280 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4281 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4282 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4283 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4284 }
4285 else
4286 Log4((" Selector value exceeds GDT limit!\n"));
4287 }
4288 break;
4289 }
4290
4291 case VMX_VMCSFIELD_WIDTH_32BIT:
4292 {
4293 uint32_t u32Val;
4294 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4295 AssertRC(rc);
4296 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4297 break;
4298 }
4299
4300 case VMX_VMCSFIELD_WIDTH_64BIT:
4301 case VMX_VMCSFIELD_WIDTH_NATURAL:
4302 {
4303 uint64_t u64Val;
4304 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4305 AssertRC(rc);
4306 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4307 break;
4308 }
4309 }
4310 }
4311 }
4312
4313 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4314 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4315 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4316 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4317 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4318 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4319#endif /* VBOX_STRICT */
4320 break;
4321 }
4322
4323 default:
4324 /* Impossible */
4325 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4326 break;
4327 }
4328}
4329
4330
4331/**
4332 * Sets up the usage of TSC-offsetting and updates the VMCS.
4333 *
4334 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4335 * VMX-preemption timer.
4336 *
4337 * @returns VBox status code.
4338 * @param pVCpu The cross context virtual CPU structure.
4339 * @param pVmxTransient The VMX-transient structure.
4340 * @param idCurrentCpu The current CPU number.
4341 *
4342 * @remarks No-long-jump zone!!!
4343 */
4344static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4345{
4346 bool fOffsettedTsc;
4347 bool fParavirtTsc;
4348 uint64_t uTscOffset;
4349 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4350 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4351
4352 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4353 {
4354 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4355 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4356 uint64_t cTicksToDeadline;
4357 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4358 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4359 {
4360 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4361 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4362 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4363 if ((int64_t)cTicksToDeadline > 0)
4364 { /* hopefully */ }
4365 else
4366 {
4367 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4368 cTicksToDeadline = 0;
4369 }
4370 }
4371 else
4372 {
4373 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4374 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4375 &pVCpu->hmr0.s.vmx.uTscDeadline,
4376 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4377 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4378 if (cTicksToDeadline >= 128)
4379 { /* hopefully */ }
4380 else
4381 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4382 }
4383
4384 /* Make sure the returned values have sane upper and lower boundaries. */
4385 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4386 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4387 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4388 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4389
4390 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4391 * preemption timers here. We probably need to clamp the preemption timer,
4392 * after converting the timer value to the host. */
4393 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4394 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4395 AssertRC(rc);
4396 }
4397 else
4398 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4399
4400 if (fParavirtTsc)
4401 {
4402 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4403 information before every VM-entry, hence disable it for performance sake. */
4404#if 0
4405 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4406 AssertRC(rc);
4407#endif
4408 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4409 }
4410
4411 if ( fOffsettedTsc
4412 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4413 {
4414 if (pVmxTransient->fIsNestedGuest)
4415 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4416 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4417 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4418 }
4419 else
4420 {
4421 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4422 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4423 }
4424}
4425
4426
4427/**
4428 * Worker for VMXR0ImportStateOnDemand.
4429 *
4430 * @returns VBox status code.
4431 * @param pVCpu The cross context virtual CPU structure.
4432 * @param pVmcsInfo The VMCS info. object.
4433 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4434 */
4435static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
4436{
4437 int rc = VINF_SUCCESS;
4438 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4439 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4440 uint32_t u32Val;
4441
4442 /*
4443 * Note! This is hack to workaround a mysterious BSOD observed with release builds
4444 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
4445 * neither are other host platforms.
4446 *
4447 * Committing this temporarily as it prevents BSOD.
4448 *
4449 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
4450 */
4451#ifdef RT_OS_WINDOWS
4452 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
4453 return VERR_HM_IPE_1;
4454#endif
4455
4456 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
4457
4458 /*
4459 * We disable interrupts to make the updating of the state and in particular
4460 * the fExtrn modification atomic wrt to preemption hooks.
4461 */
4462 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
4463
4464 fWhat &= pCtx->fExtrn;
4465 if (fWhat)
4466 {
4467 do
4468 {
4469 if (fWhat & CPUMCTX_EXTRN_RIP)
4470 vmxHCImportGuestRip(pVCpu);
4471
4472 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
4473 vmxHCImportGuestRFlags(pVCpu, pVmcsInfo);
4474
4475 if (fWhat & (CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI))
4476 vmxHCImportGuestIntrState(pVCpu, pVmcsInfo);
4477
4478 if (fWhat & CPUMCTX_EXTRN_RSP)
4479 {
4480 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
4481 AssertRC(rc);
4482 }
4483
4484 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
4485 {
4486 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4487 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
4488 if (fWhat & CPUMCTX_EXTRN_CS)
4489 {
4490 vmxHCImportGuestSegReg(pVCpu, X86_SREG_CS);
4491 vmxHCImportGuestRip(pVCpu);
4492 if (fRealOnV86Active)
4493 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
4494 EMHistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
4495 }
4496 if (fWhat & CPUMCTX_EXTRN_SS)
4497 {
4498 vmxHCImportGuestSegReg(pVCpu, X86_SREG_SS);
4499 if (fRealOnV86Active)
4500 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
4501 }
4502 if (fWhat & CPUMCTX_EXTRN_DS)
4503 {
4504 vmxHCImportGuestSegReg(pVCpu, X86_SREG_DS);
4505 if (fRealOnV86Active)
4506 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
4507 }
4508 if (fWhat & CPUMCTX_EXTRN_ES)
4509 {
4510 vmxHCImportGuestSegReg(pVCpu, X86_SREG_ES);
4511 if (fRealOnV86Active)
4512 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
4513 }
4514 if (fWhat & CPUMCTX_EXTRN_FS)
4515 {
4516 vmxHCImportGuestSegReg(pVCpu, X86_SREG_FS);
4517 if (fRealOnV86Active)
4518 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
4519 }
4520 if (fWhat & CPUMCTX_EXTRN_GS)
4521 {
4522 vmxHCImportGuestSegReg(pVCpu, X86_SREG_GS);
4523 if (fRealOnV86Active)
4524 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
4525 }
4526 }
4527
4528 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
4529 {
4530 if (fWhat & CPUMCTX_EXTRN_LDTR)
4531 vmxHCImportGuestLdtr(pVCpu);
4532
4533 if (fWhat & CPUMCTX_EXTRN_GDTR)
4534 {
4535 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
4536 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
4537 pCtx->gdtr.cbGdt = u32Val;
4538 }
4539
4540 /* Guest IDTR. */
4541 if (fWhat & CPUMCTX_EXTRN_IDTR)
4542 {
4543 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
4544 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
4545 pCtx->idtr.cbIdt = u32Val;
4546 }
4547
4548 /* Guest TR. */
4549 if (fWhat & CPUMCTX_EXTRN_TR)
4550 {
4551 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
4552 don't need to import that one. */
4553 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
4554 vmxHCImportGuestTr(pVCpu);
4555 }
4556 }
4557
4558 if (fWhat & CPUMCTX_EXTRN_DR7)
4559 {
4560 if (!pVCpu->hmr0.s.fUsingHyperDR7)
4561 {
4562 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
4563 AssertRC(rc);
4564 }
4565 }
4566
4567 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
4568 {
4569 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
4570 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
4571 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
4572 pCtx->SysEnter.cs = u32Val;
4573 }
4574
4575 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
4576 {
4577 if ( pVM->hmr0.s.fAllow64BitGuests
4578 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4579 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
4580 }
4581
4582 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
4583 {
4584 if ( pVM->hmr0.s.fAllow64BitGuests
4585 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4586 {
4587 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
4588 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
4589 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
4590 }
4591 }
4592
4593 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
4594 {
4595 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4596 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
4597 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
4598 Assert(pMsrs);
4599 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
4600 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
4601 for (uint32_t i = 0; i < cMsrs; i++)
4602 {
4603 uint32_t const idMsr = pMsrs[i].u32Msr;
4604 switch (idMsr)
4605 {
4606 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
4607 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
4608 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
4609 default:
4610 {
4611 uint32_t idxLbrMsr;
4612 if (pVM->hmr0.s.vmx.fLbr)
4613 {
4614 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
4615 {
4616 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4617 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4618 break;
4619 }
4620 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
4621 {
4622 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4623 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4624 break;
4625 }
4626 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
4627 {
4628 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
4629 break;
4630 }
4631 /* Fallthru (no break) */
4632 }
4633 pCtx->fExtrn = 0;
4634 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
4635 ASMSetFlags(fEFlags);
4636 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
4637 return VERR_HM_UNEXPECTED_LD_ST_MSR;
4638 }
4639 }
4640 }
4641 }
4642
4643 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
4644 {
4645 if (fWhat & CPUMCTX_EXTRN_CR0)
4646 {
4647 uint64_t u64Cr0;
4648 uint64_t u64Shadow;
4649 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
4650 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
4651#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4652 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4653 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4654#else
4655 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4656 {
4657 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4658 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4659 }
4660 else
4661 {
4662 /*
4663 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
4664 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4665 * re-construct CR0. See @bugref{9180#c95} for details.
4666 */
4667 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4668 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4669 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4670 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
4671 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
4672 }
4673#endif
4674 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
4675 CPUMSetGuestCR0(pVCpu, u64Cr0);
4676 VMMRZCallRing3Enable(pVCpu);
4677 }
4678
4679 if (fWhat & CPUMCTX_EXTRN_CR4)
4680 {
4681 uint64_t u64Cr4;
4682 uint64_t u64Shadow;
4683 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
4684 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
4685#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4686 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4687 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4688#else
4689 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4690 {
4691 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4692 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4693 }
4694 else
4695 {
4696 /*
4697 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
4698 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4699 * re-construct CR4. See @bugref{9180#c95} for details.
4700 */
4701 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4702 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4703 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4704 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
4705 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
4706 }
4707#endif
4708 pCtx->cr4 = u64Cr4;
4709 }
4710
4711 if (fWhat & CPUMCTX_EXTRN_CR3)
4712 {
4713 /* CR0.PG bit changes are always intercepted, so it's up to date. */
4714 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
4715 || ( pVM->hmr0.s.fNestedPaging
4716 && CPUMIsGuestPagingEnabledEx(pCtx)))
4717 {
4718 uint64_t u64Cr3;
4719 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
4720 if (pCtx->cr3 != u64Cr3)
4721 {
4722 pCtx->cr3 = u64Cr3;
4723 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4724 }
4725
4726 /*
4727 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
4728 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
4729 */
4730 if (CPUMIsGuestInPAEModeEx(pCtx))
4731 {
4732 X86PDPE aPaePdpes[4];
4733 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
4734 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
4735 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
4736 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
4737 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
4738 {
4739 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
4740 /* PGM now updates PAE PDPTEs while updating CR3. */
4741 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4742 }
4743 }
4744 }
4745 }
4746 }
4747
4748#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4749 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
4750 {
4751 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
4752 && !CPUMIsGuestInVmxNonRootMode(pCtx))
4753 {
4754 Assert(CPUMIsGuestInVmxRootMode(pCtx));
4755 rc = vmxHCCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
4756 if (RT_SUCCESS(rc))
4757 { /* likely */ }
4758 else
4759 break;
4760 }
4761 }
4762#endif
4763 } while (0);
4764
4765 if (RT_SUCCESS(rc))
4766 {
4767 /* Update fExtrn. */
4768 pCtx->fExtrn &= ~fWhat;
4769
4770 /* If everything has been imported, clear the HM keeper bit. */
4771 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
4772 {
4773 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
4774 Assert(!pCtx->fExtrn);
4775 }
4776 }
4777 }
4778 else
4779 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
4780
4781 /*
4782 * Restore interrupts.
4783 */
4784 ASMSetFlags(fEFlags);
4785
4786 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
4787
4788 if (RT_SUCCESS(rc))
4789 { /* likely */ }
4790 else
4791 return rc;
4792
4793 /*
4794 * Honor any pending CR3 updates.
4795 *
4796 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
4797 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
4798 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
4799 *
4800 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
4801 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
4802 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
4803 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
4804 *
4805 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
4806 *
4807 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
4808 */
4809 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
4810 && VMMRZCallRing3IsEnabled(pVCpu))
4811 {
4812 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
4813 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
4814 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
4815 }
4816
4817 return VINF_SUCCESS;
4818}
4819
4820
4821/**
4822 * Saves the guest state from the VMCS into the guest-CPU context.
4823 *
4824 * @returns VBox status code.
4825 * @param pVCpu The cross context virtual CPU structure.
4826 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4827 */
4828VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4829{
4830 AssertPtr(pVCpu);
4831 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4832 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
4833}
4834
4835
4836/**
4837 * Gets VMX VM-exit auxiliary information.
4838 *
4839 * @returns VBox status code.
4840 * @param pVCpu The cross context virtual CPU structure.
4841 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4842 * @param fWhat What to fetch, HMVMX_READ_XXX.
4843 */
4844VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4845{
4846 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4847 if (RT_LIKELY(pVmxTransient))
4848 {
4849 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4850 fWhat &= ~pVmxTransient->fVmcsFieldsRead;
4851
4852 /* The exit reason is always available. */
4853 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4854
4855 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4856 {
4857 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4858 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4859 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4860 }
4861
4862 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4863 {
4864 vmxHCReadIdtVectoringInfoVmcs(pVCpu, pVmxTransient);
4865 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4866 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4867 }
4868
4869 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4870 {
4871 vmxHCReadIdtVectoringErrorCodeVmcs(pVCpu, pVmxTransient);
4872 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4873 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4874 }
4875
4876 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4877 {
4878 vmxHCReadExitInstrLenVmcs(pVCpu, pVmxTransient);
4879 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4880 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4881 }
4882
4883 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4884 {
4885 vmxHCReadExitIntInfoVmcs(pVCpu, pVmxTransient);
4886 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4887 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4888 }
4889
4890 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4891 {
4892 vmxHCReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
4893 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4894 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4895 }
4896
4897 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4898 {
4899 vmxHCReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
4900 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4901 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4902 }
4903
4904 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4905 {
4906 vmxHCReadGuestLinearAddrVmcs(pVCpu, pVmxTransient);
4907 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4908 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4909 }
4910
4911 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4912 {
4913 vmxHCReadGuestPhysicalAddrVmcs(pVCpu, pVmxTransient);
4914 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4915 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4916 }
4917
4918 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4919 {
4920 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4921#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4922 vmxHCReadGuestPendingDbgXctps(pVCpu, pVmxTransient);
4923 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4924#else
4925 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4926#endif
4927 }
4928
4929 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4930 return VINF_SUCCESS;
4931 }
4932 return VERR_NOT_AVAILABLE;
4933}
4934
4935
4936/**
4937 * Does the necessary state syncing before returning to ring-3 for any reason
4938 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4939 *
4940 * @returns VBox status code.
4941 * @param pVCpu The cross context virtual CPU structure.
4942 * @param fImportState Whether to import the guest state from the VMCS back
4943 * to the guest-CPU context.
4944 *
4945 * @remarks No-long-jmp zone!!!
4946 */
4947static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4948{
4949 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4950 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4951
4952 RTCPUID const idCpu = RTMpCpuId();
4953 Log4Func(("HostCpuId=%u\n", idCpu));
4954
4955 /*
4956 * !!! IMPORTANT !!!
4957 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4958 */
4959
4960 /* Save the guest state if necessary. */
4961 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4962 if (fImportState)
4963 {
4964 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4965 AssertRCReturn(rc, rc);
4966 }
4967
4968 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4969 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4970 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4971
4972 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4973#ifdef VBOX_STRICT
4974 if (CPUMIsHyperDebugStateActive(pVCpu))
4975 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
4976#endif
4977 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4978 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4979 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4980
4981 /* Restore host-state bits that VT-x only restores partially. */
4982 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4983 {
4984 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4985 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4986 }
4987 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4988
4989 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4990 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4991 {
4992 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4993 if (!fImportState)
4994 {
4995 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4996 AssertRCReturn(rc, rc);
4997 }
4998 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4999 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
5000 }
5001 else
5002 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
5003
5004 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5005 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5006
5007 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
5008 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
5009 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
5010 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
5011 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
5012 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
5013 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
5014 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
5015 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
5016 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5017
5018 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5019
5020 /** @todo This partially defeats the purpose of having preemption hooks.
5021 * The problem is, deregistering the hooks should be moved to a place that
5022 * lasts until the EMT is about to be destroyed not everytime while leaving HM
5023 * context.
5024 */
5025 int rc = hmR0VmxClearVmcs(pVmcsInfo);
5026 AssertRCReturn(rc, rc);
5027
5028#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5029 /*
5030 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
5031 * clear a shadow VMCS before allowing that VMCS to become active on another
5032 * logical processor. We may or may not be importing guest state which clears
5033 * it, so cover for it here.
5034 *
5035 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
5036 */
5037 if ( pVmcsInfo->pvShadowVmcs
5038 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
5039 {
5040 rc = vmxHCClearShadowVmcs(pVmcsInfo);
5041 AssertRCReturn(rc, rc);
5042 }
5043
5044 /*
5045 * Flag that we need to re-export the host state if we switch to this VMCS before
5046 * executing guest or nested-guest code.
5047 */
5048 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
5049#endif
5050
5051 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
5052 NOREF(idCpu);
5053 return VINF_SUCCESS;
5054}
5055
5056
5057/**
5058 * Leaves the VT-x session.
5059 *
5060 * @returns VBox status code.
5061 * @param pVCpu The cross context virtual CPU structure.
5062 *
5063 * @remarks No-long-jmp zone!!!
5064 */
5065static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
5066{
5067 HM_DISABLE_PREEMPT(pVCpu);
5068 HMVMX_ASSERT_CPU_SAFE(pVCpu);
5069 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5070 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5071
5072 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
5073 and done this from the VMXR0ThreadCtxCallback(). */
5074 if (!pVCpu->hmr0.s.fLeaveDone)
5075 {
5076 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
5077 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
5078 pVCpu->hmr0.s.fLeaveDone = true;
5079 }
5080 Assert(!pVCpu->cpum.GstCtx.fExtrn);
5081
5082 /*
5083 * !!! IMPORTANT !!!
5084 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
5085 */
5086
5087 /* Deregister hook now that we've left HM context before re-enabling preemption. */
5088 /** @todo Deregistering here means we need to VMCLEAR always
5089 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
5090 * for calling VMMR0ThreadCtxHookDisable here! */
5091 VMMR0ThreadCtxHookDisable(pVCpu);
5092
5093 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
5094 int rc = HMR0LeaveCpu(pVCpu);
5095 HM_RESTORE_PREEMPT();
5096 return rc;
5097}
5098
5099
5100/**
5101 * Take necessary actions before going back to ring-3.
5102 *
5103 * An action requires us to go back to ring-3. This function does the necessary
5104 * steps before we can safely return to ring-3. This is not the same as longjmps
5105 * to ring-3, this is voluntary and prepares the guest so it may continue
5106 * executing outside HM (recompiler/IEM).
5107 *
5108 * @returns VBox status code.
5109 * @param pVCpu The cross context virtual CPU structure.
5110 * @param rcExit The reason for exiting to ring-3. Can be
5111 * VINF_VMM_UNKNOWN_RING3_CALL.
5112 */
5113static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
5114{
5115 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5116
5117 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5118 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
5119 {
5120 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
5121 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
5122 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
5123 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
5124 }
5125
5126 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
5127 VMMRZCallRing3Disable(pVCpu);
5128 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
5129
5130 /*
5131 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
5132 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
5133 *
5134 * This is because execution may continue from ring-3 and we would need to inject
5135 * the event from there (hence place it back in TRPM).
5136 */
5137 if (pVCpu->hm.s.Event.fPending)
5138 {
5139 vmxHCPendingEventToTrpmTrap(pVCpu);
5140 Assert(!pVCpu->hm.s.Event.fPending);
5141
5142 /* Clear the events from the VMCS. */
5143 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
5144 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
5145 }
5146#ifdef VBOX_STRICT
5147 /*
5148 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
5149 * fatal), we don't care about verifying duplicate injection of events. Errors like
5150 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
5151 * function so those should and will be checked below.
5152 */
5153 else if (RT_SUCCESS(rcExit))
5154 {
5155 /*
5156 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
5157 * This can be pretty hard to debug otherwise, interrupts might get injected twice
5158 * occasionally, see @bugref{9180#c42}.
5159 *
5160 * However, if the VM-entry failed, any VM entry-interruption info. field would
5161 * be left unmodified as the event would not have been injected to the guest. In
5162 * such cases, don't assert, we're not going to continue guest execution anyway.
5163 */
5164 uint32_t uExitReason;
5165 uint32_t uEntryIntInfo;
5166 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
5167 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
5168 AssertRC(rc);
5169 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
5170 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
5171 }
5172#endif
5173
5174 /*
5175 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
5176 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
5177 * (e.g. TPR below threshold).
5178 */
5179 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5180 {
5181 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
5182 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
5183 }
5184
5185 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
5186 and if we're injecting an event we should have a TRPM trap pending. */
5187 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5188#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
5189 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5190#endif
5191
5192 /* Save guest state and restore host state bits. */
5193 int rc = hmR0VmxLeaveSession(pVCpu);
5194 AssertRCReturn(rc, rc);
5195 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5196
5197 /* Thread-context hooks are unregistered at this point!!! */
5198 /* Ring-3 callback notifications are unregistered at this point!!! */
5199
5200 /* Sync recompiler state. */
5201 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5202 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
5203 | CPUM_CHANGED_LDTR
5204 | CPUM_CHANGED_GDTR
5205 | CPUM_CHANGED_IDTR
5206 | CPUM_CHANGED_TR
5207 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5208 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
5209 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
5210 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5211
5212 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
5213
5214 /* Update the exit-to-ring 3 reason. */
5215 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
5216
5217 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
5218 if ( rcExit != VINF_EM_RAW_INTERRUPT
5219 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5220 {
5221 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
5222 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5223 }
5224
5225 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
5226 VMMRZCallRing3Enable(pVCpu);
5227 return rc;
5228}
5229
5230
5231/**
5232 * VMMRZCallRing3() callback wrapper which saves the guest state before we
5233 * longjump due to a ring-0 assertion.
5234 *
5235 * @returns VBox status code.
5236 * @param pVCpu The cross context virtual CPU structure.
5237 */
5238VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
5239{
5240 /*
5241 * !!! IMPORTANT !!!
5242 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
5243 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
5244 */
5245 VMMR0AssertionRemoveNotification(pVCpu);
5246 VMMRZCallRing3Disable(pVCpu);
5247 HM_DISABLE_PREEMPT(pVCpu);
5248
5249 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5250 vmxHCImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
5251 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5252 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5253
5254 /* Restore host-state bits that VT-x only restores partially. */
5255 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5256 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5257 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5258
5259 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5260 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5261 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5262
5263 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5264 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5265 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5266
5267 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
5268 cleared as part of importing the guest state above. */
5269 hmR0VmxClearVmcs(pVmcsInfo);
5270
5271 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
5272 VMMR0ThreadCtxHookDisable(pVCpu);
5273
5274 /* Leave HM context. This takes care of local init (term). */
5275 HMR0LeaveCpu(pVCpu);
5276 HM_RESTORE_PREEMPT();
5277 return VINF_SUCCESS;
5278}
5279
5280
5281/**
5282 * Enters the VT-x session.
5283 *
5284 * @returns VBox status code.
5285 * @param pVCpu The cross context virtual CPU structure.
5286 */
5287VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
5288{
5289 AssertPtr(pVCpu);
5290 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
5291 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5292
5293 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5294 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5295 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5296
5297#ifdef VBOX_STRICT
5298 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
5299 RTCCUINTREG uHostCr4 = ASMGetCR4();
5300 if (!(uHostCr4 & X86_CR4_VMXE))
5301 {
5302 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
5303 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5304 }
5305#endif
5306
5307 /*
5308 * Do the EMT scheduled L1D and MDS flush here if needed.
5309 */
5310 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5311 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5312 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5313 hmR0MdsClear();
5314
5315 /*
5316 * Load the appropriate VMCS as the current and active one.
5317 */
5318 PVMXVMCSINFO pVmcsInfo;
5319 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5320 if (!fInNestedGuestMode)
5321 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5322 else
5323 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5324 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5325 if (RT_SUCCESS(rc))
5326 {
5327 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5328 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5329 pVCpu->hmr0.s.fLeaveDone = false;
5330 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5331 }
5332 return rc;
5333}
5334
5335
5336/**
5337 * The thread-context callback.
5338 *
5339 * This is used together with RTThreadCtxHookCreate() on platforms which
5340 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5341 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5342 *
5343 * @param enmEvent The thread-context event.
5344 * @param pVCpu The cross context virtual CPU structure.
5345 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5346 * @thread EMT(pVCpu)
5347 */
5348VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5349{
5350 AssertPtr(pVCpu);
5351 RT_NOREF1(fGlobalInit);
5352
5353 switch (enmEvent)
5354 {
5355 case RTTHREADCTXEVENT_OUT:
5356 {
5357 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5358 VMCPU_ASSERT_EMT(pVCpu);
5359
5360 /* No longjmps (logger flushes, locks) in this fragile context. */
5361 VMMRZCallRing3Disable(pVCpu);
5362 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5363
5364 /* Restore host-state (FPU, debug etc.) */
5365 if (!pVCpu->hmr0.s.fLeaveDone)
5366 {
5367 /*
5368 * Do -not- import the guest-state here as we might already be in the middle of importing
5369 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5370 */
5371 hmR0VmxLeave(pVCpu, false /* fImportState */);
5372 pVCpu->hmr0.s.fLeaveDone = true;
5373 }
5374
5375 /* Leave HM context, takes care of local init (term). */
5376 int rc = HMR0LeaveCpu(pVCpu);
5377 AssertRC(rc);
5378
5379 /* Restore longjmp state. */
5380 VMMRZCallRing3Enable(pVCpu);
5381 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5382 break;
5383 }
5384
5385 case RTTHREADCTXEVENT_IN:
5386 {
5387 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5388 VMCPU_ASSERT_EMT(pVCpu);
5389
5390 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5391 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5392 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5393 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5394 hmR0MdsClear();
5395
5396 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5397 VMMRZCallRing3Disable(pVCpu);
5398 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5399
5400 /* Initialize the bare minimum state required for HM. This takes care of
5401 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5402 int rc = hmR0EnterCpu(pVCpu);
5403 AssertRC(rc);
5404 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5405 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5406
5407 /* Load the active VMCS as the current one. */
5408 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5409 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5410 AssertRC(rc);
5411 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5412 pVCpu->hmr0.s.fLeaveDone = false;
5413
5414 /* Restore longjmp state. */
5415 VMMRZCallRing3Enable(pVCpu);
5416 break;
5417 }
5418
5419 default:
5420 break;
5421 }
5422}
5423
5424
5425/**
5426 * Exports the host state into the VMCS host-state area.
5427 * Sets up the VM-exit MSR-load area.
5428 *
5429 * The CPU state will be loaded from these fields on every successful VM-exit.
5430 *
5431 * @returns VBox status code.
5432 * @param pVCpu The cross context virtual CPU structure.
5433 *
5434 * @remarks No-long-jump zone!!!
5435 */
5436static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5437{
5438 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5439
5440 int rc = VINF_SUCCESS;
5441 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5442 {
5443 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5444
5445 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5446 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5447
5448 hmR0VmxExportHostMsrs(pVCpu);
5449
5450 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5451 }
5452 return rc;
5453}
5454
5455
5456/**
5457 * Saves the host state in the VMCS host-state.
5458 *
5459 * @returns VBox status code.
5460 * @param pVCpu The cross context virtual CPU structure.
5461 *
5462 * @remarks No-long-jump zone!!!
5463 */
5464VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5465{
5466 AssertPtr(pVCpu);
5467 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5468
5469 /*
5470 * Export the host state here while entering HM context.
5471 * When thread-context hooks are used, we might get preempted and have to re-save the host
5472 * state but most of the time we won't be, so do it here before we disable interrupts.
5473 */
5474 return hmR0VmxExportHostState(pVCpu);
5475}
5476
5477
5478/**
5479 * Exports the guest state into the VMCS guest-state area.
5480 *
5481 * The will typically be done before VM-entry when the guest-CPU state and the
5482 * VMCS state may potentially be out of sync.
5483 *
5484 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5485 * VM-entry controls.
5486 * Sets up the appropriate VMX non-root function to execute guest code based on
5487 * the guest CPU mode.
5488 *
5489 * @returns VBox strict status code.
5490 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5491 * without unrestricted guest execution and the VMMDev is not presently
5492 * mapped (e.g. EFI32).
5493 *
5494 * @param pVCpu The cross context virtual CPU structure.
5495 * @param pVmxTransient The VMX-transient structure.
5496 *
5497 * @remarks No-long-jump zone!!!
5498 */
5499static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5500{
5501 AssertPtr(pVCpu);
5502 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5503 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5504
5505 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5506
5507 /*
5508 * Determine real-on-v86 mode.
5509 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5510 */
5511 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5512 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5513 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5514 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5515 else
5516 {
5517 Assert(!pVmxTransient->fIsNestedGuest);
5518 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5519 }
5520
5521 /*
5522 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5523 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5524 */
5525 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5526 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5527
5528 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5529 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5530
5531 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5532 if (rcStrict == VINF_SUCCESS)
5533 { /* likely */ }
5534 else
5535 {
5536 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5537 return rcStrict;
5538 }
5539
5540 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5541 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5542
5543 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5544 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5545
5546 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5547 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5548 vmxHCExportGuestRip(pVCpu);
5549 hmR0VmxExportGuestRsp(pVCpu);
5550 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5551
5552 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5553 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5554
5555 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5556 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5557 | HM_CHANGED_GUEST_CR2
5558 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5559 | HM_CHANGED_GUEST_X87
5560 | HM_CHANGED_GUEST_SSE_AVX
5561 | HM_CHANGED_GUEST_OTHER_XSAVE
5562 | HM_CHANGED_GUEST_XCRx
5563 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5564 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5565 | HM_CHANGED_GUEST_TSC_AUX
5566 | HM_CHANGED_GUEST_OTHER_MSRS
5567 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5568
5569 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5570 return rc;
5571}
5572
5573
5574/**
5575 * Exports the state shared between the host and guest into the VMCS.
5576 *
5577 * @param pVCpu The cross context virtual CPU structure.
5578 * @param pVmxTransient The VMX-transient structure.
5579 *
5580 * @remarks No-long-jump zone!!!
5581 */
5582static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5583{
5584 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5585 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5586
5587 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5588 {
5589 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5590 AssertRC(rc);
5591 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5592
5593 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5594 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5595 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5596 }
5597
5598 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5599 {
5600 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5601 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5602 }
5603
5604 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5605 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5606}
5607
5608
5609/**
5610 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5611 *
5612 * @returns Strict VBox status code (i.e. informational status codes too).
5613 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5614 * without unrestricted guest execution and the VMMDev is not presently
5615 * mapped (e.g. EFI32).
5616 *
5617 * @param pVCpu The cross context virtual CPU structure.
5618 * @param pVmxTransient The VMX-transient structure.
5619 *
5620 * @remarks No-long-jump zone!!!
5621 */
5622static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5623{
5624 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5625 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5626
5627#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5628 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5629#endif
5630
5631 /*
5632 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5633 * changes. First try to export only these without going through all other changed-flag checks.
5634 */
5635 VBOXSTRICTRC rcStrict;
5636 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5637 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5638 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5639
5640 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5641 if ( (fCtxChanged & fMinimalMask)
5642 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5643 {
5644 vmxHCExportGuestRip(pVCpu);
5645 hmR0VmxExportGuestRsp(pVCpu);
5646 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5647 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5648 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5649 }
5650 /* If anything else also changed, go through the full export routine and export as required. */
5651 else if (fCtxChanged & fCtxMask)
5652 {
5653 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5654 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5655 { /* likely */}
5656 else
5657 {
5658 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5659 VBOXSTRICTRC_VAL(rcStrict)));
5660 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5661 return rcStrict;
5662 }
5663 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5664 }
5665 /* Nothing changed, nothing to load here. */
5666 else
5667 rcStrict = VINF_SUCCESS;
5668
5669#ifdef VBOX_STRICT
5670 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5671 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5672 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5673#endif
5674 return rcStrict;
5675}
5676
5677
5678/**
5679 * Map the APIC-access page for virtualizing APIC accesses.
5680 *
5681 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5682 * this not done as part of exporting guest state, see @bugref{8721}.
5683 *
5684 * @returns VBox status code.
5685 * @param pVCpu The cross context virtual CPU structure.
5686 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5687 */
5688static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5689{
5690 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5691 Assert(GCPhysApicBase);
5692
5693 LogFunc(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5694
5695 /* Unalias the existing mapping. */
5696 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5697 AssertRCReturn(rc, rc);
5698
5699 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5700 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5701 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5702 AssertRCReturn(rc, rc);
5703
5704 return VINF_SUCCESS;
5705}
5706
5707
5708/**
5709 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5710 * CPU.
5711 *
5712 * @param idCpu The ID for the CPU the function is called on.
5713 * @param pvUser1 Null, not used.
5714 * @param pvUser2 Null, not used.
5715 */
5716static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5717{
5718 RT_NOREF3(idCpu, pvUser1, pvUser2);
5719 VMXDispatchHostNmi();
5720}
5721
5722
5723/**
5724 * Dispatching an NMI on the host CPU that received it.
5725 *
5726 * @returns VBox status code.
5727 * @param pVCpu The cross context virtual CPU structure.
5728 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5729 * executing when receiving the host NMI in VMX non-root
5730 * operation.
5731 */
5732static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5733{
5734 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5735 Assert(idCpu != NIL_RTCPUID);
5736
5737 /*
5738 * We don't want to delay dispatching the NMI any more than we have to. However,
5739 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5740 * after executing guest or nested-guest code for the following reasons:
5741 *
5742 * - We would need to perform VMREADs with interrupts disabled and is orders of
5743 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5744 * supported by the host hypervisor.
5745 *
5746 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5747 * longer period of time just for handling an edge case like host NMIs which do
5748 * not occur nearly as frequently as other VM-exits.
5749 *
5750 * Let's cover the most likely scenario first. Check if we are on the target CPU
5751 * and dispatch the NMI right away. This should be much faster than calling into
5752 * RTMpOnSpecific() machinery.
5753 */
5754 bool fDispatched = false;
5755 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5756 if (idCpu == RTMpCpuId())
5757 {
5758 VMXDispatchHostNmi();
5759 fDispatched = true;
5760 }
5761 ASMSetFlags(fEFlags);
5762 if (fDispatched)
5763 {
5764 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5765 return VINF_SUCCESS;
5766 }
5767
5768 /*
5769 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5770 * there should be no race or recursion even if we are unlucky enough to be preempted
5771 * (to the target CPU) without dispatching the host NMI above.
5772 */
5773 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5774 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5775}
5776
5777
5778#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5779/**
5780 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5781 * nested-guest using hardware-assisted VMX.
5782 *
5783 * @param pVCpu The cross context virtual CPU structure.
5784 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5785 * @param pVmcsInfoGst The guest VMCS info. object.
5786 */
5787static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5788{
5789 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5790 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5791 Assert(pu64MsrBitmap);
5792
5793 /*
5794 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5795 * MSR that is intercepted by the guest is also intercepted while executing the
5796 * nested-guest using hardware-assisted VMX.
5797 *
5798 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5799 * nested-guest VM-exit even if the outer guest is not intercepting some
5800 * MSRs. We cannot assume the caller has initialized the nested-guest
5801 * MSR bitmap in this case.
5802 *
5803 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5804 * each of its VM-entry, hence initializing it once per-VM while setting
5805 * up the nested-guest VMCS is not sufficient.
5806 */
5807 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5808 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5809 {
5810 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5811 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5812 Assert(pu64MsrBitmapNstGst);
5813 Assert(pu64MsrBitmapGst);
5814
5815 /** @todo Detect and use EVEX.POR? */
5816 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5817 for (uint32_t i = 0; i < cFrags; i++)
5818 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5819 }
5820 else
5821 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5822}
5823
5824
5825/**
5826 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5827 * hardware-assisted VMX execution of the nested-guest.
5828 *
5829 * For a guest, we don't modify these controls once we set up the VMCS and hence
5830 * this function is never called.
5831 *
5832 * For nested-guests since the nested hypervisor provides these controls on every
5833 * nested-guest VM-entry and could potentially change them everytime we need to
5834 * merge them before every nested-guest VM-entry.
5835 *
5836 * @returns VBox status code.
5837 * @param pVCpu The cross context virtual CPU structure.
5838 */
5839static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5840{
5841 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5842 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5843 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5844
5845 /*
5846 * Merge the controls with the requirements of the guest VMCS.
5847 *
5848 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5849 * VMCS with the features supported by the physical CPU as it's already done by the
5850 * VMLAUNCH/VMRESUME instruction emulation.
5851 *
5852 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5853 * derived from the VMX features supported by the physical CPU.
5854 */
5855
5856 /* Pin-based VM-execution controls. */
5857 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5858
5859 /* Processor-based VM-execution controls. */
5860 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5861 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5862 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5863 | VMX_PROC_CTLS_MOV_DR_EXIT
5864 | VMX_PROC_CTLS_USE_TPR_SHADOW
5865 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5866
5867 /* Secondary processor-based VM-execution controls. */
5868 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5869 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5870 | VMX_PROC_CTLS2_INVPCID
5871 | VMX_PROC_CTLS2_VMCS_SHADOWING
5872 | VMX_PROC_CTLS2_RDTSCP
5873 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5874 | VMX_PROC_CTLS2_APIC_REG_VIRT
5875 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5876 | VMX_PROC_CTLS2_VMFUNC));
5877
5878 /*
5879 * VM-entry controls:
5880 * These controls contains state that depends on the nested-guest state (primarily
5881 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5882 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5883 * properly continue executing the nested-guest if the EFER MSR changes but does not
5884 * cause a nested-guest VM-exits.
5885 *
5886 * VM-exit controls:
5887 * These controls specify the host state on return. We cannot use the controls from
5888 * the nested hypervisor state as is as it would contain the guest state rather than
5889 * the host state. Since the host state is subject to change (e.g. preemption, trips
5890 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5891 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5892 *
5893 * VM-entry MSR-load:
5894 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5895 * context by the VMLAUNCH/VMRESUME instruction emulation.
5896 *
5897 * VM-exit MSR-store:
5898 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5899 * back into the VM-exit MSR-store area.
5900 *
5901 * VM-exit MSR-load areas:
5902 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5903 * can entirely ignore what the nested hypervisor wants to load here.
5904 */
5905
5906 /*
5907 * Exception bitmap.
5908 *
5909 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5910 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5911 * code more flexible if intercepting exceptions become more dynamic in the future we do
5912 * it as part of exporting the nested-guest state.
5913 */
5914 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5915
5916 /*
5917 * CR0/CR4 guest/host mask.
5918 *
5919 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5920 * cause VM-exits, so we need to merge them here.
5921 */
5922 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5923 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5924
5925 /*
5926 * Page-fault error-code mask and match.
5927 *
5928 * Although we require unrestricted guest execution (and thereby nested-paging) for
5929 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5930 * normally intercept #PFs, it might intercept them for debugging purposes.
5931 *
5932 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5933 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5934 */
5935 uint32_t u32XcptPFMask;
5936 uint32_t u32XcptPFMatch;
5937 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5938 {
5939 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5940 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5941 }
5942 else
5943 {
5944 u32XcptPFMask = 0;
5945 u32XcptPFMatch = 0;
5946 }
5947
5948 /*
5949 * Pause-Loop exiting.
5950 */
5951 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5952 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5953 * this will work... */
5954 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5955 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5956
5957 /*
5958 * Pending debug exceptions.
5959 * Currently just copy whatever the nested-guest provides us.
5960 */
5961 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5962
5963 /*
5964 * I/O Bitmap.
5965 *
5966 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5967 * intercept all I/O port accesses.
5968 */
5969 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5970 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5971
5972 /*
5973 * VMCS shadowing.
5974 *
5975 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5976 * enabled while executing the nested-guest.
5977 */
5978 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5979
5980 /*
5981 * APIC-access page.
5982 */
5983 RTHCPHYS HCPhysApicAccess;
5984 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5985 {
5986 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5987 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5988
5989 /** @todo NSTVMX: This is not really correct but currently is required to make
5990 * things work. We need to re-enable the page handler when we fallback to
5991 * IEM execution of the nested-guest! */
5992 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
5993
5994 void *pvPage;
5995 PGMPAGEMAPLOCK PgLockApicAccess;
5996 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5997 if (RT_SUCCESS(rc))
5998 {
5999 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
6000 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
6001
6002 /** @todo Handle proper releasing of page-mapping lock later. */
6003 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
6004 }
6005 else
6006 return rc;
6007 }
6008 else
6009 HCPhysApicAccess = 0;
6010
6011 /*
6012 * Virtual-APIC page and TPR threshold.
6013 */
6014 RTHCPHYS HCPhysVirtApic;
6015 uint32_t u32TprThreshold;
6016 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6017 {
6018 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
6019 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
6020
6021 void *pvPage;
6022 PGMPAGEMAPLOCK PgLockVirtApic;
6023 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
6024 if (RT_SUCCESS(rc))
6025 {
6026 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
6027 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
6028
6029 /** @todo Handle proper releasing of page-mapping lock later. */
6030 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
6031 }
6032 else
6033 return rc;
6034
6035 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
6036 }
6037 else
6038 {
6039 HCPhysVirtApic = 0;
6040 u32TprThreshold = 0;
6041
6042 /*
6043 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
6044 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
6045 * be taken care of by EPT/shadow paging.
6046 */
6047 if (pVM->hmr0.s.fAllow64BitGuests)
6048 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
6049 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
6050 }
6051
6052 /*
6053 * Validate basic assumptions.
6054 */
6055 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
6056 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
6057 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
6058 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
6059
6060 /*
6061 * Commit it to the nested-guest VMCS.
6062 */
6063 int rc = VINF_SUCCESS;
6064 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
6065 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
6066 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
6067 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
6068 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
6069 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
6070 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
6071 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
6072 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
6073 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
6074 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
6075 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
6076 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
6077 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
6078 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
6079 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
6080 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
6081 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
6082 {
6083 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
6084 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
6085 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
6086 }
6087 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6088 {
6089 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
6090 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
6091 }
6092 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6093 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
6094 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
6095 AssertRC(rc);
6096
6097 /*
6098 * Update the nested-guest VMCS cache.
6099 */
6100 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
6101 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
6102 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
6103 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
6104 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
6105 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
6106 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
6107 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
6108 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
6109
6110 /*
6111 * We need to flush the TLB if we are switching the APIC-access page address.
6112 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
6113 */
6114 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6115 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
6116
6117 /*
6118 * MSR bitmap.
6119 *
6120 * The MSR bitmap address has already been initialized while setting up the nested-guest
6121 * VMCS, here we need to merge the MSR bitmaps.
6122 */
6123 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
6124 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
6125
6126 return VINF_SUCCESS;
6127}
6128#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6129
6130
6131/**
6132 * Does the preparations before executing guest code in VT-x.
6133 *
6134 * This may cause longjmps to ring-3 and may even result in rescheduling to the
6135 * recompiler/IEM. We must be cautious what we do here regarding committing
6136 * guest-state information into the VMCS assuming we assuredly execute the
6137 * guest in VT-x mode.
6138 *
6139 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
6140 * the common-state (TRPM/forceflags), we must undo those changes so that the
6141 * recompiler/IEM can (and should) use them when it resumes guest execution.
6142 * Otherwise such operations must be done when we can no longer exit to ring-3.
6143 *
6144 * @returns Strict VBox status code (i.e. informational status codes too).
6145 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
6146 * have been disabled.
6147 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
6148 * pending events).
6149 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
6150 * double-fault into the guest.
6151 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
6152 * dispatched directly.
6153 * @retval VINF_* scheduling changes, we have to go back to ring-3.
6154 *
6155 * @param pVCpu The cross context virtual CPU structure.
6156 * @param pVmxTransient The VMX-transient structure.
6157 * @param fStepping Whether we are single-stepping the guest in the
6158 * hypervisor debugger. Makes us ignore some of the reasons
6159 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
6160 * if event dispatching took place.
6161 */
6162static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
6163{
6164 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6165
6166 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
6167
6168#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
6169 if (pVmxTransient->fIsNestedGuest)
6170 {
6171 RT_NOREF2(pVCpu, fStepping);
6172 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
6173 return VINF_EM_RESCHEDULE_REM;
6174 }
6175#endif
6176
6177 /*
6178 * Check and process force flag actions, some of which might require us to go back to ring-3.
6179 */
6180 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
6181 if (rcStrict == VINF_SUCCESS)
6182 {
6183 /* FFs don't get set all the time. */
6184#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6185 if ( pVmxTransient->fIsNestedGuest
6186 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6187 {
6188 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6189 return VINF_VMX_VMEXIT;
6190 }
6191#endif
6192 }
6193 else
6194 return rcStrict;
6195
6196 /*
6197 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
6198 */
6199 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6200 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
6201 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6202 && PDMHasApic(pVM))
6203 {
6204 /* Get the APIC base MSR from the virtual APIC device. */
6205 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
6206
6207 /* Map the APIC access page. */
6208 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
6209 AssertRCReturn(rc, rc);
6210
6211 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
6212 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
6213 }
6214
6215#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6216 /*
6217 * Merge guest VMCS controls with the nested-guest VMCS controls.
6218 *
6219 * Even if we have not executed the guest prior to this (e.g. when resuming from a
6220 * saved state), we should be okay with merging controls as we initialize the
6221 * guest VMCS controls as part of VM setup phase.
6222 */
6223 if ( pVmxTransient->fIsNestedGuest
6224 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
6225 {
6226 int rc = hmR0VmxMergeVmcsNested(pVCpu);
6227 AssertRCReturn(rc, rc);
6228 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
6229 }
6230#endif
6231
6232 /*
6233 * Evaluate events to be injected into the guest.
6234 *
6235 * Events in TRPM can be injected without inspecting the guest state.
6236 * If any new events (interrupts/NMI) are pending currently, we try to set up the
6237 * guest to cause a VM-exit the next time they are ready to receive the event.
6238 */
6239 if (TRPMHasTrap(pVCpu))
6240 vmxHCTrpmTrapToPendingEvent(pVCpu);
6241
6242 uint32_t fIntrState;
6243 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6244 &fIntrState);
6245
6246#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6247 /*
6248 * While evaluating pending events if something failed (unlikely) or if we were
6249 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
6250 */
6251 if (rcStrict != VINF_SUCCESS)
6252 return rcStrict;
6253 if ( pVmxTransient->fIsNestedGuest
6254 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6255 {
6256 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6257 return VINF_VMX_VMEXIT;
6258 }
6259#else
6260 Assert(rcStrict == VINF_SUCCESS);
6261#endif
6262
6263 /*
6264 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
6265 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
6266 * also result in triple-faulting the VM.
6267 *
6268 * With nested-guests, the above does not apply since unrestricted guest execution is a
6269 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
6270 */
6271 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6272 fIntrState, fStepping);
6273 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6274 { /* likely */ }
6275 else
6276 {
6277 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
6278 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
6279 return rcStrict;
6280 }
6281
6282 /*
6283 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
6284 * import CR3 themselves. We will need to update them here, as even as late as the above
6285 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
6286 * the below force flags to be set.
6287 */
6288 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6289 {
6290 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
6291 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6292 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6293 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6294 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6295 }
6296
6297#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6298 /* Paranoia. */
6299 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6300#endif
6301
6302 /*
6303 * No longjmps to ring-3 from this point on!!!
6304 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
6305 * This also disables flushing of the R0-logger instance (if any).
6306 */
6307 VMMRZCallRing3Disable(pVCpu);
6308
6309 /*
6310 * Export the guest state bits.
6311 *
6312 * We cannot perform longjmps while loading the guest state because we do not preserve the
6313 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6314 * CPU migration.
6315 *
6316 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6317 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6318 */
6319 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6320 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6321 { /* likely */ }
6322 else
6323 {
6324 VMMRZCallRing3Enable(pVCpu);
6325 return rcStrict;
6326 }
6327
6328 /*
6329 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6330 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6331 * preemption disabled for a while. Since this is purely to aid the
6332 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6333 * disable interrupt on NT.
6334 *
6335 * We need to check for force-flags that could've possible been altered since we last
6336 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6337 * see @bugref{6398}).
6338 *
6339 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6340 * to ring-3 before executing guest code.
6341 */
6342 pVmxTransient->fEFlags = ASMIntDisableFlags();
6343
6344 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6345 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6346 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6347 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6348 {
6349 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6350 {
6351#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6352 /*
6353 * If we are executing a nested-guest make sure that we should intercept subsequent
6354 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6355 * the VM-exit instruction emulation happy.
6356 */
6357 if (pVmxTransient->fIsNestedGuest)
6358 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6359#endif
6360
6361 /*
6362 * We've injected any pending events. This is really the point of no return (to ring-3).
6363 *
6364 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6365 * returns from this function, so do -not- enable them here.
6366 */
6367 pVCpu->hm.s.Event.fPending = false;
6368 return VINF_SUCCESS;
6369 }
6370
6371 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6372 rcStrict = VINF_EM_RAW_INTERRUPT;
6373 }
6374 else
6375 {
6376 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6377 rcStrict = VINF_EM_RAW_TO_R3;
6378 }
6379
6380 ASMSetFlags(pVmxTransient->fEFlags);
6381 VMMRZCallRing3Enable(pVCpu);
6382
6383 return rcStrict;
6384}
6385
6386
6387/**
6388 * Final preparations before executing guest code using hardware-assisted VMX.
6389 *
6390 * We can no longer get preempted to a different host CPU and there are no returns
6391 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6392 * failures), this function is not intended to fail sans unrecoverable hardware
6393 * errors.
6394 *
6395 * @param pVCpu The cross context virtual CPU structure.
6396 * @param pVmxTransient The VMX-transient structure.
6397 *
6398 * @remarks Called with preemption disabled.
6399 * @remarks No-long-jump zone!!!
6400 */
6401static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6402{
6403 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6404 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6405 Assert(!pVCpu->hm.s.Event.fPending);
6406
6407 /*
6408 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6409 */
6410 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6411 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6412
6413 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6414 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6415 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6416 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6417
6418 if (!CPUMIsGuestFPUStateActive(pVCpu))
6419 {
6420 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6421 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6422 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6423 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6424 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6425 }
6426
6427 /*
6428 * Re-export the host state bits as we may've been preempted (only happens when
6429 * thread-context hooks are used or when the VM start function changes) or if
6430 * the host CR0 is modified while loading the guest FPU state above.
6431 *
6432 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6433 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6434 * see @bugref{8432}.
6435 *
6436 * This may also happen when switching to/from a nested-guest VMCS without leaving
6437 * ring-0.
6438 */
6439 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6440 {
6441 hmR0VmxExportHostState(pVCpu);
6442 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6443 }
6444 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6445
6446 /*
6447 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6448 */
6449 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6450 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6451 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6452
6453 /*
6454 * Store status of the shared guest/host debug state at the time of VM-entry.
6455 */
6456 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6457 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6458
6459 /*
6460 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6461 * more than one conditional check. The post-run side of our code shall determine
6462 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6463 */
6464 if (pVmcsInfo->pbVirtApic)
6465 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6466
6467 /*
6468 * Update the host MSRs values in the VM-exit MSR-load area.
6469 */
6470 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6471 {
6472 if (pVmcsInfo->cExitMsrLoad > 0)
6473 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6474 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6475 }
6476
6477 /*
6478 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6479 * VMX-preemption timer based on the next virtual sync clock deadline.
6480 */
6481 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6482 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6483 {
6484 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6485 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6486 }
6487
6488 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6489 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6490 if (!fIsRdtscIntercepted)
6491 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6492 else
6493 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6494
6495 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6496 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6497 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6498 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6499 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6500 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6501
6502 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6503
6504 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6505 as we're about to start executing the guest. */
6506
6507 /*
6508 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6509 *
6510 * This is done this late as updating the TSC offsetting/preemption timer above
6511 * figures out if we can skip intercepting RDTSCP by calculating the number of
6512 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6513 */
6514 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6515 && !fIsRdtscIntercepted)
6516 {
6517 vmxHCImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6518
6519 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6520 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6521 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6522 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6523 AssertRC(rc);
6524 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6525 pVmxTransient->fRemoveTscAuxMsr = true;
6526 }
6527
6528#ifdef VBOX_STRICT
6529 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6530 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6531 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6532 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6533#endif
6534
6535#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6536 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6537 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6538 * see @bugref{9180#c54}. */
6539 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6540 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6541 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6542#endif
6543}
6544
6545
6546/**
6547 * First C routine invoked after running guest code using hardware-assisted VMX.
6548 *
6549 * @param pVCpu The cross context virtual CPU structure.
6550 * @param pVmxTransient The VMX-transient structure.
6551 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6552 *
6553 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6554 *
6555 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6556 * unconditionally when it is safe to do so.
6557 */
6558static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6559{
6560 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6561 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6562 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6563 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6564 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6565 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6566
6567 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6568 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6569 {
6570 uint64_t uGstTsc;
6571 if (!pVmxTransient->fIsNestedGuest)
6572 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6573 else
6574 {
6575 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6576 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6577 }
6578 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6579 }
6580
6581 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6582 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6583 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6584
6585 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6586 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6587#ifdef VBOX_STRICT
6588 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6589#endif
6590 Assert(!ASMIntAreEnabled());
6591 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6592 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6593
6594#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6595 /*
6596 * Clean all the VMCS fields in the transient structure before reading
6597 * anything from the VMCS.
6598 */
6599 pVmxTransient->uExitReason = 0;
6600 pVmxTransient->uExitIntErrorCode = 0;
6601 pVmxTransient->uExitQual = 0;
6602 pVmxTransient->uGuestLinearAddr = 0;
6603 pVmxTransient->uExitIntInfo = 0;
6604 pVmxTransient->cbExitInstr = 0;
6605 pVmxTransient->ExitInstrInfo.u = 0;
6606 pVmxTransient->uEntryIntInfo = 0;
6607 pVmxTransient->uEntryXcptErrorCode = 0;
6608 pVmxTransient->cbEntryInstr = 0;
6609 pVmxTransient->uIdtVectoringInfo = 0;
6610 pVmxTransient->uIdtVectoringErrorCode = 0;
6611#endif
6612
6613 /*
6614 * Save the basic VM-exit reason and check if the VM-entry failed.
6615 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6616 */
6617 uint32_t uExitReason;
6618 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6619 AssertRC(rc);
6620 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6621 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6622
6623 /*
6624 * Log the VM-exit before logging anything else as otherwise it might be a
6625 * tad confusing what happens before and after the world-switch.
6626 */
6627 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6628
6629 /*
6630 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6631 * bitmap permissions, if it was added before VM-entry.
6632 */
6633 if (pVmxTransient->fRemoveTscAuxMsr)
6634 {
6635 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6636 pVmxTransient->fRemoveTscAuxMsr = false;
6637 }
6638
6639 /*
6640 * Check if VMLAUNCH/VMRESUME succeeded.
6641 * If this failed, we cause a guru meditation and cease further execution.
6642 */
6643 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6644 {
6645 /*
6646 * Update the VM-exit history array here even if the VM-entry failed due to:
6647 * - Invalid guest state.
6648 * - MSR loading.
6649 * - Machine-check event.
6650 *
6651 * In any of the above cases we will still have a "valid" VM-exit reason
6652 * despite @a fVMEntryFailed being false.
6653 *
6654 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6655 *
6656 * Note! We don't have CS or RIP at this point. Will probably address that later
6657 * by amending the history entry added here.
6658 */
6659 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6660 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6661
6662 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6663 {
6664 VMMRZCallRing3Enable(pVCpu);
6665 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6666
6667#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6668 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
6669#endif
6670
6671 /*
6672 * Always import the guest-interruptibility state as we need it while evaluating
6673 * injecting events on re-entry.
6674 *
6675 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6676 * checking for real-mode while exporting the state because all bits that cause
6677 * mode changes wrt CR0 are intercepted.
6678 */
6679 uint64_t const fImportMask = CPUMCTX_EXTRN_INHIBIT_INT
6680 | CPUMCTX_EXTRN_INHIBIT_NMI
6681#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6682 | HMVMX_CPUMCTX_EXTRN_ALL
6683#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6684 | CPUMCTX_EXTRN_RFLAGS
6685#endif
6686 ;
6687 rc = vmxHCImportGuestState(pVCpu, pVmcsInfo, fImportMask);
6688 AssertRC(rc);
6689
6690 /*
6691 * Sync the TPR shadow with our APIC state.
6692 */
6693 if ( !pVmxTransient->fIsNestedGuest
6694 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6695 {
6696 Assert(pVmcsInfo->pbVirtApic);
6697 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6698 {
6699 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6700 AssertRC(rc);
6701 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6702 }
6703 }
6704
6705 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6706 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6707 || pVmxTransient->fWasHyperDebugStateActive == false);
6708 return;
6709 }
6710 }
6711#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6712 else if (pVmxTransient->fIsNestedGuest)
6713 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6714#endif
6715 else
6716 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6717
6718 VMMRZCallRing3Enable(pVCpu);
6719}
6720
6721
6722/**
6723 * Runs the guest code using hardware-assisted VMX the normal way.
6724 *
6725 * @returns VBox status code.
6726 * @param pVCpu The cross context virtual CPU structure.
6727 * @param pcLoops Pointer to the number of executed loops.
6728 */
6729static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6730{
6731 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6732 Assert(pcLoops);
6733 Assert(*pcLoops <= cMaxResumeLoops);
6734 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6735
6736#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6737 /*
6738 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6739 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6740 * guest VMCS while entering the VMX ring-0 session.
6741 */
6742 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6743 {
6744 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6745 if (RT_SUCCESS(rc))
6746 { /* likely */ }
6747 else
6748 {
6749 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6750 return rc;
6751 }
6752 }
6753#endif
6754
6755 VMXTRANSIENT VmxTransient;
6756 RT_ZERO(VmxTransient);
6757 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6758
6759 /* Paranoia. */
6760 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6761
6762 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6763 for (;;)
6764 {
6765 Assert(!HMR0SuspendPending());
6766 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6767 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6768
6769 /*
6770 * Preparatory work for running nested-guest code, this may force us to
6771 * return to ring-3.
6772 *
6773 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6774 */
6775 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6776 if (rcStrict != VINF_SUCCESS)
6777 break;
6778
6779 /* Interrupts are disabled at this point! */
6780 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6781 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6782 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6783 /* Interrupts are re-enabled at this point! */
6784
6785 /*
6786 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6787 */
6788 if (RT_SUCCESS(rcRun))
6789 { /* very likely */ }
6790 else
6791 {
6792 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6793 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6794 return rcRun;
6795 }
6796
6797 /*
6798 * Profile the VM-exit.
6799 */
6800 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6801 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6802 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6803 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6804 HMVMX_START_EXIT_DISPATCH_PROF();
6805
6806 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6807
6808 /*
6809 * Handle the VM-exit.
6810 */
6811#ifdef HMVMX_USE_FUNCTION_TABLE
6812 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6813#else
6814 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6815#endif
6816 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6817 if (rcStrict == VINF_SUCCESS)
6818 {
6819 if (++(*pcLoops) <= cMaxResumeLoops)
6820 continue;
6821 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6822 rcStrict = VINF_EM_RAW_INTERRUPT;
6823 }
6824 break;
6825 }
6826
6827 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6828 return rcStrict;
6829}
6830
6831
6832#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6833/**
6834 * Runs the nested-guest code using hardware-assisted VMX.
6835 *
6836 * @returns VBox status code.
6837 * @param pVCpu The cross context virtual CPU structure.
6838 * @param pcLoops Pointer to the number of executed loops.
6839 *
6840 * @sa hmR0VmxRunGuestCodeNormal.
6841 */
6842static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6843{
6844 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6845 Assert(pcLoops);
6846 Assert(*pcLoops <= cMaxResumeLoops);
6847 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6848
6849 /*
6850 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6851 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6852 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6853 */
6854 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6855 {
6856 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6857 if (RT_SUCCESS(rc))
6858 { /* likely */ }
6859 else
6860 {
6861 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6862 return rc;
6863 }
6864 }
6865
6866 VMXTRANSIENT VmxTransient;
6867 RT_ZERO(VmxTransient);
6868 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6869 VmxTransient.fIsNestedGuest = true;
6870
6871 /* Paranoia. */
6872 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6873
6874 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info. on demand in ring-0. */
6875 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6876
6877 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6878 for (;;)
6879 {
6880 Assert(!HMR0SuspendPending());
6881 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6882 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6883
6884 /*
6885 * Preparatory work for running guest code, this may force us to
6886 * return to ring-3.
6887 *
6888 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6889 */
6890 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6891 if (rcStrict != VINF_SUCCESS)
6892 break;
6893
6894 /* Interrupts are disabled at this point! */
6895 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6896 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6897 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6898 /* Interrupts are re-enabled at this point! */
6899
6900 /*
6901 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6902 */
6903 if (RT_SUCCESS(rcRun))
6904 { /* very likely */ }
6905 else
6906 {
6907 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6908 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6909 rcStrict = rcRun;
6910 break;
6911 }
6912
6913 /*
6914 * Undo temporary disabling of the APIC-access page monitoring we did in hmR0VmxMergeVmcsNested.
6915 * This is needed for NestedTrap0eHandler (and IEM) to cause nested-guest APIC-access VM-exits.
6916 */
6917 if (VmxTransient.pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6918 {
6919 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
6920 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
6921 PGMHandlerPhysicalReset(pVCpu->CTX_SUFF(pVM), GCPhysApicAccess);
6922 }
6923
6924 /*
6925 * Profile the VM-exit.
6926 */
6927 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6928 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6929 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6930 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6931 HMVMX_START_EXIT_DISPATCH_PROF();
6932
6933 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6934
6935 /*
6936 * Handle the VM-exit.
6937 */
6938 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6939 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6940 if (rcStrict == VINF_SUCCESS)
6941 {
6942 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6943 {
6944 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6945 rcStrict = VINF_VMX_VMEXIT;
6946 }
6947 else
6948 {
6949 if (++(*pcLoops) <= cMaxResumeLoops)
6950 continue;
6951 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6952 rcStrict = VINF_EM_RAW_INTERRUPT;
6953 }
6954 }
6955 else
6956 Assert(rcStrict != VINF_VMX_VMEXIT);
6957 break;
6958 }
6959
6960 /* Ensure VM-exit auxiliary info. is no longer available. */
6961 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6962
6963 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6964 return rcStrict;
6965}
6966#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6967
6968
6969/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6970 * probes.
6971 *
6972 * The following few functions and associated structure contains the bloat
6973 * necessary for providing detailed debug events and dtrace probes as well as
6974 * reliable host side single stepping. This works on the principle of
6975 * "subclassing" the normal execution loop and workers. We replace the loop
6976 * method completely and override selected helpers to add necessary adjustments
6977 * to their core operation.
6978 *
6979 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6980 * any performance for debug and analysis features.
6981 *
6982 * @{
6983 */
6984
6985/**
6986 * Single steps guest code using hardware-assisted VMX.
6987 *
6988 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6989 * but single-stepping through the hypervisor debugger.
6990 *
6991 * @returns Strict VBox status code (i.e. informational status codes too).
6992 * @param pVCpu The cross context virtual CPU structure.
6993 * @param pcLoops Pointer to the number of executed loops.
6994 *
6995 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6996 */
6997static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6998{
6999 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
7000 Assert(pcLoops);
7001 Assert(*pcLoops <= cMaxResumeLoops);
7002
7003 VMXTRANSIENT VmxTransient;
7004 RT_ZERO(VmxTransient);
7005 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
7006
7007 /* Set HMCPU indicators. */
7008 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
7009 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
7010 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7011 pVCpu->hmr0.s.fUsingDebugLoop = true;
7012
7013 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
7014 VMXRUNDBGSTATE DbgState;
7015 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
7016 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7017
7018 /*
7019 * The loop.
7020 */
7021 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
7022 for (;;)
7023 {
7024 Assert(!HMR0SuspendPending());
7025 HMVMX_ASSERT_CPU_SAFE(pVCpu);
7026 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7027 bool fStepping = pVCpu->hm.s.fSingleInstruction;
7028
7029 /* Set up VM-execution controls the next two can respond to. */
7030 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7031
7032 /*
7033 * Preparatory work for running guest code, this may force us to
7034 * return to ring-3.
7035 *
7036 * Warning! This bugger disables interrupts on VINF_SUCCESS!
7037 */
7038 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
7039 if (rcStrict != VINF_SUCCESS)
7040 break;
7041
7042 /* Interrupts are disabled at this point! */
7043 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
7044
7045 /* Override any obnoxious code in the above two calls. */
7046 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7047
7048 /*
7049 * Finally execute the guest.
7050 */
7051 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
7052
7053 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
7054 /* Interrupts are re-enabled at this point! */
7055
7056 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7057 if (RT_SUCCESS(rcRun))
7058 { /* very likely */ }
7059 else
7060 {
7061 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
7062 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
7063 return rcRun;
7064 }
7065
7066 /* Profile the VM-exit. */
7067 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7068 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
7069 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7070 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
7071 HMVMX_START_EXIT_DISPATCH_PROF();
7072
7073 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
7074
7075 /*
7076 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
7077 */
7078 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
7079 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
7080 if (rcStrict != VINF_SUCCESS)
7081 break;
7082 if (++(*pcLoops) > cMaxResumeLoops)
7083 {
7084 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
7085 rcStrict = VINF_EM_RAW_INTERRUPT;
7086 break;
7087 }
7088
7089 /*
7090 * Stepping: Did the RIP change, if so, consider it a single step.
7091 * Otherwise, make sure one of the TFs gets set.
7092 */
7093 if (fStepping)
7094 {
7095 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7096 AssertRC(rc);
7097 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
7098 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
7099 {
7100 rcStrict = VINF_EM_DBG_STEPPED;
7101 break;
7102 }
7103 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
7104 }
7105
7106 /*
7107 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
7108 */
7109 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
7110 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7111
7112 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
7113 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
7114 Assert(rcStrict == VINF_SUCCESS);
7115 }
7116
7117 /*
7118 * Clear the X86_EFL_TF if necessary.
7119 */
7120 if (pVCpu->hmr0.s.fClearTrapFlag)
7121 {
7122 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
7123 AssertRC(rc);
7124 pVCpu->hmr0.s.fClearTrapFlag = false;
7125 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
7126 }
7127 /** @todo there seems to be issues with the resume flag when the monitor trap
7128 * flag is pending without being used. Seen early in bios init when
7129 * accessing APIC page in protected mode. */
7130
7131/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
7132 * out of the above loop. */
7133
7134 /* Restore HMCPU indicators. */
7135 pVCpu->hmr0.s.fUsingDebugLoop = false;
7136 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7137 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
7138
7139 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7140 return rcStrict;
7141}
7142
7143/** @} */
7144
7145
7146/**
7147 * Checks if any expensive dtrace probes are enabled and we should go to the
7148 * debug loop.
7149 *
7150 * @returns true if we should use debug loop, false if not.
7151 */
7152static bool hmR0VmxAnyExpensiveProbesEnabled(void)
7153{
7154 /* It's probably faster to OR the raw 32-bit counter variables together.
7155 Since the variables are in an array and the probes are next to one
7156 another (more or less), we have good locality. So, better read
7157 eight-nine cache lines ever time and only have one conditional, than
7158 128+ conditionals, right? */
7159 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
7160 | VBOXVMM_XCPT_DE_ENABLED_RAW()
7161 | VBOXVMM_XCPT_DB_ENABLED_RAW()
7162 | VBOXVMM_XCPT_BP_ENABLED_RAW()
7163 | VBOXVMM_XCPT_OF_ENABLED_RAW()
7164 | VBOXVMM_XCPT_BR_ENABLED_RAW()
7165 | VBOXVMM_XCPT_UD_ENABLED_RAW()
7166 | VBOXVMM_XCPT_NM_ENABLED_RAW()
7167 | VBOXVMM_XCPT_DF_ENABLED_RAW()
7168 | VBOXVMM_XCPT_TS_ENABLED_RAW()
7169 | VBOXVMM_XCPT_NP_ENABLED_RAW()
7170 | VBOXVMM_XCPT_SS_ENABLED_RAW()
7171 | VBOXVMM_XCPT_GP_ENABLED_RAW()
7172 | VBOXVMM_XCPT_PF_ENABLED_RAW()
7173 | VBOXVMM_XCPT_MF_ENABLED_RAW()
7174 | VBOXVMM_XCPT_AC_ENABLED_RAW()
7175 | VBOXVMM_XCPT_XF_ENABLED_RAW()
7176 | VBOXVMM_XCPT_VE_ENABLED_RAW()
7177 | VBOXVMM_XCPT_SX_ENABLED_RAW()
7178 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
7179 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
7180 ) != 0
7181 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
7182 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
7183 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
7184 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
7185 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
7186 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
7187 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
7188 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
7189 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
7190 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
7191 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
7192 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
7193 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
7194 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
7195 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
7196 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
7197 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
7198 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
7199 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
7200 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
7201 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
7202 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
7203 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
7204 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
7205 | VBOXVMM_INSTR_STR_ENABLED_RAW()
7206 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
7207 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
7208 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
7209 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
7210 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
7211 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
7212 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
7213 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
7214 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
7215 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
7216 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
7217 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
7218 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
7219 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
7220 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
7221 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
7222 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
7223 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
7224 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
7225 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
7226 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
7227 ) != 0
7228 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
7229 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
7230 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
7231 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
7232 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
7233 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
7234 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
7235 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
7236 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
7237 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
7238 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
7239 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
7240 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
7241 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
7242 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
7243 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
7244 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
7245 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
7246 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
7247 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
7248 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
7249 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
7250 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
7251 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
7252 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
7253 | VBOXVMM_EXIT_STR_ENABLED_RAW()
7254 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
7255 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
7256 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
7257 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
7258 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
7259 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
7260 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
7261 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
7262 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
7263 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
7264 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
7265 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
7266 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
7267 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
7268 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
7269 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
7270 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
7271 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
7272 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
7273 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
7274 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
7275 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
7276 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
7277 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
7278 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
7279 ) != 0;
7280}
7281
7282
7283/**
7284 * Runs the guest using hardware-assisted VMX.
7285 *
7286 * @returns Strict VBox status code (i.e. informational status codes too).
7287 * @param pVCpu The cross context virtual CPU structure.
7288 */
7289VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
7290{
7291 AssertPtr(pVCpu);
7292 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7293 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7294 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7295 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
7296
7297 VBOXSTRICTRC rcStrict;
7298 uint32_t cLoops = 0;
7299 for (;;)
7300 {
7301#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7302 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
7303#else
7304 NOREF(pCtx);
7305 bool const fInNestedGuestMode = false;
7306#endif
7307 if (!fInNestedGuestMode)
7308 {
7309 if ( !pVCpu->hm.s.fUseDebugLoop
7310 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
7311 && !DBGFIsStepping(pVCpu)
7312 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
7313 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7314 else
7315 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7316 }
7317#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7318 else
7319 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7320
7321 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7322 {
7323 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7324 continue;
7325 }
7326 if (rcStrict == VINF_VMX_VMEXIT)
7327 {
7328 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7329 continue;
7330 }
7331#endif
7332 break;
7333 }
7334
7335 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7336 switch (rcLoop)
7337 {
7338 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7339 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7340 }
7341
7342 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7343 if (RT_FAILURE(rc2))
7344 {
7345 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7346 rcStrict = rc2;
7347 }
7348 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7349 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7350 return rcStrict;
7351}
7352
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette