VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 94844

Last change on this file since 94844 was 94844, checked in by vboxsync, 3 years ago

VMM/IEM,HM: Added a VBOX_WITH_IEM_TLB makefile config variable for quickly enabling/disabling the TLB; fixed associated compilation issue in the VMX code. bugref:9898

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 295.5 KB
Line 
1/* $Id: HMVMXR0.cpp 94844 2022-05-05 10:49:39Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#define VMCPU_INCL_CPUM_GST_CTX
24#include <iprt/x86.h>
25#include <iprt/asm-amd64-x86.h>
26#include <iprt/thread.h>
27#include <iprt/mem.h>
28#include <iprt/mp.h>
29
30#include <VBox/vmm/pdmapi.h>
31#include <VBox/vmm/dbgf.h>
32#include <VBox/vmm/iem.h>
33#include <VBox/vmm/iom.h>
34#include <VBox/vmm/tm.h>
35#include <VBox/vmm/em.h>
36#include <VBox/vmm/gim.h>
37#include <VBox/vmm/apic.h>
38#include "HMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include <VBox/vmm/hmvmxinline.h>
41#include "HMVMXR0.h"
42#include "VMXInternal.h"
43#include "dtrace/VBoxVMM.h"
44
45
46/*********************************************************************************************************************************
47* Defined Constants And Macros *
48*********************************************************************************************************************************/
49#ifdef DEBUG_ramshankar
50# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
51# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
52# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
53# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
54# define HMVMX_ALWAYS_CLEAN_TRANSIENT
55# define HMVMX_ALWAYS_CHECK_GUEST_STATE
56# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
57# define HMVMX_ALWAYS_TRAP_PF
58# define HMVMX_ALWAYS_FLUSH_TLB
59# define HMVMX_ALWAYS_SWAP_EFER
60#endif
61
62
63/*********************************************************************************************************************************
64* Structures and Typedefs *
65*********************************************************************************************************************************/
66/**
67 * VMX page allocation information.
68 */
69typedef struct
70{
71 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
72 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
73 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
74 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
75} VMXPAGEALLOCINFO;
76/** Pointer to VMX page-allocation info. */
77typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
78/** Pointer to a const VMX page-allocation info. */
79typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
80AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
81
82
83/*********************************************************************************************************************************
84* Internal Functions *
85*********************************************************************************************************************************/
86static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
87static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
88
89
90/**
91 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
92 * @returns @c true if it's part of LBR stack, @c false otherwise.
93 *
94 * @param pVM The cross context VM structure.
95 * @param idMsr The MSR.
96 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
97 * Optional, can be NULL.
98 *
99 * @remarks Must only be called when LBR is enabled.
100 */
101DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
102{
103 Assert(pVM->hmr0.s.vmx.fLbr);
104 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
105 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
106 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
107 if (idxMsr < cLbrStack)
108 {
109 if (pidxMsr)
110 *pidxMsr = idxMsr;
111 return true;
112 }
113 return false;
114}
115
116
117/**
118 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
119 * @returns @c true if it's part of LBR stack, @c false otherwise.
120 *
121 * @param pVM The cross context VM structure.
122 * @param idMsr The MSR.
123 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
124 * Optional, can be NULL.
125 *
126 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
127 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
128 */
129DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
130{
131 Assert(pVM->hmr0.s.vmx.fLbr);
132 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
133 {
134 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
135 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
136 if (idxMsr < cLbrStack)
137 {
138 if (pidxMsr)
139 *pidxMsr = idxMsr;
140 return true;
141 }
142 }
143 return false;
144}
145
146
147/**
148 * Gets the active (in use) VMCS info. object for the specified VCPU.
149 *
150 * This is either the guest or nested-guest VMCS info. and need not necessarily
151 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
152 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
153 * current VMCS while returning to ring-3. However, the VMCS info. object for that
154 * VMCS would still be active and returned here so that we could dump the VMCS
155 * fields to ring-3 for diagnostics. This function is thus only used to
156 * distinguish between the nested-guest or guest VMCS.
157 *
158 * @returns The active VMCS information.
159 * @param pVCpu The cross context virtual CPU structure.
160 *
161 * @thread EMT.
162 * @remarks This function may be called with preemption or interrupts disabled!
163 */
164DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
165{
166 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
167 return &pVCpu->hmr0.s.vmx.VmcsInfo;
168 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
169}
170
171
172/**
173 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
174 * area.
175 *
176 * @returns @c true if it's different, @c false otherwise.
177 * @param pVmcsInfo The VMCS info. object.
178 */
179DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
180{
181 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
182 && pVmcsInfo->pvGuestMsrStore);
183}
184
185
186/**
187 * Sets the given Processor-based VM-execution controls.
188 *
189 * @param pVmxTransient The VMX-transient structure.
190 * @param uProcCtls The Processor-based VM-execution controls to set.
191 */
192static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
193{
194 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
195 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
196 {
197 pVmcsInfo->u32ProcCtls |= uProcCtls;
198 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
199 AssertRC(rc);
200 }
201}
202
203
204/**
205 * Removes the given Processor-based VM-execution controls.
206 *
207 * @param pVCpu The cross context virtual CPU structure.
208 * @param pVmxTransient The VMX-transient structure.
209 * @param uProcCtls The Processor-based VM-execution controls to remove.
210 *
211 * @remarks When executing a nested-guest, this will not remove any of the specified
212 * controls if the nested hypervisor has set any one of them.
213 */
214static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
215{
216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
217 if (pVmcsInfo->u32ProcCtls & uProcCtls)
218 {
219#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
220 if ( !pVmxTransient->fIsNestedGuest
221 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
222#else
223 NOREF(pVCpu);
224 if (!pVmxTransient->fIsNestedGuest)
225#endif
226 {
227 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
228 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
229 AssertRC(rc);
230 }
231 }
232}
233
234
235/**
236 * Sets the TSC offset for the current VMCS.
237 *
238 * @param uTscOffset The TSC offset to set.
239 * @param pVmcsInfo The VMCS info. object.
240 */
241static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
242{
243 if (pVmcsInfo->u64TscOffset != uTscOffset)
244 {
245 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
246 AssertRC(rc);
247 pVmcsInfo->u64TscOffset = uTscOffset;
248 }
249}
250
251
252/**
253 * Loads the VMCS specified by the VMCS info. object.
254 *
255 * @returns VBox status code.
256 * @param pVmcsInfo The VMCS info. object.
257 *
258 * @remarks Can be called with interrupts disabled.
259 */
260static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
261{
262 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
263 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
264
265 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
266 if (RT_SUCCESS(rc))
267 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
268 return rc;
269}
270
271
272/**
273 * Clears the VMCS specified by the VMCS info. object.
274 *
275 * @returns VBox status code.
276 * @param pVmcsInfo The VMCS info. object.
277 *
278 * @remarks Can be called with interrupts disabled.
279 */
280static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
281{
282 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
283 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
284
285 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
286 if (RT_SUCCESS(rc))
287 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
288 return rc;
289}
290
291
292/**
293 * Checks whether the MSR belongs to the set of guest MSRs that we restore
294 * lazily while leaving VT-x.
295 *
296 * @returns true if it does, false otherwise.
297 * @param pVCpu The cross context virtual CPU structure.
298 * @param idMsr The MSR to check.
299 */
300static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
301{
302 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
303 {
304 switch (idMsr)
305 {
306 case MSR_K8_LSTAR:
307 case MSR_K6_STAR:
308 case MSR_K8_SF_MASK:
309 case MSR_K8_KERNEL_GS_BASE:
310 return true;
311 }
312 }
313 return false;
314}
315
316
317/**
318 * Loads a set of guests MSRs to allow read/passthru to the guest.
319 *
320 * The name of this function is slightly confusing. This function does NOT
321 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
322 * common prefix for functions dealing with "lazy restoration" of the shared
323 * MSRs.
324 *
325 * @param pVCpu The cross context virtual CPU structure.
326 *
327 * @remarks No-long-jump zone!!!
328 */
329static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
330{
331 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
332 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
333
334 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
335 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
336 {
337 /*
338 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
339 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
340 * we can skip a few MSR writes.
341 *
342 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
343 * guest MSR values in the guest-CPU context might be different to what's currently
344 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
345 * CPU, see @bugref{8728}.
346 */
347 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
348 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
349 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
350 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
351 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
352 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
353 {
354#ifdef VBOX_STRICT
355 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
356 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
357 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
358 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
359#endif
360 }
361 else
362 {
363 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
364 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
365 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
366 /* The system call flag mask register isn't as benign and accepting of all
367 values as the above, so mask it to avoid #GP'ing on corrupted input. */
368 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
369 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
370 }
371 }
372 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
373}
374
375
376/**
377 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
378 *
379 * @returns @c true if found, @c false otherwise.
380 * @param pVmcsInfo The VMCS info. object.
381 * @param idMsr The MSR to find.
382 */
383static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
384{
385 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
386 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
387 Assert(pMsrs);
388 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
389 for (uint32_t i = 0; i < cMsrs; i++)
390 {
391 if (pMsrs[i].u32Msr == idMsr)
392 return true;
393 }
394 return false;
395}
396
397
398/**
399 * Performs lazy restoration of the set of host MSRs if they were previously
400 * loaded with guest MSR values.
401 *
402 * @param pVCpu The cross context virtual CPU structure.
403 *
404 * @remarks No-long-jump zone!!!
405 * @remarks The guest MSRs should have been saved back into the guest-CPU
406 * context by hmR0VmxImportGuestState()!!!
407 */
408static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
409{
410 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
411 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
412
413 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
414 {
415 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
416 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
417 {
418 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
419 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
420 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
421 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
422 }
423 }
424 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
425}
426
427
428/**
429 * Sets pfnStartVm to the best suited variant.
430 *
431 * This must be called whenever anything changes relative to the hmR0VmXStartVm
432 * variant selection:
433 * - pVCpu->hm.s.fLoadSaveGuestXcr0
434 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
435 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
436 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
437 * - Perhaps: CPUMCTX.fXStateMask (windows only)
438 *
439 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
440 * cannot be changed at runtime.
441 */
442static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
443{
444 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
445 {
446 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
447 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
448 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
449 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
450 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
451 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
452 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
453 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
454 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
455 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
456 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
457 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
458 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
459 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
460 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
461 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
462 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
463 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
464 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
465 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
466 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
467 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
468 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
469 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
470 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
471 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
472 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
473 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
474 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
475 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
476 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
477 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
478 };
479 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
480 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
481 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
482 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
483 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
484 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
485 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
486 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
487}
488
489
490/**
491 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
492 * stack.
493 *
494 * @returns Strict VBox status code (i.e. informational status codes too).
495 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
496 * @param pVCpu The cross context virtual CPU structure.
497 * @param uValue The value to push to the guest stack.
498 */
499static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
500{
501 /*
502 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
503 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
504 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
505 */
506 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
507 if (pCtx->sp == 1)
508 return VINF_EM_RESET;
509 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
510 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
511 AssertRC(rc);
512 return rc;
513}
514
515
516/**
517 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
518 * unreferenced local parameters in the template code...
519 */
520DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
521{
522 RT_NOREF(pVCpu);
523 return VMXWriteVmcs16(uFieldEnc, u16Val);
524}
525
526
527/**
528 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
529 * unreferenced local parameters in the template code...
530 */
531DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
532{
533 RT_NOREF(pVCpu);
534 return VMXWriteVmcs32(uFieldEnc, u32Val);
535}
536
537
538/**
539 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
540 * unreferenced local parameters in the template code...
541 */
542DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
543{
544 RT_NOREF(pVCpu);
545 return VMXWriteVmcs64(uFieldEnc, u64Val);
546}
547
548
549/**
550 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
551 * unreferenced local parameters in the template code...
552 */
553DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
554{
555 RT_NOREF(pVCpu);
556 return VMXReadVmcs16(uFieldEnc, pu16Val);
557}
558
559
560/**
561 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
562 * unreferenced local parameters in the template code...
563 */
564DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
565{
566 RT_NOREF(pVCpu);
567 return VMXReadVmcs32(uFieldEnc, pu32Val);
568}
569
570
571/**
572 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
573 * unreferenced local parameters in the template code...
574 */
575DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
576{
577 RT_NOREF(pVCpu);
578 return VMXReadVmcs64(uFieldEnc, pu64Val);
579}
580
581
582/*
583 * Instantiate the code we share with the NEM darwin backend.
584 */
585#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
586#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
587
588#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
589#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
590#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
591#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
592
593#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
594#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
595#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
596#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
597
598#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
599#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
600#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
601#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
602
603#include "../VMMAll/VMXAllTemplate.cpp.h"
604
605#undef VMX_VMCS_WRITE_16
606#undef VMX_VMCS_WRITE_32
607#undef VMX_VMCS_WRITE_64
608#undef VMX_VMCS_WRITE_NW
609
610#undef VMX_VMCS_READ_16
611#undef VMX_VMCS_READ_32
612#undef VMX_VMCS_READ_64
613#undef VMX_VMCS_READ_NW
614
615#undef VM_IS_VMX_PREEMPT_TIMER_USED
616#undef VM_IS_VMX_NESTED_PAGING
617#undef VM_IS_VMX_UNRESTRICTED_GUEST
618#undef VCPU_2_VMXSTATS
619#undef VCPU_2_VMXSTATE
620
621
622/**
623 * Updates the VM's last error record.
624 *
625 * If there was a VMX instruction error, reads the error data from the VMCS and
626 * updates VCPU's last error record as well.
627 *
628 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
629 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
630 * VERR_VMX_INVALID_VMCS_FIELD.
631 * @param rc The error code.
632 */
633static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
634{
635 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
636 || rc == VERR_VMX_UNABLE_TO_START_VM)
637 {
638 AssertPtrReturnVoid(pVCpu);
639 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
640 }
641 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
642}
643
644
645/**
646 * Enters VMX root mode operation on the current CPU.
647 *
648 * @returns VBox status code.
649 * @param pHostCpu The HM physical-CPU structure.
650 * @param pVM The cross context VM structure. Can be
651 * NULL, after a resume.
652 * @param HCPhysCpuPage Physical address of the VMXON region.
653 * @param pvCpuPage Pointer to the VMXON region.
654 */
655static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
656{
657 Assert(pHostCpu);
658 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
659 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
660 Assert(pvCpuPage);
661 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
662
663 if (pVM)
664 {
665 /* Write the VMCS revision identifier to the VMXON region. */
666 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
667 }
668
669 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
670 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
671
672 /* Enable the VMX bit in CR4 if necessary. */
673 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
674
675 /* Record whether VMXE was already prior to us enabling it above. */
676 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
677
678 /* Enter VMX root mode. */
679 int rc = VMXEnable(HCPhysCpuPage);
680 if (RT_FAILURE(rc))
681 {
682 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
683 if (!pHostCpu->fVmxeAlreadyEnabled)
684 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
685
686 if (pVM)
687 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
688 }
689
690 /* Restore interrupts. */
691 ASMSetFlags(fEFlags);
692 return rc;
693}
694
695
696/**
697 * Exits VMX root mode operation on the current CPU.
698 *
699 * @returns VBox status code.
700 * @param pHostCpu The HM physical-CPU structure.
701 */
702static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
703{
704 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
705
706 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
707 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
708
709 /* If we're for some reason not in VMX root mode, then don't leave it. */
710 RTCCUINTREG const uHostCr4 = ASMGetCR4();
711
712 int rc;
713 if (uHostCr4 & X86_CR4_VMXE)
714 {
715 /* Exit VMX root mode and clear the VMX bit in CR4. */
716 VMXDisable();
717
718 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
719 if (!pHostCpu->fVmxeAlreadyEnabled)
720 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
721
722 rc = VINF_SUCCESS;
723 }
724 else
725 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
726
727 /* Restore interrupts. */
728 ASMSetFlags(fEFlags);
729 return rc;
730}
731
732
733/**
734 * Allocates pages specified as specified by an array of VMX page allocation info
735 * objects.
736 *
737 * The pages contents are zero'd after allocation.
738 *
739 * @returns VBox status code.
740 * @param phMemObj Where to return the handle to the allocation.
741 * @param paAllocInfo The pointer to the first element of the VMX
742 * page-allocation info object array.
743 * @param cEntries The number of elements in the @a paAllocInfo array.
744 */
745static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
746{
747 *phMemObj = NIL_RTR0MEMOBJ;
748
749 /* Figure out how many pages to allocate. */
750 uint32_t cPages = 0;
751 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
752 cPages += !!paAllocInfo[iPage].fValid;
753
754 /* Allocate the pages. */
755 if (cPages)
756 {
757 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
758 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
759 if (RT_FAILURE(rc))
760 return rc;
761
762 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
763 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
764 RT_BZERO(pvFirstPage, cbPages);
765
766 uint32_t iPage = 0;
767 for (uint32_t i = 0; i < cEntries; i++)
768 if (paAllocInfo[i].fValid)
769 {
770 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
771 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
772 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
773 AssertPtr(pvPage);
774
775 Assert(paAllocInfo[iPage].pHCPhys);
776 Assert(paAllocInfo[iPage].ppVirt);
777 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
778 *paAllocInfo[iPage].ppVirt = pvPage;
779
780 /* Move to next page. */
781 ++iPage;
782 }
783
784 /* Make sure all valid (requested) pages have been assigned. */
785 Assert(iPage == cPages);
786 }
787 return VINF_SUCCESS;
788}
789
790
791/**
792 * Frees pages allocated using hmR0VmxPagesAllocZ.
793 *
794 * @param phMemObj Pointer to the memory object handle. Will be set to
795 * NIL.
796 */
797DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
798{
799 /* We can cleanup wholesale since it's all one allocation. */
800 if (*phMemObj != NIL_RTR0MEMOBJ)
801 {
802 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
803 *phMemObj = NIL_RTR0MEMOBJ;
804 }
805}
806
807
808/**
809 * Initializes a VMCS info. object.
810 *
811 * @param pVmcsInfo The VMCS info. object.
812 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
813 */
814static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
815{
816 RT_ZERO(*pVmcsInfo);
817 RT_ZERO(*pVmcsInfoShared);
818
819 pVmcsInfo->pShared = pVmcsInfoShared;
820 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
821 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
822 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
823 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
824 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
825 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
826 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
827 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
828 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
829 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
830 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
831 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
832}
833
834
835/**
836 * Frees the VT-x structures for a VMCS info. object.
837 *
838 * @param pVmcsInfo The VMCS info. object.
839 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
840 */
841static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
842{
843 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
844 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
845}
846
847
848/**
849 * Allocates the VT-x structures for a VMCS info. object.
850 *
851 * @returns VBox status code.
852 * @param pVCpu The cross context virtual CPU structure.
853 * @param pVmcsInfo The VMCS info. object.
854 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
855 *
856 * @remarks The caller is expected to take care of any and all allocation failures.
857 * This function will not perform any cleanup for failures half-way
858 * through.
859 */
860static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
861{
862 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
863
864 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
865 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
866 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
867 VMXPAGEALLOCINFO aAllocInfo[] =
868 {
869 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
870 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
871 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
872 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
873 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
874 };
875
876 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
877 if (RT_FAILURE(rc))
878 return rc;
879
880 /*
881 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
882 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
883 */
884 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
885 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
886 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
887 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
888
889 /*
890 * Get the virtual-APIC page rather than allocating them again.
891 */
892 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
893 {
894 if (!fIsNstGstVmcs)
895 {
896 if (PDMHasApic(pVM))
897 {
898 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
899 if (RT_FAILURE(rc))
900 return rc;
901 Assert(pVmcsInfo->pbVirtApic);
902 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
903 }
904 }
905 else
906 {
907 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
908 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
909 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
910 }
911 }
912
913 return VINF_SUCCESS;
914}
915
916
917/**
918 * Free all VT-x structures for the VM.
919 *
920 * @returns IPRT status code.
921 * @param pVM The cross context VM structure.
922 */
923static void hmR0VmxStructsFree(PVMCC pVM)
924{
925 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
926#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
927 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
928 {
929 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
930 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
931 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
932 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
933 }
934#endif
935
936 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
937 {
938 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
939 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
940#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
941 if (pVM->cpum.ro.GuestFeatures.fVmx)
942 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
943#endif
944 }
945}
946
947
948/**
949 * Allocate all VT-x structures for the VM.
950 *
951 * @returns IPRT status code.
952 * @param pVM The cross context VM structure.
953 *
954 * @remarks This functions will cleanup on memory allocation failures.
955 */
956static int hmR0VmxStructsAlloc(PVMCC pVM)
957{
958 /*
959 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
960 * The VMCS size cannot be more than 4096 bytes.
961 *
962 * See Intel spec. Appendix A.1 "Basic VMX Information".
963 */
964 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
965 if (cbVmcs <= X86_PAGE_4K_SIZE)
966 { /* likely */ }
967 else
968 {
969 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
970 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
971 }
972
973 /*
974 * Allocate per-VM VT-x structures.
975 */
976 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
977 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
978 VMXPAGEALLOCINFO aAllocInfo[] =
979 {
980 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
981 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
982 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
983#ifdef VBOX_WITH_CRASHDUMP_MAGIC
984 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
985#endif
986 };
987
988 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
989 if (RT_SUCCESS(rc))
990 {
991#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
992 /* Allocate the shadow VMCS-fields array. */
993 if (fUseVmcsShadowing)
994 {
995 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
996 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
997 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
998 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
999 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1000 rc = VERR_NO_MEMORY;
1001 }
1002#endif
1003
1004 /*
1005 * Allocate per-VCPU VT-x structures.
1006 */
1007 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1008 {
1009 /* Allocate the guest VMCS structures. */
1010 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1011 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1012
1013#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1014 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1015 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1016 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1017#endif
1018 }
1019 if (RT_SUCCESS(rc))
1020 return VINF_SUCCESS;
1021 }
1022 hmR0VmxStructsFree(pVM);
1023 return rc;
1024}
1025
1026
1027/**
1028 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1029 *
1030 * @param pVM The cross context VM structure.
1031 */
1032static void hmR0VmxStructsInit(PVMCC pVM)
1033{
1034 /* Paranoia. */
1035 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1036#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1037 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1038#endif
1039
1040 /*
1041 * Initialize members up-front so we can cleanup en masse on allocation failures.
1042 */
1043#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1044 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1045#endif
1046 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1047 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1048 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1049 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1050 {
1051 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1052 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1053 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1054 }
1055}
1056
1057#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1058/**
1059 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1060 *
1061 * @returns @c true if the MSR is intercepted, @c false otherwise.
1062 * @param pbMsrBitmap The MSR bitmap.
1063 * @param offMsr The MSR byte offset.
1064 * @param iBit The bit offset from the byte offset.
1065 */
1066DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1067{
1068 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1069 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
1070}
1071#endif
1072
1073/**
1074 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1075 *
1076 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1077 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1078 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1079 * the read/write access of this MSR.
1080 *
1081 * @param pVCpu The cross context virtual CPU structure.
1082 * @param pVmcsInfo The VMCS info. object.
1083 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1084 * @param idMsr The MSR value.
1085 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1086 * include both a read -and- a write permission!
1087 *
1088 * @sa CPUMGetVmxMsrPermission.
1089 * @remarks Can be called with interrupts disabled.
1090 */
1091static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1092{
1093 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1094 Assert(pbMsrBitmap);
1095 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1096
1097 /*
1098 * MSR-bitmap Layout:
1099 * Byte index MSR range Interpreted as
1100 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1101 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1102 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1103 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1104 *
1105 * A bit corresponding to an MSR within the above range causes a VM-exit
1106 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1107 * the MSR range, it always cause a VM-exit.
1108 *
1109 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1110 */
1111 uint16_t const offBitmapRead = 0;
1112 uint16_t const offBitmapWrite = 0x800;
1113 uint16_t offMsr;
1114 int32_t iBit;
1115 if (idMsr <= UINT32_C(0x00001fff))
1116 {
1117 offMsr = 0;
1118 iBit = idMsr;
1119 }
1120 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1121 {
1122 offMsr = 0x400;
1123 iBit = idMsr - UINT32_C(0xc0000000);
1124 }
1125 else
1126 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1127
1128 /*
1129 * Set the MSR read permission.
1130 */
1131 uint16_t const offMsrRead = offBitmapRead + offMsr;
1132 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1133 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1134 {
1135#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1136 bool const fClear = !fIsNstGstVmcs ? true
1137 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1138#else
1139 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1140 bool const fClear = true;
1141#endif
1142 if (fClear)
1143 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
1144 }
1145 else
1146 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
1147
1148 /*
1149 * Set the MSR write permission.
1150 */
1151 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1152 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1153 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1154 {
1155#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1156 bool const fClear = !fIsNstGstVmcs ? true
1157 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1158#else
1159 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1160 bool const fClear = true;
1161#endif
1162 if (fClear)
1163 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
1164 }
1165 else
1166 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
1167}
1168
1169
1170/**
1171 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1172 * area.
1173 *
1174 * @returns VBox status code.
1175 * @param pVCpu The cross context virtual CPU structure.
1176 * @param pVmcsInfo The VMCS info. object.
1177 * @param cMsrs The number of MSRs.
1178 */
1179static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1180{
1181 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1182 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1183 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1184 {
1185 /* Commit the MSR counts to the VMCS and update the cache. */
1186 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1187 {
1188 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1189 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1190 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1191 pVmcsInfo->cEntryMsrLoad = cMsrs;
1192 pVmcsInfo->cExitMsrStore = cMsrs;
1193 pVmcsInfo->cExitMsrLoad = cMsrs;
1194 }
1195 return VINF_SUCCESS;
1196 }
1197
1198 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1199 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1200 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1201}
1202
1203
1204/**
1205 * Adds a new (or updates the value of an existing) guest/host MSR
1206 * pair to be swapped during the world-switch as part of the
1207 * auto-load/store MSR area in the VMCS.
1208 *
1209 * @returns VBox status code.
1210 * @param pVCpu The cross context virtual CPU structure.
1211 * @param pVmxTransient The VMX-transient structure.
1212 * @param idMsr The MSR.
1213 * @param uGuestMsrValue Value of the guest MSR.
1214 * @param fSetReadWrite Whether to set the guest read/write access of this
1215 * MSR (thus not causing a VM-exit).
1216 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1217 * necessary.
1218 */
1219static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1220 bool fSetReadWrite, bool fUpdateHostMsr)
1221{
1222 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1223 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1224 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1225 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1226 uint32_t i;
1227
1228 /* Paranoia. */
1229 Assert(pGuestMsrLoad);
1230
1231#ifndef DEBUG_bird
1232 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1233#endif
1234
1235 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1236 for (i = 0; i < cMsrs; i++)
1237 {
1238 if (pGuestMsrLoad[i].u32Msr == idMsr)
1239 break;
1240 }
1241
1242 bool fAdded = false;
1243 if (i == cMsrs)
1244 {
1245 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1246 ++cMsrs;
1247 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1248 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1249
1250 /* Set the guest to read/write this MSR without causing VM-exits. */
1251 if ( fSetReadWrite
1252 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1253 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1254
1255 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1256 fAdded = true;
1257 }
1258
1259 /* Update the MSR value for the newly added or already existing MSR. */
1260 pGuestMsrLoad[i].u32Msr = idMsr;
1261 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1262
1263 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1264 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1265 {
1266 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1267 pGuestMsrStore[i].u32Msr = idMsr;
1268 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1269 }
1270
1271 /* Update the corresponding slot in the host MSR area. */
1272 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1273 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1274 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1275 pHostMsr[i].u32Msr = idMsr;
1276
1277 /*
1278 * Only if the caller requests to update the host MSR value AND we've newly added the
1279 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1280 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1281 *
1282 * We do this for performance reasons since reading MSRs may be quite expensive.
1283 */
1284 if (fAdded)
1285 {
1286 if (fUpdateHostMsr)
1287 {
1288 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1289 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1290 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1291 }
1292 else
1293 {
1294 /* Someone else can do the work. */
1295 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1296 }
1297 }
1298 return VINF_SUCCESS;
1299}
1300
1301
1302/**
1303 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1304 * auto-load/store MSR area in the VMCS.
1305 *
1306 * @returns VBox status code.
1307 * @param pVCpu The cross context virtual CPU structure.
1308 * @param pVmxTransient The VMX-transient structure.
1309 * @param idMsr The MSR.
1310 */
1311static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1312{
1313 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1314 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1315 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1316 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1317
1318#ifndef DEBUG_bird
1319 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1320#endif
1321
1322 for (uint32_t i = 0; i < cMsrs; i++)
1323 {
1324 /* Find the MSR. */
1325 if (pGuestMsrLoad[i].u32Msr == idMsr)
1326 {
1327 /*
1328 * If it's the last MSR, we only need to reduce the MSR count.
1329 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1330 */
1331 if (i < cMsrs - 1)
1332 {
1333 /* Remove it from the VM-entry MSR-load area. */
1334 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1335 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1336
1337 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1338 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1339 {
1340 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1341 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1342 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1343 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1344 }
1345
1346 /* Remove it from the VM-exit MSR-load area. */
1347 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1348 Assert(pHostMsr[i].u32Msr == idMsr);
1349 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1350 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1351 }
1352
1353 /* Reduce the count to reflect the removed MSR and bail. */
1354 --cMsrs;
1355 break;
1356 }
1357 }
1358
1359 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1360 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1361 {
1362 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1363 AssertRCReturn(rc, rc);
1364
1365 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1366 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1367 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1368
1369 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1370 return VINF_SUCCESS;
1371 }
1372
1373 return VERR_NOT_FOUND;
1374}
1375
1376
1377/**
1378 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1379 *
1380 * @param pVCpu The cross context virtual CPU structure.
1381 * @param pVmcsInfo The VMCS info. object.
1382 *
1383 * @remarks No-long-jump zone!!!
1384 */
1385static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1386{
1387 RT_NOREF(pVCpu);
1388 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1389
1390 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1391 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1392 Assert(pHostMsrLoad);
1393 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1394 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1395 for (uint32_t i = 0; i < cMsrs; i++)
1396 {
1397 /*
1398 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1399 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1400 */
1401 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1402 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1403 else
1404 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1405 }
1406}
1407
1408
1409/**
1410 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1411 * perform lazy restoration of the host MSRs while leaving VT-x.
1412 *
1413 * @param pVCpu The cross context virtual CPU structure.
1414 *
1415 * @remarks No-long-jump zone!!!
1416 */
1417static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1418{
1419 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1420
1421 /*
1422 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1423 */
1424 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1425 {
1426 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1427 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1428 {
1429 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1430 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1431 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1432 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1433 }
1434 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1435 }
1436}
1437
1438
1439#ifdef VBOX_STRICT
1440
1441/**
1442 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1443 *
1444 * @param pVmcsInfo The VMCS info. object.
1445 */
1446static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1447{
1448 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1449
1450 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1451 {
1452 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1453 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1454 uint64_t uVmcsEferMsrVmcs;
1455 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1456 AssertRC(rc);
1457
1458 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1459 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1460 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1461 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1462 }
1463}
1464
1465
1466/**
1467 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1468 * VMCS are correct.
1469 *
1470 * @param pVCpu The cross context virtual CPU structure.
1471 * @param pVmcsInfo The VMCS info. object.
1472 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1473 */
1474static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1475{
1476 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1477
1478 /* Read the various MSR-area counts from the VMCS. */
1479 uint32_t cEntryLoadMsrs;
1480 uint32_t cExitStoreMsrs;
1481 uint32_t cExitLoadMsrs;
1482 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1483 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1484 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1485
1486 /* Verify all the MSR counts are the same. */
1487 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1488 Assert(cExitStoreMsrs == cExitLoadMsrs);
1489 uint32_t const cMsrs = cExitLoadMsrs;
1490
1491 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1492 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1493
1494 /* Verify the MSR counts are within the allocated page size. */
1495 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1496
1497 /* Verify the relevant contents of the MSR areas match. */
1498 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1499 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1500 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1501 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1502 for (uint32_t i = 0; i < cMsrs; i++)
1503 {
1504 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1505 if (fSeparateExitMsrStorePage)
1506 {
1507 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1508 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1509 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1510 }
1511
1512 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1513 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1514 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1515
1516 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1517 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1518 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1519 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1520
1521 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1522 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1523 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1524 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1525
1526 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1527 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1528 {
1529 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1530 if (fIsEferMsr)
1531 {
1532 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1533 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1534 }
1535 else
1536 {
1537 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1538 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1539 if ( pVM->hmr0.s.vmx.fLbr
1540 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1541 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1542 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1543 {
1544 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1545 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1546 pGuestMsrLoad->u32Msr, cMsrs));
1547 }
1548 else if (!fIsNstGstVmcs)
1549 {
1550 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1551 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1552 }
1553 else
1554 {
1555 /*
1556 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1557 * execute a nested-guest with MSR passthrough.
1558 *
1559 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1560 * allow passthrough too.
1561 */
1562 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1563 Assert(pvMsrBitmapNstGst);
1564 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1565 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1566 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1567 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1568 }
1569 }
1570 }
1571
1572 /* Move to the next MSR. */
1573 pHostMsrLoad++;
1574 pGuestMsrLoad++;
1575 pGuestMsrStore++;
1576 }
1577}
1578
1579#endif /* VBOX_STRICT */
1580
1581/**
1582 * Flushes the TLB using EPT.
1583 *
1584 * @returns VBox status code.
1585 * @param pVCpu The cross context virtual CPU structure of the calling
1586 * EMT. Can be NULL depending on @a enmTlbFlush.
1587 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1588 * enmTlbFlush.
1589 * @param enmTlbFlush Type of flush.
1590 *
1591 * @remarks Caller is responsible for making sure this function is called only
1592 * when NestedPaging is supported and providing @a enmTlbFlush that is
1593 * supported by the CPU.
1594 * @remarks Can be called with interrupts disabled.
1595 */
1596static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1597{
1598 uint64_t au64Descriptor[2];
1599 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1600 au64Descriptor[0] = 0;
1601 else
1602 {
1603 Assert(pVCpu);
1604 Assert(pVmcsInfo);
1605 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1606 }
1607 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1608
1609 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1610 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1611
1612 if ( RT_SUCCESS(rc)
1613 && pVCpu)
1614 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1615}
1616
1617
1618/**
1619 * Flushes the TLB using VPID.
1620 *
1621 * @returns VBox status code.
1622 * @param pVCpu The cross context virtual CPU structure of the calling
1623 * EMT. Can be NULL depending on @a enmTlbFlush.
1624 * @param enmTlbFlush Type of flush.
1625 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1626 * on @a enmTlbFlush).
1627 *
1628 * @remarks Can be called with interrupts disabled.
1629 */
1630static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1631{
1632 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1633
1634 uint64_t au64Descriptor[2];
1635 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1636 {
1637 au64Descriptor[0] = 0;
1638 au64Descriptor[1] = 0;
1639 }
1640 else
1641 {
1642 AssertPtr(pVCpu);
1643 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1644 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1645 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1646 au64Descriptor[1] = GCPtr;
1647 }
1648
1649 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1650 AssertMsg(rc == VINF_SUCCESS,
1651 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1652
1653 if ( RT_SUCCESS(rc)
1654 && pVCpu)
1655 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1656 NOREF(rc);
1657}
1658
1659
1660/**
1661 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1662 * otherwise there is nothing really to invalidate.
1663 *
1664 * @returns VBox status code.
1665 * @param pVCpu The cross context virtual CPU structure.
1666 * @param GCVirt Guest virtual address of the page to invalidate.
1667 */
1668VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1669{
1670 AssertPtr(pVCpu);
1671 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1672
1673 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1674 {
1675 /*
1676 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1677 * the EPT case. See @bugref{6043} and @bugref{6177}.
1678 *
1679 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1680 * as this function maybe called in a loop with individual addresses.
1681 */
1682 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1683 if (pVM->hmr0.s.vmx.fVpid)
1684 {
1685 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1686 {
1687 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1688 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1689 }
1690 else
1691 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1692 }
1693 else if (pVM->hmr0.s.fNestedPaging)
1694 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1695 }
1696
1697 return VINF_SUCCESS;
1698}
1699
1700
1701/**
1702 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1703 * case where neither EPT nor VPID is supported by the CPU.
1704 *
1705 * @param pHostCpu The HM physical-CPU structure.
1706 * @param pVCpu The cross context virtual CPU structure.
1707 *
1708 * @remarks Called with interrupts disabled.
1709 */
1710static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1711{
1712 AssertPtr(pVCpu);
1713 AssertPtr(pHostCpu);
1714
1715 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1716
1717 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1718 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1719 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1720 pVCpu->hmr0.s.fForceTLBFlush = false;
1721 return;
1722}
1723
1724
1725/**
1726 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1727 *
1728 * @param pHostCpu The HM physical-CPU structure.
1729 * @param pVCpu The cross context virtual CPU structure.
1730 * @param pVmcsInfo The VMCS info. object.
1731 *
1732 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1733 * nomenclature. The reason is, to avoid confusion in compare statements
1734 * since the host-CPU copies are named "ASID".
1735 *
1736 * @remarks Called with interrupts disabled.
1737 */
1738static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1739{
1740#ifdef VBOX_WITH_STATISTICS
1741 bool fTlbFlushed = false;
1742# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1743# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1744 if (!fTlbFlushed) \
1745 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1746 } while (0)
1747#else
1748# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1749# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1750#endif
1751
1752 AssertPtr(pVCpu);
1753 AssertPtr(pHostCpu);
1754 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1755
1756 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1757 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1758 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1759 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1760
1761 /*
1762 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1763 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1764 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1765 * cannot reuse the current ASID anymore.
1766 */
1767 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1768 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1769 {
1770 ++pHostCpu->uCurrentAsid;
1771 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1772 {
1773 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1774 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1775 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1776 }
1777
1778 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1779 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1780 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1781
1782 /*
1783 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1784 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1785 */
1786 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1787 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1788 HMVMX_SET_TAGGED_TLB_FLUSHED();
1789 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1790 }
1791 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1792 {
1793 /*
1794 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1795 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1796 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1797 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1798 * mappings, see @bugref{6568}.
1799 *
1800 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1801 */
1802 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1803 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1804 HMVMX_SET_TAGGED_TLB_FLUSHED();
1805 }
1806 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1807 {
1808 /*
1809 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1810 * address which requires flushing the TLB of EPT cached structures.
1811 *
1812 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1813 */
1814 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1815 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1816 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1817 HMVMX_SET_TAGGED_TLB_FLUSHED();
1818 }
1819
1820
1821 pVCpu->hmr0.s.fForceTLBFlush = false;
1822 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1823
1824 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1825 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1826 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1827 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1828 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1829 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1830 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1831 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1832 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1833
1834 /* Update VMCS with the VPID. */
1835 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1836 AssertRC(rc);
1837
1838#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1839}
1840
1841
1842/**
1843 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1844 *
1845 * @param pHostCpu The HM physical-CPU structure.
1846 * @param pVCpu The cross context virtual CPU structure.
1847 * @param pVmcsInfo The VMCS info. object.
1848 *
1849 * @remarks Called with interrupts disabled.
1850 */
1851static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1852{
1853 AssertPtr(pVCpu);
1854 AssertPtr(pHostCpu);
1855 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1856 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1857 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1858
1859 /*
1860 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1861 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1862 */
1863 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1864 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1865 {
1866 pVCpu->hmr0.s.fForceTLBFlush = true;
1867 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1868 }
1869
1870 /* Check for explicit TLB flushes. */
1871 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1872 {
1873 pVCpu->hmr0.s.fForceTLBFlush = true;
1874 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1875 }
1876
1877 /* Check for TLB flushes while switching to/from a nested-guest. */
1878 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1879 {
1880 pVCpu->hmr0.s.fForceTLBFlush = true;
1881 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1882 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1883 }
1884
1885 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1886 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1887
1888 if (pVCpu->hmr0.s.fForceTLBFlush)
1889 {
1890 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1891 pVCpu->hmr0.s.fForceTLBFlush = false;
1892 }
1893}
1894
1895
1896/**
1897 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1898 *
1899 * @param pHostCpu The HM physical-CPU structure.
1900 * @param pVCpu The cross context virtual CPU structure.
1901 *
1902 * @remarks Called with interrupts disabled.
1903 */
1904static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1905{
1906 AssertPtr(pVCpu);
1907 AssertPtr(pHostCpu);
1908 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1909 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1910 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1911
1912 /*
1913 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1914 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1915 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1916 * cannot reuse the current ASID anymore.
1917 */
1918 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1919 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1920 {
1921 pVCpu->hmr0.s.fForceTLBFlush = true;
1922 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1923 }
1924
1925 /* Check for explicit TLB flushes. */
1926 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1927 {
1928 /*
1929 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1930 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1931 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1932 * include fExplicitFlush's too) - an obscure corner case.
1933 */
1934 pVCpu->hmr0.s.fForceTLBFlush = true;
1935 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1936 }
1937
1938 /* Check for TLB flushes while switching to/from a nested-guest. */
1939 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1940 {
1941 pVCpu->hmr0.s.fForceTLBFlush = true;
1942 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1943 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1944 }
1945
1946 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1947 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1948 if (pVCpu->hmr0.s.fForceTLBFlush)
1949 {
1950 ++pHostCpu->uCurrentAsid;
1951 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1952 {
1953 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1954 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1955 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1956 }
1957
1958 pVCpu->hmr0.s.fForceTLBFlush = false;
1959 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1960 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1961 if (pHostCpu->fFlushAsidBeforeUse)
1962 {
1963 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1964 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1965 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1966 {
1967 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1968 pHostCpu->fFlushAsidBeforeUse = false;
1969 }
1970 else
1971 {
1972 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1973 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1974 }
1975 }
1976 }
1977
1978 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1979 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1980 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1981 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1982 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1983 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1984 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1985
1986 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1987 AssertRC(rc);
1988}
1989
1990
1991/**
1992 * Flushes the guest TLB entry based on CPU capabilities.
1993 *
1994 * @param pHostCpu The HM physical-CPU structure.
1995 * @param pVCpu The cross context virtual CPU structure.
1996 * @param pVmcsInfo The VMCS info. object.
1997 *
1998 * @remarks Called with interrupts disabled.
1999 */
2000static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2001{
2002#ifdef HMVMX_ALWAYS_FLUSH_TLB
2003 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2004#endif
2005 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2006 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2007 {
2008 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2009 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2010 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2011 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2012 default:
2013 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2014 break;
2015 }
2016 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2017}
2018
2019
2020/**
2021 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2022 * TLB entries from the host TLB before VM-entry.
2023 *
2024 * @returns VBox status code.
2025 * @param pVM The cross context VM structure.
2026 */
2027static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2028{
2029 /*
2030 * Determine optimal flush type for nested paging.
2031 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2032 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2033 */
2034 if (pVM->hmr0.s.fNestedPaging)
2035 {
2036 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2037 {
2038 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2039 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2040 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2041 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2042 else
2043 {
2044 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2045 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2046 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2047 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2048 }
2049
2050 /* Make sure the write-back cacheable memory type for EPT is supported. */
2051 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2052 {
2053 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2054 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2055 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2056 }
2057
2058 /* EPT requires a page-walk length of 4. */
2059 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2060 {
2061 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2062 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2063 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2064 }
2065 }
2066 else
2067 {
2068 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2069 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2070 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2071 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2072 }
2073 }
2074
2075 /*
2076 * Determine optimal flush type for VPID.
2077 */
2078 if (pVM->hmr0.s.vmx.fVpid)
2079 {
2080 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2081 {
2082 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2083 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2084 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2085 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2086 else
2087 {
2088 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2089 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2090 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2091 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2092 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2093 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2094 pVM->hmr0.s.vmx.fVpid = false;
2095 }
2096 }
2097 else
2098 {
2099 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2100 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2101 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2102 pVM->hmr0.s.vmx.fVpid = false;
2103 }
2104 }
2105
2106 /*
2107 * Setup the handler for flushing tagged-TLBs.
2108 */
2109 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2110 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2111 else if (pVM->hmr0.s.fNestedPaging)
2112 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2113 else if (pVM->hmr0.s.vmx.fVpid)
2114 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2115 else
2116 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2117
2118
2119 /*
2120 * Copy out the result to ring-3.
2121 */
2122 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2123 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2124 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2125 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2126 return VINF_SUCCESS;
2127}
2128
2129
2130/**
2131 * Sets up the LBR MSR ranges based on the host CPU.
2132 *
2133 * @returns VBox status code.
2134 * @param pVM The cross context VM structure.
2135 *
2136 * @sa nemR3DarwinSetupLbrMsrRange
2137 */
2138static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2139{
2140 Assert(pVM->hmr0.s.vmx.fLbr);
2141 uint32_t idLbrFromIpMsrFirst;
2142 uint32_t idLbrFromIpMsrLast;
2143 uint32_t idLbrToIpMsrFirst;
2144 uint32_t idLbrToIpMsrLast;
2145 uint32_t idLbrTosMsr;
2146
2147 /*
2148 * Determine the LBR MSRs supported for this host CPU family and model.
2149 *
2150 * See Intel spec. 17.4.8 "LBR Stack".
2151 * See Intel "Model-Specific Registers" spec.
2152 */
2153 uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
2154 | pVM->cpum.ro.HostFeatures.uModel;
2155 switch (uFamilyModel)
2156 {
2157 case 0x0f01: case 0x0f02:
2158 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2159 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2160 idLbrToIpMsrFirst = 0x0;
2161 idLbrToIpMsrLast = 0x0;
2162 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2163 break;
2164
2165 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2166 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2167 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2168 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2169 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2170 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2171 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2172 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2173 break;
2174
2175 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2176 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2177 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2178 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2179 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2180 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2181 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2182 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2183 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2184 break;
2185
2186 case 0x0617: case 0x061d: case 0x060f:
2187 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2188 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2189 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2190 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2191 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2192 break;
2193
2194 /* Atom and related microarchitectures we don't care about:
2195 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2196 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2197 case 0x0636: */
2198 /* All other CPUs: */
2199 default:
2200 {
2201 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2202 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2203 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2204 }
2205 }
2206
2207 /*
2208 * Validate.
2209 */
2210 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2211 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2212 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2213 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2214 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2215 {
2216 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2217 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2218 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2219 }
2220 NOREF(pVCpu0);
2221
2222 /*
2223 * Update the LBR info. to the VM struct. for use later.
2224 */
2225 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2226
2227 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2228 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2229
2230 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2231 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2232 return VINF_SUCCESS;
2233}
2234
2235
2236#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2237/**
2238 * Sets up the shadow VMCS fields arrays.
2239 *
2240 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2241 * executing the guest.
2242 *
2243 * @returns VBox status code.
2244 * @param pVM The cross context VM structure.
2245 */
2246static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2247{
2248 /*
2249 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2250 * when the host does not support it.
2251 */
2252 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2253 if ( !fGstVmwriteAll
2254 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2255 { /* likely. */ }
2256 else
2257 {
2258 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2259 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2260 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2261 }
2262
2263 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2264 uint32_t cRwFields = 0;
2265 uint32_t cRoFields = 0;
2266 for (uint32_t i = 0; i < cVmcsFields; i++)
2267 {
2268 VMXVMCSFIELD VmcsField;
2269 VmcsField.u = g_aVmcsFields[i];
2270
2271 /*
2272 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2273 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2274 * in the shadow VMCS fields array as they would be redundant.
2275 *
2276 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2277 * we must not include it in the shadow VMCS fields array. Guests attempting to
2278 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2279 * the required behavior.
2280 */
2281 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2282 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2283 {
2284 /*
2285 * Read-only fields are placed in a separate array so that while syncing shadow
2286 * VMCS fields later (which is more performance critical) we can avoid branches.
2287 *
2288 * However, if the guest can write to all fields (including read-only fields),
2289 * we treat it a as read/write field. Otherwise, writing to these fields would
2290 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2291 */
2292 if ( fGstVmwriteAll
2293 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2294 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2295 else
2296 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2297 }
2298 }
2299
2300 /* Update the counts. */
2301 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2302 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2303 return VINF_SUCCESS;
2304}
2305
2306
2307/**
2308 * Sets up the VMREAD and VMWRITE bitmaps.
2309 *
2310 * @param pVM The cross context VM structure.
2311 */
2312static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2313{
2314 /*
2315 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2316 */
2317 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2318 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2319 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2320 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2321 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2322
2323 /*
2324 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2325 * VMREAD and VMWRITE bitmaps.
2326 */
2327 {
2328 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2329 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2330 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2331 {
2332 uint32_t const uVmcsField = paShadowVmcsFields[i];
2333 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2334 Assert(uVmcsField >> 3 < cbBitmap);
2335 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2336 ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
2337 }
2338 }
2339
2340 /*
2341 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2342 * if the host supports VMWRITE to all supported VMCS fields.
2343 */
2344 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2345 {
2346 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2347 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2348 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2349 {
2350 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2351 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2352 Assert(uVmcsField >> 3 < cbBitmap);
2353 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2354 }
2355 }
2356}
2357#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2358
2359
2360/**
2361 * Sets up the virtual-APIC page address for the VMCS.
2362 *
2363 * @param pVmcsInfo The VMCS info. object.
2364 */
2365DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2366{
2367 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2368 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2369 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2370 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2371 AssertRC(rc);
2372}
2373
2374
2375/**
2376 * Sets up the MSR-bitmap address for the VMCS.
2377 *
2378 * @param pVmcsInfo The VMCS info. object.
2379 */
2380DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2381{
2382 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2383 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2384 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2385 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2386 AssertRC(rc);
2387}
2388
2389
2390/**
2391 * Sets up the APIC-access page address for the VMCS.
2392 *
2393 * @param pVCpu The cross context virtual CPU structure.
2394 */
2395DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2396{
2397 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2398 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2399 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2400 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2401 AssertRC(rc);
2402}
2403
2404#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2405
2406/**
2407 * Sets up the VMREAD bitmap address for the VMCS.
2408 *
2409 * @param pVCpu The cross context virtual CPU structure.
2410 */
2411DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2412{
2413 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2414 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2415 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2416 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2417 AssertRC(rc);
2418}
2419
2420
2421/**
2422 * Sets up the VMWRITE bitmap address for the VMCS.
2423 *
2424 * @param pVCpu The cross context virtual CPU structure.
2425 */
2426DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2427{
2428 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2429 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2430 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2431 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2432 AssertRC(rc);
2433}
2434
2435#endif
2436
2437/**
2438 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2439 * in the VMCS.
2440 *
2441 * @returns VBox status code.
2442 * @param pVmcsInfo The VMCS info. object.
2443 */
2444DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2445{
2446 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2447 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2448 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2449
2450 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2451 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2452 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2453
2454 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2455 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2456 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2457
2458 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2459 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2460 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2461 return VINF_SUCCESS;
2462}
2463
2464
2465/**
2466 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2467 *
2468 * @param pVCpu The cross context virtual CPU structure.
2469 * @param pVmcsInfo The VMCS info. object.
2470 */
2471static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2472{
2473 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2474
2475 /*
2476 * By default, ensure guest attempts to access any MSR cause VM-exits.
2477 * This shall later be relaxed for specific MSRs as necessary.
2478 *
2479 * Note: For nested-guests, the entire bitmap will be merged prior to
2480 * executing the nested-guest using hardware-assisted VMX and hence there
2481 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2482 */
2483 Assert(pVmcsInfo->pvMsrBitmap);
2484 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2485
2486 /*
2487 * The guest can access the following MSRs (read, write) without causing
2488 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2489 */
2490 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2491 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2492 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2493 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2494 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2495 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2496
2497 /*
2498 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2499 * associated with then. We never need to intercept access (writes need to be
2500 * executed without causing a VM-exit, reads will #GP fault anyway).
2501 *
2502 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2503 * read/write them. We swap the guest/host MSR value using the
2504 * auto-load/store MSR area.
2505 */
2506 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2507 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2508 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2509 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2510 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2511 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2512
2513 /*
2514 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2515 * required for 64-bit guests.
2516 */
2517 if (pVM->hmr0.s.fAllow64BitGuests)
2518 {
2519 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2520 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2521 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2522 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2523 }
2524
2525 /*
2526 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2527 */
2528#ifdef VBOX_STRICT
2529 Assert(pVmcsInfo->pvMsrBitmap);
2530 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2531 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2532#endif
2533}
2534
2535
2536/**
2537 * Sets up pin-based VM-execution controls in the VMCS.
2538 *
2539 * @returns VBox status code.
2540 * @param pVCpu The cross context virtual CPU structure.
2541 * @param pVmcsInfo The VMCS info. object.
2542 */
2543static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2544{
2545 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2546 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2547 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2548
2549 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2550 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2551
2552 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2553 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2554
2555 /* Enable the VMX-preemption timer. */
2556 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2557 {
2558 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2559 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2560 }
2561
2562#if 0
2563 /* Enable posted-interrupt processing. */
2564 if (pVM->hm.s.fPostedIntrs)
2565 {
2566 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2567 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2568 fVal |= VMX_PIN_CTLS_POSTED_INT;
2569 }
2570#endif
2571
2572 if ((fVal & fZap) != fVal)
2573 {
2574 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2575 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2576 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2577 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2578 }
2579
2580 /* Commit it to the VMCS and update our cache. */
2581 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2582 AssertRC(rc);
2583 pVmcsInfo->u32PinCtls = fVal;
2584
2585 return VINF_SUCCESS;
2586}
2587
2588
2589/**
2590 * Sets up secondary processor-based VM-execution controls in the VMCS.
2591 *
2592 * @returns VBox status code.
2593 * @param pVCpu The cross context virtual CPU structure.
2594 * @param pVmcsInfo The VMCS info. object.
2595 */
2596static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2597{
2598 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2599 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2600 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2601
2602 /* WBINVD causes a VM-exit. */
2603 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2604 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2605
2606 /* Enable EPT (aka nested-paging). */
2607 if (pVM->hmr0.s.fNestedPaging)
2608 fVal |= VMX_PROC_CTLS2_EPT;
2609
2610 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2611 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2612 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2613 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2614 fVal |= VMX_PROC_CTLS2_INVPCID;
2615
2616 /* Enable VPID. */
2617 if (pVM->hmr0.s.vmx.fVpid)
2618 fVal |= VMX_PROC_CTLS2_VPID;
2619
2620 /* Enable unrestricted guest execution. */
2621 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2622 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2623
2624#if 0
2625 if (pVM->hm.s.fVirtApicRegs)
2626 {
2627 /* Enable APIC-register virtualization. */
2628 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2629 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2630
2631 /* Enable virtual-interrupt delivery. */
2632 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2633 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2634 }
2635#endif
2636
2637 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2638 where the TPR shadow resides. */
2639 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2640 * done dynamically. */
2641 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2642 {
2643 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2644 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2645 }
2646
2647 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2648 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2649 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2650 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2651 fVal |= VMX_PROC_CTLS2_RDTSCP;
2652
2653 /* Enable Pause-Loop exiting. */
2654 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2655 && pVM->hm.s.vmx.cPleGapTicks
2656 && pVM->hm.s.vmx.cPleWindowTicks)
2657 {
2658 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2659
2660 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2661 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2662 }
2663
2664 if ((fVal & fZap) != fVal)
2665 {
2666 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2667 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2668 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2669 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2670 }
2671
2672 /* Commit it to the VMCS and update our cache. */
2673 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2674 AssertRC(rc);
2675 pVmcsInfo->u32ProcCtls2 = fVal;
2676
2677 return VINF_SUCCESS;
2678}
2679
2680
2681/**
2682 * Sets up processor-based VM-execution controls in the VMCS.
2683 *
2684 * @returns VBox status code.
2685 * @param pVCpu The cross context virtual CPU structure.
2686 * @param pVmcsInfo The VMCS info. object.
2687 */
2688static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2689{
2690 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2691 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2692 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2693
2694 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2695 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2696 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2697 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2698 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2699 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2700 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2701
2702 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2703 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2704 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2705 {
2706 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2707 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2708 }
2709
2710 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2711 if (!pVM->hmr0.s.fNestedPaging)
2712 {
2713 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2714 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2715 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2716 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2717 }
2718
2719 /* Use TPR shadowing if supported by the CPU. */
2720 if ( PDMHasApic(pVM)
2721 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2722 {
2723 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2724 /* CR8 writes cause a VM-exit based on TPR threshold. */
2725 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2726 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2727 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2728 }
2729 else
2730 {
2731 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2732 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2733 if (pVM->hmr0.s.fAllow64BitGuests)
2734 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2735 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2736 }
2737
2738 /* Use MSR-bitmaps if supported by the CPU. */
2739 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2740 {
2741 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2742 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2743 }
2744
2745 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2746 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2747 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2748
2749 if ((fVal & fZap) != fVal)
2750 {
2751 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2752 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2753 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2754 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2755 }
2756
2757 /* Commit it to the VMCS and update our cache. */
2758 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2759 AssertRC(rc);
2760 pVmcsInfo->u32ProcCtls = fVal;
2761
2762 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2763 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2764 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2765
2766 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2767 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2768 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2769
2770 /* Sanity check, should not really happen. */
2771 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2772 { /* likely */ }
2773 else
2774 {
2775 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2776 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2777 }
2778
2779 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2780 return VINF_SUCCESS;
2781}
2782
2783
2784/**
2785 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2786 * Processor-based VM-execution) control fields in the VMCS.
2787 *
2788 * @returns VBox status code.
2789 * @param pVCpu The cross context virtual CPU structure.
2790 * @param pVmcsInfo The VMCS info. object.
2791 */
2792static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2793{
2794#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2795 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2796 {
2797 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2798 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2799 }
2800#endif
2801
2802 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2803 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2804 AssertRC(rc);
2805
2806 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2807 if (RT_SUCCESS(rc))
2808 {
2809 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2810 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2811
2812 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2813 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2814
2815 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2816 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2817
2818 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2819 {
2820 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2821 AssertRC(rc);
2822 }
2823 return VINF_SUCCESS;
2824 }
2825 else
2826 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2827 return rc;
2828}
2829
2830
2831/**
2832 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2833 *
2834 * We shall setup those exception intercepts that don't change during the
2835 * lifetime of the VM here. The rest are done dynamically while loading the
2836 * guest state.
2837 *
2838 * @param pVCpu The cross context virtual CPU structure.
2839 * @param pVmcsInfo The VMCS info. object.
2840 */
2841static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2842{
2843 /*
2844 * The following exceptions are always intercepted:
2845 *
2846 * #AC - To prevent the guest from hanging the CPU and for dealing with
2847 * split-lock detecting host configs.
2848 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2849 * recursive #DBs can cause a CPU hang.
2850 * #PF - To sync our shadow page tables when nested-paging is not used.
2851 */
2852 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2853 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2854 | RT_BIT(X86_XCPT_DB)
2855 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2856
2857 /* Commit it to the VMCS. */
2858 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2859 AssertRC(rc);
2860
2861 /* Update our cache of the exception bitmap. */
2862 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2863}
2864
2865
2866#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2867/**
2868 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2869 *
2870 * @returns VBox status code.
2871 * @param pVmcsInfo The VMCS info. object.
2872 */
2873static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2874{
2875 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2876 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2877 AssertRC(rc);
2878
2879 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2880 if (RT_SUCCESS(rc))
2881 {
2882 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2883 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2884
2885 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2886 Assert(!pVmcsInfo->u64Cr0Mask);
2887 Assert(!pVmcsInfo->u64Cr4Mask);
2888 return VINF_SUCCESS;
2889 }
2890 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2891 return rc;
2892}
2893#endif
2894
2895
2896/**
2897 * Selector FNHMSVMVMRUN implementation.
2898 */
2899static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2900{
2901 hmR0VmxUpdateStartVmFunction(pVCpu);
2902 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2903}
2904
2905
2906/**
2907 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2908 * VMX.
2909 *
2910 * @returns VBox status code.
2911 * @param pVCpu The cross context virtual CPU structure.
2912 * @param pVmcsInfo The VMCS info. object.
2913 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2914 */
2915static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2916{
2917 Assert(pVmcsInfo->pvVmcs);
2918 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2919
2920 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2921 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2922 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2923
2924 LogFlowFunc(("\n"));
2925
2926 /*
2927 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2928 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2929 */
2930 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2931 if (RT_SUCCESS(rc))
2932 {
2933 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2934 if (RT_SUCCESS(rc))
2935 {
2936 /*
2937 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2938 * The host is always 64-bit since we no longer support 32-bit hosts.
2939 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2940 */
2941 if (!fIsNstGstVmcs)
2942 {
2943 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2944 if (RT_SUCCESS(rc))
2945 {
2946 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2947 if (RT_SUCCESS(rc))
2948 {
2949 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2950 if (RT_SUCCESS(rc))
2951 {
2952 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2953#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2954 /*
2955 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2956 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2957 * making it fit for use when VMCS shadowing is later enabled.
2958 */
2959 if (pVmcsInfo->pvShadowVmcs)
2960 {
2961 VMXVMCSREVID VmcsRevId;
2962 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2963 VmcsRevId.n.fIsShadowVmcs = 1;
2964 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2965 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2966 if (RT_SUCCESS(rc))
2967 { /* likely */ }
2968 else
2969 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2970 }
2971#endif
2972 }
2973 else
2974 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2975 }
2976 else
2977 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2978 }
2979 else
2980 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2981 }
2982 else
2983 {
2984#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2985 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
2986 if (RT_SUCCESS(rc))
2987 { /* likely */ }
2988 else
2989 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
2990#else
2991 AssertFailed();
2992#endif
2993 }
2994 }
2995 else
2996 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
2997 }
2998 else
2999 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3000
3001 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3002 if (RT_SUCCESS(rc))
3003 {
3004 rc = hmR0VmxClearVmcs(pVmcsInfo);
3005 if (RT_SUCCESS(rc))
3006 { /* likely */ }
3007 else
3008 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3009 }
3010
3011 /*
3012 * Update the last-error record both for failures and success, so we
3013 * can propagate the status code back to ring-3 for diagnostics.
3014 */
3015 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3016 NOREF(pszVmcs);
3017 return rc;
3018}
3019
3020
3021/**
3022 * Does global VT-x initialization (called during module initialization).
3023 *
3024 * @returns VBox status code.
3025 */
3026VMMR0DECL(int) VMXR0GlobalInit(void)
3027{
3028#ifdef HMVMX_USE_FUNCTION_TABLE
3029 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3030# ifdef VBOX_STRICT
3031 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3032 Assert(g_aVMExitHandlers[i].pfn);
3033# endif
3034#endif
3035 return VINF_SUCCESS;
3036}
3037
3038
3039/**
3040 * Does global VT-x termination (called during module termination).
3041 */
3042VMMR0DECL(void) VMXR0GlobalTerm()
3043{
3044 /* Nothing to do currently. */
3045}
3046
3047
3048/**
3049 * Sets up and activates VT-x on the current CPU.
3050 *
3051 * @returns VBox status code.
3052 * @param pHostCpu The HM physical-CPU structure.
3053 * @param pVM The cross context VM structure. Can be
3054 * NULL after a host resume operation.
3055 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3056 * fEnabledByHost is @c true).
3057 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3058 * @a fEnabledByHost is @c true).
3059 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3060 * enable VT-x on the host.
3061 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3062 */
3063VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3064 PCSUPHWVIRTMSRS pHwvirtMsrs)
3065{
3066 AssertPtr(pHostCpu);
3067 AssertPtr(pHwvirtMsrs);
3068 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3069
3070 /* Enable VT-x if it's not already enabled by the host. */
3071 if (!fEnabledByHost)
3072 {
3073 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3074 if (RT_FAILURE(rc))
3075 return rc;
3076 }
3077
3078 /*
3079 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3080 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3081 * invalidated when flushing by VPID.
3082 */
3083 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3084 {
3085 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3086 pHostCpu->fFlushAsidBeforeUse = false;
3087 }
3088 else
3089 pHostCpu->fFlushAsidBeforeUse = true;
3090
3091 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3092 ++pHostCpu->cTlbFlushes;
3093
3094 return VINF_SUCCESS;
3095}
3096
3097
3098/**
3099 * Deactivates VT-x on the current CPU.
3100 *
3101 * @returns VBox status code.
3102 * @param pHostCpu The HM physical-CPU structure.
3103 * @param pvCpuPage Pointer to the VMXON region.
3104 * @param HCPhysCpuPage Physical address of the VMXON region.
3105 *
3106 * @remarks This function should never be called when SUPR0EnableVTx() or
3107 * similar was used to enable VT-x on the host.
3108 */
3109VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3110{
3111 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3112
3113 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3114 return hmR0VmxLeaveRootMode(pHostCpu);
3115}
3116
3117
3118/**
3119 * Does per-VM VT-x initialization.
3120 *
3121 * @returns VBox status code.
3122 * @param pVM The cross context VM structure.
3123 */
3124VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3125{
3126 AssertPtr(pVM);
3127 LogFlowFunc(("pVM=%p\n", pVM));
3128
3129 hmR0VmxStructsInit(pVM);
3130 int rc = hmR0VmxStructsAlloc(pVM);
3131 if (RT_FAILURE(rc))
3132 {
3133 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3134 return rc;
3135 }
3136
3137 /* Setup the crash dump page. */
3138#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3139 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3140 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3141#endif
3142 return VINF_SUCCESS;
3143}
3144
3145
3146/**
3147 * Does per-VM VT-x termination.
3148 *
3149 * @returns VBox status code.
3150 * @param pVM The cross context VM structure.
3151 */
3152VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3153{
3154 AssertPtr(pVM);
3155 LogFlowFunc(("pVM=%p\n", pVM));
3156
3157#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3158 if (pVM->hmr0.s.vmx.pbScratch)
3159 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3160#endif
3161 hmR0VmxStructsFree(pVM);
3162 return VINF_SUCCESS;
3163}
3164
3165
3166/**
3167 * Sets up the VM for execution using hardware-assisted VMX.
3168 * This function is only called once per-VM during initialization.
3169 *
3170 * @returns VBox status code.
3171 * @param pVM The cross context VM structure.
3172 */
3173VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3174{
3175 AssertPtr(pVM);
3176 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3177
3178 LogFlowFunc(("pVM=%p\n", pVM));
3179
3180 /*
3181 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3182 * without causing a #GP.
3183 */
3184 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3185 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3186 { /* likely */ }
3187 else
3188 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3189
3190 /*
3191 * Check that nested paging is supported if enabled and copy over the flag to the
3192 * ring-0 only structure.
3193 */
3194 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3195 AssertReturn( !fNestedPaging
3196 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3197 VERR_INCOMPATIBLE_CONFIG);
3198 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3199 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3200
3201 /*
3202 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3203 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3204 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3205 */
3206 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3207 AssertReturn( !fUnrestrictedGuest
3208 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3209 && fNestedPaging),
3210 VERR_INCOMPATIBLE_CONFIG);
3211 if ( !fUnrestrictedGuest
3212 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3213 || !pVM->hm.s.vmx.pRealModeTSS))
3214 {
3215 LogRelFunc(("Invalid real-on-v86 state.\n"));
3216 return VERR_INTERNAL_ERROR;
3217 }
3218 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3219
3220 /* Initialize these always, see hmR3InitFinalizeR0().*/
3221 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3222 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3223
3224 /* Setup the tagged-TLB flush handlers. */
3225 int rc = hmR0VmxSetupTaggedTlb(pVM);
3226 if (RT_FAILURE(rc))
3227 {
3228 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3229 return rc;
3230 }
3231
3232 /* Determine LBR capabilities. */
3233 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3234 if (pVM->hmr0.s.vmx.fLbr)
3235 {
3236 rc = hmR0VmxSetupLbrMsrRange(pVM);
3237 if (RT_FAILURE(rc))
3238 {
3239 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3240 return rc;
3241 }
3242 }
3243
3244#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3245 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3246 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3247 {
3248 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3249 if (RT_SUCCESS(rc))
3250 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3251 else
3252 {
3253 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3254 return rc;
3255 }
3256 }
3257#endif
3258
3259 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3260 {
3261 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3262 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3263
3264 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3265
3266 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3267 if (RT_SUCCESS(rc))
3268 {
3269#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3270 if (pVM->cpum.ro.GuestFeatures.fVmx)
3271 {
3272 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3273 if (RT_SUCCESS(rc))
3274 { /* likely */ }
3275 else
3276 {
3277 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3278 return rc;
3279 }
3280 }
3281#endif
3282 }
3283 else
3284 {
3285 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3286 return rc;
3287 }
3288 }
3289
3290 return VINF_SUCCESS;
3291}
3292
3293
3294/**
3295 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3296 * the VMCS.
3297 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3298 */
3299static uint64_t hmR0VmxExportHostControlRegs(void)
3300{
3301 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3302 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3303 uint64_t uHostCr4 = ASMGetCR4();
3304 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3305 return uHostCr4;
3306}
3307
3308
3309/**
3310 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3311 * the host-state area in the VMCS.
3312 *
3313 * @returns VBox status code.
3314 * @param pVCpu The cross context virtual CPU structure.
3315 * @param uHostCr4 The host CR4 value.
3316 */
3317static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3318{
3319 /*
3320 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3321 * will be messed up. We should -not- save the messed up state without restoring
3322 * the original host-state, see @bugref{7240}.
3323 *
3324 * This apparently can happen (most likely the FPU changes), deal with it rather than
3325 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3326 */
3327 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3328 {
3329 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3330 pVCpu->idCpu));
3331 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3332 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3333 }
3334
3335 /*
3336 * Get all the host info.
3337 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3338 * without also checking the cpuid bit.
3339 */
3340 uint32_t fRestoreHostFlags;
3341#if RT_INLINE_ASM_EXTERNAL
3342 if (uHostCr4 & X86_CR4_FSGSBASE)
3343 {
3344 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3345 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3346 }
3347 else
3348 {
3349 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3350 fRestoreHostFlags = 0;
3351 }
3352 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3353 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3354 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3355 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3356#else
3357 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3358 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3359 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3360 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3361 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3362 if (uHostCr4 & X86_CR4_FSGSBASE)
3363 {
3364 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3365 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3366 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3367 }
3368 else
3369 {
3370 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3371 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3372 fRestoreHostFlags = 0;
3373 }
3374 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3375 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3376 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3377 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3378 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3379#endif
3380
3381 /*
3382 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3383 * gain VM-entry and restore them before we get preempted.
3384 *
3385 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3386 */
3387 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3388 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3389 {
3390 if (!(uSelAll & X86_SEL_LDT))
3391 {
3392#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3393 do { \
3394 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3395 if ((a_uVmcsVar) & X86_SEL_RPL) \
3396 { \
3397 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3398 (a_uVmcsVar) = 0; \
3399 } \
3400 } while (0)
3401 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3402 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3403 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3404 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3405#undef VMXLOCAL_ADJUST_HOST_SEG
3406 }
3407 else
3408 {
3409#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3410 do { \
3411 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3412 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3413 { \
3414 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3415 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3416 else \
3417 { \
3418 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3419 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3420 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3421 } \
3422 (a_uVmcsVar) = 0; \
3423 } \
3424 } while (0)
3425 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3426 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3427 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3428 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3429#undef VMXLOCAL_ADJUST_HOST_SEG
3430 }
3431 }
3432
3433 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3434 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3435 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3436 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3437 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3438 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3439 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3440 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3441
3442 /*
3443 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3444 * them to the maximum limit (0xffff) on every VM-exit.
3445 */
3446 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3447 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3448
3449 /*
3450 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3451 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3452 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3453 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3454 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3455 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3456 * at 0xffff on hosts where we are sure it won't cause trouble.
3457 */
3458#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3459 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3460#else
3461 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3462#endif
3463 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3464
3465 /*
3466 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3467 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3468 * RPL should be too in most cases.
3469 */
3470 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3471 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3472 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3473 VERR_VMX_INVALID_HOST_STATE);
3474
3475 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3476 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3477
3478 /*
3479 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3480 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3481 * restoration if the host has something else. Task switching is not supported in 64-bit
3482 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3483 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3484 *
3485 * [1] See Intel spec. 3.5 "System Descriptor Types".
3486 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3487 */
3488 Assert(pDesc->System.u4Type == 11);
3489 if ( pDesc->System.u16LimitLow != 0x67
3490 || pDesc->System.u4LimitHigh)
3491 {
3492 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3493
3494 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3495 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3496 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3497 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3498 {
3499 /* The GDT is read-only but the writable GDT is available. */
3500 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3501 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3502 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3503 AssertRCReturn(rc, rc);
3504 }
3505 }
3506
3507 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3508
3509 /*
3510 * Do all the VMCS updates in one block to assist nested virtualization.
3511 */
3512 int rc;
3513 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3514 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3515 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3516 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3517 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3518 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3519 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3520 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3521 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3522 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3523 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3524 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3525
3526 return VINF_SUCCESS;
3527}
3528
3529
3530/**
3531 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3532 * host-state area of the VMCS.
3533 *
3534 * These MSRs will be automatically restored on the host after every successful
3535 * VM-exit.
3536 *
3537 * @param pVCpu The cross context virtual CPU structure.
3538 *
3539 * @remarks No-long-jump zone!!!
3540 */
3541static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3542{
3543 AssertPtr(pVCpu);
3544
3545 /*
3546 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3547 * rather than swapping them on every VM-entry.
3548 */
3549 hmR0VmxLazySaveHostMsrs(pVCpu);
3550
3551 /*
3552 * Host Sysenter MSRs.
3553 */
3554 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3555 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3556 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3557
3558 /*
3559 * Host EFER MSR.
3560 *
3561 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3562 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3563 */
3564 if (g_fHmVmxSupportsVmcsEfer)
3565 {
3566 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3567 AssertRC(rc);
3568 }
3569
3570 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3571 * hmR0VmxExportGuestEntryExitCtls(). */
3572}
3573
3574
3575/**
3576 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3577 *
3578 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3579 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3580 *
3581 * @returns true if we need to load guest EFER, false otherwise.
3582 * @param pVCpu The cross context virtual CPU structure.
3583 * @param pVmxTransient The VMX-transient structure.
3584 *
3585 * @remarks Requires EFER, CR4.
3586 * @remarks No-long-jump zone!!!
3587 */
3588static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3589{
3590#ifdef HMVMX_ALWAYS_SWAP_EFER
3591 RT_NOREF2(pVCpu, pVmxTransient);
3592 return true;
3593#else
3594 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3595 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3596 uint64_t const u64GuestEfer = pCtx->msrEFER;
3597
3598# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3599 /*
3600 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3601 * the nested-guest.
3602 */
3603 if ( pVmxTransient->fIsNestedGuest
3604 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3605 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3606 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3607 return true;
3608# else
3609 RT_NOREF(pVmxTransient);
3610#endif
3611
3612 /*
3613 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3614 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3615 */
3616 if ( CPUMIsGuestInLongModeEx(pCtx)
3617 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3618 return true;
3619
3620 /*
3621 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3622 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3623 *
3624 * See Intel spec. 4.5 "IA-32e Paging".
3625 * See Intel spec. 4.1.1 "Three Paging Modes".
3626 *
3627 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3628 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3629 */
3630 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3631 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3632 if ( (pCtx->cr4 & X86_CR4_PAE)
3633 && (pCtx->cr0 & X86_CR0_PG))
3634 {
3635 /*
3636 * If nested paging is not used, verify that the guest paging mode matches the
3637 * shadow paging mode which is/will be placed in the VMCS (which is what will
3638 * actually be used while executing the guest and not the CR4 shadow value).
3639 */
3640 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3641 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3642 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3643 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3644 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3645 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3646 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3647 {
3648 /* Verify that the host is NX capable. */
3649 Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
3650 return true;
3651 }
3652 }
3653
3654 return false;
3655#endif
3656}
3657
3658
3659/**
3660 * Exports the guest's RSP into the guest-state area in the VMCS.
3661 *
3662 * @param pVCpu The cross context virtual CPU structure.
3663 *
3664 * @remarks No-long-jump zone!!!
3665 */
3666static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3667{
3668 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3669 {
3670 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3671
3672 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3673 AssertRC(rc);
3674
3675 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3676 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3677 }
3678}
3679
3680
3681/**
3682 * Exports the guest hardware-virtualization state.
3683 *
3684 * @returns VBox status code.
3685 * @param pVCpu The cross context virtual CPU structure.
3686 * @param pVmxTransient The VMX-transient structure.
3687 *
3688 * @remarks No-long-jump zone!!!
3689 */
3690static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3691{
3692 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3693 {
3694#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3695 /*
3696 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3697 * VMCS shadowing.
3698 */
3699 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3700 {
3701 /*
3702 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3703 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3704 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3705 *
3706 * We check for VMX root mode here in case the guest executes VMXOFF without
3707 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3708 * not clear the current VMCS pointer.
3709 */
3710 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3711 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3712 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3713 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3714 {
3715 /* Paranoia. */
3716 Assert(!pVmxTransient->fIsNestedGuest);
3717
3718 /*
3719 * For performance reasons, also check if the nested hypervisor's current VMCS
3720 * was newly loaded or modified before copying it to the shadow VMCS.
3721 */
3722 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3723 {
3724 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3725 AssertRCReturn(rc, rc);
3726 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3727 }
3728 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3729 }
3730 else
3731 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3732 }
3733#else
3734 NOREF(pVmxTransient);
3735#endif
3736 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3737 }
3738 return VINF_SUCCESS;
3739}
3740
3741
3742/**
3743 * Exports the guest debug registers into the guest-state area in the VMCS.
3744 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3745 *
3746 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3747 *
3748 * @returns VBox status code.
3749 * @param pVCpu The cross context virtual CPU structure.
3750 * @param pVmxTransient The VMX-transient structure.
3751 *
3752 * @remarks No-long-jump zone!!!
3753 */
3754static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3755{
3756 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3757
3758 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3759 * stepping. */
3760 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3761 if (pVmxTransient->fIsNestedGuest)
3762 {
3763 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3764 AssertRC(rc);
3765
3766 /*
3767 * We don't want to always intercept MOV DRx for nested-guests as it causes
3768 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3769 * Instead, they are strictly only requested when the nested hypervisor intercepts
3770 * them -- handled while merging VMCS controls.
3771 *
3772 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3773 * then the nested-guest debug state should be actively loaded on the host so that
3774 * nested-guest reads its own debug registers without causing VM-exits.
3775 */
3776 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3777 && !CPUMIsGuestDebugStateActive(pVCpu))
3778 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3779 return VINF_SUCCESS;
3780 }
3781
3782#ifdef VBOX_STRICT
3783 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3784 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3785 {
3786 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3787 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3788 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3789 }
3790#endif
3791
3792 bool fSteppingDB = false;
3793 bool fInterceptMovDRx = false;
3794 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3795 if (pVCpu->hm.s.fSingleInstruction)
3796 {
3797 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3798 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3799 {
3800 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3801 Assert(fSteppingDB == false);
3802 }
3803 else
3804 {
3805 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
3806 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3807 pVCpu->hmr0.s.fClearTrapFlag = true;
3808 fSteppingDB = true;
3809 }
3810 }
3811
3812 uint64_t u64GuestDr7;
3813 if ( fSteppingDB
3814 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3815 {
3816 /*
3817 * Use the combined guest and host DRx values found in the hypervisor register set
3818 * because the hypervisor debugger has breakpoints active or someone is single stepping
3819 * on the host side without a monitor trap flag.
3820 *
3821 * Note! DBGF expects a clean DR6 state before executing guest code.
3822 */
3823 if (!CPUMIsHyperDebugStateActive(pVCpu))
3824 {
3825 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3826 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3827 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3828 }
3829
3830 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3831 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3832 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3833 fInterceptMovDRx = true;
3834 }
3835 else
3836 {
3837 /*
3838 * If the guest has enabled debug registers, we need to load them prior to
3839 * executing guest code so they'll trigger at the right time.
3840 */
3841 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3842 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3843 {
3844 if (!CPUMIsGuestDebugStateActive(pVCpu))
3845 {
3846 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3847 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3848 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3849 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3850 }
3851 Assert(!fInterceptMovDRx);
3852 }
3853 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3854 {
3855 /*
3856 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3857 * must intercept #DB in order to maintain a correct DR6 guest value, and
3858 * because we need to intercept it to prevent nested #DBs from hanging the
3859 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3860 */
3861 fInterceptMovDRx = true;
3862 }
3863
3864 /* Update DR7 with the actual guest value. */
3865 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3866 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3867 }
3868
3869 if (fInterceptMovDRx)
3870 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3871 else
3872 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3873
3874 /*
3875 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3876 * monitor-trap flag and update our cache.
3877 */
3878 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3879 {
3880 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3881 AssertRC(rc);
3882 pVmcsInfo->u32ProcCtls = uProcCtls;
3883 }
3884
3885 /*
3886 * Update guest DR7.
3887 */
3888 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3889 AssertRC(rc);
3890
3891 /*
3892 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3893 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3894 *
3895 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3896 */
3897 if (fSteppingDB)
3898 {
3899 Assert(pVCpu->hm.s.fSingleInstruction);
3900 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3901
3902 uint32_t fIntrState = 0;
3903 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3904 AssertRC(rc);
3905
3906 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3907 {
3908 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3909 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3910 AssertRC(rc);
3911 }
3912 }
3913
3914 return VINF_SUCCESS;
3915}
3916
3917
3918/**
3919 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3920 * areas.
3921 *
3922 * These MSRs will automatically be loaded to the host CPU on every successful
3923 * VM-entry and stored from the host CPU on every successful VM-exit.
3924 *
3925 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3926 * actual host MSR values are not- updated here for performance reasons. See
3927 * hmR0VmxExportHostMsrs().
3928 *
3929 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3930 *
3931 * @returns VBox status code.
3932 * @param pVCpu The cross context virtual CPU structure.
3933 * @param pVmxTransient The VMX-transient structure.
3934 *
3935 * @remarks No-long-jump zone!!!
3936 */
3937static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3938{
3939 AssertPtr(pVCpu);
3940 AssertPtr(pVmxTransient);
3941
3942 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3943 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3944
3945 /*
3946 * MSRs that we use the auto-load/store MSR area in the VMCS.
3947 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
3948 * nothing to do here. The host MSR values are updated when it's safe in
3949 * hmR0VmxLazySaveHostMsrs().
3950 *
3951 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
3952 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
3953 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
3954 * for any MSR that are not part of the lazy MSRs so we do not need to place
3955 * those MSRs into the auto-load/store MSR area. Nothing to do here.
3956 */
3957 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3958 {
3959 /* No auto-load/store MSRs currently. */
3960 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
3961 }
3962
3963 /*
3964 * Guest Sysenter MSRs.
3965 */
3966 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
3967 {
3968 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
3969
3970 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
3971 {
3972 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
3973 AssertRC(rc);
3974 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
3975 }
3976
3977 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
3978 {
3979 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
3980 AssertRC(rc);
3981 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
3982 }
3983
3984 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
3985 {
3986 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
3987 AssertRC(rc);
3988 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
3989 }
3990 }
3991
3992 /*
3993 * Guest/host EFER MSR.
3994 */
3995 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
3996 {
3997 /* Whether we are using the VMCS to swap the EFER MSR must have been
3998 determined earlier while exporting VM-entry/VM-exit controls. */
3999 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4000 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4001
4002 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4003 {
4004 /*
4005 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4006 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4007 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4008 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4009 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4010 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4011 * during VM-entry.
4012 */
4013 uint64_t uGuestEferMsr = pCtx->msrEFER;
4014 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4015 {
4016 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4017 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4018 else
4019 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4020 }
4021
4022 /*
4023 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4024 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4025 */
4026 if (g_fHmVmxSupportsVmcsEfer)
4027 {
4028 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4029 AssertRC(rc);
4030 }
4031 else
4032 {
4033 /*
4034 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4035 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4036 */
4037 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4038 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4039 AssertRCReturn(rc, rc);
4040 }
4041
4042 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4043 }
4044 else if (!g_fHmVmxSupportsVmcsEfer)
4045 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4046
4047 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4048 }
4049
4050 /*
4051 * Other MSRs.
4052 */
4053 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4054 {
4055 /* Speculation Control (R/W). */
4056 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4057 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4058 {
4059 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4060 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4061 AssertRCReturn(rc, rc);
4062 }
4063
4064 /* Last Branch Record. */
4065 if (pVM->hmr0.s.vmx.fLbr)
4066 {
4067 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4068 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4069 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4070 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4071 Assert(cLbrStack <= 32);
4072 for (uint32_t i = 0; i < cLbrStack; i++)
4073 {
4074 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4075 pVmcsInfoShared->au64LbrFromIpMsr[i],
4076 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4077 AssertRCReturn(rc, rc);
4078
4079 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4080 if (idToIpMsrStart != 0)
4081 {
4082 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4083 pVmcsInfoShared->au64LbrToIpMsr[i],
4084 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4085 AssertRCReturn(rc, rc);
4086 }
4087 }
4088
4089 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4090 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4091 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4092 false /* fUpdateHostMsr */);
4093 AssertRCReturn(rc, rc);
4094 }
4095
4096 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4097 }
4098
4099 return VINF_SUCCESS;
4100}
4101
4102
4103/**
4104 * Wrapper for running the guest code in VT-x.
4105 *
4106 * @returns VBox status code, no informational status codes.
4107 * @param pVCpu The cross context virtual CPU structure.
4108 * @param pVmxTransient The VMX-transient structure.
4109 *
4110 * @remarks No-long-jump zone!!!
4111 */
4112DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4113{
4114 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4115 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4116
4117 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4118 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4119#ifdef VBOX_WITH_STATISTICS
4120 if (fResumeVM)
4121 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4122 else
4123 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4124#endif
4125 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4126 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4127 return rc;
4128}
4129
4130
4131/**
4132 * Reports world-switch error and dumps some useful debug info.
4133 *
4134 * @param pVCpu The cross context virtual CPU structure.
4135 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4136 * @param pVmxTransient The VMX-transient structure (only
4137 * exitReason updated).
4138 */
4139static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4140{
4141 Assert(pVCpu);
4142 Assert(pVmxTransient);
4143 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4144
4145 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4146 switch (rcVMRun)
4147 {
4148 case VERR_VMX_INVALID_VMXON_PTR:
4149 AssertFailed();
4150 break;
4151 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4152 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4153 {
4154 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4155 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4156 AssertRC(rc);
4157 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4158
4159 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4160 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4161 Cannot do it here as we may have been long preempted. */
4162
4163#ifdef VBOX_STRICT
4164 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4165 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4166 pVmxTransient->uExitReason));
4167 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4168 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4169 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4170 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4171 else
4172 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4173 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4174 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4175
4176 static struct
4177 {
4178 /** Name of the field to log. */
4179 const char *pszName;
4180 /** The VMCS field. */
4181 uint32_t uVmcsField;
4182 /** Whether host support of this field needs to be checked. */
4183 bool fCheckSupport;
4184 } const s_aVmcsFields[] =
4185 {
4186 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4187 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4188 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4189 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4190 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4191 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4192 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4193 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4194 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4195 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4196 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4197 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4198 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4199 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4200 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4201 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4202 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4203 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4204 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4205 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4206 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4207 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4208 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4209 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4210 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4211 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4212 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4213 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4214 /* The order of selector fields below are fixed! */
4215 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4216 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4217 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4218 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4219 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4220 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4221 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4222 /* End of ordered selector fields. */
4223 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4224 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4225 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4226 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4227 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4228 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4229 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4230 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4231 };
4232
4233 RTGDTR HostGdtr;
4234 ASMGetGDTR(&HostGdtr);
4235
4236 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4237 for (uint32_t i = 0; i < cVmcsFields; i++)
4238 {
4239 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4240
4241 bool fSupported;
4242 if (!s_aVmcsFields[i].fCheckSupport)
4243 fSupported = true;
4244 else
4245 {
4246 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4247 switch (uVmcsField)
4248 {
4249 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4250 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4251 case VMX_VMCS32_CTRL_PROC_EXEC2:
4252 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4253 break;
4254 default:
4255 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4256 }
4257 }
4258
4259 if (fSupported)
4260 {
4261 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4262 switch (uWidth)
4263 {
4264 case VMX_VMCSFIELD_WIDTH_16BIT:
4265 {
4266 uint16_t u16Val;
4267 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4268 AssertRC(rc);
4269 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4270
4271 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4272 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4273 {
4274 if (u16Val < HostGdtr.cbGdt)
4275 {
4276 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4277 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4278 "Host FS", "Host GS", "Host TR" };
4279 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4280 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4281 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4282 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4283 }
4284 else
4285 Log4((" Selector value exceeds GDT limit!\n"));
4286 }
4287 break;
4288 }
4289
4290 case VMX_VMCSFIELD_WIDTH_32BIT:
4291 {
4292 uint32_t u32Val;
4293 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4294 AssertRC(rc);
4295 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4296 break;
4297 }
4298
4299 case VMX_VMCSFIELD_WIDTH_64BIT:
4300 case VMX_VMCSFIELD_WIDTH_NATURAL:
4301 {
4302 uint64_t u64Val;
4303 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4304 AssertRC(rc);
4305 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4306 break;
4307 }
4308 }
4309 }
4310 }
4311
4312 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4313 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4314 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4315 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4316 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4317 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4318#endif /* VBOX_STRICT */
4319 break;
4320 }
4321
4322 default:
4323 /* Impossible */
4324 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4325 break;
4326 }
4327}
4328
4329
4330/**
4331 * Sets up the usage of TSC-offsetting and updates the VMCS.
4332 *
4333 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4334 * VMX-preemption timer.
4335 *
4336 * @returns VBox status code.
4337 * @param pVCpu The cross context virtual CPU structure.
4338 * @param pVmxTransient The VMX-transient structure.
4339 * @param idCurrentCpu The current CPU number.
4340 *
4341 * @remarks No-long-jump zone!!!
4342 */
4343static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4344{
4345 bool fOffsettedTsc;
4346 bool fParavirtTsc;
4347 uint64_t uTscOffset;
4348 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4349 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4350
4351 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4352 {
4353 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4354 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4355 uint64_t cTicksToDeadline;
4356 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4357 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4358 {
4359 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4360 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4361 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4362 if ((int64_t)cTicksToDeadline > 0)
4363 { /* hopefully */ }
4364 else
4365 {
4366 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4367 cTicksToDeadline = 0;
4368 }
4369 }
4370 else
4371 {
4372 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4373 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4374 &pVCpu->hmr0.s.vmx.uTscDeadline,
4375 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4376 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4377 if (cTicksToDeadline >= 128)
4378 { /* hopefully */ }
4379 else
4380 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4381 }
4382
4383 /* Make sure the returned values have sane upper and lower boundaries. */
4384 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4385 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4386 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4387 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4388
4389 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4390 * preemption timers here. We probably need to clamp the preemption timer,
4391 * after converting the timer value to the host. */
4392 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4393 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4394 AssertRC(rc);
4395 }
4396 else
4397 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4398
4399 if (fParavirtTsc)
4400 {
4401 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4402 information before every VM-entry, hence disable it for performance sake. */
4403#if 0
4404 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4405 AssertRC(rc);
4406#endif
4407 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4408 }
4409
4410 if ( fOffsettedTsc
4411 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4412 {
4413 if (pVmxTransient->fIsNestedGuest)
4414 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4415 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4416 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4417 }
4418 else
4419 {
4420 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4421 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4422 }
4423}
4424
4425
4426/**
4427 * Worker for VMXR0ImportStateOnDemand.
4428 *
4429 * @returns VBox status code.
4430 * @param pVCpu The cross context virtual CPU structure.
4431 * @param pVmcsInfo The VMCS info. object.
4432 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4433 */
4434static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
4435{
4436 int rc = VINF_SUCCESS;
4437 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4438 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4439 uint32_t u32Val;
4440
4441 /*
4442 * Note! This is hack to workaround a mysterious BSOD observed with release builds
4443 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
4444 * neither are other host platforms.
4445 *
4446 * Committing this temporarily as it prevents BSOD.
4447 *
4448 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
4449 */
4450#ifdef RT_OS_WINDOWS
4451 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
4452 return VERR_HM_IPE_1;
4453#endif
4454
4455 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
4456
4457 /*
4458 * We disable interrupts to make the updating of the state and in particular
4459 * the fExtrn modification atomic wrt to preemption hooks.
4460 */
4461 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
4462
4463 fWhat &= pCtx->fExtrn;
4464 if (fWhat)
4465 {
4466 do
4467 {
4468 if (fWhat & CPUMCTX_EXTRN_RIP)
4469 vmxHCImportGuestRip(pVCpu);
4470
4471 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
4472 vmxHCImportGuestRFlags(pVCpu, pVmcsInfo);
4473
4474 if (fWhat & (CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI))
4475 vmxHCImportGuestIntrState(pVCpu, pVmcsInfo);
4476
4477 if (fWhat & CPUMCTX_EXTRN_RSP)
4478 {
4479 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
4480 AssertRC(rc);
4481 }
4482
4483 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
4484 {
4485 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4486 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
4487 if (fWhat & CPUMCTX_EXTRN_CS)
4488 {
4489 vmxHCImportGuestSegReg(pVCpu, X86_SREG_CS);
4490 vmxHCImportGuestRip(pVCpu);
4491 if (fRealOnV86Active)
4492 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
4493 EMHistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
4494 }
4495 if (fWhat & CPUMCTX_EXTRN_SS)
4496 {
4497 vmxHCImportGuestSegReg(pVCpu, X86_SREG_SS);
4498 if (fRealOnV86Active)
4499 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
4500 }
4501 if (fWhat & CPUMCTX_EXTRN_DS)
4502 {
4503 vmxHCImportGuestSegReg(pVCpu, X86_SREG_DS);
4504 if (fRealOnV86Active)
4505 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
4506 }
4507 if (fWhat & CPUMCTX_EXTRN_ES)
4508 {
4509 vmxHCImportGuestSegReg(pVCpu, X86_SREG_ES);
4510 if (fRealOnV86Active)
4511 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
4512 }
4513 if (fWhat & CPUMCTX_EXTRN_FS)
4514 {
4515 vmxHCImportGuestSegReg(pVCpu, X86_SREG_FS);
4516 if (fRealOnV86Active)
4517 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
4518 }
4519 if (fWhat & CPUMCTX_EXTRN_GS)
4520 {
4521 vmxHCImportGuestSegReg(pVCpu, X86_SREG_GS);
4522 if (fRealOnV86Active)
4523 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
4524 }
4525 }
4526
4527 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
4528 {
4529 if (fWhat & CPUMCTX_EXTRN_LDTR)
4530 vmxHCImportGuestLdtr(pVCpu);
4531
4532 if (fWhat & CPUMCTX_EXTRN_GDTR)
4533 {
4534 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
4535 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
4536 pCtx->gdtr.cbGdt = u32Val;
4537 }
4538
4539 /* Guest IDTR. */
4540 if (fWhat & CPUMCTX_EXTRN_IDTR)
4541 {
4542 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
4543 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
4544 pCtx->idtr.cbIdt = u32Val;
4545 }
4546
4547 /* Guest TR. */
4548 if (fWhat & CPUMCTX_EXTRN_TR)
4549 {
4550 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
4551 don't need to import that one. */
4552 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
4553 vmxHCImportGuestTr(pVCpu);
4554 }
4555 }
4556
4557 if (fWhat & CPUMCTX_EXTRN_DR7)
4558 {
4559 if (!pVCpu->hmr0.s.fUsingHyperDR7)
4560 {
4561 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
4562 AssertRC(rc);
4563 }
4564 }
4565
4566 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
4567 {
4568 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
4569 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
4570 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
4571 pCtx->SysEnter.cs = u32Val;
4572 }
4573
4574 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
4575 {
4576 if ( pVM->hmr0.s.fAllow64BitGuests
4577 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4578 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
4579 }
4580
4581 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
4582 {
4583 if ( pVM->hmr0.s.fAllow64BitGuests
4584 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4585 {
4586 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
4587 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
4588 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
4589 }
4590 }
4591
4592 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
4593 {
4594 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4595 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
4596 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
4597 Assert(pMsrs);
4598 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
4599 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
4600 for (uint32_t i = 0; i < cMsrs; i++)
4601 {
4602 uint32_t const idMsr = pMsrs[i].u32Msr;
4603 switch (idMsr)
4604 {
4605 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
4606 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
4607 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
4608 default:
4609 {
4610 uint32_t idxLbrMsr;
4611 if (pVM->hmr0.s.vmx.fLbr)
4612 {
4613 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
4614 {
4615 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4616 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4617 break;
4618 }
4619 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
4620 {
4621 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4622 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4623 break;
4624 }
4625 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
4626 {
4627 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
4628 break;
4629 }
4630 /* Fallthru (no break) */
4631 }
4632 pCtx->fExtrn = 0;
4633 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
4634 ASMSetFlags(fEFlags);
4635 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
4636 return VERR_HM_UNEXPECTED_LD_ST_MSR;
4637 }
4638 }
4639 }
4640 }
4641
4642 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
4643 {
4644 if (fWhat & CPUMCTX_EXTRN_CR0)
4645 {
4646 uint64_t u64Cr0;
4647 uint64_t u64Shadow;
4648 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
4649 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
4650#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4651 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4652 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4653#else
4654 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4655 {
4656 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4657 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4658 }
4659 else
4660 {
4661 /*
4662 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
4663 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4664 * re-construct CR0. See @bugref{9180#c95} for details.
4665 */
4666 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4667 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4668 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4669 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
4670 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
4671 }
4672#endif
4673 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
4674 CPUMSetGuestCR0(pVCpu, u64Cr0);
4675 VMMRZCallRing3Enable(pVCpu);
4676 }
4677
4678 if (fWhat & CPUMCTX_EXTRN_CR4)
4679 {
4680 uint64_t u64Cr4;
4681 uint64_t u64Shadow;
4682 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
4683 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
4684#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4685 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4686 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4687#else
4688 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4689 {
4690 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4691 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4692 }
4693 else
4694 {
4695 /*
4696 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
4697 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4698 * re-construct CR4. See @bugref{9180#c95} for details.
4699 */
4700 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4701 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4702 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4703 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
4704 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
4705 }
4706#endif
4707 pCtx->cr4 = u64Cr4;
4708 }
4709
4710 if (fWhat & CPUMCTX_EXTRN_CR3)
4711 {
4712 /* CR0.PG bit changes are always intercepted, so it's up to date. */
4713 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
4714 || ( pVM->hmr0.s.fNestedPaging
4715 && CPUMIsGuestPagingEnabledEx(pCtx)))
4716 {
4717 uint64_t u64Cr3;
4718 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
4719 if (pCtx->cr3 != u64Cr3)
4720 {
4721 pCtx->cr3 = u64Cr3;
4722 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4723 }
4724
4725 /*
4726 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
4727 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
4728 */
4729 if (CPUMIsGuestInPAEModeEx(pCtx))
4730 {
4731 X86PDPE aPaePdpes[4];
4732 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
4733 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
4734 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
4735 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
4736 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
4737 {
4738 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
4739 /* PGM now updates PAE PDPTEs while updating CR3. */
4740 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4741 }
4742 }
4743 }
4744 }
4745 }
4746
4747#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4748 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
4749 {
4750 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
4751 && !CPUMIsGuestInVmxNonRootMode(pCtx))
4752 {
4753 Assert(CPUMIsGuestInVmxRootMode(pCtx));
4754 rc = vmxHCCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
4755 if (RT_SUCCESS(rc))
4756 { /* likely */ }
4757 else
4758 break;
4759 }
4760 }
4761#endif
4762 } while (0);
4763
4764 if (RT_SUCCESS(rc))
4765 {
4766 /* Update fExtrn. */
4767 pCtx->fExtrn &= ~fWhat;
4768
4769 /* If everything has been imported, clear the HM keeper bit. */
4770 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
4771 {
4772 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
4773 Assert(!pCtx->fExtrn);
4774 }
4775 }
4776 }
4777 else
4778 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
4779
4780 /*
4781 * Restore interrupts.
4782 */
4783 ASMSetFlags(fEFlags);
4784
4785 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
4786
4787 if (RT_SUCCESS(rc))
4788 { /* likely */ }
4789 else
4790 return rc;
4791
4792 /*
4793 * Honor any pending CR3 updates.
4794 *
4795 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
4796 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
4797 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
4798 *
4799 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
4800 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
4801 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
4802 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
4803 *
4804 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
4805 *
4806 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
4807 */
4808 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
4809 && VMMRZCallRing3IsEnabled(pVCpu))
4810 {
4811 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
4812 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
4813 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
4814 }
4815
4816 return VINF_SUCCESS;
4817}
4818
4819
4820/**
4821 * Saves the guest state from the VMCS into the guest-CPU context.
4822 *
4823 * @returns VBox status code.
4824 * @param pVCpu The cross context virtual CPU structure.
4825 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4826 */
4827VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4828{
4829 AssertPtr(pVCpu);
4830 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4831 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
4832}
4833
4834
4835/**
4836 * Gets VMX VM-exit auxiliary information.
4837 *
4838 * @returns VBox status code.
4839 * @param pVCpu The cross context virtual CPU structure.
4840 * @param pVmxExitAux Where to store the VM-exit auxiliary info.
4841 * @param fWhat What to fetch, HMVMX_READ_XXX.
4842 */
4843VMMR0DECL(int) VMXR0GetExitAuxInfo(PVMCPUCC pVCpu, PVMXEXITAUX pVmxExitAux, uint32_t fWhat)
4844{
4845 PVMXTRANSIENT pVmxTransient = pVCpu->hmr0.s.vmx.pVmxTransient;
4846 if (RT_LIKELY(pVmxTransient))
4847 {
4848 AssertCompile(sizeof(fWhat) == sizeof(pVmxTransient->fVmcsFieldsRead));
4849 fWhat &= ~pVmxTransient->fVmcsFieldsRead;
4850
4851 /* The exit reason is always available. */
4852 pVmxExitAux->uReason = pVmxTransient->uExitReason;
4853
4854 if (fWhat & HMVMX_READ_EXIT_QUALIFICATION)
4855 {
4856 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4857 fWhat &= ~HMVMX_READ_EXIT_QUALIFICATION;
4858 pVmxExitAux->u64Qual = pVmxTransient->uExitQual;
4859 }
4860
4861 if (fWhat & HMVMX_READ_IDT_VECTORING_INFO)
4862 {
4863 vmxHCReadIdtVectoringInfoVmcs(pVCpu, pVmxTransient);
4864 fWhat &= ~HMVMX_READ_IDT_VECTORING_INFO;
4865 pVmxExitAux->uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
4866 }
4867
4868 if (fWhat & HMVMX_READ_IDT_VECTORING_ERROR_CODE)
4869 {
4870 vmxHCReadIdtVectoringErrorCodeVmcs(pVCpu, pVmxTransient);
4871 fWhat &= ~HMVMX_READ_IDT_VECTORING_ERROR_CODE;
4872 pVmxExitAux->uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
4873 }
4874
4875 if (fWhat & HMVMX_READ_EXIT_INSTR_LEN)
4876 {
4877 vmxHCReadExitInstrLenVmcs(pVCpu, pVmxTransient);
4878 fWhat &= ~HMVMX_READ_EXIT_INSTR_LEN;
4879 pVmxExitAux->cbInstr = pVmxTransient->cbExitInstr;
4880 }
4881
4882 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_INFO)
4883 {
4884 vmxHCReadExitIntInfoVmcs(pVCpu, pVmxTransient);
4885 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_INFO;
4886 pVmxExitAux->uExitIntInfo = pVmxTransient->uExitIntInfo;
4887 }
4888
4889 if (fWhat & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)
4890 {
4891 vmxHCReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
4892 fWhat &= ~HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
4893 pVmxExitAux->uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
4894 }
4895
4896 if (fWhat & HMVMX_READ_EXIT_INSTR_INFO)
4897 {
4898 vmxHCReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
4899 fWhat &= ~HMVMX_READ_EXIT_INSTR_INFO;
4900 pVmxExitAux->InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
4901 }
4902
4903 if (fWhat & HMVMX_READ_GUEST_LINEAR_ADDR)
4904 {
4905 vmxHCReadGuestLinearAddrVmcs(pVCpu, pVmxTransient);
4906 fWhat &= ~HMVMX_READ_GUEST_LINEAR_ADDR;
4907 pVmxExitAux->u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
4908 }
4909
4910 if (fWhat & HMVMX_READ_GUEST_PHYSICAL_ADDR)
4911 {
4912 vmxHCReadGuestPhysicalAddrVmcs(pVCpu, pVmxTransient);
4913 fWhat &= ~HMVMX_READ_GUEST_PHYSICAL_ADDR;
4914 pVmxExitAux->u64GuestPhysAddr = pVmxTransient->uGuestPhysicalAddr;
4915 }
4916
4917 if (fWhat & HMVMX_READ_GUEST_PENDING_DBG_XCPTS)
4918 {
4919 fWhat &= ~HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
4920#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4921 vmxHCReadGuestPendingDbgXctps(pVCpu, pVmxTransient);
4922 pVmxExitAux->u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
4923#else
4924 pVmxExitAux->u64GuestPendingDbgXcpts = 0;
4925#endif
4926 }
4927
4928 AssertMsg(!fWhat, ("fWhat=%#RX32 fVmcsFieldsRead=%#RX32\n", fWhat, pVmxTransient->fVmcsFieldsRead));
4929 return VINF_SUCCESS;
4930 }
4931 return VERR_NOT_AVAILABLE;
4932}
4933
4934
4935/**
4936 * Does the necessary state syncing before returning to ring-3 for any reason
4937 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4938 *
4939 * @returns VBox status code.
4940 * @param pVCpu The cross context virtual CPU structure.
4941 * @param fImportState Whether to import the guest state from the VMCS back
4942 * to the guest-CPU context.
4943 *
4944 * @remarks No-long-jmp zone!!!
4945 */
4946static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4947{
4948 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4949 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4950
4951 RTCPUID const idCpu = RTMpCpuId();
4952 Log4Func(("HostCpuId=%u\n", idCpu));
4953
4954 /*
4955 * !!! IMPORTANT !!!
4956 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4957 */
4958
4959 /* Save the guest state if necessary. */
4960 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4961 if (fImportState)
4962 {
4963 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4964 AssertRCReturn(rc, rc);
4965 }
4966
4967 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4968 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4969 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4970
4971 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4972#ifdef VBOX_STRICT
4973 if (CPUMIsHyperDebugStateActive(pVCpu))
4974 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
4975#endif
4976 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4977 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4978 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4979
4980 /* Restore host-state bits that VT-x only restores partially. */
4981 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4982 {
4983 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4984 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4985 }
4986 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4987
4988 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4989 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4990 {
4991 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4992 if (!fImportState)
4993 {
4994 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4995 AssertRCReturn(rc, rc);
4996 }
4997 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4998 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4999 }
5000 else
5001 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
5002
5003 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5004 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5005
5006 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
5007 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
5008 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
5009 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
5010 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
5011 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
5012 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
5013 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
5014 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
5015 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5016
5017 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5018
5019 /** @todo This partially defeats the purpose of having preemption hooks.
5020 * The problem is, deregistering the hooks should be moved to a place that
5021 * lasts until the EMT is about to be destroyed not everytime while leaving HM
5022 * context.
5023 */
5024 int rc = hmR0VmxClearVmcs(pVmcsInfo);
5025 AssertRCReturn(rc, rc);
5026
5027#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5028 /*
5029 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
5030 * clear a shadow VMCS before allowing that VMCS to become active on another
5031 * logical processor. We may or may not be importing guest state which clears
5032 * it, so cover for it here.
5033 *
5034 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
5035 */
5036 if ( pVmcsInfo->pvShadowVmcs
5037 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
5038 {
5039 rc = vmxHCClearShadowVmcs(pVmcsInfo);
5040 AssertRCReturn(rc, rc);
5041 }
5042
5043 /*
5044 * Flag that we need to re-export the host state if we switch to this VMCS before
5045 * executing guest or nested-guest code.
5046 */
5047 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
5048#endif
5049
5050 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
5051 NOREF(idCpu);
5052 return VINF_SUCCESS;
5053}
5054
5055
5056/**
5057 * Leaves the VT-x session.
5058 *
5059 * @returns VBox status code.
5060 * @param pVCpu The cross context virtual CPU structure.
5061 *
5062 * @remarks No-long-jmp zone!!!
5063 */
5064static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
5065{
5066 HM_DISABLE_PREEMPT(pVCpu);
5067 HMVMX_ASSERT_CPU_SAFE(pVCpu);
5068 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5069 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5070
5071 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
5072 and done this from the VMXR0ThreadCtxCallback(). */
5073 if (!pVCpu->hmr0.s.fLeaveDone)
5074 {
5075 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
5076 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
5077 pVCpu->hmr0.s.fLeaveDone = true;
5078 }
5079 Assert(!pVCpu->cpum.GstCtx.fExtrn);
5080
5081 /*
5082 * !!! IMPORTANT !!!
5083 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
5084 */
5085
5086 /* Deregister hook now that we've left HM context before re-enabling preemption. */
5087 /** @todo Deregistering here means we need to VMCLEAR always
5088 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
5089 * for calling VMMR0ThreadCtxHookDisable here! */
5090 VMMR0ThreadCtxHookDisable(pVCpu);
5091
5092 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
5093 int rc = HMR0LeaveCpu(pVCpu);
5094 HM_RESTORE_PREEMPT();
5095 return rc;
5096}
5097
5098
5099/**
5100 * Take necessary actions before going back to ring-3.
5101 *
5102 * An action requires us to go back to ring-3. This function does the necessary
5103 * steps before we can safely return to ring-3. This is not the same as longjmps
5104 * to ring-3, this is voluntary and prepares the guest so it may continue
5105 * executing outside HM (recompiler/IEM).
5106 *
5107 * @returns VBox status code.
5108 * @param pVCpu The cross context virtual CPU structure.
5109 * @param rcExit The reason for exiting to ring-3. Can be
5110 * VINF_VMM_UNKNOWN_RING3_CALL.
5111 */
5112static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
5113{
5114 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5115
5116 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5117 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
5118 {
5119 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
5120 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
5121 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
5122 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
5123 }
5124
5125 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
5126 VMMRZCallRing3Disable(pVCpu);
5127 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
5128
5129 /*
5130 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
5131 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
5132 *
5133 * This is because execution may continue from ring-3 and we would need to inject
5134 * the event from there (hence place it back in TRPM).
5135 */
5136 if (pVCpu->hm.s.Event.fPending)
5137 {
5138 vmxHCPendingEventToTrpmTrap(pVCpu);
5139 Assert(!pVCpu->hm.s.Event.fPending);
5140
5141 /* Clear the events from the VMCS. */
5142 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
5143 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
5144 }
5145#ifdef VBOX_STRICT
5146 /*
5147 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
5148 * fatal), we don't care about verifying duplicate injection of events. Errors like
5149 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
5150 * function so those should and will be checked below.
5151 */
5152 else if (RT_SUCCESS(rcExit))
5153 {
5154 /*
5155 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
5156 * This can be pretty hard to debug otherwise, interrupts might get injected twice
5157 * occasionally, see @bugref{9180#c42}.
5158 *
5159 * However, if the VM-entry failed, any VM entry-interruption info. field would
5160 * be left unmodified as the event would not have been injected to the guest. In
5161 * such cases, don't assert, we're not going to continue guest execution anyway.
5162 */
5163 uint32_t uExitReason;
5164 uint32_t uEntryIntInfo;
5165 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
5166 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
5167 AssertRC(rc);
5168 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
5169 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
5170 }
5171#endif
5172
5173 /*
5174 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
5175 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
5176 * (e.g. TPR below threshold).
5177 */
5178 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5179 {
5180 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
5181 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
5182 }
5183
5184 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
5185 and if we're injecting an event we should have a TRPM trap pending. */
5186 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5187#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
5188 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5189#endif
5190
5191 /* Save guest state and restore host state bits. */
5192 int rc = hmR0VmxLeaveSession(pVCpu);
5193 AssertRCReturn(rc, rc);
5194 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5195
5196 /* Thread-context hooks are unregistered at this point!!! */
5197 /* Ring-3 callback notifications are unregistered at this point!!! */
5198
5199 /* Sync recompiler state. */
5200 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5201 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
5202 | CPUM_CHANGED_LDTR
5203 | CPUM_CHANGED_GDTR
5204 | CPUM_CHANGED_IDTR
5205 | CPUM_CHANGED_TR
5206 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5207 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
5208 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
5209 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5210
5211 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
5212
5213 /* Update the exit-to-ring 3 reason. */
5214 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
5215
5216 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
5217 if ( rcExit != VINF_EM_RAW_INTERRUPT
5218 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5219 {
5220 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
5221 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5222 }
5223
5224 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
5225 VMMRZCallRing3Enable(pVCpu);
5226 return rc;
5227}
5228
5229
5230/**
5231 * VMMRZCallRing3() callback wrapper which saves the guest state before we
5232 * longjump due to a ring-0 assertion.
5233 *
5234 * @returns VBox status code.
5235 * @param pVCpu The cross context virtual CPU structure.
5236 */
5237VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
5238{
5239 /*
5240 * !!! IMPORTANT !!!
5241 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
5242 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
5243 */
5244 VMMR0AssertionRemoveNotification(pVCpu);
5245 VMMRZCallRing3Disable(pVCpu);
5246 HM_DISABLE_PREEMPT(pVCpu);
5247
5248 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5249 vmxHCImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
5250 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5251 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5252
5253 /* Restore host-state bits that VT-x only restores partially. */
5254 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5255 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5256 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5257
5258 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5259 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5260 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5261
5262 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5263 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5264 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5265
5266 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
5267 cleared as part of importing the guest state above. */
5268 hmR0VmxClearVmcs(pVmcsInfo);
5269
5270 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
5271 VMMR0ThreadCtxHookDisable(pVCpu);
5272
5273 /* Leave HM context. This takes care of local init (term). */
5274 HMR0LeaveCpu(pVCpu);
5275 HM_RESTORE_PREEMPT();
5276 return VINF_SUCCESS;
5277}
5278
5279
5280/**
5281 * Enters the VT-x session.
5282 *
5283 * @returns VBox status code.
5284 * @param pVCpu The cross context virtual CPU structure.
5285 */
5286VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
5287{
5288 AssertPtr(pVCpu);
5289 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
5290 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5291
5292 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5293 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5294 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5295
5296#ifdef VBOX_STRICT
5297 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
5298 RTCCUINTREG uHostCr4 = ASMGetCR4();
5299 if (!(uHostCr4 & X86_CR4_VMXE))
5300 {
5301 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
5302 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5303 }
5304#endif
5305
5306 /*
5307 * Do the EMT scheduled L1D and MDS flush here if needed.
5308 */
5309 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5310 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5311 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5312 hmR0MdsClear();
5313
5314 /*
5315 * Load the appropriate VMCS as the current and active one.
5316 */
5317 PVMXVMCSINFO pVmcsInfo;
5318 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5319 if (!fInNestedGuestMode)
5320 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5321 else
5322 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5323 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5324 if (RT_SUCCESS(rc))
5325 {
5326 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5327 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5328 pVCpu->hmr0.s.fLeaveDone = false;
5329 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5330 }
5331 return rc;
5332}
5333
5334
5335/**
5336 * The thread-context callback.
5337 *
5338 * This is used together with RTThreadCtxHookCreate() on platforms which
5339 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5340 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5341 *
5342 * @param enmEvent The thread-context event.
5343 * @param pVCpu The cross context virtual CPU structure.
5344 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5345 * @thread EMT(pVCpu)
5346 */
5347VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5348{
5349 AssertPtr(pVCpu);
5350 RT_NOREF1(fGlobalInit);
5351
5352 switch (enmEvent)
5353 {
5354 case RTTHREADCTXEVENT_OUT:
5355 {
5356 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5357 VMCPU_ASSERT_EMT(pVCpu);
5358
5359 /* No longjmps (logger flushes, locks) in this fragile context. */
5360 VMMRZCallRing3Disable(pVCpu);
5361 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5362
5363 /* Restore host-state (FPU, debug etc.) */
5364 if (!pVCpu->hmr0.s.fLeaveDone)
5365 {
5366 /*
5367 * Do -not- import the guest-state here as we might already be in the middle of importing
5368 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5369 */
5370 hmR0VmxLeave(pVCpu, false /* fImportState */);
5371 pVCpu->hmr0.s.fLeaveDone = true;
5372 }
5373
5374 /* Leave HM context, takes care of local init (term). */
5375 int rc = HMR0LeaveCpu(pVCpu);
5376 AssertRC(rc);
5377
5378 /* Restore longjmp state. */
5379 VMMRZCallRing3Enable(pVCpu);
5380 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5381 break;
5382 }
5383
5384 case RTTHREADCTXEVENT_IN:
5385 {
5386 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5387 VMCPU_ASSERT_EMT(pVCpu);
5388
5389 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5390 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5391 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5392 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5393 hmR0MdsClear();
5394
5395 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5396 VMMRZCallRing3Disable(pVCpu);
5397 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5398
5399 /* Initialize the bare minimum state required for HM. This takes care of
5400 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5401 int rc = hmR0EnterCpu(pVCpu);
5402 AssertRC(rc);
5403 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5404 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5405
5406 /* Load the active VMCS as the current one. */
5407 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5408 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5409 AssertRC(rc);
5410 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5411 pVCpu->hmr0.s.fLeaveDone = false;
5412
5413 /* Restore longjmp state. */
5414 VMMRZCallRing3Enable(pVCpu);
5415 break;
5416 }
5417
5418 default:
5419 break;
5420 }
5421}
5422
5423
5424/**
5425 * Exports the host state into the VMCS host-state area.
5426 * Sets up the VM-exit MSR-load area.
5427 *
5428 * The CPU state will be loaded from these fields on every successful VM-exit.
5429 *
5430 * @returns VBox status code.
5431 * @param pVCpu The cross context virtual CPU structure.
5432 *
5433 * @remarks No-long-jump zone!!!
5434 */
5435static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5436{
5437 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5438
5439 int rc = VINF_SUCCESS;
5440 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5441 {
5442 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5443
5444 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5445 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5446
5447 hmR0VmxExportHostMsrs(pVCpu);
5448
5449 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5450 }
5451 return rc;
5452}
5453
5454
5455/**
5456 * Saves the host state in the VMCS host-state.
5457 *
5458 * @returns VBox status code.
5459 * @param pVCpu The cross context virtual CPU structure.
5460 *
5461 * @remarks No-long-jump zone!!!
5462 */
5463VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5464{
5465 AssertPtr(pVCpu);
5466 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5467
5468 /*
5469 * Export the host state here while entering HM context.
5470 * When thread-context hooks are used, we might get preempted and have to re-save the host
5471 * state but most of the time we won't be, so do it here before we disable interrupts.
5472 */
5473 return hmR0VmxExportHostState(pVCpu);
5474}
5475
5476
5477/**
5478 * Exports the guest state into the VMCS guest-state area.
5479 *
5480 * The will typically be done before VM-entry when the guest-CPU state and the
5481 * VMCS state may potentially be out of sync.
5482 *
5483 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5484 * VM-entry controls.
5485 * Sets up the appropriate VMX non-root function to execute guest code based on
5486 * the guest CPU mode.
5487 *
5488 * @returns VBox strict status code.
5489 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5490 * without unrestricted guest execution and the VMMDev is not presently
5491 * mapped (e.g. EFI32).
5492 *
5493 * @param pVCpu The cross context virtual CPU structure.
5494 * @param pVmxTransient The VMX-transient structure.
5495 *
5496 * @remarks No-long-jump zone!!!
5497 */
5498static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5499{
5500 AssertPtr(pVCpu);
5501 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5502 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5503
5504 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5505
5506 /*
5507 * Determine real-on-v86 mode.
5508 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5509 */
5510 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5511 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5512 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5513 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5514 else
5515 {
5516 Assert(!pVmxTransient->fIsNestedGuest);
5517 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5518 }
5519
5520 /*
5521 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5522 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5523 */
5524 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5525 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5526
5527 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5528 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5529
5530 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5531 if (rcStrict == VINF_SUCCESS)
5532 { /* likely */ }
5533 else
5534 {
5535 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5536 return rcStrict;
5537 }
5538
5539 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5540 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5541
5542 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5543 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5544
5545 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5546 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5547 vmxHCExportGuestRip(pVCpu);
5548 hmR0VmxExportGuestRsp(pVCpu);
5549 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5550
5551 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5552 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5553
5554 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5555 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5556 | HM_CHANGED_GUEST_CR2
5557 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5558 | HM_CHANGED_GUEST_X87
5559 | HM_CHANGED_GUEST_SSE_AVX
5560 | HM_CHANGED_GUEST_OTHER_XSAVE
5561 | HM_CHANGED_GUEST_XCRx
5562 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5563 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5564 | HM_CHANGED_GUEST_TSC_AUX
5565 | HM_CHANGED_GUEST_OTHER_MSRS
5566 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5567
5568 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5569 return rc;
5570}
5571
5572
5573/**
5574 * Exports the state shared between the host and guest into the VMCS.
5575 *
5576 * @param pVCpu The cross context virtual CPU structure.
5577 * @param pVmxTransient The VMX-transient structure.
5578 *
5579 * @remarks No-long-jump zone!!!
5580 */
5581static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5582{
5583 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5584 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5585
5586 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5587 {
5588 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5589 AssertRC(rc);
5590 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5591
5592 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5593 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5594 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5595 }
5596
5597 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5598 {
5599 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5600 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5601 }
5602
5603 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5604 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5605}
5606
5607
5608/**
5609 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5610 *
5611 * @returns Strict VBox status code (i.e. informational status codes too).
5612 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5613 * without unrestricted guest execution and the VMMDev is not presently
5614 * mapped (e.g. EFI32).
5615 *
5616 * @param pVCpu The cross context virtual CPU structure.
5617 * @param pVmxTransient The VMX-transient structure.
5618 *
5619 * @remarks No-long-jump zone!!!
5620 */
5621static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5622{
5623 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5624 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5625
5626#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5627 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5628#endif
5629
5630 /*
5631 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5632 * changes. First try to export only these without going through all other changed-flag checks.
5633 */
5634 VBOXSTRICTRC rcStrict;
5635 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5636 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5637 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5638
5639 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5640 if ( (fCtxChanged & fMinimalMask)
5641 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5642 {
5643 vmxHCExportGuestRip(pVCpu);
5644 hmR0VmxExportGuestRsp(pVCpu);
5645 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5646 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5647 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5648 }
5649 /* If anything else also changed, go through the full export routine and export as required. */
5650 else if (fCtxChanged & fCtxMask)
5651 {
5652 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5653 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5654 { /* likely */}
5655 else
5656 {
5657 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5658 VBOXSTRICTRC_VAL(rcStrict)));
5659 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5660 return rcStrict;
5661 }
5662 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5663 }
5664 /* Nothing changed, nothing to load here. */
5665 else
5666 rcStrict = VINF_SUCCESS;
5667
5668#ifdef VBOX_STRICT
5669 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5670 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5671 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5672#endif
5673 return rcStrict;
5674}
5675
5676
5677/**
5678 * Map the APIC-access page for virtualizing APIC accesses.
5679 *
5680 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5681 * this not done as part of exporting guest state, see @bugref{8721}.
5682 *
5683 * @returns VBox status code.
5684 * @param pVCpu The cross context virtual CPU structure.
5685 * @param GCPhysApicBase The guest-physical address of the APIC access page.
5686 */
5687static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu, RTGCPHYS GCPhysApicBase)
5688{
5689 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5690 Assert(GCPhysApicBase);
5691
5692 LogFunc(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5693
5694 /* Unalias the existing mapping. */
5695 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5696 AssertRCReturn(rc, rc);
5697
5698 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5699 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5700 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5701 AssertRCReturn(rc, rc);
5702
5703 return VINF_SUCCESS;
5704}
5705
5706
5707/**
5708 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5709 * CPU.
5710 *
5711 * @param idCpu The ID for the CPU the function is called on.
5712 * @param pvUser1 Null, not used.
5713 * @param pvUser2 Null, not used.
5714 */
5715static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5716{
5717 RT_NOREF3(idCpu, pvUser1, pvUser2);
5718 VMXDispatchHostNmi();
5719}
5720
5721
5722/**
5723 * Dispatching an NMI on the host CPU that received it.
5724 *
5725 * @returns VBox status code.
5726 * @param pVCpu The cross context virtual CPU structure.
5727 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5728 * executing when receiving the host NMI in VMX non-root
5729 * operation.
5730 */
5731static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5732{
5733 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5734 Assert(idCpu != NIL_RTCPUID);
5735
5736 /*
5737 * We don't want to delay dispatching the NMI any more than we have to. However,
5738 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5739 * after executing guest or nested-guest code for the following reasons:
5740 *
5741 * - We would need to perform VMREADs with interrupts disabled and is orders of
5742 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5743 * supported by the host hypervisor.
5744 *
5745 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5746 * longer period of time just for handling an edge case like host NMIs which do
5747 * not occur nearly as frequently as other VM-exits.
5748 *
5749 * Let's cover the most likely scenario first. Check if we are on the target CPU
5750 * and dispatch the NMI right away. This should be much faster than calling into
5751 * RTMpOnSpecific() machinery.
5752 */
5753 bool fDispatched = false;
5754 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5755 if (idCpu == RTMpCpuId())
5756 {
5757 VMXDispatchHostNmi();
5758 fDispatched = true;
5759 }
5760 ASMSetFlags(fEFlags);
5761 if (fDispatched)
5762 {
5763 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5764 return VINF_SUCCESS;
5765 }
5766
5767 /*
5768 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5769 * there should be no race or recursion even if we are unlucky enough to be preempted
5770 * (to the target CPU) without dispatching the host NMI above.
5771 */
5772 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5773 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5774}
5775
5776
5777#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5778/**
5779 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5780 * nested-guest using hardware-assisted VMX.
5781 *
5782 * @param pVCpu The cross context virtual CPU structure.
5783 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5784 * @param pVmcsInfoGst The guest VMCS info. object.
5785 */
5786static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5787{
5788 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5789 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5790 Assert(pu64MsrBitmap);
5791
5792 /*
5793 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5794 * MSR that is intercepted by the guest is also intercepted while executing the
5795 * nested-guest using hardware-assisted VMX.
5796 *
5797 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5798 * nested-guest VM-exit even if the outer guest is not intercepting some
5799 * MSRs. We cannot assume the caller has initialized the nested-guest
5800 * MSR bitmap in this case.
5801 *
5802 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5803 * each of its VM-entry, hence initializing it once per-VM while setting
5804 * up the nested-guest VMCS is not sufficient.
5805 */
5806 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5807 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5808 {
5809 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5810 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5811 Assert(pu64MsrBitmapNstGst);
5812 Assert(pu64MsrBitmapGst);
5813
5814 /** @todo Detect and use EVEX.POR? */
5815 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5816 for (uint32_t i = 0; i < cFrags; i++)
5817 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5818 }
5819 else
5820 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5821}
5822
5823
5824/**
5825 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5826 * hardware-assisted VMX execution of the nested-guest.
5827 *
5828 * For a guest, we don't modify these controls once we set up the VMCS and hence
5829 * this function is never called.
5830 *
5831 * For nested-guests since the nested hypervisor provides these controls on every
5832 * nested-guest VM-entry and could potentially change them everytime we need to
5833 * merge them before every nested-guest VM-entry.
5834 *
5835 * @returns VBox status code.
5836 * @param pVCpu The cross context virtual CPU structure.
5837 */
5838static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5839{
5840 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5841 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5842 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5843
5844 /*
5845 * Merge the controls with the requirements of the guest VMCS.
5846 *
5847 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5848 * VMCS with the features supported by the physical CPU as it's already done by the
5849 * VMLAUNCH/VMRESUME instruction emulation.
5850 *
5851 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5852 * derived from the VMX features supported by the physical CPU.
5853 */
5854
5855 /* Pin-based VM-execution controls. */
5856 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5857
5858 /* Processor-based VM-execution controls. */
5859 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5860 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5861 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5862 | VMX_PROC_CTLS_MOV_DR_EXIT
5863 | VMX_PROC_CTLS_USE_TPR_SHADOW
5864 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5865
5866 /* Secondary processor-based VM-execution controls. */
5867 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5868 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5869 | VMX_PROC_CTLS2_INVPCID
5870 | VMX_PROC_CTLS2_VMCS_SHADOWING
5871 | VMX_PROC_CTLS2_RDTSCP
5872 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5873 | VMX_PROC_CTLS2_APIC_REG_VIRT
5874 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5875 | VMX_PROC_CTLS2_VMFUNC));
5876
5877 /*
5878 * VM-entry controls:
5879 * These controls contains state that depends on the nested-guest state (primarily
5880 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5881 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5882 * properly continue executing the nested-guest if the EFER MSR changes but does not
5883 * cause a nested-guest VM-exits.
5884 *
5885 * VM-exit controls:
5886 * These controls specify the host state on return. We cannot use the controls from
5887 * the nested hypervisor state as is as it would contain the guest state rather than
5888 * the host state. Since the host state is subject to change (e.g. preemption, trips
5889 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5890 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5891 *
5892 * VM-entry MSR-load:
5893 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5894 * context by the VMLAUNCH/VMRESUME instruction emulation.
5895 *
5896 * VM-exit MSR-store:
5897 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5898 * back into the VM-exit MSR-store area.
5899 *
5900 * VM-exit MSR-load areas:
5901 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5902 * can entirely ignore what the nested hypervisor wants to load here.
5903 */
5904
5905 /*
5906 * Exception bitmap.
5907 *
5908 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5909 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5910 * code more flexible if intercepting exceptions become more dynamic in the future we do
5911 * it as part of exporting the nested-guest state.
5912 */
5913 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5914
5915 /*
5916 * CR0/CR4 guest/host mask.
5917 *
5918 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5919 * cause VM-exits, so we need to merge them here.
5920 */
5921 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5922 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5923
5924 /*
5925 * Page-fault error-code mask and match.
5926 *
5927 * Although we require unrestricted guest execution (and thereby nested-paging) for
5928 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5929 * normally intercept #PFs, it might intercept them for debugging purposes.
5930 *
5931 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5932 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5933 */
5934 uint32_t u32XcptPFMask;
5935 uint32_t u32XcptPFMatch;
5936 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5937 {
5938 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5939 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5940 }
5941 else
5942 {
5943 u32XcptPFMask = 0;
5944 u32XcptPFMatch = 0;
5945 }
5946
5947 /*
5948 * Pause-Loop exiting.
5949 */
5950 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5951 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5952 * this will work... */
5953 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5954 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5955
5956 /*
5957 * Pending debug exceptions.
5958 * Currently just copy whatever the nested-guest provides us.
5959 */
5960 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5961
5962 /*
5963 * I/O Bitmap.
5964 *
5965 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5966 * intercept all I/O port accesses.
5967 */
5968 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5969 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5970
5971 /*
5972 * VMCS shadowing.
5973 *
5974 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5975 * enabled while executing the nested-guest.
5976 */
5977 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5978
5979 /*
5980 * APIC-access page.
5981 */
5982 RTHCPHYS HCPhysApicAccess;
5983 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5984 {
5985 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5986 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5987
5988 /** @todo NSTVMX: This is not really correct but currently is required to make
5989 * things work. We need to re-enable the page handler when we fallback to
5990 * IEM execution of the nested-guest! */
5991 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
5992
5993 void *pvPage;
5994 PGMPAGEMAPLOCK PgLockApicAccess;
5995 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5996 if (RT_SUCCESS(rc))
5997 {
5998 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5999 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
6000
6001 /** @todo Handle proper releasing of page-mapping lock later. */
6002 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
6003 }
6004 else
6005 return rc;
6006 }
6007 else
6008 HCPhysApicAccess = 0;
6009
6010 /*
6011 * Virtual-APIC page and TPR threshold.
6012 */
6013 RTHCPHYS HCPhysVirtApic;
6014 uint32_t u32TprThreshold;
6015 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6016 {
6017 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
6018 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
6019
6020 void *pvPage;
6021 PGMPAGEMAPLOCK PgLockVirtApic;
6022 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
6023 if (RT_SUCCESS(rc))
6024 {
6025 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
6026 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
6027
6028 /** @todo Handle proper releasing of page-mapping lock later. */
6029 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
6030 }
6031 else
6032 return rc;
6033
6034 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
6035 }
6036 else
6037 {
6038 HCPhysVirtApic = 0;
6039 u32TprThreshold = 0;
6040
6041 /*
6042 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
6043 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
6044 * be taken care of by EPT/shadow paging.
6045 */
6046 if (pVM->hmr0.s.fAllow64BitGuests)
6047 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
6048 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
6049 }
6050
6051 /*
6052 * Validate basic assumptions.
6053 */
6054 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
6055 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
6056 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
6057 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
6058
6059 /*
6060 * Commit it to the nested-guest VMCS.
6061 */
6062 int rc = VINF_SUCCESS;
6063 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
6064 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
6065 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
6066 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
6067 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
6068 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
6069 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
6070 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
6071 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
6072 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
6073 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
6074 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
6075 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
6076 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
6077 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
6078 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
6079 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
6080 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
6081 {
6082 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
6083 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
6084 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
6085 }
6086 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
6087 {
6088 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
6089 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
6090 }
6091 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6092 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
6093 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
6094 AssertRC(rc);
6095
6096 /*
6097 * Update the nested-guest VMCS cache.
6098 */
6099 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
6100 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
6101 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
6102 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
6103 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
6104 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
6105 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
6106 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
6107 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
6108
6109 /*
6110 * We need to flush the TLB if we are switching the APIC-access page address.
6111 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
6112 */
6113 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6114 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
6115
6116 /*
6117 * MSR bitmap.
6118 *
6119 * The MSR bitmap address has already been initialized while setting up the nested-guest
6120 * VMCS, here we need to merge the MSR bitmaps.
6121 */
6122 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
6123 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
6124
6125 return VINF_SUCCESS;
6126}
6127#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6128
6129
6130/**
6131 * Does the preparations before executing guest code in VT-x.
6132 *
6133 * This may cause longjmps to ring-3 and may even result in rescheduling to the
6134 * recompiler/IEM. We must be cautious what we do here regarding committing
6135 * guest-state information into the VMCS assuming we assuredly execute the
6136 * guest in VT-x mode.
6137 *
6138 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
6139 * the common-state (TRPM/forceflags), we must undo those changes so that the
6140 * recompiler/IEM can (and should) use them when it resumes guest execution.
6141 * Otherwise such operations must be done when we can no longer exit to ring-3.
6142 *
6143 * @returns Strict VBox status code (i.e. informational status codes too).
6144 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
6145 * have been disabled.
6146 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
6147 * pending events).
6148 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
6149 * double-fault into the guest.
6150 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
6151 * dispatched directly.
6152 * @retval VINF_* scheduling changes, we have to go back to ring-3.
6153 *
6154 * @param pVCpu The cross context virtual CPU structure.
6155 * @param pVmxTransient The VMX-transient structure.
6156 * @param fStepping Whether we are single-stepping the guest in the
6157 * hypervisor debugger. Makes us ignore some of the reasons
6158 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
6159 * if event dispatching took place.
6160 */
6161static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
6162{
6163 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6164
6165 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
6166
6167#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
6168 if (pVmxTransient->fIsNestedGuest)
6169 {
6170 RT_NOREF2(pVCpu, fStepping);
6171 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
6172 return VINF_EM_RESCHEDULE_REM;
6173 }
6174#endif
6175
6176 /*
6177 * Check and process force flag actions, some of which might require us to go back to ring-3.
6178 */
6179 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
6180 if (rcStrict == VINF_SUCCESS)
6181 {
6182 /* FFs don't get set all the time. */
6183#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6184 if ( pVmxTransient->fIsNestedGuest
6185 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6186 {
6187 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6188 return VINF_VMX_VMEXIT;
6189 }
6190#endif
6191 }
6192 else
6193 return rcStrict;
6194
6195 /*
6196 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
6197 */
6198 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6199 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
6200 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6201 && PDMHasApic(pVM))
6202 {
6203 /* Get the APIC base MSR from the virtual APIC device. */
6204 uint64_t const uApicBaseMsr = APICGetBaseMsrNoCheck(pVCpu);
6205
6206 /* Map the APIC access page. */
6207 int rc = hmR0VmxMapHCApicAccessPage(pVCpu, uApicBaseMsr & ~(RTGCPHYS)GUEST_PAGE_OFFSET_MASK);
6208 AssertRCReturn(rc, rc);
6209
6210 /* Update the per-VCPU cache of the APIC base MSR corresponding to the mapped APIC access page. */
6211 pVCpu->hm.s.vmx.u64GstMsrApicBase = uApicBaseMsr;
6212 }
6213
6214#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6215 /*
6216 * Merge guest VMCS controls with the nested-guest VMCS controls.
6217 *
6218 * Even if we have not executed the guest prior to this (e.g. when resuming from a
6219 * saved state), we should be okay with merging controls as we initialize the
6220 * guest VMCS controls as part of VM setup phase.
6221 */
6222 if ( pVmxTransient->fIsNestedGuest
6223 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
6224 {
6225 int rc = hmR0VmxMergeVmcsNested(pVCpu);
6226 AssertRCReturn(rc, rc);
6227 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
6228 }
6229#endif
6230
6231 /*
6232 * Evaluate events to be injected into the guest.
6233 *
6234 * Events in TRPM can be injected without inspecting the guest state.
6235 * If any new events (interrupts/NMI) are pending currently, we try to set up the
6236 * guest to cause a VM-exit the next time they are ready to receive the event.
6237 */
6238 if (TRPMHasTrap(pVCpu))
6239 vmxHCTrpmTrapToPendingEvent(pVCpu);
6240
6241 uint32_t fIntrState;
6242 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6243 &fIntrState);
6244
6245#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6246 /*
6247 * While evaluating pending events if something failed (unlikely) or if we were
6248 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
6249 */
6250 if (rcStrict != VINF_SUCCESS)
6251 return rcStrict;
6252 if ( pVmxTransient->fIsNestedGuest
6253 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6254 {
6255 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6256 return VINF_VMX_VMEXIT;
6257 }
6258#else
6259 Assert(rcStrict == VINF_SUCCESS);
6260#endif
6261
6262 /*
6263 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
6264 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
6265 * also result in triple-faulting the VM.
6266 *
6267 * With nested-guests, the above does not apply since unrestricted guest execution is a
6268 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
6269 */
6270 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6271 fIntrState, fStepping);
6272 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6273 { /* likely */ }
6274 else
6275 {
6276 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
6277 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
6278 return rcStrict;
6279 }
6280
6281 /*
6282 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
6283 * import CR3 themselves. We will need to update them here, as even as late as the above
6284 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
6285 * the below force flags to be set.
6286 */
6287 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6288 {
6289 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
6290 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6291 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6292 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6293 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6294 }
6295
6296#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6297 /* Paranoia. */
6298 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6299#endif
6300
6301 /*
6302 * No longjmps to ring-3 from this point on!!!
6303 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
6304 * This also disables flushing of the R0-logger instance (if any).
6305 */
6306 VMMRZCallRing3Disable(pVCpu);
6307
6308 /*
6309 * Export the guest state bits.
6310 *
6311 * We cannot perform longjmps while loading the guest state because we do not preserve the
6312 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6313 * CPU migration.
6314 *
6315 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6316 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6317 */
6318 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6319 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6320 { /* likely */ }
6321 else
6322 {
6323 VMMRZCallRing3Enable(pVCpu);
6324 return rcStrict;
6325 }
6326
6327 /*
6328 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6329 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6330 * preemption disabled for a while. Since this is purely to aid the
6331 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6332 * disable interrupt on NT.
6333 *
6334 * We need to check for force-flags that could've possible been altered since we last
6335 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6336 * see @bugref{6398}).
6337 *
6338 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6339 * to ring-3 before executing guest code.
6340 */
6341 pVmxTransient->fEFlags = ASMIntDisableFlags();
6342
6343 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6344 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6345 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6346 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6347 {
6348 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6349 {
6350#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6351 /*
6352 * If we are executing a nested-guest make sure that we should intercept subsequent
6353 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6354 * the VM-exit instruction emulation happy.
6355 */
6356 if (pVmxTransient->fIsNestedGuest)
6357 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6358#endif
6359
6360 /*
6361 * We've injected any pending events. This is really the point of no return (to ring-3).
6362 *
6363 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6364 * returns from this function, so do -not- enable them here.
6365 */
6366 pVCpu->hm.s.Event.fPending = false;
6367 return VINF_SUCCESS;
6368 }
6369
6370 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6371 rcStrict = VINF_EM_RAW_INTERRUPT;
6372 }
6373 else
6374 {
6375 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6376 rcStrict = VINF_EM_RAW_TO_R3;
6377 }
6378
6379 ASMSetFlags(pVmxTransient->fEFlags);
6380 VMMRZCallRing3Enable(pVCpu);
6381
6382 return rcStrict;
6383}
6384
6385
6386/**
6387 * Final preparations before executing guest code using hardware-assisted VMX.
6388 *
6389 * We can no longer get preempted to a different host CPU and there are no returns
6390 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6391 * failures), this function is not intended to fail sans unrecoverable hardware
6392 * errors.
6393 *
6394 * @param pVCpu The cross context virtual CPU structure.
6395 * @param pVmxTransient The VMX-transient structure.
6396 *
6397 * @remarks Called with preemption disabled.
6398 * @remarks No-long-jump zone!!!
6399 */
6400static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6401{
6402 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6403 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6404 Assert(!pVCpu->hm.s.Event.fPending);
6405
6406 /*
6407 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6408 */
6409 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6410 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6411
6412 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6413 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6414 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6415 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6416
6417 if (!CPUMIsGuestFPUStateActive(pVCpu))
6418 {
6419 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6420 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6421 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6422 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6423 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6424 }
6425
6426 /*
6427 * Re-export the host state bits as we may've been preempted (only happens when
6428 * thread-context hooks are used or when the VM start function changes) or if
6429 * the host CR0 is modified while loading the guest FPU state above.
6430 *
6431 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6432 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6433 * see @bugref{8432}.
6434 *
6435 * This may also happen when switching to/from a nested-guest VMCS without leaving
6436 * ring-0.
6437 */
6438 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6439 {
6440 hmR0VmxExportHostState(pVCpu);
6441 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6442 }
6443 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6444
6445 /*
6446 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6447 */
6448 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6449 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6450 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6451
6452 /*
6453 * Store status of the shared guest/host debug state at the time of VM-entry.
6454 */
6455 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6456 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6457
6458 /*
6459 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6460 * more than one conditional check. The post-run side of our code shall determine
6461 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6462 */
6463 if (pVmcsInfo->pbVirtApic)
6464 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6465
6466 /*
6467 * Update the host MSRs values in the VM-exit MSR-load area.
6468 */
6469 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6470 {
6471 if (pVmcsInfo->cExitMsrLoad > 0)
6472 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6473 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6474 }
6475
6476 /*
6477 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6478 * VMX-preemption timer based on the next virtual sync clock deadline.
6479 */
6480 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6481 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6482 {
6483 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6484 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6485 }
6486
6487 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6488 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6489 if (!fIsRdtscIntercepted)
6490 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6491 else
6492 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6493
6494 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6495 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6496 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6497 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6498 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6499 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6500
6501 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6502
6503 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6504 as we're about to start executing the guest. */
6505
6506 /*
6507 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6508 *
6509 * This is done this late as updating the TSC offsetting/preemption timer above
6510 * figures out if we can skip intercepting RDTSCP by calculating the number of
6511 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6512 */
6513 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6514 && !fIsRdtscIntercepted)
6515 {
6516 vmxHCImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6517
6518 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6519 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6520 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6521 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6522 AssertRC(rc);
6523 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6524 pVmxTransient->fRemoveTscAuxMsr = true;
6525 }
6526
6527#ifdef VBOX_STRICT
6528 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6529 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6530 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6531 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6532#endif
6533
6534#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6535 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6536 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6537 * see @bugref{9180#c54}. */
6538 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6539 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6540 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6541#endif
6542}
6543
6544
6545/**
6546 * First C routine invoked after running guest code using hardware-assisted VMX.
6547 *
6548 * @param pVCpu The cross context virtual CPU structure.
6549 * @param pVmxTransient The VMX-transient structure.
6550 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6551 *
6552 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6553 *
6554 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6555 * unconditionally when it is safe to do so.
6556 */
6557static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6558{
6559 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6560 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6561 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6562 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6563 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6564 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6565
6566 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6567 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6568 {
6569 uint64_t uGstTsc;
6570 if (!pVmxTransient->fIsNestedGuest)
6571 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6572 else
6573 {
6574 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6575 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6576 }
6577 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6578 }
6579
6580 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6581 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6582 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6583
6584 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6585 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6586#ifdef VBOX_STRICT
6587 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6588#endif
6589 Assert(!ASMIntAreEnabled());
6590 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6591 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6592
6593#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6594 /*
6595 * Clean all the VMCS fields in the transient structure before reading
6596 * anything from the VMCS.
6597 */
6598 pVmxTransient->uExitReason = 0;
6599 pVmxTransient->uExitIntErrorCode = 0;
6600 pVmxTransient->uExitQual = 0;
6601 pVmxTransient->uGuestLinearAddr = 0;
6602 pVmxTransient->uExitIntInfo = 0;
6603 pVmxTransient->cbExitInstr = 0;
6604 pVmxTransient->ExitInstrInfo.u = 0;
6605 pVmxTransient->uEntryIntInfo = 0;
6606 pVmxTransient->uEntryXcptErrorCode = 0;
6607 pVmxTransient->cbEntryInstr = 0;
6608 pVmxTransient->uIdtVectoringInfo = 0;
6609 pVmxTransient->uIdtVectoringErrorCode = 0;
6610#endif
6611
6612 /*
6613 * Save the basic VM-exit reason and check if the VM-entry failed.
6614 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6615 */
6616 uint32_t uExitReason;
6617 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6618 AssertRC(rc);
6619 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6620 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6621
6622 /*
6623 * Log the VM-exit before logging anything else as otherwise it might be a
6624 * tad confusing what happens before and after the world-switch.
6625 */
6626 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6627
6628 /*
6629 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6630 * bitmap permissions, if it was added before VM-entry.
6631 */
6632 if (pVmxTransient->fRemoveTscAuxMsr)
6633 {
6634 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6635 pVmxTransient->fRemoveTscAuxMsr = false;
6636 }
6637
6638 /*
6639 * Check if VMLAUNCH/VMRESUME succeeded.
6640 * If this failed, we cause a guru meditation and cease further execution.
6641 */
6642 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6643 {
6644 /*
6645 * Update the VM-exit history array here even if the VM-entry failed due to:
6646 * - Invalid guest state.
6647 * - MSR loading.
6648 * - Machine-check event.
6649 *
6650 * In any of the above cases we will still have a "valid" VM-exit reason
6651 * despite @a fVMEntryFailed being false.
6652 *
6653 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6654 *
6655 * Note! We don't have CS or RIP at this point. Will probably address that later
6656 * by amending the history entry added here.
6657 */
6658 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6659 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6660
6661 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6662 {
6663 VMMRZCallRing3Enable(pVCpu);
6664 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6665
6666#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6667 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
6668#endif
6669
6670 /*
6671 * Always import the guest-interruptibility state as we need it while evaluating
6672 * injecting events on re-entry.
6673 *
6674 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6675 * checking for real-mode while exporting the state because all bits that cause
6676 * mode changes wrt CR0 are intercepted.
6677 */
6678 uint64_t const fImportMask = CPUMCTX_EXTRN_INHIBIT_INT
6679 | CPUMCTX_EXTRN_INHIBIT_NMI
6680#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6681 | HMVMX_CPUMCTX_EXTRN_ALL
6682#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6683 | CPUMCTX_EXTRN_RFLAGS
6684#endif
6685 ;
6686 rc = vmxHCImportGuestState(pVCpu, pVmcsInfo, fImportMask);
6687 AssertRC(rc);
6688
6689 /*
6690 * Sync the TPR shadow with our APIC state.
6691 */
6692 if ( !pVmxTransient->fIsNestedGuest
6693 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6694 {
6695 Assert(pVmcsInfo->pbVirtApic);
6696 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6697 {
6698 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6699 AssertRC(rc);
6700 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6701 }
6702 }
6703
6704 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6705 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6706 || pVmxTransient->fWasHyperDebugStateActive == false);
6707 return;
6708 }
6709 }
6710#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6711 else if (pVmxTransient->fIsNestedGuest)
6712 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6713#endif
6714 else
6715 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6716
6717 VMMRZCallRing3Enable(pVCpu);
6718}
6719
6720
6721/**
6722 * Runs the guest code using hardware-assisted VMX the normal way.
6723 *
6724 * @returns VBox status code.
6725 * @param pVCpu The cross context virtual CPU structure.
6726 * @param pcLoops Pointer to the number of executed loops.
6727 */
6728static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6729{
6730 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6731 Assert(pcLoops);
6732 Assert(*pcLoops <= cMaxResumeLoops);
6733 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6734
6735#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6736 /*
6737 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6738 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6739 * guest VMCS while entering the VMX ring-0 session.
6740 */
6741 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6742 {
6743 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6744 if (RT_SUCCESS(rc))
6745 { /* likely */ }
6746 else
6747 {
6748 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6749 return rc;
6750 }
6751 }
6752#endif
6753
6754 VMXTRANSIENT VmxTransient;
6755 RT_ZERO(VmxTransient);
6756 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6757
6758 /* Paranoia. */
6759 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6760
6761 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6762 for (;;)
6763 {
6764 Assert(!HMR0SuspendPending());
6765 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6766 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6767
6768 /*
6769 * Preparatory work for running nested-guest code, this may force us to
6770 * return to ring-3.
6771 *
6772 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6773 */
6774 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6775 if (rcStrict != VINF_SUCCESS)
6776 break;
6777
6778 /* Interrupts are disabled at this point! */
6779 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6780 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6781 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6782 /* Interrupts are re-enabled at this point! */
6783
6784 /*
6785 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6786 */
6787 if (RT_SUCCESS(rcRun))
6788 { /* very likely */ }
6789 else
6790 {
6791 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6792 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6793 return rcRun;
6794 }
6795
6796 /*
6797 * Profile the VM-exit.
6798 */
6799 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6800 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6801 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6802 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6803 HMVMX_START_EXIT_DISPATCH_PROF();
6804
6805 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6806
6807 /*
6808 * Handle the VM-exit.
6809 */
6810#ifdef HMVMX_USE_FUNCTION_TABLE
6811 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6812#else
6813 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6814#endif
6815 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6816 if (rcStrict == VINF_SUCCESS)
6817 {
6818 if (++(*pcLoops) <= cMaxResumeLoops)
6819 continue;
6820 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6821 rcStrict = VINF_EM_RAW_INTERRUPT;
6822 }
6823 break;
6824 }
6825
6826 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6827 return rcStrict;
6828}
6829
6830
6831#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6832/**
6833 * Runs the nested-guest code using hardware-assisted VMX.
6834 *
6835 * @returns VBox status code.
6836 * @param pVCpu The cross context virtual CPU structure.
6837 * @param pcLoops Pointer to the number of executed loops.
6838 *
6839 * @sa hmR0VmxRunGuestCodeNormal.
6840 */
6841static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6842{
6843 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6844 Assert(pcLoops);
6845 Assert(*pcLoops <= cMaxResumeLoops);
6846 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6847
6848 /*
6849 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6850 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6851 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6852 */
6853 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6854 {
6855 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6856 if (RT_SUCCESS(rc))
6857 { /* likely */ }
6858 else
6859 {
6860 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6861 return rc;
6862 }
6863 }
6864
6865 VMXTRANSIENT VmxTransient;
6866 RT_ZERO(VmxTransient);
6867 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6868 VmxTransient.fIsNestedGuest = true;
6869
6870 /* Paranoia. */
6871 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6872
6873 /* Setup pointer so PGM/IEM can query VM-exit auxiliary info. on demand in ring-0. */
6874 pVCpu->hmr0.s.vmx.pVmxTransient = &VmxTransient;
6875
6876 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6877 for (;;)
6878 {
6879 Assert(!HMR0SuspendPending());
6880 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6881 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6882
6883 /*
6884 * Preparatory work for running guest code, this may force us to
6885 * return to ring-3.
6886 *
6887 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6888 */
6889 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6890 if (rcStrict != VINF_SUCCESS)
6891 break;
6892
6893 /* Interrupts are disabled at this point! */
6894 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6895 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6896 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6897 /* Interrupts are re-enabled at this point! */
6898
6899 /*
6900 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6901 */
6902 if (RT_SUCCESS(rcRun))
6903 { /* very likely */ }
6904 else
6905 {
6906 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6907 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6908 rcStrict = rcRun;
6909 break;
6910 }
6911
6912 /*
6913 * Undo temporary disabling of the APIC-access page monitoring we did in hmR0VmxMergeVmcsNested.
6914 * This is needed for NestedTrap0eHandler (and IEM) to cause nested-guest APIC-access VM-exits.
6915 */
6916 if (VmxTransient.pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6917 {
6918 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
6919 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
6920 PGMHandlerPhysicalReset(pVCpu->CTX_SUFF(pVM), GCPhysApicAccess);
6921 }
6922
6923 /*
6924 * Profile the VM-exit.
6925 */
6926 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6927 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6928 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6929 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6930 HMVMX_START_EXIT_DISPATCH_PROF();
6931
6932 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6933
6934 /*
6935 * Handle the VM-exit.
6936 */
6937 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6938 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6939 if (rcStrict == VINF_SUCCESS)
6940 {
6941 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6942 {
6943 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6944 rcStrict = VINF_VMX_VMEXIT;
6945 }
6946 else
6947 {
6948 if (++(*pcLoops) <= cMaxResumeLoops)
6949 continue;
6950 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6951 rcStrict = VINF_EM_RAW_INTERRUPT;
6952 }
6953 }
6954 else
6955 Assert(rcStrict != VINF_VMX_VMEXIT);
6956 break;
6957 }
6958
6959 /* Ensure VM-exit auxiliary info. is no longer available. */
6960 pVCpu->hmr0.s.vmx.pVmxTransient = NULL;
6961
6962 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6963 return rcStrict;
6964}
6965#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6966
6967
6968/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6969 * probes.
6970 *
6971 * The following few functions and associated structure contains the bloat
6972 * necessary for providing detailed debug events and dtrace probes as well as
6973 * reliable host side single stepping. This works on the principle of
6974 * "subclassing" the normal execution loop and workers. We replace the loop
6975 * method completely and override selected helpers to add necessary adjustments
6976 * to their core operation.
6977 *
6978 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6979 * any performance for debug and analysis features.
6980 *
6981 * @{
6982 */
6983
6984/**
6985 * Single steps guest code using hardware-assisted VMX.
6986 *
6987 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
6988 * but single-stepping through the hypervisor debugger.
6989 *
6990 * @returns Strict VBox status code (i.e. informational status codes too).
6991 * @param pVCpu The cross context virtual CPU structure.
6992 * @param pcLoops Pointer to the number of executed loops.
6993 *
6994 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
6995 */
6996static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
6997{
6998 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6999 Assert(pcLoops);
7000 Assert(*pcLoops <= cMaxResumeLoops);
7001
7002 VMXTRANSIENT VmxTransient;
7003 RT_ZERO(VmxTransient);
7004 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
7005
7006 /* Set HMCPU indicators. */
7007 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
7008 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
7009 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7010 pVCpu->hmr0.s.fUsingDebugLoop = true;
7011
7012 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
7013 VMXRUNDBGSTATE DbgState;
7014 vmxHCRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
7015 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7016
7017 /*
7018 * The loop.
7019 */
7020 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
7021 for (;;)
7022 {
7023 Assert(!HMR0SuspendPending());
7024 HMVMX_ASSERT_CPU_SAFE(pVCpu);
7025 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7026 bool fStepping = pVCpu->hm.s.fSingleInstruction;
7027
7028 /* Set up VM-execution controls the next two can respond to. */
7029 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7030
7031 /*
7032 * Preparatory work for running guest code, this may force us to
7033 * return to ring-3.
7034 *
7035 * Warning! This bugger disables interrupts on VINF_SUCCESS!
7036 */
7037 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
7038 if (rcStrict != VINF_SUCCESS)
7039 break;
7040
7041 /* Interrupts are disabled at this point! */
7042 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
7043
7044 /* Override any obnoxious code in the above two calls. */
7045 vmxHCPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7046
7047 /*
7048 * Finally execute the guest.
7049 */
7050 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
7051
7052 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
7053 /* Interrupts are re-enabled at this point! */
7054
7055 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7056 if (RT_SUCCESS(rcRun))
7057 { /* very likely */ }
7058 else
7059 {
7060 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
7061 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
7062 return rcRun;
7063 }
7064
7065 /* Profile the VM-exit. */
7066 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7067 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
7068 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7069 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
7070 HMVMX_START_EXIT_DISPATCH_PROF();
7071
7072 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
7073
7074 /*
7075 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
7076 */
7077 rcStrict = vmxHCRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
7078 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
7079 if (rcStrict != VINF_SUCCESS)
7080 break;
7081 if (++(*pcLoops) > cMaxResumeLoops)
7082 {
7083 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
7084 rcStrict = VINF_EM_RAW_INTERRUPT;
7085 break;
7086 }
7087
7088 /*
7089 * Stepping: Did the RIP change, if so, consider it a single step.
7090 * Otherwise, make sure one of the TFs gets set.
7091 */
7092 if (fStepping)
7093 {
7094 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7095 AssertRC(rc);
7096 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
7097 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
7098 {
7099 rcStrict = VINF_EM_DBG_STEPPED;
7100 break;
7101 }
7102 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
7103 }
7104
7105 /*
7106 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
7107 */
7108 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
7109 vmxHCPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7110
7111 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
7112 rcStrict = vmxHCRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
7113 Assert(rcStrict == VINF_SUCCESS);
7114 }
7115
7116 /*
7117 * Clear the X86_EFL_TF if necessary.
7118 */
7119 if (pVCpu->hmr0.s.fClearTrapFlag)
7120 {
7121 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
7122 AssertRC(rc);
7123 pVCpu->hmr0.s.fClearTrapFlag = false;
7124 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
7125 }
7126 /** @todo there seems to be issues with the resume flag when the monitor trap
7127 * flag is pending without being used. Seen early in bios init when
7128 * accessing APIC page in protected mode. */
7129
7130/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
7131 * out of the above loop. */
7132
7133 /* Restore HMCPU indicators. */
7134 pVCpu->hmr0.s.fUsingDebugLoop = false;
7135 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7136 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
7137
7138 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7139 return rcStrict;
7140}
7141
7142/** @} */
7143
7144
7145/**
7146 * Checks if any expensive dtrace probes are enabled and we should go to the
7147 * debug loop.
7148 *
7149 * @returns true if we should use debug loop, false if not.
7150 */
7151static bool hmR0VmxAnyExpensiveProbesEnabled(void)
7152{
7153 /* It's probably faster to OR the raw 32-bit counter variables together.
7154 Since the variables are in an array and the probes are next to one
7155 another (more or less), we have good locality. So, better read
7156 eight-nine cache lines ever time and only have one conditional, than
7157 128+ conditionals, right? */
7158 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
7159 | VBOXVMM_XCPT_DE_ENABLED_RAW()
7160 | VBOXVMM_XCPT_DB_ENABLED_RAW()
7161 | VBOXVMM_XCPT_BP_ENABLED_RAW()
7162 | VBOXVMM_XCPT_OF_ENABLED_RAW()
7163 | VBOXVMM_XCPT_BR_ENABLED_RAW()
7164 | VBOXVMM_XCPT_UD_ENABLED_RAW()
7165 | VBOXVMM_XCPT_NM_ENABLED_RAW()
7166 | VBOXVMM_XCPT_DF_ENABLED_RAW()
7167 | VBOXVMM_XCPT_TS_ENABLED_RAW()
7168 | VBOXVMM_XCPT_NP_ENABLED_RAW()
7169 | VBOXVMM_XCPT_SS_ENABLED_RAW()
7170 | VBOXVMM_XCPT_GP_ENABLED_RAW()
7171 | VBOXVMM_XCPT_PF_ENABLED_RAW()
7172 | VBOXVMM_XCPT_MF_ENABLED_RAW()
7173 | VBOXVMM_XCPT_AC_ENABLED_RAW()
7174 | VBOXVMM_XCPT_XF_ENABLED_RAW()
7175 | VBOXVMM_XCPT_VE_ENABLED_RAW()
7176 | VBOXVMM_XCPT_SX_ENABLED_RAW()
7177 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
7178 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
7179 ) != 0
7180 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
7181 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
7182 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
7183 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
7184 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
7185 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
7186 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
7187 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
7188 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
7189 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
7190 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
7191 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
7192 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
7193 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
7194 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
7195 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
7196 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
7197 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
7198 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
7199 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
7200 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
7201 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
7202 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
7203 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
7204 | VBOXVMM_INSTR_STR_ENABLED_RAW()
7205 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
7206 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
7207 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
7208 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
7209 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
7210 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
7211 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
7212 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
7213 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
7214 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
7215 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
7216 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
7217 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
7218 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
7219 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
7220 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
7221 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
7222 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
7223 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
7224 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
7225 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
7226 ) != 0
7227 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
7228 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
7229 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
7230 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
7231 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
7232 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
7233 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
7234 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
7235 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
7236 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
7237 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
7238 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
7239 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
7240 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
7241 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
7242 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
7243 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
7244 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
7245 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
7246 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
7247 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
7248 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
7249 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
7250 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
7251 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
7252 | VBOXVMM_EXIT_STR_ENABLED_RAW()
7253 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
7254 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
7255 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
7256 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
7257 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
7258 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
7259 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
7260 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
7261 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
7262 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
7263 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
7264 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
7265 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
7266 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
7267 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
7268 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
7269 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
7270 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
7271 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
7272 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
7273 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
7274 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
7275 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
7276 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
7277 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
7278 ) != 0;
7279}
7280
7281
7282/**
7283 * Runs the guest using hardware-assisted VMX.
7284 *
7285 * @returns Strict VBox status code (i.e. informational status codes too).
7286 * @param pVCpu The cross context virtual CPU structure.
7287 */
7288VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
7289{
7290 AssertPtr(pVCpu);
7291 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7292 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7293 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7294 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
7295
7296 VBOXSTRICTRC rcStrict;
7297 uint32_t cLoops = 0;
7298 for (;;)
7299 {
7300#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7301 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
7302#else
7303 NOREF(pCtx);
7304 bool const fInNestedGuestMode = false;
7305#endif
7306 if (!fInNestedGuestMode)
7307 {
7308 if ( !pVCpu->hm.s.fUseDebugLoop
7309 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
7310 && !DBGFIsStepping(pVCpu)
7311 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
7312 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
7313 else
7314 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
7315 }
7316#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7317 else
7318 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
7319
7320 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
7321 {
7322 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
7323 continue;
7324 }
7325 if (rcStrict == VINF_VMX_VMEXIT)
7326 {
7327 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
7328 continue;
7329 }
7330#endif
7331 break;
7332 }
7333
7334 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
7335 switch (rcLoop)
7336 {
7337 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
7338 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
7339 }
7340
7341 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
7342 if (RT_FAILURE(rc2))
7343 {
7344 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
7345 rcStrict = rc2;
7346 }
7347 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
7348 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
7349 return rcStrict;
7350}
7351
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette