VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 93705

Last change on this file since 93705 was 93574, checked in by vboxsync, 3 years ago

VMM/HM: Nested VMX: bugref:10092 Separate out VM-exit counters for each guest run-loop (normal, debug, nested).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 345.0 KB
Line 
1/* $Id: HMVMXR0.cpp 93574 2022-02-03 11:27:27Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#define VMCPU_INCL_CPUM_GST_CTX
24#include <iprt/x86.h>
25#include <iprt/asm-amd64-x86.h>
26#include <iprt/thread.h>
27#include <iprt/mem.h>
28#include <iprt/mp.h>
29
30#include <VBox/vmm/pdmapi.h>
31#include <VBox/vmm/dbgf.h>
32#include <VBox/vmm/iem.h>
33#include <VBox/vmm/iom.h>
34#include <VBox/vmm/tm.h>
35#include <VBox/vmm/em.h>
36#include <VBox/vmm/gim.h>
37#include <VBox/vmm/apic.h>
38#include "HMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include <VBox/vmm/hmvmxinline.h>
41#include "HMVMXR0.h"
42#include "VMXInternal.h"
43#include "dtrace/VBoxVMM.h"
44
45#ifdef DEBUG_ramshankar
46# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
47# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
48# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
49# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
50# define HMVMX_ALWAYS_CLEAN_TRANSIENT
51# define HMVMX_ALWAYS_CHECK_GUEST_STATE
52# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
53# define HMVMX_ALWAYS_TRAP_PF
54# define HMVMX_ALWAYS_FLUSH_TLB
55# define HMVMX_ALWAYS_SWAP_EFER
56#endif
57
58
59/*********************************************************************************************************************************
60* Defined Constants And Macros *
61*********************************************************************************************************************************/
62
63
64/*********************************************************************************************************************************
65* Structures and Typedefs *
66*********************************************************************************************************************************/
67
68/**
69 * VMX page allocation information.
70 */
71typedef struct
72{
73 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
74 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
75 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
76 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
77} VMXPAGEALLOCINFO;
78/** Pointer to VMX page-allocation info. */
79typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
80/** Pointer to a const VMX page-allocation info. */
81typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
82AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
83
84
85/*********************************************************************************************************************************
86* Internal Functions *
87*********************************************************************************************************************************/
88
89
90/*********************************************************************************************************************************
91* Global Variables *
92*********************************************************************************************************************************/
93static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient);
94static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo);
95
96
97/**
98 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
99 * @returns @c true if it's part of LBR stack, @c false otherwise.
100 *
101 * @param pVM The cross context VM structure.
102 * @param idMsr The MSR.
103 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
104 * Optional, can be NULL.
105 *
106 * @remarks Must only be called when LBR is enabled.
107 */
108DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
109{
110 Assert(pVM->hmr0.s.vmx.fLbr);
111 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
112 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
113 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
114 if (idxMsr < cLbrStack)
115 {
116 if (pidxMsr)
117 *pidxMsr = idxMsr;
118 return true;
119 }
120 return false;
121}
122
123
124/**
125 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
126 * @returns @c true if it's part of LBR stack, @c false otherwise.
127 *
128 * @param pVM The cross context VM structure.
129 * @param idMsr The MSR.
130 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
131 * Optional, can be NULL.
132 *
133 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
134 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
135 */
136DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
137{
138 Assert(pVM->hmr0.s.vmx.fLbr);
139 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
140 {
141 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
142 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
143 if (idxMsr < cLbrStack)
144 {
145 if (pidxMsr)
146 *pidxMsr = idxMsr;
147 return true;
148 }
149 }
150 return false;
151}
152
153
154/**
155 * Gets the active (in use) VMCS info. object for the specified VCPU.
156 *
157 * This is either the guest or nested-guest VMCS info. and need not necessarily
158 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
159 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
160 * current VMCS while returning to ring-3. However, the VMCS info. object for that
161 * VMCS would still be active and returned here so that we could dump the VMCS
162 * fields to ring-3 for diagnostics. This function is thus only used to
163 * distinguish between the nested-guest or guest VMCS.
164 *
165 * @returns The active VMCS information.
166 * @param pVCpu The cross context virtual CPU structure.
167 *
168 * @thread EMT.
169 * @remarks This function may be called with preemption or interrupts disabled!
170 */
171DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
172{
173 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
174 return &pVCpu->hmr0.s.vmx.VmcsInfo;
175 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
176}
177
178
179/**
180 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
181 * area.
182 *
183 * @returns @c true if it's different, @c false otherwise.
184 * @param pVmcsInfo The VMCS info. object.
185 */
186DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
187{
188 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
189 && pVmcsInfo->pvGuestMsrStore);
190}
191
192
193/**
194 * Sets the given Processor-based VM-execution controls.
195 *
196 * @param pVmxTransient The VMX-transient structure.
197 * @param uProcCtls The Processor-based VM-execution controls to set.
198 */
199static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
200{
201 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
202 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
203 {
204 pVmcsInfo->u32ProcCtls |= uProcCtls;
205 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
206 AssertRC(rc);
207 }
208}
209
210
211/**
212 * Removes the given Processor-based VM-execution controls.
213 *
214 * @param pVCpu The cross context virtual CPU structure.
215 * @param pVmxTransient The VMX-transient structure.
216 * @param uProcCtls The Processor-based VM-execution controls to remove.
217 *
218 * @remarks When executing a nested-guest, this will not remove any of the specified
219 * controls if the nested hypervisor has set any one of them.
220 */
221static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
222{
223 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
224 if (pVmcsInfo->u32ProcCtls & uProcCtls)
225 {
226#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
227 if ( !pVmxTransient->fIsNestedGuest
228 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
229#else
230 NOREF(pVCpu);
231 if (!pVmxTransient->fIsNestedGuest)
232#endif
233 {
234 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
235 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
236 AssertRC(rc);
237 }
238 }
239}
240
241
242/**
243 * Sets the TSC offset for the current VMCS.
244 *
245 * @param uTscOffset The TSC offset to set.
246 * @param pVmcsInfo The VMCS info. object.
247 */
248static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
249{
250 if (pVmcsInfo->u64TscOffset != uTscOffset)
251 {
252 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
253 AssertRC(rc);
254 pVmcsInfo->u64TscOffset = uTscOffset;
255 }
256}
257
258
259/**
260 * Loads the VMCS specified by the VMCS info. object.
261 *
262 * @returns VBox status code.
263 * @param pVmcsInfo The VMCS info. object.
264 *
265 * @remarks Can be called with interrupts disabled.
266 */
267static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
268{
269 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
270 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
271
272 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
273 if (RT_SUCCESS(rc))
274 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
275 return rc;
276}
277
278
279/**
280 * Clears the VMCS specified by the VMCS info. object.
281 *
282 * @returns VBox status code.
283 * @param pVmcsInfo The VMCS info. object.
284 *
285 * @remarks Can be called with interrupts disabled.
286 */
287static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
288{
289 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
290 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
291
292 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
293 if (RT_SUCCESS(rc))
294 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
295 return rc;
296}
297
298
299/**
300 * Checks whether the MSR belongs to the set of guest MSRs that we restore
301 * lazily while leaving VT-x.
302 *
303 * @returns true if it does, false otherwise.
304 * @param pVCpu The cross context virtual CPU structure.
305 * @param idMsr The MSR to check.
306 */
307static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
308{
309 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
310 {
311 switch (idMsr)
312 {
313 case MSR_K8_LSTAR:
314 case MSR_K6_STAR:
315 case MSR_K8_SF_MASK:
316 case MSR_K8_KERNEL_GS_BASE:
317 return true;
318 }
319 }
320 return false;
321}
322
323
324/**
325 * Loads a set of guests MSRs to allow read/passthru to the guest.
326 *
327 * The name of this function is slightly confusing. This function does NOT
328 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
329 * common prefix for functions dealing with "lazy restoration" of the shared
330 * MSRs.
331 *
332 * @param pVCpu The cross context virtual CPU structure.
333 *
334 * @remarks No-long-jump zone!!!
335 */
336static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
337{
338 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
339 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
340
341 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
342 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
343 {
344 /*
345 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
346 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
347 * we can skip a few MSR writes.
348 *
349 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
350 * guest MSR values in the guest-CPU context might be different to what's currently
351 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
352 * CPU, see @bugref{8728}.
353 */
354 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
355 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
356 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
357 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
358 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
359 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
360 {
361#ifdef VBOX_STRICT
362 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
363 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
364 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
365 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
366#endif
367 }
368 else
369 {
370 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
371 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
372 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
373 /* The system call flag mask register isn't as benign and accepting of all
374 values as the above, so mask it to avoid #GP'ing on corrupted input. */
375 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
376 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
377 }
378 }
379 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
380}
381
382
383/**
384 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
385 *
386 * @returns @c true if found, @c false otherwise.
387 * @param pVmcsInfo The VMCS info. object.
388 * @param idMsr The MSR to find.
389 */
390static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
391{
392 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
393 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
394 Assert(pMsrs);
395 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
396 for (uint32_t i = 0; i < cMsrs; i++)
397 {
398 if (pMsrs[i].u32Msr == idMsr)
399 return true;
400 }
401 return false;
402}
403
404
405/**
406 * Performs lazy restoration of the set of host MSRs if they were previously
407 * loaded with guest MSR values.
408 *
409 * @param pVCpu The cross context virtual CPU structure.
410 *
411 * @remarks No-long-jump zone!!!
412 * @remarks The guest MSRs should have been saved back into the guest-CPU
413 * context by hmR0VmxImportGuestState()!!!
414 */
415static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
416{
417 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
418 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
419
420 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
421 {
422 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
423 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
424 {
425 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
426 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
427 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
428 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
429 }
430 }
431 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
432}
433
434
435/**
436 * Sets pfnStartVm to the best suited variant.
437 *
438 * This must be called whenever anything changes relative to the hmR0VmXStartVm
439 * variant selection:
440 * - pVCpu->hm.s.fLoadSaveGuestXcr0
441 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
442 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
443 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
444 * - Perhaps: CPUMCTX.fXStateMask (windows only)
445 *
446 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
447 * cannot be changed at runtime.
448 */
449static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
450{
451 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
452 {
453 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
454 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
455 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
456 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
457 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
458 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
459 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
460 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
461 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
462 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
463 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
464 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
465 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
466 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
467 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
468 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
469 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
470 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
471 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
472 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
473 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
474 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
475 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
476 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
477 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
478 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
479 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
480 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
481 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
482 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
483 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
484 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
485 };
486 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
487 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
488 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
489 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
490 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
491 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
492 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
493 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
494}
495
496
497/**
498 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
499 * stack.
500 *
501 * @returns Strict VBox status code (i.e. informational status codes too).
502 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
503 * @param pVCpu The cross context virtual CPU structure.
504 * @param uValue The value to push to the guest stack.
505 */
506static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
507{
508 /*
509 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
510 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
511 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
512 */
513 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
514 if (pCtx->sp == 1)
515 return VINF_EM_RESET;
516 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
517 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
518 AssertRC(rc);
519 return rc;
520}
521
522
523/**
524 * Wrapper around VMXWriteVmcs16 taking a pVCpu parameter so VCC doesn't complain about
525 * unreferenced local parameters in the template code...
526 */
527DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t u16Val)
528{
529 RT_NOREF(pVCpu);
530 return VMXWriteVmcs16(uFieldEnc, u16Val);
531}
532
533
534/**
535 * Wrapper around VMXWriteVmcs32 taking a pVCpu parameter so VCC doesn't complain about
536 * unreferenced local parameters in the template code...
537 */
538DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t u32Val)
539{
540 RT_NOREF(pVCpu);
541 return VMXWriteVmcs32(uFieldEnc, u32Val);
542}
543
544
545/**
546 * Wrapper around VMXWriteVmcs64 taking a pVCpu parameter so VCC doesn't complain about
547 * unreferenced local parameters in the template code...
548 */
549DECL_FORCE_INLINE(int) hmR0VmxWriteVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t u64Val)
550{
551 RT_NOREF(pVCpu);
552 return VMXWriteVmcs64(uFieldEnc, u64Val);
553}
554
555
556/**
557 * Wrapper around VMXReadVmcs16 taking a pVCpu parameter so VCC doesn't complain about
558 * unreferenced local parameters in the template code...
559 */
560DECL_FORCE_INLINE(int) hmR0VmxReadVmcs16(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint16_t *pu16Val)
561{
562 RT_NOREF(pVCpu);
563 return VMXReadVmcs16(uFieldEnc, pu16Val);
564}
565
566
567/**
568 * Wrapper around VMXReadVmcs32 taking a pVCpu parameter so VCC doesn't complain about
569 * unreferenced local parameters in the template code...
570 */
571DECL_FORCE_INLINE(int) hmR0VmxReadVmcs32(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint32_t *pu32Val)
572{
573 RT_NOREF(pVCpu);
574 return VMXReadVmcs32(uFieldEnc, pu32Val);
575}
576
577
578/**
579 * Wrapper around VMXReadVmcs64 taking a pVCpu parameter so VCC doesn't complain about
580 * unreferenced local parameters in the template code...
581 */
582DECL_FORCE_INLINE(int) hmR0VmxReadVmcs64(PCVMCPUCC pVCpu, uint32_t uFieldEnc, uint64_t *pu64Val)
583{
584 RT_NOREF(pVCpu);
585 return VMXReadVmcs64(uFieldEnc, pu64Val);
586}
587
588
589/*
590 * Instantiate the code we share with the NEM darwin backend.
591 */
592#define VCPU_2_VMXSTATE(a_pVCpu) (a_pVCpu)->hm.s
593#define VCPU_2_VMXSTATS(a_pVCpu) (a_pVCpu)->hm.s
594
595#define VM_IS_VMX_UNRESTRICTED_GUEST(a_pVM) (a_pVM)->hmr0.s.vmx.fUnrestrictedGuest
596#define VM_IS_VMX_NESTED_PAGING(a_pVM) (a_pVM)->hmr0.s.fNestedPaging
597#define VM_IS_VMX_PREEMPT_TIMER_USED(a_pVM) (a_pVM)->hmr0.s.vmx.fUsePreemptTimer
598#define VM_IS_VMX_LBR(a_pVM) (a_pVM)->hmr0.s.vmx.fLbr
599
600#define VMX_VMCS_WRITE_16(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs16((a_pVCpu), (a_FieldEnc), (a_Val))
601#define VMX_VMCS_WRITE_32(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs32((a_pVCpu), (a_FieldEnc), (a_Val))
602#define VMX_VMCS_WRITE_64(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
603#define VMX_VMCS_WRITE_NW(a_pVCpu, a_FieldEnc, a_Val) hmR0VmxWriteVmcs64((a_pVCpu), (a_FieldEnc), (a_Val))
604
605#define VMX_VMCS_READ_16(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs16((a_pVCpu), (a_FieldEnc), (a_pVal))
606#define VMX_VMCS_READ_32(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs32((a_pVCpu), (a_FieldEnc), (a_pVal))
607#define VMX_VMCS_READ_64(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
608#define VMX_VMCS_READ_NW(a_pVCpu, a_FieldEnc, a_pVal) hmR0VmxReadVmcs64((a_pVCpu), (a_FieldEnc), (a_pVal))
609
610#include "../VMMAll/VMXAllTemplate.cpp.h"
611
612#undef VMX_VMCS_WRITE_16
613#undef VMX_VMCS_WRITE_32
614#undef VMX_VMCS_WRITE_64
615#undef VMX_VMCS_WRITE_NW
616
617#undef VMX_VMCS_READ_16
618#undef VMX_VMCS_READ_32
619#undef VMX_VMCS_READ_64
620#undef VMX_VMCS_READ_NW
621
622#undef VM_IS_VMX_PREEMPT_TIMER_USED
623#undef VM_IS_VMX_NESTED_PAGING
624#undef VM_IS_VMX_UNRESTRICTED_GUEST
625#undef VCPU_2_VMXSTATS
626#undef VCPU_2_VMXSTATE
627
628
629/**
630 * Updates the VM's last error record.
631 *
632 * If there was a VMX instruction error, reads the error data from the VMCS and
633 * updates VCPU's last error record as well.
634 *
635 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
636 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
637 * VERR_VMX_INVALID_VMCS_FIELD.
638 * @param rc The error code.
639 */
640static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
641{
642 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
643 || rc == VERR_VMX_UNABLE_TO_START_VM)
644 {
645 AssertPtrReturnVoid(pVCpu);
646 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
647 }
648 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
649}
650
651
652/**
653 * Enters VMX root mode operation on the current CPU.
654 *
655 * @returns VBox status code.
656 * @param pHostCpu The HM physical-CPU structure.
657 * @param pVM The cross context VM structure. Can be
658 * NULL, after a resume.
659 * @param HCPhysCpuPage Physical address of the VMXON region.
660 * @param pvCpuPage Pointer to the VMXON region.
661 */
662static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
663{
664 Assert(pHostCpu);
665 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
666 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
667 Assert(pvCpuPage);
668 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
669
670 if (pVM)
671 {
672 /* Write the VMCS revision identifier to the VMXON region. */
673 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
674 }
675
676 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
677 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
678
679 /* Enable the VMX bit in CR4 if necessary. */
680 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
681
682 /* Record whether VMXE was already prior to us enabling it above. */
683 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
684
685 /* Enter VMX root mode. */
686 int rc = VMXEnable(HCPhysCpuPage);
687 if (RT_FAILURE(rc))
688 {
689 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
690 if (!pHostCpu->fVmxeAlreadyEnabled)
691 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
692
693 if (pVM)
694 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
695 }
696
697 /* Restore interrupts. */
698 ASMSetFlags(fEFlags);
699 return rc;
700}
701
702
703/**
704 * Exits VMX root mode operation on the current CPU.
705 *
706 * @returns VBox status code.
707 * @param pHostCpu The HM physical-CPU structure.
708 */
709static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
710{
711 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
712
713 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
714 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
715
716 /* If we're for some reason not in VMX root mode, then don't leave it. */
717 RTCCUINTREG const uHostCr4 = ASMGetCR4();
718
719 int rc;
720 if (uHostCr4 & X86_CR4_VMXE)
721 {
722 /* Exit VMX root mode and clear the VMX bit in CR4. */
723 VMXDisable();
724
725 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
726 if (!pHostCpu->fVmxeAlreadyEnabled)
727 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
728
729 rc = VINF_SUCCESS;
730 }
731 else
732 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
733
734 /* Restore interrupts. */
735 ASMSetFlags(fEFlags);
736 return rc;
737}
738
739
740/**
741 * Allocates pages specified as specified by an array of VMX page allocation info
742 * objects.
743 *
744 * The pages contents are zero'd after allocation.
745 *
746 * @returns VBox status code.
747 * @param phMemObj Where to return the handle to the allocation.
748 * @param paAllocInfo The pointer to the first element of the VMX
749 * page-allocation info object array.
750 * @param cEntries The number of elements in the @a paAllocInfo array.
751 */
752static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
753{
754 *phMemObj = NIL_RTR0MEMOBJ;
755
756 /* Figure out how many pages to allocate. */
757 uint32_t cPages = 0;
758 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
759 cPages += !!paAllocInfo[iPage].fValid;
760
761 /* Allocate the pages. */
762 if (cPages)
763 {
764 size_t const cbPages = cPages << HOST_PAGE_SHIFT;
765 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
766 if (RT_FAILURE(rc))
767 return rc;
768
769 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
770 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
771 RT_BZERO(pvFirstPage, cbPages);
772
773 uint32_t iPage = 0;
774 for (uint32_t i = 0; i < cEntries; i++)
775 if (paAllocInfo[i].fValid)
776 {
777 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
778 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
779 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
780 AssertPtr(pvPage);
781
782 Assert(paAllocInfo[iPage].pHCPhys);
783 Assert(paAllocInfo[iPage].ppVirt);
784 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
785 *paAllocInfo[iPage].ppVirt = pvPage;
786
787 /* Move to next page. */
788 ++iPage;
789 }
790
791 /* Make sure all valid (requested) pages have been assigned. */
792 Assert(iPage == cPages);
793 }
794 return VINF_SUCCESS;
795}
796
797
798/**
799 * Frees pages allocated using hmR0VmxPagesAllocZ.
800 *
801 * @param phMemObj Pointer to the memory object handle. Will be set to
802 * NIL.
803 */
804DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
805{
806 /* We can cleanup wholesale since it's all one allocation. */
807 if (*phMemObj != NIL_RTR0MEMOBJ)
808 {
809 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
810 *phMemObj = NIL_RTR0MEMOBJ;
811 }
812}
813
814
815/**
816 * Initializes a VMCS info. object.
817 *
818 * @param pVmcsInfo The VMCS info. object.
819 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
820 */
821static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
822{
823 RT_ZERO(*pVmcsInfo);
824 RT_ZERO(*pVmcsInfoShared);
825
826 pVmcsInfo->pShared = pVmcsInfoShared;
827 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
828 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
829 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
830 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
831 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
832 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
833 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
834 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
835 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
836 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
837 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
838 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
839}
840
841
842/**
843 * Frees the VT-x structures for a VMCS info. object.
844 *
845 * @param pVmcsInfo The VMCS info. object.
846 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
847 */
848static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
849{
850 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
851 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
852}
853
854
855/**
856 * Allocates the VT-x structures for a VMCS info. object.
857 *
858 * @returns VBox status code.
859 * @param pVCpu The cross context virtual CPU structure.
860 * @param pVmcsInfo The VMCS info. object.
861 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
862 *
863 * @remarks The caller is expected to take care of any and all allocation failures.
864 * This function will not perform any cleanup for failures half-way
865 * through.
866 */
867static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
868{
869 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
870
871 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
872 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
873 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
874 VMXPAGEALLOCINFO aAllocInfo[] =
875 {
876 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
877 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
878 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
879 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
880 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
881 };
882
883 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
884 if (RT_FAILURE(rc))
885 return rc;
886
887 /*
888 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
889 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
890 */
891 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
892 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
893 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
894 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
895
896 /*
897 * Get the virtual-APIC page rather than allocating them again.
898 */
899 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
900 {
901 if (!fIsNstGstVmcs)
902 {
903 if (PDMHasApic(pVM))
904 {
905 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
906 if (RT_FAILURE(rc))
907 return rc;
908 Assert(pVmcsInfo->pbVirtApic);
909 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
910 }
911 }
912 else
913 {
914 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
915 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
916 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
917 }
918 }
919
920 return VINF_SUCCESS;
921}
922
923
924/**
925 * Free all VT-x structures for the VM.
926 *
927 * @returns IPRT status code.
928 * @param pVM The cross context VM structure.
929 */
930static void hmR0VmxStructsFree(PVMCC pVM)
931{
932 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
933#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
934 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
935 {
936 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
937 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
938 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
939 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
940 }
941#endif
942
943 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
944 {
945 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
946 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
947#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
948 if (pVM->cpum.ro.GuestFeatures.fVmx)
949 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
950#endif
951 }
952}
953
954
955/**
956 * Allocate all VT-x structures for the VM.
957 *
958 * @returns IPRT status code.
959 * @param pVM The cross context VM structure.
960 *
961 * @remarks This functions will cleanup on memory allocation failures.
962 */
963static int hmR0VmxStructsAlloc(PVMCC pVM)
964{
965 /*
966 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
967 * The VMCS size cannot be more than 4096 bytes.
968 *
969 * See Intel spec. Appendix A.1 "Basic VMX Information".
970 */
971 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
972 if (cbVmcs <= X86_PAGE_4K_SIZE)
973 { /* likely */ }
974 else
975 {
976 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
977 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
978 }
979
980 /*
981 * Allocate per-VM VT-x structures.
982 */
983 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
984 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
985 VMXPAGEALLOCINFO aAllocInfo[] =
986 {
987 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
988 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
989 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
990#ifdef VBOX_WITH_CRASHDUMP_MAGIC
991 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
992#endif
993 };
994
995 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
996 if (RT_SUCCESS(rc))
997 {
998#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
999 /* Allocate the shadow VMCS-fields array. */
1000 if (fUseVmcsShadowing)
1001 {
1002 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
1003 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
1004 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1005 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
1006 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
1007 rc = VERR_NO_MEMORY;
1008 }
1009#endif
1010
1011 /*
1012 * Allocate per-VCPU VT-x structures.
1013 */
1014 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
1015 {
1016 /* Allocate the guest VMCS structures. */
1017 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1018 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
1019
1020#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1021 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
1022 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
1023 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
1024#endif
1025 }
1026 if (RT_SUCCESS(rc))
1027 return VINF_SUCCESS;
1028 }
1029 hmR0VmxStructsFree(pVM);
1030 return rc;
1031}
1032
1033
1034/**
1035 * Pre-initializes non-zero fields in VMX structures that will be allocated.
1036 *
1037 * @param pVM The cross context VM structure.
1038 */
1039static void hmR0VmxStructsInit(PVMCC pVM)
1040{
1041 /* Paranoia. */
1042 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
1043#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1044 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
1045#endif
1046
1047 /*
1048 * Initialize members up-front so we can cleanup en masse on allocation failures.
1049 */
1050#ifdef VBOX_WITH_CRASHDUMP_MAGIC
1051 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
1052#endif
1053 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
1054 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
1055 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
1056 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1057 {
1058 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
1059 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
1060 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
1061 }
1062}
1063
1064#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1065/**
1066 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
1067 *
1068 * @returns @c true if the MSR is intercepted, @c false otherwise.
1069 * @param pbMsrBitmap The MSR bitmap.
1070 * @param offMsr The MSR byte offset.
1071 * @param iBit The bit offset from the byte offset.
1072 */
1073DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
1074{
1075 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
1076 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
1077}
1078#endif
1079
1080/**
1081 * Sets the permission bits for the specified MSR in the given MSR bitmap.
1082 *
1083 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
1084 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
1085 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
1086 * the read/write access of this MSR.
1087 *
1088 * @param pVCpu The cross context virtual CPU structure.
1089 * @param pVmcsInfo The VMCS info. object.
1090 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1091 * @param idMsr The MSR value.
1092 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
1093 * include both a read -and- a write permission!
1094 *
1095 * @sa CPUMGetVmxMsrPermission.
1096 * @remarks Can be called with interrupts disabled.
1097 */
1098static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
1099{
1100 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
1101 Assert(pbMsrBitmap);
1102 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
1103
1104 /*
1105 * MSR-bitmap Layout:
1106 * Byte index MSR range Interpreted as
1107 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
1108 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
1109 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
1110 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
1111 *
1112 * A bit corresponding to an MSR within the above range causes a VM-exit
1113 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
1114 * the MSR range, it always cause a VM-exit.
1115 *
1116 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
1117 */
1118 uint16_t const offBitmapRead = 0;
1119 uint16_t const offBitmapWrite = 0x800;
1120 uint16_t offMsr;
1121 int32_t iBit;
1122 if (idMsr <= UINT32_C(0x00001fff))
1123 {
1124 offMsr = 0;
1125 iBit = idMsr;
1126 }
1127 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
1128 {
1129 offMsr = 0x400;
1130 iBit = idMsr - UINT32_C(0xc0000000);
1131 }
1132 else
1133 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
1134
1135 /*
1136 * Set the MSR read permission.
1137 */
1138 uint16_t const offMsrRead = offBitmapRead + offMsr;
1139 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
1140 if (fMsrpm & VMXMSRPM_ALLOW_RD)
1141 {
1142#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1143 bool const fClear = !fIsNstGstVmcs ? true
1144 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
1145#else
1146 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1147 bool const fClear = true;
1148#endif
1149 if (fClear)
1150 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
1151 }
1152 else
1153 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
1154
1155 /*
1156 * Set the MSR write permission.
1157 */
1158 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
1159 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
1160 if (fMsrpm & VMXMSRPM_ALLOW_WR)
1161 {
1162#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1163 bool const fClear = !fIsNstGstVmcs ? true
1164 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
1165#else
1166 RT_NOREF2(pVCpu, fIsNstGstVmcs);
1167 bool const fClear = true;
1168#endif
1169 if (fClear)
1170 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
1171 }
1172 else
1173 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
1174}
1175
1176
1177/**
1178 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1179 * area.
1180 *
1181 * @returns VBox status code.
1182 * @param pVCpu The cross context virtual CPU structure.
1183 * @param pVmcsInfo The VMCS info. object.
1184 * @param cMsrs The number of MSRs.
1185 */
1186static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
1187{
1188 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1189 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
1190 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
1191 {
1192 /* Commit the MSR counts to the VMCS and update the cache. */
1193 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
1194 {
1195 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1196 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
1197 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
1198 pVmcsInfo->cEntryMsrLoad = cMsrs;
1199 pVmcsInfo->cExitMsrStore = cMsrs;
1200 pVmcsInfo->cExitMsrLoad = cMsrs;
1201 }
1202 return VINF_SUCCESS;
1203 }
1204
1205 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
1206 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1207 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1208}
1209
1210
1211/**
1212 * Adds a new (or updates the value of an existing) guest/host MSR
1213 * pair to be swapped during the world-switch as part of the
1214 * auto-load/store MSR area in the VMCS.
1215 *
1216 * @returns VBox status code.
1217 * @param pVCpu The cross context virtual CPU structure.
1218 * @param pVmxTransient The VMX-transient structure.
1219 * @param idMsr The MSR.
1220 * @param uGuestMsrValue Value of the guest MSR.
1221 * @param fSetReadWrite Whether to set the guest read/write access of this
1222 * MSR (thus not causing a VM-exit).
1223 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1224 * necessary.
1225 */
1226static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
1227 bool fSetReadWrite, bool fUpdateHostMsr)
1228{
1229 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1230 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1231 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1232 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1233 uint32_t i;
1234
1235 /* Paranoia. */
1236 Assert(pGuestMsrLoad);
1237
1238#ifndef DEBUG_bird
1239 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
1240#endif
1241
1242 /* Check if the MSR already exists in the VM-entry MSR-load area. */
1243 for (i = 0; i < cMsrs; i++)
1244 {
1245 if (pGuestMsrLoad[i].u32Msr == idMsr)
1246 break;
1247 }
1248
1249 bool fAdded = false;
1250 if (i == cMsrs)
1251 {
1252 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
1253 ++cMsrs;
1254 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1255 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
1256
1257 /* Set the guest to read/write this MSR without causing VM-exits. */
1258 if ( fSetReadWrite
1259 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
1260 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
1261
1262 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1263 fAdded = true;
1264 }
1265
1266 /* Update the MSR value for the newly added or already existing MSR. */
1267 pGuestMsrLoad[i].u32Msr = idMsr;
1268 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
1269
1270 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
1271 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1272 {
1273 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1274 pGuestMsrStore[i].u32Msr = idMsr;
1275 pGuestMsrStore[i].u64Value = uGuestMsrValue;
1276 }
1277
1278 /* Update the corresponding slot in the host MSR area. */
1279 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1280 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
1281 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
1282 pHostMsr[i].u32Msr = idMsr;
1283
1284 /*
1285 * Only if the caller requests to update the host MSR value AND we've newly added the
1286 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
1287 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
1288 *
1289 * We do this for performance reasons since reading MSRs may be quite expensive.
1290 */
1291 if (fAdded)
1292 {
1293 if (fUpdateHostMsr)
1294 {
1295 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1296 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1297 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
1298 }
1299 else
1300 {
1301 /* Someone else can do the work. */
1302 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1303 }
1304 }
1305 return VINF_SUCCESS;
1306}
1307
1308
1309/**
1310 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1311 * auto-load/store MSR area in the VMCS.
1312 *
1313 * @returns VBox status code.
1314 * @param pVCpu The cross context virtual CPU structure.
1315 * @param pVmxTransient The VMX-transient structure.
1316 * @param idMsr The MSR.
1317 */
1318static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
1319{
1320 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1321 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
1322 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1323 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
1324
1325#ifndef DEBUG_bird
1326 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
1327#endif
1328
1329 for (uint32_t i = 0; i < cMsrs; i++)
1330 {
1331 /* Find the MSR. */
1332 if (pGuestMsrLoad[i].u32Msr == idMsr)
1333 {
1334 /*
1335 * If it's the last MSR, we only need to reduce the MSR count.
1336 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
1337 */
1338 if (i < cMsrs - 1)
1339 {
1340 /* Remove it from the VM-entry MSR-load area. */
1341 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
1342 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
1343
1344 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
1345 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
1346 {
1347 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1348 Assert(pGuestMsrStore[i].u32Msr == idMsr);
1349 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
1350 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
1351 }
1352
1353 /* Remove it from the VM-exit MSR-load area. */
1354 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1355 Assert(pHostMsr[i].u32Msr == idMsr);
1356 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
1357 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
1358 }
1359
1360 /* Reduce the count to reflect the removed MSR and bail. */
1361 --cMsrs;
1362 break;
1363 }
1364 }
1365
1366 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
1367 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
1368 {
1369 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
1370 AssertRCReturn(rc, rc);
1371
1372 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1373 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1374 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
1375
1376 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
1377 return VINF_SUCCESS;
1378 }
1379
1380 return VERR_NOT_FOUND;
1381}
1382
1383
1384/**
1385 * Updates the value of all host MSRs in the VM-exit MSR-load area.
1386 *
1387 * @param pVCpu The cross context virtual CPU structure.
1388 * @param pVmcsInfo The VMCS info. object.
1389 *
1390 * @remarks No-long-jump zone!!!
1391 */
1392static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1393{
1394 RT_NOREF(pVCpu);
1395 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1396
1397 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1398 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
1399 Assert(pHostMsrLoad);
1400 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
1401 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
1402 for (uint32_t i = 0; i < cMsrs; i++)
1403 {
1404 /*
1405 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1406 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1407 */
1408 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
1409 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
1410 else
1411 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
1412 }
1413}
1414
1415
1416/**
1417 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1418 * perform lazy restoration of the host MSRs while leaving VT-x.
1419 *
1420 * @param pVCpu The cross context virtual CPU structure.
1421 *
1422 * @remarks No-long-jump zone!!!
1423 */
1424static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
1425{
1426 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1427
1428 /*
1429 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
1430 */
1431 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
1432 {
1433 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
1434 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
1435 {
1436 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
1437 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
1438 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
1439 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1440 }
1441 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1442 }
1443}
1444
1445
1446#ifdef VBOX_STRICT
1447
1448/**
1449 * Verifies that our cached host EFER MSR value has not changed since we cached it.
1450 *
1451 * @param pVmcsInfo The VMCS info. object.
1452 */
1453static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
1454{
1455 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1456
1457 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
1458 {
1459 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
1460 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
1461 uint64_t uVmcsEferMsrVmcs;
1462 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
1463 AssertRC(rc);
1464
1465 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
1466 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
1467 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
1468 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
1469 }
1470}
1471
1472
1473/**
1474 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1475 * VMCS are correct.
1476 *
1477 * @param pVCpu The cross context virtual CPU structure.
1478 * @param pVmcsInfo The VMCS info. object.
1479 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1480 */
1481static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1482{
1483 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1484
1485 /* Read the various MSR-area counts from the VMCS. */
1486 uint32_t cEntryLoadMsrs;
1487 uint32_t cExitStoreMsrs;
1488 uint32_t cExitLoadMsrs;
1489 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
1490 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
1491 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
1492
1493 /* Verify all the MSR counts are the same. */
1494 Assert(cEntryLoadMsrs == cExitStoreMsrs);
1495 Assert(cExitStoreMsrs == cExitLoadMsrs);
1496 uint32_t const cMsrs = cExitLoadMsrs;
1497
1498 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
1499 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
1500
1501 /* Verify the MSR counts are within the allocated page size. */
1502 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
1503
1504 /* Verify the relevant contents of the MSR areas match. */
1505 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
1506 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
1507 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
1508 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
1509 for (uint32_t i = 0; i < cMsrs; i++)
1510 {
1511 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1512 if (fSeparateExitMsrStorePage)
1513 {
1514 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
1515 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
1516 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
1517 }
1518
1519 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
1520 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
1521 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
1522
1523 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
1524 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
1525 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1526 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
1527
1528 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
1529 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
1530 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
1531 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
1532
1533 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
1534 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
1535 {
1536 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
1537 if (fIsEferMsr)
1538 {
1539 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
1540 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
1541 }
1542 else
1543 {
1544 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
1545 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1546 if ( pVM->hmr0.s.vmx.fLbr
1547 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1548 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
1549 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
1550 {
1551 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
1552 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
1553 pGuestMsrLoad->u32Msr, cMsrs));
1554 }
1555 else if (!fIsNstGstVmcs)
1556 {
1557 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
1558 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
1559 }
1560 else
1561 {
1562 /*
1563 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
1564 * execute a nested-guest with MSR passthrough.
1565 *
1566 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
1567 * allow passthrough too.
1568 */
1569 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
1570 Assert(pvMsrBitmapNstGst);
1571 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
1572 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
1573 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
1574 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
1575 }
1576 }
1577 }
1578
1579 /* Move to the next MSR. */
1580 pHostMsrLoad++;
1581 pGuestMsrLoad++;
1582 pGuestMsrStore++;
1583 }
1584}
1585
1586#endif /* VBOX_STRICT */
1587
1588/**
1589 * Flushes the TLB using EPT.
1590 *
1591 * @returns VBox status code.
1592 * @param pVCpu The cross context virtual CPU structure of the calling
1593 * EMT. Can be NULL depending on @a enmTlbFlush.
1594 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
1595 * enmTlbFlush.
1596 * @param enmTlbFlush Type of flush.
1597 *
1598 * @remarks Caller is responsible for making sure this function is called only
1599 * when NestedPaging is supported and providing @a enmTlbFlush that is
1600 * supported by the CPU.
1601 * @remarks Can be called with interrupts disabled.
1602 */
1603static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
1604{
1605 uint64_t au64Descriptor[2];
1606 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
1607 au64Descriptor[0] = 0;
1608 else
1609 {
1610 Assert(pVCpu);
1611 Assert(pVmcsInfo);
1612 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
1613 }
1614 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1615
1616 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
1617 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
1618
1619 if ( RT_SUCCESS(rc)
1620 && pVCpu)
1621 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1622}
1623
1624
1625/**
1626 * Flushes the TLB using VPID.
1627 *
1628 * @returns VBox status code.
1629 * @param pVCpu The cross context virtual CPU structure of the calling
1630 * EMT. Can be NULL depending on @a enmTlbFlush.
1631 * @param enmTlbFlush Type of flush.
1632 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1633 * on @a enmTlbFlush).
1634 *
1635 * @remarks Can be called with interrupts disabled.
1636 */
1637static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
1638{
1639 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
1640
1641 uint64_t au64Descriptor[2];
1642 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1643 {
1644 au64Descriptor[0] = 0;
1645 au64Descriptor[1] = 0;
1646 }
1647 else
1648 {
1649 AssertPtr(pVCpu);
1650 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1651 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
1652 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
1653 au64Descriptor[1] = GCPtr;
1654 }
1655
1656 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
1657 AssertMsg(rc == VINF_SUCCESS,
1658 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
1659
1660 if ( RT_SUCCESS(rc)
1661 && pVCpu)
1662 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1663 NOREF(rc);
1664}
1665
1666
1667/**
1668 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
1669 * otherwise there is nothing really to invalidate.
1670 *
1671 * @returns VBox status code.
1672 * @param pVCpu The cross context virtual CPU structure.
1673 * @param GCVirt Guest virtual address of the page to invalidate.
1674 */
1675VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
1676{
1677 AssertPtr(pVCpu);
1678 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
1679
1680 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
1681 {
1682 /*
1683 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
1684 * the EPT case. See @bugref{6043} and @bugref{6177}.
1685 *
1686 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
1687 * as this function maybe called in a loop with individual addresses.
1688 */
1689 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1690 if (pVM->hmr0.s.vmx.fVpid)
1691 {
1692 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1693 {
1694 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
1695 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1696 }
1697 else
1698 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1699 }
1700 else if (pVM->hmr0.s.fNestedPaging)
1701 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1702 }
1703
1704 return VINF_SUCCESS;
1705}
1706
1707
1708/**
1709 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1710 * case where neither EPT nor VPID is supported by the CPU.
1711 *
1712 * @param pHostCpu The HM physical-CPU structure.
1713 * @param pVCpu The cross context virtual CPU structure.
1714 *
1715 * @remarks Called with interrupts disabled.
1716 */
1717static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1718{
1719 AssertPtr(pVCpu);
1720 AssertPtr(pHostCpu);
1721
1722 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1723
1724 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1725 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1726 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1727 pVCpu->hmr0.s.fForceTLBFlush = false;
1728 return;
1729}
1730
1731
1732/**
1733 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1734 *
1735 * @param pHostCpu The HM physical-CPU structure.
1736 * @param pVCpu The cross context virtual CPU structure.
1737 * @param pVmcsInfo The VMCS info. object.
1738 *
1739 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
1740 * nomenclature. The reason is, to avoid confusion in compare statements
1741 * since the host-CPU copies are named "ASID".
1742 *
1743 * @remarks Called with interrupts disabled.
1744 */
1745static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1746{
1747#ifdef VBOX_WITH_STATISTICS
1748 bool fTlbFlushed = false;
1749# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1750# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1751 if (!fTlbFlushed) \
1752 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1753 } while (0)
1754#else
1755# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1756# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1757#endif
1758
1759 AssertPtr(pVCpu);
1760 AssertPtr(pHostCpu);
1761 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1762
1763 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1764 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
1765 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1766 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
1767
1768 /*
1769 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
1770 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1771 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1772 * cannot reuse the current ASID anymore.
1773 */
1774 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1775 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1776 {
1777 ++pHostCpu->uCurrentAsid;
1778 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1779 {
1780 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1781 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1782 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1783 }
1784
1785 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1786 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1787 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1788
1789 /*
1790 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1791 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1792 */
1793 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1794 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1795 HMVMX_SET_TAGGED_TLB_FLUSHED();
1796 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1797 }
1798 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
1799 {
1800 /*
1801 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
1802 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
1803 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
1804 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
1805 * mappings, see @bugref{6568}.
1806 *
1807 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
1808 */
1809 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1810 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1811 HMVMX_SET_TAGGED_TLB_FLUSHED();
1812 }
1813 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1814 {
1815 /*
1816 * The nested-guest specifies its own guest-physical address to use as the APIC-access
1817 * address which requires flushing the TLB of EPT cached structures.
1818 *
1819 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
1820 */
1821 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
1822 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1823 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1824 HMVMX_SET_TAGGED_TLB_FLUSHED();
1825 }
1826
1827
1828 pVCpu->hmr0.s.fForceTLBFlush = false;
1829 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1830
1831 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
1832 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
1833 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1834 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1835 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1836 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1837 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1838 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1839 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1840
1841 /* Update VMCS with the VPID. */
1842 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1843 AssertRC(rc);
1844
1845#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1846}
1847
1848
1849/**
1850 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1851 *
1852 * @param pHostCpu The HM physical-CPU structure.
1853 * @param pVCpu The cross context virtual CPU structure.
1854 * @param pVmcsInfo The VMCS info. object.
1855 *
1856 * @remarks Called with interrupts disabled.
1857 */
1858static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
1859{
1860 AssertPtr(pVCpu);
1861 AssertPtr(pHostCpu);
1862 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1863 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
1864 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
1865
1866 /*
1867 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1868 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1869 */
1870 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1871 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1872 {
1873 pVCpu->hmr0.s.fForceTLBFlush = true;
1874 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1875 }
1876
1877 /* Check for explicit TLB flushes. */
1878 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1879 {
1880 pVCpu->hmr0.s.fForceTLBFlush = true;
1881 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1882 }
1883
1884 /* Check for TLB flushes while switching to/from a nested-guest. */
1885 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1886 {
1887 pVCpu->hmr0.s.fForceTLBFlush = true;
1888 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1889 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1890 }
1891
1892 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1893 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1894
1895 if (pVCpu->hmr0.s.fForceTLBFlush)
1896 {
1897 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
1898 pVCpu->hmr0.s.fForceTLBFlush = false;
1899 }
1900}
1901
1902
1903/**
1904 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1905 *
1906 * @param pHostCpu The HM physical-CPU structure.
1907 * @param pVCpu The cross context virtual CPU structure.
1908 *
1909 * @remarks Called with interrupts disabled.
1910 */
1911static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
1912{
1913 AssertPtr(pVCpu);
1914 AssertPtr(pHostCpu);
1915 Assert(pHostCpu->idCpu != NIL_RTCPUID);
1916 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
1917 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
1918
1919 /*
1920 * Force a TLB flush for the first world switch if the current CPU differs from the one we
1921 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
1922 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
1923 * cannot reuse the current ASID anymore.
1924 */
1925 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
1926 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
1927 {
1928 pVCpu->hmr0.s.fForceTLBFlush = true;
1929 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1930 }
1931
1932 /* Check for explicit TLB flushes. */
1933 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1934 {
1935 /*
1936 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
1937 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
1938 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
1939 * include fExplicitFlush's too) - an obscure corner case.
1940 */
1941 pVCpu->hmr0.s.fForceTLBFlush = true;
1942 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1943 }
1944
1945 /* Check for TLB flushes while switching to/from a nested-guest. */
1946 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
1947 {
1948 pVCpu->hmr0.s.fForceTLBFlush = true;
1949 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
1950 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
1951 }
1952
1953 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1954 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
1955 if (pVCpu->hmr0.s.fForceTLBFlush)
1956 {
1957 ++pHostCpu->uCurrentAsid;
1958 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
1959 {
1960 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1961 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1962 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1963 }
1964
1965 pVCpu->hmr0.s.fForceTLBFlush = false;
1966 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
1967 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
1968 if (pHostCpu->fFlushAsidBeforeUse)
1969 {
1970 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
1971 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
1972 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
1973 {
1974 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
1975 pHostCpu->fFlushAsidBeforeUse = false;
1976 }
1977 else
1978 {
1979 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
1980 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
1981 }
1982 }
1983 }
1984
1985 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
1986 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
1987 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
1988 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
1989 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
1990 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
1991 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
1992
1993 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
1994 AssertRC(rc);
1995}
1996
1997
1998/**
1999 * Flushes the guest TLB entry based on CPU capabilities.
2000 *
2001 * @param pHostCpu The HM physical-CPU structure.
2002 * @param pVCpu The cross context virtual CPU structure.
2003 * @param pVmcsInfo The VMCS info. object.
2004 *
2005 * @remarks Called with interrupts disabled.
2006 */
2007static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2008{
2009#ifdef HMVMX_ALWAYS_FLUSH_TLB
2010 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2011#endif
2012 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2013 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
2014 {
2015 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
2016 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
2017 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
2018 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
2019 default:
2020 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2021 break;
2022 }
2023 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2024}
2025
2026
2027/**
2028 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2029 * TLB entries from the host TLB before VM-entry.
2030 *
2031 * @returns VBox status code.
2032 * @param pVM The cross context VM structure.
2033 */
2034static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
2035{
2036 /*
2037 * Determine optimal flush type for nested paging.
2038 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
2039 * unrestricted guest execution (see hmR3InitFinalizeR0()).
2040 */
2041 if (pVM->hmr0.s.fNestedPaging)
2042 {
2043 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2044 {
2045 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2046 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
2047 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2048 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
2049 else
2050 {
2051 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2052 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2053 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2054 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2055 }
2056
2057 /* Make sure the write-back cacheable memory type for EPT is supported. */
2058 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
2059 {
2060 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2061 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2062 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2063 }
2064
2065 /* EPT requires a page-walk length of 4. */
2066 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2067 {
2068 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2069 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2070 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2071 }
2072 }
2073 else
2074 {
2075 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2076 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
2077 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2078 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2079 }
2080 }
2081
2082 /*
2083 * Determine optimal flush type for VPID.
2084 */
2085 if (pVM->hmr0.s.vmx.fVpid)
2086 {
2087 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2088 {
2089 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2090 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
2091 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2092 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
2093 else
2094 {
2095 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2096 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2097 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
2098 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2099 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2100 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2101 pVM->hmr0.s.vmx.fVpid = false;
2102 }
2103 }
2104 else
2105 {
2106 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2107 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
2108 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
2109 pVM->hmr0.s.vmx.fVpid = false;
2110 }
2111 }
2112
2113 /*
2114 * Setup the handler for flushing tagged-TLBs.
2115 */
2116 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
2117 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
2118 else if (pVM->hmr0.s.fNestedPaging)
2119 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
2120 else if (pVM->hmr0.s.vmx.fVpid)
2121 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
2122 else
2123 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
2124
2125
2126 /*
2127 * Copy out the result to ring-3.
2128 */
2129 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
2130 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
2131 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
2132 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
2133 return VINF_SUCCESS;
2134}
2135
2136
2137/**
2138 * Sets up the LBR MSR ranges based on the host CPU.
2139 *
2140 * @returns VBox status code.
2141 * @param pVM The cross context VM structure.
2142 */
2143static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
2144{
2145 Assert(pVM->hmr0.s.vmx.fLbr);
2146 uint32_t idLbrFromIpMsrFirst;
2147 uint32_t idLbrFromIpMsrLast;
2148 uint32_t idLbrToIpMsrFirst;
2149 uint32_t idLbrToIpMsrLast;
2150 uint32_t idLbrTosMsr;
2151
2152 /*
2153 * Determine the LBR MSRs supported for this host CPU family and model.
2154 *
2155 * See Intel spec. 17.4.8 "LBR Stack".
2156 * See Intel "Model-Specific Registers" spec.
2157 */
2158 uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
2159 | pVM->cpum.ro.HostFeatures.uModel;
2160 switch (uFamilyModel)
2161 {
2162 case 0x0f01: case 0x0f02:
2163 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
2164 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
2165 idLbrToIpMsrFirst = 0x0;
2166 idLbrToIpMsrLast = 0x0;
2167 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
2168 break;
2169
2170 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
2171 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
2172 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
2173 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2174 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
2175 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2176 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
2177 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2178 break;
2179
2180 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
2181 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
2182 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
2183 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
2184 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
2185 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
2186 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
2187 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
2188 idLbrTosMsr = MSR_LASTBRANCH_TOS;
2189 break;
2190
2191 case 0x0617: case 0x061d: case 0x060f:
2192 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
2193 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
2194 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
2195 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
2196 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
2197 break;
2198
2199 /* Atom and related microarchitectures we don't care about:
2200 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
2201 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
2202 case 0x0636: */
2203 /* All other CPUs: */
2204 default:
2205 {
2206 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
2207 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
2208 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2209 }
2210 }
2211
2212 /*
2213 * Validate.
2214 */
2215 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
2216 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
2217 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
2218 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
2219 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
2220 {
2221 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
2222 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
2223 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2224 }
2225 NOREF(pVCpu0);
2226
2227 /*
2228 * Update the LBR info. to the VM struct. for use later.
2229 */
2230 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
2231
2232 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
2233 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
2234
2235 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
2236 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
2237 return VINF_SUCCESS;
2238}
2239
2240
2241#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2242/**
2243 * Sets up the shadow VMCS fields arrays.
2244 *
2245 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
2246 * executing the guest.
2247 *
2248 * @returns VBox status code.
2249 * @param pVM The cross context VM structure.
2250 */
2251static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
2252{
2253 /*
2254 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
2255 * when the host does not support it.
2256 */
2257 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
2258 if ( !fGstVmwriteAll
2259 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
2260 { /* likely. */ }
2261 else
2262 {
2263 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
2264 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
2265 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2266 }
2267
2268 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
2269 uint32_t cRwFields = 0;
2270 uint32_t cRoFields = 0;
2271 for (uint32_t i = 0; i < cVmcsFields; i++)
2272 {
2273 VMXVMCSFIELD VmcsField;
2274 VmcsField.u = g_aVmcsFields[i];
2275
2276 /*
2277 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
2278 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
2279 * in the shadow VMCS fields array as they would be redundant.
2280 *
2281 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
2282 * we must not include it in the shadow VMCS fields array. Guests attempting to
2283 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
2284 * the required behavior.
2285 */
2286 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
2287 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
2288 {
2289 /*
2290 * Read-only fields are placed in a separate array so that while syncing shadow
2291 * VMCS fields later (which is more performance critical) we can avoid branches.
2292 *
2293 * However, if the guest can write to all fields (including read-only fields),
2294 * we treat it a as read/write field. Otherwise, writing to these fields would
2295 * cause a VMWRITE instruction error while syncing the shadow VMCS.
2296 */
2297 if ( fGstVmwriteAll
2298 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
2299 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
2300 else
2301 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
2302 }
2303 }
2304
2305 /* Update the counts. */
2306 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
2307 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
2308 return VINF_SUCCESS;
2309}
2310
2311
2312/**
2313 * Sets up the VMREAD and VMWRITE bitmaps.
2314 *
2315 * @param pVM The cross context VM structure.
2316 */
2317static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
2318{
2319 /*
2320 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
2321 */
2322 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
2323 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
2324 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
2325 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
2326 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
2327
2328 /*
2329 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
2330 * VMREAD and VMWRITE bitmaps.
2331 */
2332 {
2333 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
2334 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
2335 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
2336 {
2337 uint32_t const uVmcsField = paShadowVmcsFields[i];
2338 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2339 Assert(uVmcsField >> 3 < cbBitmap);
2340 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2341 ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
2342 }
2343 }
2344
2345 /*
2346 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
2347 * if the host supports VMWRITE to all supported VMCS fields.
2348 */
2349 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
2350 {
2351 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
2352 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
2353 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
2354 {
2355 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
2356 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
2357 Assert(uVmcsField >> 3 < cbBitmap);
2358 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
2359 }
2360 }
2361}
2362#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
2363
2364
2365/**
2366 * Sets up the virtual-APIC page address for the VMCS.
2367 *
2368 * @param pVmcsInfo The VMCS info. object.
2369 */
2370DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
2371{
2372 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
2373 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
2374 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2375 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
2376 AssertRC(rc);
2377}
2378
2379
2380/**
2381 * Sets up the MSR-bitmap address for the VMCS.
2382 *
2383 * @param pVmcsInfo The VMCS info. object.
2384 */
2385DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
2386{
2387 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
2388 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
2389 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2390 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
2391 AssertRC(rc);
2392}
2393
2394
2395/**
2396 * Sets up the APIC-access page address for the VMCS.
2397 *
2398 * @param pVCpu The cross context virtual CPU structure.
2399 */
2400DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
2401{
2402 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
2403 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
2404 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2405 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
2406 AssertRC(rc);
2407}
2408
2409#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2410
2411/**
2412 * Sets up the VMREAD bitmap address for the VMCS.
2413 *
2414 * @param pVCpu The cross context virtual CPU structure.
2415 */
2416DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
2417{
2418 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
2419 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
2420 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2421 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
2422 AssertRC(rc);
2423}
2424
2425
2426/**
2427 * Sets up the VMWRITE bitmap address for the VMCS.
2428 *
2429 * @param pVCpu The cross context virtual CPU structure.
2430 */
2431DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
2432{
2433 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
2434 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
2435 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2436 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
2437 AssertRC(rc);
2438}
2439
2440#endif
2441
2442/**
2443 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
2444 * in the VMCS.
2445 *
2446 * @returns VBox status code.
2447 * @param pVmcsInfo The VMCS info. object.
2448 */
2449DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
2450{
2451 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
2452 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
2453 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2454
2455 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
2456 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
2457 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
2458
2459 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
2460 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
2461 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
2462
2463 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
2464 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
2465 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
2466 return VINF_SUCCESS;
2467}
2468
2469
2470/**
2471 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
2472 *
2473 * @param pVCpu The cross context virtual CPU structure.
2474 * @param pVmcsInfo The VMCS info. object.
2475 */
2476static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2477{
2478 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
2479
2480 /*
2481 * By default, ensure guest attempts to access any MSR cause VM-exits.
2482 * This shall later be relaxed for specific MSRs as necessary.
2483 *
2484 * Note: For nested-guests, the entire bitmap will be merged prior to
2485 * executing the nested-guest using hardware-assisted VMX and hence there
2486 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
2487 */
2488 Assert(pVmcsInfo->pvMsrBitmap);
2489 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
2490
2491 /*
2492 * The guest can access the following MSRs (read, write) without causing
2493 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
2494 */
2495 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2496 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
2497 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
2498 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
2499 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2500 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
2501
2502 /*
2503 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
2504 * associated with then. We never need to intercept access (writes need to be
2505 * executed without causing a VM-exit, reads will #GP fault anyway).
2506 *
2507 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
2508 * read/write them. We swap the guest/host MSR value using the
2509 * auto-load/store MSR area.
2510 */
2511 if (pVM->cpum.ro.GuestFeatures.fIbpb)
2512 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
2513 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
2514 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
2515 if (pVM->cpum.ro.GuestFeatures.fIbrs)
2516 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
2517
2518 /*
2519 * Allow full read/write access for the following MSRs (mandatory for VT-x)
2520 * required for 64-bit guests.
2521 */
2522 if (pVM->hmr0.s.fAllow64BitGuests)
2523 {
2524 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
2525 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
2526 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
2527 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
2528 }
2529
2530 /*
2531 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
2532 */
2533#ifdef VBOX_STRICT
2534 Assert(pVmcsInfo->pvMsrBitmap);
2535 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
2536 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
2537#endif
2538}
2539
2540
2541/**
2542 * Sets up pin-based VM-execution controls in the VMCS.
2543 *
2544 * @returns VBox status code.
2545 * @param pVCpu The cross context virtual CPU structure.
2546 * @param pVmcsInfo The VMCS info. object.
2547 */
2548static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2549{
2550 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2551 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
2552 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2553
2554 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2555 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2556
2557 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
2558 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2559
2560 /* Enable the VMX-preemption timer. */
2561 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
2562 {
2563 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
2564 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
2565 }
2566
2567#if 0
2568 /* Enable posted-interrupt processing. */
2569 if (pVM->hm.s.fPostedIntrs)
2570 {
2571 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
2572 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
2573 fVal |= VMX_PIN_CTLS_POSTED_INT;
2574 }
2575#endif
2576
2577 if ((fVal & fZap) != fVal)
2578 {
2579 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2580 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
2581 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2582 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2583 }
2584
2585 /* Commit it to the VMCS and update our cache. */
2586 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
2587 AssertRC(rc);
2588 pVmcsInfo->u32PinCtls = fVal;
2589
2590 return VINF_SUCCESS;
2591}
2592
2593
2594/**
2595 * Sets up secondary processor-based VM-execution controls in the VMCS.
2596 *
2597 * @returns VBox status code.
2598 * @param pVCpu The cross context virtual CPU structure.
2599 * @param pVmcsInfo The VMCS info. object.
2600 */
2601static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2602{
2603 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2604 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
2605 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2606
2607 /* WBINVD causes a VM-exit. */
2608 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
2609 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
2610
2611 /* Enable EPT (aka nested-paging). */
2612 if (pVM->hmr0.s.fNestedPaging)
2613 fVal |= VMX_PROC_CTLS2_EPT;
2614
2615 /* Enable the INVPCID instruction if we expose it to the guest and is supported
2616 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
2617 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
2618 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
2619 fVal |= VMX_PROC_CTLS2_INVPCID;
2620
2621 /* Enable VPID. */
2622 if (pVM->hmr0.s.vmx.fVpid)
2623 fVal |= VMX_PROC_CTLS2_VPID;
2624
2625 /* Enable unrestricted guest execution. */
2626 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
2627 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
2628
2629#if 0
2630 if (pVM->hm.s.fVirtApicRegs)
2631 {
2632 /* Enable APIC-register virtualization. */
2633 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
2634 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
2635
2636 /* Enable virtual-interrupt delivery. */
2637 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
2638 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
2639 }
2640#endif
2641
2642 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
2643 where the TPR shadow resides. */
2644 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2645 * done dynamically. */
2646 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
2647 {
2648 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
2649 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
2650 }
2651
2652 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
2653 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
2654 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
2655 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
2656 fVal |= VMX_PROC_CTLS2_RDTSCP;
2657
2658 /* Enable Pause-Loop exiting. */
2659 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
2660 && pVM->hm.s.vmx.cPleGapTicks
2661 && pVM->hm.s.vmx.cPleWindowTicks)
2662 {
2663 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
2664
2665 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
2666 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
2667 }
2668
2669 if ((fVal & fZap) != fVal)
2670 {
2671 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2672 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
2673 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2674 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2675 }
2676
2677 /* Commit it to the VMCS and update our cache. */
2678 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
2679 AssertRC(rc);
2680 pVmcsInfo->u32ProcCtls2 = fVal;
2681
2682 return VINF_SUCCESS;
2683}
2684
2685
2686/**
2687 * Sets up processor-based VM-execution controls in the VMCS.
2688 *
2689 * @returns VBox status code.
2690 * @param pVCpu The cross context virtual CPU structure.
2691 * @param pVmcsInfo The VMCS info. object.
2692 */
2693static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2694{
2695 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2696 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
2697 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2698
2699 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
2700 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2701 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2702 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2703 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2704 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2705 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2706
2707 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2708 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
2709 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
2710 {
2711 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2712 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2713 }
2714
2715 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2716 if (!pVM->hmr0.s.fNestedPaging)
2717 {
2718 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
2719 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
2720 | VMX_PROC_CTLS_CR3_LOAD_EXIT
2721 | VMX_PROC_CTLS_CR3_STORE_EXIT;
2722 }
2723
2724 /* Use TPR shadowing if supported by the CPU. */
2725 if ( PDMHasApic(pVM)
2726 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
2727 {
2728 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2729 /* CR8 writes cause a VM-exit based on TPR threshold. */
2730 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
2731 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
2732 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
2733 }
2734 else
2735 {
2736 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
2737 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
2738 if (pVM->hmr0.s.fAllow64BitGuests)
2739 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2740 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2741 }
2742
2743 /* Use MSR-bitmaps if supported by the CPU. */
2744 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2745 {
2746 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
2747 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2748 }
2749
2750 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2751 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2752 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
2753
2754 if ((fVal & fZap) != fVal)
2755 {
2756 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
2757 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
2758 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2759 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2760 }
2761
2762 /* Commit it to the VMCS and update our cache. */
2763 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
2764 AssertRC(rc);
2765 pVmcsInfo->u32ProcCtls = fVal;
2766
2767 /* Set up MSR permissions that don't change through the lifetime of the VM. */
2768 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2769 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
2770
2771 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
2772 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2773 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
2774
2775 /* Sanity check, should not really happen. */
2776 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
2777 { /* likely */ }
2778 else
2779 {
2780 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2781 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2782 }
2783
2784 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
2785 return VINF_SUCCESS;
2786}
2787
2788
2789/**
2790 * Sets up miscellaneous (everything other than Pin, Processor and secondary
2791 * Processor-based VM-execution) control fields in the VMCS.
2792 *
2793 * @returns VBox status code.
2794 * @param pVCpu The cross context virtual CPU structure.
2795 * @param pVmcsInfo The VMCS info. object.
2796 */
2797static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2798{
2799#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2800 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
2801 {
2802 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
2803 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
2804 }
2805#endif
2806
2807 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2808 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2809 AssertRC(rc);
2810
2811 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2812 if (RT_SUCCESS(rc))
2813 {
2814 uint64_t const u64Cr0Mask = vmxHCGetFixedCr0Mask(pVCpu);
2815 uint64_t const u64Cr4Mask = vmxHCGetFixedCr4Mask(pVCpu);
2816
2817 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
2818 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
2819
2820 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
2821 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
2822
2823 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
2824 {
2825 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
2826 AssertRC(rc);
2827 }
2828 return VINF_SUCCESS;
2829 }
2830 else
2831 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
2832 return rc;
2833}
2834
2835
2836/**
2837 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2838 *
2839 * We shall setup those exception intercepts that don't change during the
2840 * lifetime of the VM here. The rest are done dynamically while loading the
2841 * guest state.
2842 *
2843 * @param pVCpu The cross context virtual CPU structure.
2844 * @param pVmcsInfo The VMCS info. object.
2845 */
2846static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
2847{
2848 /*
2849 * The following exceptions are always intercepted:
2850 *
2851 * #AC - To prevent the guest from hanging the CPU and for dealing with
2852 * split-lock detecting host configs.
2853 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
2854 * recursive #DBs can cause a CPU hang.
2855 * #PF - To sync our shadow page tables when nested-paging is not used.
2856 */
2857 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
2858 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
2859 | RT_BIT(X86_XCPT_DB)
2860 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
2861
2862 /* Commit it to the VMCS. */
2863 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
2864 AssertRC(rc);
2865
2866 /* Update our cache of the exception bitmap. */
2867 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
2868}
2869
2870
2871#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2872/**
2873 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
2874 *
2875 * @returns VBox status code.
2876 * @param pVmcsInfo The VMCS info. object.
2877 */
2878static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
2879{
2880 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
2881 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
2882 AssertRC(rc);
2883
2884 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
2885 if (RT_SUCCESS(rc))
2886 {
2887 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2888 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
2889
2890 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
2891 Assert(!pVmcsInfo->u64Cr0Mask);
2892 Assert(!pVmcsInfo->u64Cr4Mask);
2893 return VINF_SUCCESS;
2894 }
2895 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
2896 return rc;
2897}
2898#endif
2899
2900
2901/**
2902 * Selector FNHMSVMVMRUN implementation.
2903 */
2904static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
2905{
2906 hmR0VmxUpdateStartVmFunction(pVCpu);
2907 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
2908}
2909
2910
2911/**
2912 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
2913 * VMX.
2914 *
2915 * @returns VBox status code.
2916 * @param pVCpu The cross context virtual CPU structure.
2917 * @param pVmcsInfo The VMCS info. object.
2918 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2919 */
2920static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2921{
2922 Assert(pVmcsInfo->pvVmcs);
2923 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2924
2925 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
2926 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2927 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
2928
2929 LogFlowFunc(("\n"));
2930
2931 /*
2932 * Initialize the VMCS using VMCLEAR before loading the VMCS.
2933 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
2934 */
2935 int rc = hmR0VmxClearVmcs(pVmcsInfo);
2936 if (RT_SUCCESS(rc))
2937 {
2938 rc = hmR0VmxLoadVmcs(pVmcsInfo);
2939 if (RT_SUCCESS(rc))
2940 {
2941 /*
2942 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
2943 * The host is always 64-bit since we no longer support 32-bit hosts.
2944 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
2945 */
2946 if (!fIsNstGstVmcs)
2947 {
2948 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
2949 if (RT_SUCCESS(rc))
2950 {
2951 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
2952 if (RT_SUCCESS(rc))
2953 {
2954 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
2955 if (RT_SUCCESS(rc))
2956 {
2957 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
2958#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2959 /*
2960 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
2961 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
2962 * making it fit for use when VMCS shadowing is later enabled.
2963 */
2964 if (pVmcsInfo->pvShadowVmcs)
2965 {
2966 VMXVMCSREVID VmcsRevId;
2967 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
2968 VmcsRevId.n.fIsShadowVmcs = 1;
2969 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
2970 rc = vmxHCClearShadowVmcs(pVmcsInfo);
2971 if (RT_SUCCESS(rc))
2972 { /* likely */ }
2973 else
2974 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
2975 }
2976#endif
2977 }
2978 else
2979 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
2980 }
2981 else
2982 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
2983 }
2984 else
2985 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
2986 }
2987 else
2988 {
2989#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2990 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
2991 if (RT_SUCCESS(rc))
2992 { /* likely */ }
2993 else
2994 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
2995#else
2996 AssertFailed();
2997#endif
2998 }
2999 }
3000 else
3001 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
3002 }
3003 else
3004 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
3005
3006 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
3007 if (RT_SUCCESS(rc))
3008 {
3009 rc = hmR0VmxClearVmcs(pVmcsInfo);
3010 if (RT_SUCCESS(rc))
3011 { /* likely */ }
3012 else
3013 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
3014 }
3015
3016 /*
3017 * Update the last-error record both for failures and success, so we
3018 * can propagate the status code back to ring-3 for diagnostics.
3019 */
3020 hmR0VmxUpdateErrorRecord(pVCpu, rc);
3021 NOREF(pszVmcs);
3022 return rc;
3023}
3024
3025
3026/**
3027 * Does global VT-x initialization (called during module initialization).
3028 *
3029 * @returns VBox status code.
3030 */
3031VMMR0DECL(int) VMXR0GlobalInit(void)
3032{
3033#ifdef HMVMX_USE_FUNCTION_TABLE
3034 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
3035# ifdef VBOX_STRICT
3036 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
3037 Assert(g_aVMExitHandlers[i].pfn);
3038# endif
3039#endif
3040 return VINF_SUCCESS;
3041}
3042
3043
3044/**
3045 * Does global VT-x termination (called during module termination).
3046 */
3047VMMR0DECL(void) VMXR0GlobalTerm()
3048{
3049 /* Nothing to do currently. */
3050}
3051
3052
3053/**
3054 * Sets up and activates VT-x on the current CPU.
3055 *
3056 * @returns VBox status code.
3057 * @param pHostCpu The HM physical-CPU structure.
3058 * @param pVM The cross context VM structure. Can be
3059 * NULL after a host resume operation.
3060 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
3061 * fEnabledByHost is @c true).
3062 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
3063 * @a fEnabledByHost is @c true).
3064 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
3065 * enable VT-x on the host.
3066 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
3067 */
3068VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
3069 PCSUPHWVIRTMSRS pHwvirtMsrs)
3070{
3071 AssertPtr(pHostCpu);
3072 AssertPtr(pHwvirtMsrs);
3073 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3074
3075 /* Enable VT-x if it's not already enabled by the host. */
3076 if (!fEnabledByHost)
3077 {
3078 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
3079 if (RT_FAILURE(rc))
3080 return rc;
3081 }
3082
3083 /*
3084 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
3085 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
3086 * invalidated when flushing by VPID.
3087 */
3088 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3089 {
3090 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
3091 pHostCpu->fFlushAsidBeforeUse = false;
3092 }
3093 else
3094 pHostCpu->fFlushAsidBeforeUse = true;
3095
3096 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
3097 ++pHostCpu->cTlbFlushes;
3098
3099 return VINF_SUCCESS;
3100}
3101
3102
3103/**
3104 * Deactivates VT-x on the current CPU.
3105 *
3106 * @returns VBox status code.
3107 * @param pHostCpu The HM physical-CPU structure.
3108 * @param pvCpuPage Pointer to the VMXON region.
3109 * @param HCPhysCpuPage Physical address of the VMXON region.
3110 *
3111 * @remarks This function should never be called when SUPR0EnableVTx() or
3112 * similar was used to enable VT-x on the host.
3113 */
3114VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
3115{
3116 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
3117
3118 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3119 return hmR0VmxLeaveRootMode(pHostCpu);
3120}
3121
3122
3123/**
3124 * Does per-VM VT-x initialization.
3125 *
3126 * @returns VBox status code.
3127 * @param pVM The cross context VM structure.
3128 */
3129VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
3130{
3131 AssertPtr(pVM);
3132 LogFlowFunc(("pVM=%p\n", pVM));
3133
3134 hmR0VmxStructsInit(pVM);
3135 int rc = hmR0VmxStructsAlloc(pVM);
3136 if (RT_FAILURE(rc))
3137 {
3138 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
3139 return rc;
3140 }
3141
3142 /* Setup the crash dump page. */
3143#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3144 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
3145 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
3146#endif
3147 return VINF_SUCCESS;
3148}
3149
3150
3151/**
3152 * Does per-VM VT-x termination.
3153 *
3154 * @returns VBox status code.
3155 * @param pVM The cross context VM structure.
3156 */
3157VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
3158{
3159 AssertPtr(pVM);
3160 LogFlowFunc(("pVM=%p\n", pVM));
3161
3162#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3163 if (pVM->hmr0.s.vmx.pbScratch)
3164 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
3165#endif
3166 hmR0VmxStructsFree(pVM);
3167 return VINF_SUCCESS;
3168}
3169
3170
3171/**
3172 * Sets up the VM for execution using hardware-assisted VMX.
3173 * This function is only called once per-VM during initialization.
3174 *
3175 * @returns VBox status code.
3176 * @param pVM The cross context VM structure.
3177 */
3178VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
3179{
3180 AssertPtr(pVM);
3181 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3182
3183 LogFlowFunc(("pVM=%p\n", pVM));
3184
3185 /*
3186 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
3187 * without causing a #GP.
3188 */
3189 RTCCUINTREG const uHostCr4 = ASMGetCR4();
3190 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
3191 { /* likely */ }
3192 else
3193 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
3194
3195 /*
3196 * Check that nested paging is supported if enabled and copy over the flag to the
3197 * ring-0 only structure.
3198 */
3199 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
3200 AssertReturn( !fNestedPaging
3201 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
3202 VERR_INCOMPATIBLE_CONFIG);
3203 pVM->hmr0.s.fNestedPaging = fNestedPaging;
3204 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
3205
3206 /*
3207 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
3208 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
3209 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
3210 */
3211 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
3212 AssertReturn( !fUnrestrictedGuest
3213 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
3214 && fNestedPaging),
3215 VERR_INCOMPATIBLE_CONFIG);
3216 if ( !fUnrestrictedGuest
3217 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
3218 || !pVM->hm.s.vmx.pRealModeTSS))
3219 {
3220 LogRelFunc(("Invalid real-on-v86 state.\n"));
3221 return VERR_INTERNAL_ERROR;
3222 }
3223 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
3224
3225 /* Initialize these always, see hmR3InitFinalizeR0().*/
3226 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
3227 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
3228
3229 /* Setup the tagged-TLB flush handlers. */
3230 int rc = hmR0VmxSetupTaggedTlb(pVM);
3231 if (RT_FAILURE(rc))
3232 {
3233 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
3234 return rc;
3235 }
3236
3237 /* Determine LBR capabilities. */
3238 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
3239 if (pVM->hmr0.s.vmx.fLbr)
3240 {
3241 rc = hmR0VmxSetupLbrMsrRange(pVM);
3242 if (RT_FAILURE(rc))
3243 {
3244 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
3245 return rc;
3246 }
3247 }
3248
3249#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3250 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
3251 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
3252 {
3253 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
3254 if (RT_SUCCESS(rc))
3255 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
3256 else
3257 {
3258 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
3259 return rc;
3260 }
3261 }
3262#endif
3263
3264 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
3265 {
3266 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
3267 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
3268
3269 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
3270
3271 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
3272 if (RT_SUCCESS(rc))
3273 {
3274#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3275 if (pVM->cpum.ro.GuestFeatures.fVmx)
3276 {
3277 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
3278 if (RT_SUCCESS(rc))
3279 { /* likely */ }
3280 else
3281 {
3282 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
3283 return rc;
3284 }
3285 }
3286#endif
3287 }
3288 else
3289 {
3290 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
3291 return rc;
3292 }
3293 }
3294
3295 return VINF_SUCCESS;
3296}
3297
3298
3299/**
3300 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
3301 * the VMCS.
3302 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
3303 */
3304static uint64_t hmR0VmxExportHostControlRegs(void)
3305{
3306 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
3307 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
3308 uint64_t uHostCr4 = ASMGetCR4();
3309 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
3310 return uHostCr4;
3311}
3312
3313
3314/**
3315 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
3316 * the host-state area in the VMCS.
3317 *
3318 * @returns VBox status code.
3319 * @param pVCpu The cross context virtual CPU structure.
3320 * @param uHostCr4 The host CR4 value.
3321 */
3322static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
3323{
3324 /*
3325 * If we've executed guest code using hardware-assisted VMX, the host-state bits
3326 * will be messed up. We should -not- save the messed up state without restoring
3327 * the original host-state, see @bugref{7240}.
3328 *
3329 * This apparently can happen (most likely the FPU changes), deal with it rather than
3330 * asserting. Was observed booting Solaris 10u10 32-bit guest.
3331 */
3332 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
3333 {
3334 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
3335 pVCpu->idCpu));
3336 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
3337 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
3338 }
3339
3340 /*
3341 * Get all the host info.
3342 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
3343 * without also checking the cpuid bit.
3344 */
3345 uint32_t fRestoreHostFlags;
3346#if RT_INLINE_ASM_EXTERNAL
3347 if (uHostCr4 & X86_CR4_FSGSBASE)
3348 {
3349 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
3350 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3351 }
3352 else
3353 {
3354 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
3355 fRestoreHostFlags = 0;
3356 }
3357 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
3358 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
3359 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
3360 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
3361#else
3362 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
3363 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
3364 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
3365 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
3366 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
3367 if (uHostCr4 & X86_CR4_FSGSBASE)
3368 {
3369 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
3370 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
3371 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
3372 }
3373 else
3374 {
3375 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
3376 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
3377 fRestoreHostFlags = 0;
3378 }
3379 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
3380 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
3381 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
3382 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
3383 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
3384#endif
3385
3386 /*
3387 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
3388 * gain VM-entry and restore them before we get preempted.
3389 *
3390 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
3391 */
3392 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
3393 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
3394 {
3395 if (!(uSelAll & X86_SEL_LDT))
3396 {
3397#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3398 do { \
3399 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3400 if ((a_uVmcsVar) & X86_SEL_RPL) \
3401 { \
3402 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3403 (a_uVmcsVar) = 0; \
3404 } \
3405 } while (0)
3406 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3407 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3408 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3409 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3410#undef VMXLOCAL_ADJUST_HOST_SEG
3411 }
3412 else
3413 {
3414#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
3415 do { \
3416 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
3417 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
3418 { \
3419 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
3420 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3421 else \
3422 { \
3423 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
3424 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
3425 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
3426 } \
3427 (a_uVmcsVar) = 0; \
3428 } \
3429 } while (0)
3430 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
3431 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
3432 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
3433 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
3434#undef VMXLOCAL_ADJUST_HOST_SEG
3435 }
3436 }
3437
3438 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
3439 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
3440 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
3441 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
3442 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
3443 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
3444 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
3445 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
3446
3447 /*
3448 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
3449 * them to the maximum limit (0xffff) on every VM-exit.
3450 */
3451 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
3452 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
3453
3454 /*
3455 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
3456 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
3457 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
3458 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
3459 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
3460 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
3461 * at 0xffff on hosts where we are sure it won't cause trouble.
3462 */
3463#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
3464 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
3465#else
3466 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
3467#endif
3468 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
3469
3470 /*
3471 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
3472 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
3473 * RPL should be too in most cases.
3474 */
3475 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
3476 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
3477 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
3478 VERR_VMX_INVALID_HOST_STATE);
3479
3480 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
3481 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
3482
3483 /*
3484 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
3485 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
3486 * restoration if the host has something else. Task switching is not supported in 64-bit
3487 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
3488 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
3489 *
3490 * [1] See Intel spec. 3.5 "System Descriptor Types".
3491 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
3492 */
3493 Assert(pDesc->System.u4Type == 11);
3494 if ( pDesc->System.u16LimitLow != 0x67
3495 || pDesc->System.u4LimitHigh)
3496 {
3497 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
3498
3499 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
3500 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
3501 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
3502 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
3503 {
3504 /* The GDT is read-only but the writable GDT is available. */
3505 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
3506 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
3507 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
3508 AssertRCReturn(rc, rc);
3509 }
3510 }
3511
3512 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
3513
3514 /*
3515 * Do all the VMCS updates in one block to assist nested virtualization.
3516 */
3517 int rc;
3518 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
3519 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
3520 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
3521 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
3522 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
3523 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
3524 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
3525 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
3526 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
3527 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
3528 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
3529 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
3530
3531 return VINF_SUCCESS;
3532}
3533
3534
3535/**
3536 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
3537 * host-state area of the VMCS.
3538 *
3539 * These MSRs will be automatically restored on the host after every successful
3540 * VM-exit.
3541 *
3542 * @param pVCpu The cross context virtual CPU structure.
3543 *
3544 * @remarks No-long-jump zone!!!
3545 */
3546static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
3547{
3548 AssertPtr(pVCpu);
3549
3550 /*
3551 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
3552 * rather than swapping them on every VM-entry.
3553 */
3554 hmR0VmxLazySaveHostMsrs(pVCpu);
3555
3556 /*
3557 * Host Sysenter MSRs.
3558 */
3559 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
3560 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
3561 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
3562
3563 /*
3564 * Host EFER MSR.
3565 *
3566 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
3567 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
3568 */
3569 if (g_fHmVmxSupportsVmcsEfer)
3570 {
3571 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
3572 AssertRC(rc);
3573 }
3574
3575 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3576 * hmR0VmxExportGuestEntryExitCtls(). */
3577}
3578
3579
3580/**
3581 * Figures out if we need to swap the EFER MSR which is particularly expensive.
3582 *
3583 * We check all relevant bits. For now, that's everything besides LMA/LME, as
3584 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
3585 *
3586 * @returns true if we need to load guest EFER, false otherwise.
3587 * @param pVCpu The cross context virtual CPU structure.
3588 * @param pVmxTransient The VMX-transient structure.
3589 *
3590 * @remarks Requires EFER, CR4.
3591 * @remarks No-long-jump zone!!!
3592 */
3593static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3594{
3595#ifdef HMVMX_ALWAYS_SWAP_EFER
3596 RT_NOREF2(pVCpu, pVmxTransient);
3597 return true;
3598#else
3599 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3600 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
3601 uint64_t const u64GuestEfer = pCtx->msrEFER;
3602
3603# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3604 /*
3605 * For nested-guests, we shall honor swapping the EFER MSR when requested by
3606 * the nested-guest.
3607 */
3608 if ( pVmxTransient->fIsNestedGuest
3609 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
3610 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
3611 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
3612 return true;
3613# else
3614 RT_NOREF(pVmxTransient);
3615#endif
3616
3617 /*
3618 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
3619 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
3620 */
3621 if ( CPUMIsGuestInLongModeEx(pCtx)
3622 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3623 return true;
3624
3625 /*
3626 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
3627 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
3628 *
3629 * See Intel spec. 4.5 "IA-32e Paging".
3630 * See Intel spec. 4.1.1 "Three Paging Modes".
3631 *
3632 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
3633 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
3634 */
3635 Assert(vmxHCGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
3636 Assert(vmxHCGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
3637 if ( (pCtx->cr4 & X86_CR4_PAE)
3638 && (pCtx->cr0 & X86_CR0_PG))
3639 {
3640 /*
3641 * If nested paging is not used, verify that the guest paging mode matches the
3642 * shadow paging mode which is/will be placed in the VMCS (which is what will
3643 * actually be used while executing the guest and not the CR4 shadow value).
3644 */
3645 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
3646 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
3647 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
3648 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
3649 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
3650 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
3651 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3652 {
3653 /* Verify that the host is NX capable. */
3654 Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
3655 return true;
3656 }
3657 }
3658
3659 return false;
3660#endif
3661}
3662
3663
3664/**
3665 * Exports the guest's RSP into the guest-state area in the VMCS.
3666 *
3667 * @param pVCpu The cross context virtual CPU structure.
3668 *
3669 * @remarks No-long-jump zone!!!
3670 */
3671static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
3672{
3673 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
3674 {
3675 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
3676
3677 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
3678 AssertRC(rc);
3679
3680 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
3681 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
3682 }
3683}
3684
3685
3686/**
3687 * Exports the guest hardware-virtualization state.
3688 *
3689 * @returns VBox status code.
3690 * @param pVCpu The cross context virtual CPU structure.
3691 * @param pVmxTransient The VMX-transient structure.
3692 *
3693 * @remarks No-long-jump zone!!!
3694 */
3695static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3696{
3697 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
3698 {
3699#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3700 /*
3701 * Check if the VMX feature is exposed to the guest and if the host CPU supports
3702 * VMCS shadowing.
3703 */
3704 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
3705 {
3706 /*
3707 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
3708 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
3709 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
3710 *
3711 * We check for VMX root mode here in case the guest executes VMXOFF without
3712 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
3713 * not clear the current VMCS pointer.
3714 */
3715 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3716 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
3717 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
3718 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
3719 {
3720 /* Paranoia. */
3721 Assert(!pVmxTransient->fIsNestedGuest);
3722
3723 /*
3724 * For performance reasons, also check if the nested hypervisor's current VMCS
3725 * was newly loaded or modified before copying it to the shadow VMCS.
3726 */
3727 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
3728 {
3729 int rc = vmxHCCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
3730 AssertRCReturn(rc, rc);
3731 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
3732 }
3733 vmxHCEnableVmcsShadowing(pVCpu, pVmcsInfo);
3734 }
3735 else
3736 vmxHCDisableVmcsShadowing(pVCpu, pVmcsInfo);
3737 }
3738#else
3739 NOREF(pVmxTransient);
3740#endif
3741 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
3742 }
3743 return VINF_SUCCESS;
3744}
3745
3746
3747/**
3748 * Exports the guest debug registers into the guest-state area in the VMCS.
3749 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3750 *
3751 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
3752 *
3753 * @returns VBox status code.
3754 * @param pVCpu The cross context virtual CPU structure.
3755 * @param pVmxTransient The VMX-transient structure.
3756 *
3757 * @remarks No-long-jump zone!!!
3758 */
3759static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
3760{
3761 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3762
3763 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
3764 * stepping. */
3765 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
3766 if (pVmxTransient->fIsNestedGuest)
3767 {
3768 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
3769 AssertRC(rc);
3770
3771 /*
3772 * We don't want to always intercept MOV DRx for nested-guests as it causes
3773 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
3774 * Instead, they are strictly only requested when the nested hypervisor intercepts
3775 * them -- handled while merging VMCS controls.
3776 *
3777 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
3778 * then the nested-guest debug state should be actively loaded on the host so that
3779 * nested-guest reads its own debug registers without causing VM-exits.
3780 */
3781 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
3782 && !CPUMIsGuestDebugStateActive(pVCpu))
3783 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3784 return VINF_SUCCESS;
3785 }
3786
3787#ifdef VBOX_STRICT
3788 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3789 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
3790 {
3791 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3792 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
3793 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
3794 }
3795#endif
3796
3797 bool fSteppingDB = false;
3798 bool fInterceptMovDRx = false;
3799 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
3800 if (pVCpu->hm.s.fSingleInstruction)
3801 {
3802 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3803 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
3804 {
3805 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
3806 Assert(fSteppingDB == false);
3807 }
3808 else
3809 {
3810 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
3811 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
3812 pVCpu->hmr0.s.fClearTrapFlag = true;
3813 fSteppingDB = true;
3814 }
3815 }
3816
3817 uint64_t u64GuestDr7;
3818 if ( fSteppingDB
3819 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3820 {
3821 /*
3822 * Use the combined guest and host DRx values found in the hypervisor register set
3823 * because the hypervisor debugger has breakpoints active or someone is single stepping
3824 * on the host side without a monitor trap flag.
3825 *
3826 * Note! DBGF expects a clean DR6 state before executing guest code.
3827 */
3828 if (!CPUMIsHyperDebugStateActive(pVCpu))
3829 {
3830 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3831 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3832 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3833 }
3834
3835 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
3836 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
3837 pVCpu->hmr0.s.fUsingHyperDR7 = true;
3838 fInterceptMovDRx = true;
3839 }
3840 else
3841 {
3842 /*
3843 * If the guest has enabled debug registers, we need to load them prior to
3844 * executing guest code so they'll trigger at the right time.
3845 */
3846 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
3847 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
3848 {
3849 if (!CPUMIsGuestDebugStateActive(pVCpu))
3850 {
3851 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3852 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3853 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3854 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3855 }
3856 Assert(!fInterceptMovDRx);
3857 }
3858 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3859 {
3860 /*
3861 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3862 * must intercept #DB in order to maintain a correct DR6 guest value, and
3863 * because we need to intercept it to prevent nested #DBs from hanging the
3864 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
3865 */
3866 fInterceptMovDRx = true;
3867 }
3868
3869 /* Update DR7 with the actual guest value. */
3870 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
3871 pVCpu->hmr0.s.fUsingHyperDR7 = false;
3872 }
3873
3874 if (fInterceptMovDRx)
3875 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
3876 else
3877 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
3878
3879 /*
3880 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
3881 * monitor-trap flag and update our cache.
3882 */
3883 if (uProcCtls != pVmcsInfo->u32ProcCtls)
3884 {
3885 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
3886 AssertRC(rc);
3887 pVmcsInfo->u32ProcCtls = uProcCtls;
3888 }
3889
3890 /*
3891 * Update guest DR7.
3892 */
3893 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
3894 AssertRC(rc);
3895
3896 /*
3897 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
3898 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
3899 *
3900 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
3901 */
3902 if (fSteppingDB)
3903 {
3904 Assert(pVCpu->hm.s.fSingleInstruction);
3905 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
3906
3907 uint32_t fIntrState = 0;
3908 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
3909 AssertRC(rc);
3910
3911 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
3912 {
3913 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
3914 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
3915 AssertRC(rc);
3916 }
3917 }
3918
3919 return VINF_SUCCESS;
3920}
3921
3922
3923/**
3924 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3925 * areas.
3926 *
3927 * These MSRs will automatically be loaded to the host CPU on every successful
3928 * VM-entry and stored from the host CPU on every successful VM-exit.
3929 *
3930 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
3931 * actual host MSR values are not- updated here for performance reasons. See
3932 * hmR0VmxExportHostMsrs().
3933 *
3934 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
3935 *
3936 * @returns VBox status code.
3937 * @param pVCpu The cross context virtual CPU structure.
3938 * @param pVmxTransient The VMX-transient structure.
3939 *
3940 * @remarks No-long-jump zone!!!
3941 */
3942static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
3943{
3944 AssertPtr(pVCpu);
3945 AssertPtr(pVmxTransient);
3946
3947 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3948 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
3949
3950 /*
3951 * MSRs that we use the auto-load/store MSR area in the VMCS.
3952 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
3953 * nothing to do here. The host MSR values are updated when it's safe in
3954 * hmR0VmxLazySaveHostMsrs().
3955 *
3956 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
3957 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
3958 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
3959 * for any MSR that are not part of the lazy MSRs so we do not need to place
3960 * those MSRs into the auto-load/store MSR area. Nothing to do here.
3961 */
3962 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3963 {
3964 /* No auto-load/store MSRs currently. */
3965 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
3966 }
3967
3968 /*
3969 * Guest Sysenter MSRs.
3970 */
3971 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
3972 {
3973 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
3974
3975 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
3976 {
3977 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
3978 AssertRC(rc);
3979 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
3980 }
3981
3982 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
3983 {
3984 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
3985 AssertRC(rc);
3986 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
3987 }
3988
3989 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
3990 {
3991 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
3992 AssertRC(rc);
3993 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
3994 }
3995 }
3996
3997 /*
3998 * Guest/host EFER MSR.
3999 */
4000 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
4001 {
4002 /* Whether we are using the VMCS to swap the EFER MSR must have been
4003 determined earlier while exporting VM-entry/VM-exit controls. */
4004 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
4005 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
4006
4007 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
4008 {
4009 /*
4010 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
4011 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
4012 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
4013 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
4014 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
4015 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
4016 * during VM-entry.
4017 */
4018 uint64_t uGuestEferMsr = pCtx->msrEFER;
4019 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
4020 {
4021 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
4022 uGuestEferMsr &= ~MSR_K6_EFER_LME;
4023 else
4024 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
4025 }
4026
4027 /*
4028 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4029 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4030 */
4031 if (g_fHmVmxSupportsVmcsEfer)
4032 {
4033 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
4034 AssertRC(rc);
4035 }
4036 else
4037 {
4038 /*
4039 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
4040 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
4041 */
4042 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
4043 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4044 AssertRCReturn(rc, rc);
4045 }
4046
4047 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
4048 }
4049 else if (!g_fHmVmxSupportsVmcsEfer)
4050 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
4051
4052 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
4053 }
4054
4055 /*
4056 * Other MSRs.
4057 */
4058 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
4059 {
4060 /* Speculation Control (R/W). */
4061 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
4062 if (pVM->cpum.ro.GuestFeatures.fIbrs)
4063 {
4064 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
4065 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4066 AssertRCReturn(rc, rc);
4067 }
4068
4069 /* Last Branch Record. */
4070 if (pVM->hmr0.s.vmx.fLbr)
4071 {
4072 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
4073 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
4074 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
4075 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
4076 Assert(cLbrStack <= 32);
4077 for (uint32_t i = 0; i < cLbrStack; i++)
4078 {
4079 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
4080 pVmcsInfoShared->au64LbrFromIpMsr[i],
4081 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4082 AssertRCReturn(rc, rc);
4083
4084 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
4085 if (idToIpMsrStart != 0)
4086 {
4087 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
4088 pVmcsInfoShared->au64LbrToIpMsr[i],
4089 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
4090 AssertRCReturn(rc, rc);
4091 }
4092 }
4093
4094 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
4095 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
4096 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
4097 false /* fUpdateHostMsr */);
4098 AssertRCReturn(rc, rc);
4099 }
4100
4101 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
4102 }
4103
4104 return VINF_SUCCESS;
4105}
4106
4107
4108/**
4109 * Wrapper for running the guest code in VT-x.
4110 *
4111 * @returns VBox status code, no informational status codes.
4112 * @param pVCpu The cross context virtual CPU structure.
4113 * @param pVmxTransient The VMX-transient structure.
4114 *
4115 * @remarks No-long-jump zone!!!
4116 */
4117DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4118{
4119 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
4120 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
4121
4122 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
4123 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
4124#ifdef VBOX_WITH_STATISTICS
4125 if (fResumeVM)
4126 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
4127 else
4128 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
4129#endif
4130 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
4131 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
4132 return rc;
4133}
4134
4135
4136/**
4137 * Reports world-switch error and dumps some useful debug info.
4138 *
4139 * @param pVCpu The cross context virtual CPU structure.
4140 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4141 * @param pVmxTransient The VMX-transient structure (only
4142 * exitReason updated).
4143 */
4144static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
4145{
4146 Assert(pVCpu);
4147 Assert(pVmxTransient);
4148 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
4149
4150 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
4151 switch (rcVMRun)
4152 {
4153 case VERR_VMX_INVALID_VMXON_PTR:
4154 AssertFailed();
4155 break;
4156 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4157 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4158 {
4159 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4160 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4161 AssertRC(rc);
4162 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
4163
4164 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
4165 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4166 Cannot do it here as we may have been long preempted. */
4167
4168#ifdef VBOX_STRICT
4169 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4170 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4171 pVmxTransient->uExitReason));
4172 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
4173 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4174 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4175 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4176 else
4177 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4178 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4179 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4180
4181 static struct
4182 {
4183 /** Name of the field to log. */
4184 const char *pszName;
4185 /** The VMCS field. */
4186 uint32_t uVmcsField;
4187 /** Whether host support of this field needs to be checked. */
4188 bool fCheckSupport;
4189 } const s_aVmcsFields[] =
4190 {
4191 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
4192 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
4193 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
4194 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
4195 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
4196 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
4197 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
4198 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
4199 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
4200 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
4201 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
4202 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
4203 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
4204 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
4205 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
4206 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
4207 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
4208 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
4209 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
4210 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
4211 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
4212 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
4213 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
4214 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
4215 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
4216 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
4217 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
4218 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
4219 /* The order of selector fields below are fixed! */
4220 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
4221 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
4222 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
4223 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
4224 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
4225 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
4226 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
4227 /* End of ordered selector fields. */
4228 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
4229 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
4230 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
4231 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
4232 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
4233 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
4234 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
4235 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
4236 };
4237
4238 RTGDTR HostGdtr;
4239 ASMGetGDTR(&HostGdtr);
4240
4241 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
4242 for (uint32_t i = 0; i < cVmcsFields; i++)
4243 {
4244 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
4245
4246 bool fSupported;
4247 if (!s_aVmcsFields[i].fCheckSupport)
4248 fSupported = true;
4249 else
4250 {
4251 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4252 switch (uVmcsField)
4253 {
4254 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
4255 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
4256 case VMX_VMCS32_CTRL_PROC_EXEC2:
4257 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
4258 break;
4259 default:
4260 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
4261 }
4262 }
4263
4264 if (fSupported)
4265 {
4266 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
4267 switch (uWidth)
4268 {
4269 case VMX_VMCSFIELD_WIDTH_16BIT:
4270 {
4271 uint16_t u16Val;
4272 rc = VMXReadVmcs16(uVmcsField, &u16Val);
4273 AssertRC(rc);
4274 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
4275
4276 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
4277 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
4278 {
4279 if (u16Val < HostGdtr.cbGdt)
4280 {
4281 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
4282 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
4283 "Host FS", "Host GS", "Host TR" };
4284 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
4285 Assert(idxSel < RT_ELEMENTS(s_apszSel));
4286 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
4287 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
4288 }
4289 else
4290 Log4((" Selector value exceeds GDT limit!\n"));
4291 }
4292 break;
4293 }
4294
4295 case VMX_VMCSFIELD_WIDTH_32BIT:
4296 {
4297 uint32_t u32Val;
4298 rc = VMXReadVmcs32(uVmcsField, &u32Val);
4299 AssertRC(rc);
4300 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
4301 break;
4302 }
4303
4304 case VMX_VMCSFIELD_WIDTH_64BIT:
4305 case VMX_VMCSFIELD_WIDTH_NATURAL:
4306 {
4307 uint64_t u64Val;
4308 rc = VMXReadVmcs64(uVmcsField, &u64Val);
4309 AssertRC(rc);
4310 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
4311 break;
4312 }
4313 }
4314 }
4315 }
4316
4317 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4318 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4319 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4320 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4321 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4322 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4323#endif /* VBOX_STRICT */
4324 break;
4325 }
4326
4327 default:
4328 /* Impossible */
4329 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4330 break;
4331 }
4332}
4333
4334
4335/**
4336 * Sets up the usage of TSC-offsetting and updates the VMCS.
4337 *
4338 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
4339 * VMX-preemption timer.
4340 *
4341 * @returns VBox status code.
4342 * @param pVCpu The cross context virtual CPU structure.
4343 * @param pVmxTransient The VMX-transient structure.
4344 * @param idCurrentCpu The current CPU number.
4345 *
4346 * @remarks No-long-jump zone!!!
4347 */
4348static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
4349{
4350 bool fOffsettedTsc;
4351 bool fParavirtTsc;
4352 uint64_t uTscOffset;
4353 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4354 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4355
4356 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
4357 {
4358 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
4359 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
4360 uint64_t cTicksToDeadline;
4361 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
4362 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
4363 {
4364 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
4365 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4366 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
4367 if ((int64_t)cTicksToDeadline > 0)
4368 { /* hopefully */ }
4369 else
4370 {
4371 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
4372 cTicksToDeadline = 0;
4373 }
4374 }
4375 else
4376 {
4377 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
4378 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
4379 &pVCpu->hmr0.s.vmx.uTscDeadline,
4380 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
4381 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
4382 if (cTicksToDeadline >= 128)
4383 { /* hopefully */ }
4384 else
4385 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
4386 }
4387
4388 /* Make sure the returned values have sane upper and lower boundaries. */
4389 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
4390 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
4391 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
4392 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4393
4394 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
4395 * preemption timers here. We probably need to clamp the preemption timer,
4396 * after converting the timer value to the host. */
4397 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4398 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
4399 AssertRC(rc);
4400 }
4401 else
4402 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
4403
4404 if (fParavirtTsc)
4405 {
4406 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
4407 information before every VM-entry, hence disable it for performance sake. */
4408#if 0
4409 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
4410 AssertRC(rc);
4411#endif
4412 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
4413 }
4414
4415 if ( fOffsettedTsc
4416 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
4417 {
4418 if (pVmxTransient->fIsNestedGuest)
4419 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
4420 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
4421 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4422 }
4423 else
4424 {
4425 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4426 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
4427 }
4428}
4429
4430
4431/**
4432 * Worker for VMXR0ImportStateOnDemand.
4433 *
4434 * @returns VBox status code.
4435 * @param pVCpu The cross context virtual CPU structure.
4436 * @param pVmcsInfo The VMCS info. object.
4437 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4438 */
4439static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
4440{
4441 int rc = VINF_SUCCESS;
4442 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
4443 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4444 uint32_t u32Val;
4445
4446 /*
4447 * Note! This is hack to workaround a mysterious BSOD observed with release builds
4448 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
4449 * neither are other host platforms.
4450 *
4451 * Committing this temporarily as it prevents BSOD.
4452 *
4453 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
4454 */
4455#ifdef RT_OS_WINDOWS
4456 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
4457 return VERR_HM_IPE_1;
4458#endif
4459
4460 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
4461
4462 /*
4463 * We disable interrupts to make the updating of the state and in particular
4464 * the fExtrn modification atomic wrt to preemption hooks.
4465 */
4466 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
4467
4468 fWhat &= pCtx->fExtrn;
4469 if (fWhat)
4470 {
4471 do
4472 {
4473 if (fWhat & CPUMCTX_EXTRN_RIP)
4474 vmxHCImportGuestRip(pVCpu);
4475
4476 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
4477 vmxHCImportGuestRFlags(pVCpu, pVmcsInfo);
4478
4479 if (fWhat & (CPUMCTX_EXTRN_INHIBIT_INT | CPUMCTX_EXTRN_INHIBIT_NMI))
4480 vmxHCImportGuestIntrState(pVCpu, pVmcsInfo);
4481
4482 if (fWhat & CPUMCTX_EXTRN_RSP)
4483 {
4484 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
4485 AssertRC(rc);
4486 }
4487
4488 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
4489 {
4490 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4491 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
4492 if (fWhat & CPUMCTX_EXTRN_CS)
4493 {
4494 vmxHCImportGuestSegReg(pVCpu, X86_SREG_CS);
4495 vmxHCImportGuestRip(pVCpu);
4496 if (fRealOnV86Active)
4497 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
4498 EMHistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
4499 }
4500 if (fWhat & CPUMCTX_EXTRN_SS)
4501 {
4502 vmxHCImportGuestSegReg(pVCpu, X86_SREG_SS);
4503 if (fRealOnV86Active)
4504 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
4505 }
4506 if (fWhat & CPUMCTX_EXTRN_DS)
4507 {
4508 vmxHCImportGuestSegReg(pVCpu, X86_SREG_DS);
4509 if (fRealOnV86Active)
4510 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
4511 }
4512 if (fWhat & CPUMCTX_EXTRN_ES)
4513 {
4514 vmxHCImportGuestSegReg(pVCpu, X86_SREG_ES);
4515 if (fRealOnV86Active)
4516 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
4517 }
4518 if (fWhat & CPUMCTX_EXTRN_FS)
4519 {
4520 vmxHCImportGuestSegReg(pVCpu, X86_SREG_FS);
4521 if (fRealOnV86Active)
4522 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
4523 }
4524 if (fWhat & CPUMCTX_EXTRN_GS)
4525 {
4526 vmxHCImportGuestSegReg(pVCpu, X86_SREG_GS);
4527 if (fRealOnV86Active)
4528 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
4529 }
4530 }
4531
4532 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
4533 {
4534 if (fWhat & CPUMCTX_EXTRN_LDTR)
4535 vmxHCImportGuestLdtr(pVCpu);
4536
4537 if (fWhat & CPUMCTX_EXTRN_GDTR)
4538 {
4539 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
4540 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
4541 pCtx->gdtr.cbGdt = u32Val;
4542 }
4543
4544 /* Guest IDTR. */
4545 if (fWhat & CPUMCTX_EXTRN_IDTR)
4546 {
4547 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
4548 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
4549 pCtx->idtr.cbIdt = u32Val;
4550 }
4551
4552 /* Guest TR. */
4553 if (fWhat & CPUMCTX_EXTRN_TR)
4554 {
4555 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
4556 don't need to import that one. */
4557 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
4558 vmxHCImportGuestTr(pVCpu);
4559 }
4560 }
4561
4562 if (fWhat & CPUMCTX_EXTRN_DR7)
4563 {
4564 if (!pVCpu->hmr0.s.fUsingHyperDR7)
4565 {
4566 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
4567 AssertRC(rc);
4568 }
4569 }
4570
4571 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
4572 {
4573 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
4574 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
4575 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
4576 pCtx->SysEnter.cs = u32Val;
4577 }
4578
4579 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
4580 {
4581 if ( pVM->hmr0.s.fAllow64BitGuests
4582 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4583 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
4584 }
4585
4586 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
4587 {
4588 if ( pVM->hmr0.s.fAllow64BitGuests
4589 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
4590 {
4591 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
4592 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
4593 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
4594 }
4595 }
4596
4597 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
4598 {
4599 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
4600 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
4601 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
4602 Assert(pMsrs);
4603 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
4604 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
4605 for (uint32_t i = 0; i < cMsrs; i++)
4606 {
4607 uint32_t const idMsr = pMsrs[i].u32Msr;
4608 switch (idMsr)
4609 {
4610 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
4611 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
4612 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
4613 default:
4614 {
4615 uint32_t idxLbrMsr;
4616 if (pVM->hmr0.s.vmx.fLbr)
4617 {
4618 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
4619 {
4620 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4621 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4622 break;
4623 }
4624 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
4625 {
4626 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
4627 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
4628 break;
4629 }
4630 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
4631 {
4632 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
4633 break;
4634 }
4635 /* Fallthru (no break) */
4636 }
4637 pCtx->fExtrn = 0;
4638 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
4639 ASMSetFlags(fEFlags);
4640 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
4641 return VERR_HM_UNEXPECTED_LD_ST_MSR;
4642 }
4643 }
4644 }
4645 }
4646
4647 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
4648 {
4649 if (fWhat & CPUMCTX_EXTRN_CR0)
4650 {
4651 uint64_t u64Cr0;
4652 uint64_t u64Shadow;
4653 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
4654 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
4655#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4656 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4657 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4658#else
4659 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4660 {
4661 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4662 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
4663 }
4664 else
4665 {
4666 /*
4667 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
4668 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4669 * re-construct CR0. See @bugref{9180#c95} for details.
4670 */
4671 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4672 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4673 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
4674 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
4675 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
4676 }
4677#endif
4678 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
4679 CPUMSetGuestCR0(pVCpu, u64Cr0);
4680 VMMRZCallRing3Enable(pVCpu);
4681 }
4682
4683 if (fWhat & CPUMCTX_EXTRN_CR4)
4684 {
4685 uint64_t u64Cr4;
4686 uint64_t u64Shadow;
4687 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
4688 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
4689#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
4690 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4691 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4692#else
4693 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
4694 {
4695 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4696 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
4697 }
4698 else
4699 {
4700 /*
4701 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
4702 * the nested-guest using hardware-assisted VMX. Accordingly we need to
4703 * re-construct CR4. See @bugref{9180#c95} for details.
4704 */
4705 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
4706 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
4707 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
4708 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
4709 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
4710 }
4711#endif
4712 pCtx->cr4 = u64Cr4;
4713 }
4714
4715 if (fWhat & CPUMCTX_EXTRN_CR3)
4716 {
4717 /* CR0.PG bit changes are always intercepted, so it's up to date. */
4718 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
4719 || ( pVM->hmr0.s.fNestedPaging
4720 && CPUMIsGuestPagingEnabledEx(pCtx)))
4721 {
4722 uint64_t u64Cr3;
4723 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
4724 if (pCtx->cr3 != u64Cr3)
4725 {
4726 pCtx->cr3 = u64Cr3;
4727 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4728 }
4729
4730 /*
4731 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
4732 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
4733 */
4734 if (CPUMIsGuestInPAEModeEx(pCtx))
4735 {
4736 X86PDPE aPaePdpes[4];
4737 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
4738 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
4739 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
4740 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
4741 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
4742 {
4743 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
4744 /* PGM now updates PAE PDPTEs while updating CR3. */
4745 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
4746 }
4747 }
4748 }
4749 }
4750 }
4751
4752#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4753 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
4754 {
4755 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
4756 && !CPUMIsGuestInVmxNonRootMode(pCtx))
4757 {
4758 Assert(CPUMIsGuestInVmxRootMode(pCtx));
4759 rc = vmxHCCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
4760 if (RT_SUCCESS(rc))
4761 { /* likely */ }
4762 else
4763 break;
4764 }
4765 }
4766#endif
4767 } while (0);
4768
4769 if (RT_SUCCESS(rc))
4770 {
4771 /* Update fExtrn. */
4772 pCtx->fExtrn &= ~fWhat;
4773
4774 /* If everything has been imported, clear the HM keeper bit. */
4775 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
4776 {
4777 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
4778 Assert(!pCtx->fExtrn);
4779 }
4780 }
4781 }
4782 else
4783 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
4784
4785 /*
4786 * Restore interrupts.
4787 */
4788 ASMSetFlags(fEFlags);
4789
4790 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
4791
4792 if (RT_SUCCESS(rc))
4793 { /* likely */ }
4794 else
4795 return rc;
4796
4797 /*
4798 * Honor any pending CR3 updates.
4799 *
4800 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
4801 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
4802 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
4803 *
4804 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
4805 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
4806 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
4807 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
4808 *
4809 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
4810 *
4811 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
4812 */
4813 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
4814 && VMMRZCallRing3IsEnabled(pVCpu))
4815 {
4816 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
4817 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
4818 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
4819 }
4820
4821 return VINF_SUCCESS;
4822}
4823
4824
4825/**
4826 * Saves the guest state from the VMCS into the guest-CPU context.
4827 *
4828 * @returns VBox status code.
4829 * @param pVCpu The cross context virtual CPU structure.
4830 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
4831 */
4832VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
4833{
4834 AssertPtr(pVCpu);
4835 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4836 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
4837}
4838
4839
4840/**
4841 * Does the necessary state syncing before returning to ring-3 for any reason
4842 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
4843 *
4844 * @returns VBox status code.
4845 * @param pVCpu The cross context virtual CPU structure.
4846 * @param fImportState Whether to import the guest state from the VMCS back
4847 * to the guest-CPU context.
4848 *
4849 * @remarks No-long-jmp zone!!!
4850 */
4851static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
4852{
4853 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4854 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4855
4856 RTCPUID const idCpu = RTMpCpuId();
4857 Log4Func(("HostCpuId=%u\n", idCpu));
4858
4859 /*
4860 * !!! IMPORTANT !!!
4861 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
4862 */
4863
4864 /* Save the guest state if necessary. */
4865 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
4866 if (fImportState)
4867 {
4868 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
4869 AssertRCReturn(rc, rc);
4870 }
4871
4872 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
4873 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
4874 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
4875
4876 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
4877#ifdef VBOX_STRICT
4878 if (CPUMIsHyperDebugStateActive(pVCpu))
4879 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
4880#endif
4881 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
4882 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4883 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4884
4885 /* Restore host-state bits that VT-x only restores partially. */
4886 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4887 {
4888 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
4889 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4890 }
4891 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4892
4893 /* Restore the lazy host MSRs as we're leaving VT-x context. */
4894 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
4895 {
4896 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
4897 if (!fImportState)
4898 {
4899 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
4900 AssertRCReturn(rc, rc);
4901 }
4902 hmR0VmxLazyRestoreHostMsrs(pVCpu);
4903 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
4904 }
4905 else
4906 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
4907
4908 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
4909 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
4910
4911 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
4912 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
4913 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
4914 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
4915 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
4916 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
4917 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
4918 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
4919 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
4920 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
4921
4922 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
4923
4924 /** @todo This partially defeats the purpose of having preemption hooks.
4925 * The problem is, deregistering the hooks should be moved to a place that
4926 * lasts until the EMT is about to be destroyed not everytime while leaving HM
4927 * context.
4928 */
4929 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4930 AssertRCReturn(rc, rc);
4931
4932#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4933 /*
4934 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
4935 * clear a shadow VMCS before allowing that VMCS to become active on another
4936 * logical processor. We may or may not be importing guest state which clears
4937 * it, so cover for it here.
4938 *
4939 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
4940 */
4941 if ( pVmcsInfo->pvShadowVmcs
4942 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
4943 {
4944 rc = vmxHCClearShadowVmcs(pVmcsInfo);
4945 AssertRCReturn(rc, rc);
4946 }
4947
4948 /*
4949 * Flag that we need to re-export the host state if we switch to this VMCS before
4950 * executing guest or nested-guest code.
4951 */
4952 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
4953#endif
4954
4955 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
4956 NOREF(idCpu);
4957 return VINF_SUCCESS;
4958}
4959
4960
4961/**
4962 * Leaves the VT-x session.
4963 *
4964 * @returns VBox status code.
4965 * @param pVCpu The cross context virtual CPU structure.
4966 *
4967 * @remarks No-long-jmp zone!!!
4968 */
4969static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
4970{
4971 HM_DISABLE_PREEMPT(pVCpu);
4972 HMVMX_ASSERT_CPU_SAFE(pVCpu);
4973 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
4974 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4975
4976 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
4977 and done this from the VMXR0ThreadCtxCallback(). */
4978 if (!pVCpu->hmr0.s.fLeaveDone)
4979 {
4980 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
4981 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
4982 pVCpu->hmr0.s.fLeaveDone = true;
4983 }
4984 Assert(!pVCpu->cpum.GstCtx.fExtrn);
4985
4986 /*
4987 * !!! IMPORTANT !!!
4988 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
4989 */
4990
4991 /* Deregister hook now that we've left HM context before re-enabling preemption. */
4992 /** @todo Deregistering here means we need to VMCLEAR always
4993 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
4994 * for calling VMMR0ThreadCtxHookDisable here! */
4995 VMMR0ThreadCtxHookDisable(pVCpu);
4996
4997 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
4998 int rc = HMR0LeaveCpu(pVCpu);
4999 HM_RESTORE_PREEMPT();
5000 return rc;
5001}
5002
5003
5004/**
5005 * Take necessary actions before going back to ring-3.
5006 *
5007 * An action requires us to go back to ring-3. This function does the necessary
5008 * steps before we can safely return to ring-3. This is not the same as longjmps
5009 * to ring-3, this is voluntary and prepares the guest so it may continue
5010 * executing outside HM (recompiler/IEM).
5011 *
5012 * @returns VBox status code.
5013 * @param pVCpu The cross context virtual CPU structure.
5014 * @param rcExit The reason for exiting to ring-3. Can be
5015 * VINF_VMM_UNKNOWN_RING3_CALL.
5016 */
5017static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
5018{
5019 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5020
5021 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5022 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
5023 {
5024 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
5025 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
5026 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
5027 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
5028 }
5029
5030 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
5031 VMMRZCallRing3Disable(pVCpu);
5032 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
5033
5034 /*
5035 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
5036 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
5037 *
5038 * This is because execution may continue from ring-3 and we would need to inject
5039 * the event from there (hence place it back in TRPM).
5040 */
5041 if (pVCpu->hm.s.Event.fPending)
5042 {
5043 vmxHCPendingEventToTrpmTrap(pVCpu);
5044 Assert(!pVCpu->hm.s.Event.fPending);
5045
5046 /* Clear the events from the VMCS. */
5047 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
5048 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
5049 }
5050#ifdef VBOX_STRICT
5051 /*
5052 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
5053 * fatal), we don't care about verifying duplicate injection of events. Errors like
5054 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
5055 * function so those should and will be checked below.
5056 */
5057 else if (RT_SUCCESS(rcExit))
5058 {
5059 /*
5060 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
5061 * This can be pretty hard to debug otherwise, interrupts might get injected twice
5062 * occasionally, see @bugref{9180#c42}.
5063 *
5064 * However, if the VM-entry failed, any VM entry-interruption info. field would
5065 * be left unmodified as the event would not have been injected to the guest. In
5066 * such cases, don't assert, we're not going to continue guest execution anyway.
5067 */
5068 uint32_t uExitReason;
5069 uint32_t uEntryIntInfo;
5070 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
5071 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
5072 AssertRC(rc);
5073 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
5074 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
5075 }
5076#endif
5077
5078 /*
5079 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
5080 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
5081 * (e.g. TPR below threshold).
5082 */
5083 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5084 {
5085 vmxHCClearIntWindowExitVmcs(pVCpu, pVmcsInfo);
5086 vmxHCClearNmiWindowExitVmcs(pVCpu, pVmcsInfo);
5087 }
5088
5089 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
5090 and if we're injecting an event we should have a TRPM trap pending. */
5091 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5092#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
5093 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
5094#endif
5095
5096 /* Save guest state and restore host state bits. */
5097 int rc = hmR0VmxLeaveSession(pVCpu);
5098 AssertRCReturn(rc, rc);
5099 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
5100
5101 /* Thread-context hooks are unregistered at this point!!! */
5102 /* Ring-3 callback notifications are unregistered at this point!!! */
5103
5104 /* Sync recompiler state. */
5105 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5106 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
5107 | CPUM_CHANGED_LDTR
5108 | CPUM_CHANGED_GDTR
5109 | CPUM_CHANGED_IDTR
5110 | CPUM_CHANGED_TR
5111 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5112 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
5113 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
5114 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
5115
5116 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
5117
5118 /* Update the exit-to-ring 3 reason. */
5119 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
5120
5121 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
5122 if ( rcExit != VINF_EM_RAW_INTERRUPT
5123 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
5124 {
5125 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
5126 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5127 }
5128
5129 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
5130 VMMRZCallRing3Enable(pVCpu);
5131 return rc;
5132}
5133
5134
5135/**
5136 * VMMRZCallRing3() callback wrapper which saves the guest state before we
5137 * longjump due to a ring-0 assertion.
5138 *
5139 * @returns VBox status code.
5140 * @param pVCpu The cross context virtual CPU structure.
5141 */
5142VMMR0DECL(int) VMXR0AssertionCallback(PVMCPUCC pVCpu)
5143{
5144 /*
5145 * !!! IMPORTANT !!!
5146 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
5147 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
5148 */
5149 VMMR0AssertionRemoveNotification(pVCpu);
5150 VMMRZCallRing3Disable(pVCpu);
5151 HM_DISABLE_PREEMPT(pVCpu);
5152
5153 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5154 vmxHCImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
5155 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
5156 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
5157
5158 /* Restore host-state bits that VT-x only restores partially. */
5159 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
5160 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
5161 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
5162
5163 /* Restore the lazy host MSRs as we're leaving VT-x context. */
5164 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
5165 hmR0VmxLazyRestoreHostMsrs(pVCpu);
5166
5167 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
5168 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
5169 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
5170
5171 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
5172 cleared as part of importing the guest state above. */
5173 hmR0VmxClearVmcs(pVmcsInfo);
5174
5175 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
5176 VMMR0ThreadCtxHookDisable(pVCpu);
5177
5178 /* Leave HM context. This takes care of local init (term). */
5179 HMR0LeaveCpu(pVCpu);
5180 HM_RESTORE_PREEMPT();
5181 return VINF_SUCCESS;
5182}
5183
5184
5185/**
5186 * Enters the VT-x session.
5187 *
5188 * @returns VBox status code.
5189 * @param pVCpu The cross context virtual CPU structure.
5190 */
5191VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
5192{
5193 AssertPtr(pVCpu);
5194 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
5195 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5196
5197 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5198 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5199 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5200
5201#ifdef VBOX_STRICT
5202 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
5203 RTCCUINTREG uHostCr4 = ASMGetCR4();
5204 if (!(uHostCr4 & X86_CR4_VMXE))
5205 {
5206 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
5207 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5208 }
5209#endif
5210
5211 /*
5212 * Do the EMT scheduled L1D and MDS flush here if needed.
5213 */
5214 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5215 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5216 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5217 hmR0MdsClear();
5218
5219 /*
5220 * Load the appropriate VMCS as the current and active one.
5221 */
5222 PVMXVMCSINFO pVmcsInfo;
5223 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
5224 if (!fInNestedGuestMode)
5225 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
5226 else
5227 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5228 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
5229 if (RT_SUCCESS(rc))
5230 {
5231 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
5232 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
5233 pVCpu->hmr0.s.fLeaveDone = false;
5234 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5235 }
5236 return rc;
5237}
5238
5239
5240/**
5241 * The thread-context callback.
5242 *
5243 * This is used together with RTThreadCtxHookCreate() on platforms which
5244 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
5245 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
5246 *
5247 * @param enmEvent The thread-context event.
5248 * @param pVCpu The cross context virtual CPU structure.
5249 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
5250 * @thread EMT(pVCpu)
5251 */
5252VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
5253{
5254 AssertPtr(pVCpu);
5255 RT_NOREF1(fGlobalInit);
5256
5257 switch (enmEvent)
5258 {
5259 case RTTHREADCTXEVENT_OUT:
5260 {
5261 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5262 VMCPU_ASSERT_EMT(pVCpu);
5263
5264 /* No longjmps (logger flushes, locks) in this fragile context. */
5265 VMMRZCallRing3Disable(pVCpu);
5266 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
5267
5268 /* Restore host-state (FPU, debug etc.) */
5269 if (!pVCpu->hmr0.s.fLeaveDone)
5270 {
5271 /*
5272 * Do -not- import the guest-state here as we might already be in the middle of importing
5273 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
5274 */
5275 hmR0VmxLeave(pVCpu, false /* fImportState */);
5276 pVCpu->hmr0.s.fLeaveDone = true;
5277 }
5278
5279 /* Leave HM context, takes care of local init (term). */
5280 int rc = HMR0LeaveCpu(pVCpu);
5281 AssertRC(rc);
5282
5283 /* Restore longjmp state. */
5284 VMMRZCallRing3Enable(pVCpu);
5285 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
5286 break;
5287 }
5288
5289 case RTTHREADCTXEVENT_IN:
5290 {
5291 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5292 VMCPU_ASSERT_EMT(pVCpu);
5293
5294 /* Do the EMT scheduled L1D and MDS flush here if needed. */
5295 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
5296 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
5297 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
5298 hmR0MdsClear();
5299
5300 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
5301 VMMRZCallRing3Disable(pVCpu);
5302 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
5303
5304 /* Initialize the bare minimum state required for HM. This takes care of
5305 initializing VT-x if necessary (onlined CPUs, local init etc.) */
5306 int rc = hmR0EnterCpu(pVCpu);
5307 AssertRC(rc);
5308 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
5309 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
5310
5311 /* Load the active VMCS as the current one. */
5312 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
5313 rc = hmR0VmxLoadVmcs(pVmcsInfo);
5314 AssertRC(rc);
5315 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
5316 pVCpu->hmr0.s.fLeaveDone = false;
5317
5318 /* Restore longjmp state. */
5319 VMMRZCallRing3Enable(pVCpu);
5320 break;
5321 }
5322
5323 default:
5324 break;
5325 }
5326}
5327
5328
5329/**
5330 * Exports the host state into the VMCS host-state area.
5331 * Sets up the VM-exit MSR-load area.
5332 *
5333 * The CPU state will be loaded from these fields on every successful VM-exit.
5334 *
5335 * @returns VBox status code.
5336 * @param pVCpu The cross context virtual CPU structure.
5337 *
5338 * @remarks No-long-jump zone!!!
5339 */
5340static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
5341{
5342 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5343
5344 int rc = VINF_SUCCESS;
5345 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
5346 {
5347 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
5348
5349 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
5350 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5351
5352 hmR0VmxExportHostMsrs(pVCpu);
5353
5354 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
5355 }
5356 return rc;
5357}
5358
5359
5360/**
5361 * Saves the host state in the VMCS host-state.
5362 *
5363 * @returns VBox status code.
5364 * @param pVCpu The cross context virtual CPU structure.
5365 *
5366 * @remarks No-long-jump zone!!!
5367 */
5368VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
5369{
5370 AssertPtr(pVCpu);
5371 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5372
5373 /*
5374 * Export the host state here while entering HM context.
5375 * When thread-context hooks are used, we might get preempted and have to re-save the host
5376 * state but most of the time we won't be, so do it here before we disable interrupts.
5377 */
5378 return hmR0VmxExportHostState(pVCpu);
5379}
5380
5381
5382/**
5383 * Exports the guest state into the VMCS guest-state area.
5384 *
5385 * The will typically be done before VM-entry when the guest-CPU state and the
5386 * VMCS state may potentially be out of sync.
5387 *
5388 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
5389 * VM-entry controls.
5390 * Sets up the appropriate VMX non-root function to execute guest code based on
5391 * the guest CPU mode.
5392 *
5393 * @returns VBox strict status code.
5394 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5395 * without unrestricted guest execution and the VMMDev is not presently
5396 * mapped (e.g. EFI32).
5397 *
5398 * @param pVCpu The cross context virtual CPU structure.
5399 * @param pVmxTransient The VMX-transient structure.
5400 *
5401 * @remarks No-long-jump zone!!!
5402 */
5403static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5404{
5405 AssertPtr(pVCpu);
5406 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5407 LogFlowFunc(("pVCpu=%p\n", pVCpu));
5408
5409 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
5410
5411 /*
5412 * Determine real-on-v86 mode.
5413 * Used when the guest is in real-mode and unrestricted guest execution is not used.
5414 */
5415 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
5416 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
5417 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
5418 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
5419 else
5420 {
5421 Assert(!pVmxTransient->fIsNestedGuest);
5422 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
5423 }
5424
5425 /*
5426 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
5427 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
5428 */
5429 int rc = vmxHCExportGuestEntryExitCtls(pVCpu, pVmxTransient);
5430 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5431
5432 rc = vmxHCExportGuestCR0(pVCpu, pVmxTransient);
5433 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5434
5435 VBOXSTRICTRC rcStrict = vmxHCExportGuestCR3AndCR4(pVCpu, pVmxTransient);
5436 if (rcStrict == VINF_SUCCESS)
5437 { /* likely */ }
5438 else
5439 {
5440 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
5441 return rcStrict;
5442 }
5443
5444 rc = vmxHCExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
5445 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5446
5447 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
5448 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5449
5450 vmxHCExportGuestApicTpr(pVCpu, pVmxTransient);
5451 vmxHCExportGuestXcptIntercepts(pVCpu, pVmxTransient);
5452 vmxHCExportGuestRip(pVCpu);
5453 hmR0VmxExportGuestRsp(pVCpu);
5454 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5455
5456 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5457 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
5458
5459 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
5460 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
5461 | HM_CHANGED_GUEST_CR2
5462 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
5463 | HM_CHANGED_GUEST_X87
5464 | HM_CHANGED_GUEST_SSE_AVX
5465 | HM_CHANGED_GUEST_OTHER_XSAVE
5466 | HM_CHANGED_GUEST_XCRx
5467 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
5468 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
5469 | HM_CHANGED_GUEST_TSC_AUX
5470 | HM_CHANGED_GUEST_OTHER_MSRS
5471 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
5472
5473 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
5474 return rc;
5475}
5476
5477
5478/**
5479 * Exports the state shared between the host and guest into the VMCS.
5480 *
5481 * @param pVCpu The cross context virtual CPU structure.
5482 * @param pVmxTransient The VMX-transient structure.
5483 *
5484 * @remarks No-long-jump zone!!!
5485 */
5486static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5487{
5488 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5489 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5490
5491 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
5492 {
5493 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
5494 AssertRC(rc);
5495 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
5496
5497 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
5498 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
5499 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5500 }
5501
5502 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
5503 {
5504 hmR0VmxLazyLoadGuestMsrs(pVCpu);
5505 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
5506 }
5507
5508 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
5509 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
5510}
5511
5512
5513/**
5514 * Worker for loading the guest-state bits in the inner VT-x execution loop.
5515 *
5516 * @returns Strict VBox status code (i.e. informational status codes too).
5517 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5518 * without unrestricted guest execution and the VMMDev is not presently
5519 * mapped (e.g. EFI32).
5520 *
5521 * @param pVCpu The cross context virtual CPU structure.
5522 * @param pVmxTransient The VMX-transient structure.
5523 *
5524 * @remarks No-long-jump zone!!!
5525 */
5526static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
5527{
5528 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
5529 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5530
5531#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
5532 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
5533#endif
5534
5535 /*
5536 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
5537 * changes. First try to export only these without going through all other changed-flag checks.
5538 */
5539 VBOXSTRICTRC rcStrict;
5540 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
5541 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
5542 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5543
5544 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
5545 if ( (fCtxChanged & fMinimalMask)
5546 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
5547 {
5548 vmxHCExportGuestRip(pVCpu);
5549 hmR0VmxExportGuestRsp(pVCpu);
5550 vmxHCExportGuestRflags(pVCpu, pVmxTransient);
5551 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
5552 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
5553 }
5554 /* If anything else also changed, go through the full export routine and export as required. */
5555 else if (fCtxChanged & fCtxMask)
5556 {
5557 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
5558 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
5559 { /* likely */}
5560 else
5561 {
5562 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
5563 VBOXSTRICTRC_VAL(rcStrict)));
5564 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
5565 return rcStrict;
5566 }
5567 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
5568 }
5569 /* Nothing changed, nothing to load here. */
5570 else
5571 rcStrict = VINF_SUCCESS;
5572
5573#ifdef VBOX_STRICT
5574 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
5575 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
5576 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
5577#endif
5578 return rcStrict;
5579}
5580
5581
5582/**
5583 * Map the APIC-access page for virtualizing APIC accesses.
5584 *
5585 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
5586 * this not done as part of exporting guest state, see @bugref{8721}.
5587 *
5588 * @returns VBox status code.
5589 * @param pVCpu The cross context virtual CPU structure.
5590 */
5591static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu)
5592{
5593 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5594 uint64_t const u64MsrApicBase = APICGetBaseMsrNoCheck(pVCpu);
5595
5596 Assert(PDMHasApic(pVM));
5597 Assert(u64MsrApicBase);
5598
5599 RTGCPHYS const GCPhysApicBase = u64MsrApicBase & PAGE_BASE_GC_MASK;
5600 Log4Func(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
5601
5602 /* Unalias the existing mapping. */
5603 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
5604 AssertRCReturn(rc, rc);
5605
5606 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
5607 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
5608 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
5609 AssertRCReturn(rc, rc);
5610
5611 /* Update the per-VCPU cache of the APIC base MSR. */
5612 pVCpu->hm.s.vmx.u64GstMsrApicBase = u64MsrApicBase;
5613 return VINF_SUCCESS;
5614}
5615
5616
5617/**
5618 * Worker function passed to RTMpOnSpecific() that is to be called on the target
5619 * CPU.
5620 *
5621 * @param idCpu The ID for the CPU the function is called on.
5622 * @param pvUser1 Null, not used.
5623 * @param pvUser2 Null, not used.
5624 */
5625static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
5626{
5627 RT_NOREF3(idCpu, pvUser1, pvUser2);
5628 VMXDispatchHostNmi();
5629}
5630
5631
5632/**
5633 * Dispatching an NMI on the host CPU that received it.
5634 *
5635 * @returns VBox status code.
5636 * @param pVCpu The cross context virtual CPU structure.
5637 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
5638 * executing when receiving the host NMI in VMX non-root
5639 * operation.
5640 */
5641static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
5642{
5643 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
5644 Assert(idCpu != NIL_RTCPUID);
5645
5646 /*
5647 * We don't want to delay dispatching the NMI any more than we have to. However,
5648 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
5649 * after executing guest or nested-guest code for the following reasons:
5650 *
5651 * - We would need to perform VMREADs with interrupts disabled and is orders of
5652 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
5653 * supported by the host hypervisor.
5654 *
5655 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
5656 * longer period of time just for handling an edge case like host NMIs which do
5657 * not occur nearly as frequently as other VM-exits.
5658 *
5659 * Let's cover the most likely scenario first. Check if we are on the target CPU
5660 * and dispatch the NMI right away. This should be much faster than calling into
5661 * RTMpOnSpecific() machinery.
5662 */
5663 bool fDispatched = false;
5664 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5665 if (idCpu == RTMpCpuId())
5666 {
5667 VMXDispatchHostNmi();
5668 fDispatched = true;
5669 }
5670 ASMSetFlags(fEFlags);
5671 if (fDispatched)
5672 {
5673 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
5674 return VINF_SUCCESS;
5675 }
5676
5677 /*
5678 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
5679 * there should be no race or recursion even if we are unlucky enough to be preempted
5680 * (to the target CPU) without dispatching the host NMI above.
5681 */
5682 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
5683 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
5684}
5685
5686
5687#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5688/**
5689 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
5690 * nested-guest using hardware-assisted VMX.
5691 *
5692 * @param pVCpu The cross context virtual CPU structure.
5693 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
5694 * @param pVmcsInfoGst The guest VMCS info. object.
5695 */
5696static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
5697{
5698 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
5699 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
5700 Assert(pu64MsrBitmap);
5701
5702 /*
5703 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
5704 * MSR that is intercepted by the guest is also intercepted while executing the
5705 * nested-guest using hardware-assisted VMX.
5706 *
5707 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
5708 * nested-guest VM-exit even if the outer guest is not intercepting some
5709 * MSRs. We cannot assume the caller has initialized the nested-guest
5710 * MSR bitmap in this case.
5711 *
5712 * The nested hypervisor may also switch whether it uses MSR bitmaps for
5713 * each of its VM-entry, hence initializing it once per-VM while setting
5714 * up the nested-guest VMCS is not sufficient.
5715 */
5716 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5717 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
5718 {
5719 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
5720 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
5721 Assert(pu64MsrBitmapNstGst);
5722 Assert(pu64MsrBitmapGst);
5723
5724 /** @todo Detect and use EVEX.POR? */
5725 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
5726 for (uint32_t i = 0; i < cFrags; i++)
5727 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
5728 }
5729 else
5730 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
5731}
5732
5733
5734/**
5735 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
5736 * hardware-assisted VMX execution of the nested-guest.
5737 *
5738 * For a guest, we don't modify these controls once we set up the VMCS and hence
5739 * this function is never called.
5740 *
5741 * For nested-guests since the nested hypervisor provides these controls on every
5742 * nested-guest VM-entry and could potentially change them everytime we need to
5743 * merge them before every nested-guest VM-entry.
5744 *
5745 * @returns VBox status code.
5746 * @param pVCpu The cross context virtual CPU structure.
5747 */
5748static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
5749{
5750 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5751 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
5752 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5753
5754 /*
5755 * Merge the controls with the requirements of the guest VMCS.
5756 *
5757 * We do not need to validate the nested-guest VMX features specified in the nested-guest
5758 * VMCS with the features supported by the physical CPU as it's already done by the
5759 * VMLAUNCH/VMRESUME instruction emulation.
5760 *
5761 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
5762 * derived from the VMX features supported by the physical CPU.
5763 */
5764
5765 /* Pin-based VM-execution controls. */
5766 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
5767
5768 /* Processor-based VM-execution controls. */
5769 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
5770 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
5771 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
5772 | VMX_PROC_CTLS_MOV_DR_EXIT
5773 | VMX_PROC_CTLS_USE_TPR_SHADOW
5774 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
5775
5776 /* Secondary processor-based VM-execution controls. */
5777 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
5778 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
5779 | VMX_PROC_CTLS2_INVPCID
5780 | VMX_PROC_CTLS2_VMCS_SHADOWING
5781 | VMX_PROC_CTLS2_RDTSCP
5782 | VMX_PROC_CTLS2_XSAVES_XRSTORS
5783 | VMX_PROC_CTLS2_APIC_REG_VIRT
5784 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
5785 | VMX_PROC_CTLS2_VMFUNC));
5786
5787 /*
5788 * VM-entry controls:
5789 * These controls contains state that depends on the nested-guest state (primarily
5790 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
5791 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
5792 * properly continue executing the nested-guest if the EFER MSR changes but does not
5793 * cause a nested-guest VM-exits.
5794 *
5795 * VM-exit controls:
5796 * These controls specify the host state on return. We cannot use the controls from
5797 * the nested hypervisor state as is as it would contain the guest state rather than
5798 * the host state. Since the host state is subject to change (e.g. preemption, trips
5799 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
5800 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
5801 *
5802 * VM-entry MSR-load:
5803 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
5804 * context by the VMLAUNCH/VMRESUME instruction emulation.
5805 *
5806 * VM-exit MSR-store:
5807 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
5808 * back into the VM-exit MSR-store area.
5809 *
5810 * VM-exit MSR-load areas:
5811 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
5812 * can entirely ignore what the nested hypervisor wants to load here.
5813 */
5814
5815 /*
5816 * Exception bitmap.
5817 *
5818 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
5819 * here (and avoid doing anything while exporting nested-guest state), but to keep the
5820 * code more flexible if intercepting exceptions become more dynamic in the future we do
5821 * it as part of exporting the nested-guest state.
5822 */
5823 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
5824
5825 /*
5826 * CR0/CR4 guest/host mask.
5827 *
5828 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
5829 * cause VM-exits, so we need to merge them here.
5830 */
5831 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
5832 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
5833
5834 /*
5835 * Page-fault error-code mask and match.
5836 *
5837 * Although we require unrestricted guest execution (and thereby nested-paging) for
5838 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
5839 * normally intercept #PFs, it might intercept them for debugging purposes.
5840 *
5841 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
5842 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
5843 */
5844 uint32_t u32XcptPFMask;
5845 uint32_t u32XcptPFMatch;
5846 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
5847 {
5848 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
5849 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
5850 }
5851 else
5852 {
5853 u32XcptPFMask = 0;
5854 u32XcptPFMatch = 0;
5855 }
5856
5857 /*
5858 * Pause-Loop exiting.
5859 */
5860 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
5861 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
5862 * this will work... */
5863 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
5864 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
5865
5866 /*
5867 * Pending debug exceptions.
5868 * Currently just copy whatever the nested-guest provides us.
5869 */
5870 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
5871
5872 /*
5873 * I/O Bitmap.
5874 *
5875 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
5876 * intercept all I/O port accesses.
5877 */
5878 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
5879 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
5880
5881 /*
5882 * VMCS shadowing.
5883 *
5884 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
5885 * enabled while executing the nested-guest.
5886 */
5887 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
5888
5889 /*
5890 * APIC-access page.
5891 */
5892 RTHCPHYS HCPhysApicAccess;
5893 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
5894 {
5895 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
5896 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
5897
5898 /** @todo NSTVMX: This is not really correct but currently is required to make
5899 * things work. We need to re-enable the page handler when we fallback to
5900 * IEM execution of the nested-guest! */
5901 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
5902
5903 void *pvPage;
5904 PGMPAGEMAPLOCK PgLockApicAccess;
5905 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
5906 if (RT_SUCCESS(rc))
5907 {
5908 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
5909 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
5910
5911 /** @todo Handle proper releasing of page-mapping lock later. */
5912 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
5913 }
5914 else
5915 return rc;
5916 }
5917 else
5918 HCPhysApicAccess = 0;
5919
5920 /*
5921 * Virtual-APIC page and TPR threshold.
5922 */
5923 RTHCPHYS HCPhysVirtApic;
5924 uint32_t u32TprThreshold;
5925 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5926 {
5927 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
5928 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
5929
5930 void *pvPage;
5931 PGMPAGEMAPLOCK PgLockVirtApic;
5932 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
5933 if (RT_SUCCESS(rc))
5934 {
5935 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
5936 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
5937
5938 /** @todo Handle proper releasing of page-mapping lock later. */
5939 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
5940 }
5941 else
5942 return rc;
5943
5944 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
5945 }
5946 else
5947 {
5948 HCPhysVirtApic = 0;
5949 u32TprThreshold = 0;
5950
5951 /*
5952 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
5953 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
5954 * be taken care of by EPT/shadow paging.
5955 */
5956 if (pVM->hmr0.s.fAllow64BitGuests)
5957 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
5958 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
5959 }
5960
5961 /*
5962 * Validate basic assumptions.
5963 */
5964 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
5965 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
5966 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
5967 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
5968
5969 /*
5970 * Commit it to the nested-guest VMCS.
5971 */
5972 int rc = VINF_SUCCESS;
5973 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
5974 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
5975 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
5976 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
5977 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
5978 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
5979 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
5980 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
5981 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
5982 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
5983 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
5984 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
5985 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
5986 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
5987 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
5988 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
5989 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
5990 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
5991 {
5992 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
5993 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
5994 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
5995 }
5996 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5997 {
5998 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5999 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
6000 }
6001 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6002 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
6003 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
6004 AssertRC(rc);
6005
6006 /*
6007 * Update the nested-guest VMCS cache.
6008 */
6009 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
6010 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
6011 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
6012 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
6013 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
6014 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
6015 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
6016 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
6017 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
6018
6019 /*
6020 * We need to flush the TLB if we are switching the APIC-access page address.
6021 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
6022 */
6023 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6024 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
6025
6026 /*
6027 * MSR bitmap.
6028 *
6029 * The MSR bitmap address has already been initialized while setting up the nested-guest
6030 * VMCS, here we need to merge the MSR bitmaps.
6031 */
6032 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
6033 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
6034
6035 return VINF_SUCCESS;
6036}
6037#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6038
6039
6040/**
6041 * Does the preparations before executing guest code in VT-x.
6042 *
6043 * This may cause longjmps to ring-3 and may even result in rescheduling to the
6044 * recompiler/IEM. We must be cautious what we do here regarding committing
6045 * guest-state information into the VMCS assuming we assuredly execute the
6046 * guest in VT-x mode.
6047 *
6048 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
6049 * the common-state (TRPM/forceflags), we must undo those changes so that the
6050 * recompiler/IEM can (and should) use them when it resumes guest execution.
6051 * Otherwise such operations must be done when we can no longer exit to ring-3.
6052 *
6053 * @returns Strict VBox status code (i.e. informational status codes too).
6054 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
6055 * have been disabled.
6056 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
6057 * pending events).
6058 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
6059 * double-fault into the guest.
6060 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
6061 * dispatched directly.
6062 * @retval VINF_* scheduling changes, we have to go back to ring-3.
6063 *
6064 * @param pVCpu The cross context virtual CPU structure.
6065 * @param pVmxTransient The VMX-transient structure.
6066 * @param fStepping Whether we are single-stepping the guest in the
6067 * hypervisor debugger. Makes us ignore some of the reasons
6068 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
6069 * if event dispatching took place.
6070 */
6071static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
6072{
6073 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6074
6075 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
6076
6077#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
6078 if (pVmxTransient->fIsNestedGuest)
6079 {
6080 RT_NOREF2(pVCpu, fStepping);
6081 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
6082 return VINF_EM_RESCHEDULE_REM;
6083 }
6084#endif
6085
6086 /*
6087 * Check and process force flag actions, some of which might require us to go back to ring-3.
6088 */
6089 VBOXSTRICTRC rcStrict = vmxHCCheckForceFlags(pVCpu, pVmxTransient->fIsNestedGuest, fStepping);
6090 if (rcStrict == VINF_SUCCESS)
6091 {
6092 /* FFs don't get set all the time. */
6093#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6094 if ( pVmxTransient->fIsNestedGuest
6095 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6096 {
6097 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6098 return VINF_VMX_VMEXIT;
6099 }
6100#endif
6101 }
6102 else
6103 return rcStrict;
6104
6105 /*
6106 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
6107 */
6108 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6109 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
6110 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
6111 && PDMHasApic(pVM))
6112 {
6113 int rc = hmR0VmxMapHCApicAccessPage(pVCpu);
6114 AssertRCReturn(rc, rc);
6115 }
6116
6117#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6118 /*
6119 * Merge guest VMCS controls with the nested-guest VMCS controls.
6120 *
6121 * Even if we have not executed the guest prior to this (e.g. when resuming from a
6122 * saved state), we should be okay with merging controls as we initialize the
6123 * guest VMCS controls as part of VM setup phase.
6124 */
6125 if ( pVmxTransient->fIsNestedGuest
6126 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
6127 {
6128 int rc = hmR0VmxMergeVmcsNested(pVCpu);
6129 AssertRCReturn(rc, rc);
6130 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
6131 }
6132#endif
6133
6134 /*
6135 * Evaluate events to be injected into the guest.
6136 *
6137 * Events in TRPM can be injected without inspecting the guest state.
6138 * If any new events (interrupts/NMI) are pending currently, we try to set up the
6139 * guest to cause a VM-exit the next time they are ready to receive the event.
6140 */
6141 if (TRPMHasTrap(pVCpu))
6142 vmxHCTrpmTrapToPendingEvent(pVCpu);
6143
6144 uint32_t fIntrState;
6145 rcStrict = vmxHCEvaluatePendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6146 &fIntrState);
6147
6148#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6149 /*
6150 * While evaluating pending events if something failed (unlikely) or if we were
6151 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
6152 */
6153 if (rcStrict != VINF_SUCCESS)
6154 return rcStrict;
6155 if ( pVmxTransient->fIsNestedGuest
6156 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6157 {
6158 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6159 return VINF_VMX_VMEXIT;
6160 }
6161#else
6162 Assert(rcStrict == VINF_SUCCESS);
6163#endif
6164
6165 /*
6166 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
6167 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
6168 * also result in triple-faulting the VM.
6169 *
6170 * With nested-guests, the above does not apply since unrestricted guest execution is a
6171 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
6172 */
6173 rcStrict = vmxHCInjectPendingEvent(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->fIsNestedGuest,
6174 fIntrState, fStepping);
6175 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6176 { /* likely */ }
6177 else
6178 {
6179 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
6180 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
6181 return rcStrict;
6182 }
6183
6184 /*
6185 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
6186 * import CR3 themselves. We will need to update them here, as even as late as the above
6187 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
6188 * the below force flags to be set.
6189 */
6190 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6191 {
6192 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
6193 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6194 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6195 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6196 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6197 }
6198
6199#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6200 /* Paranoia. */
6201 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6202#endif
6203
6204 /*
6205 * No longjmps to ring-3 from this point on!!!
6206 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
6207 * This also disables flushing of the R0-logger instance (if any).
6208 */
6209 VMMRZCallRing3Disable(pVCpu);
6210
6211 /*
6212 * Export the guest state bits.
6213 *
6214 * We cannot perform longjmps while loading the guest state because we do not preserve the
6215 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
6216 * CPU migration.
6217 *
6218 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
6219 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
6220 */
6221 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
6222 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
6223 { /* likely */ }
6224 else
6225 {
6226 VMMRZCallRing3Enable(pVCpu);
6227 return rcStrict;
6228 }
6229
6230 /*
6231 * We disable interrupts so that we don't miss any interrupts that would flag preemption
6232 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
6233 * preemption disabled for a while. Since this is purely to aid the
6234 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
6235 * disable interrupt on NT.
6236 *
6237 * We need to check for force-flags that could've possible been altered since we last
6238 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
6239 * see @bugref{6398}).
6240 *
6241 * We also check a couple of other force-flags as a last opportunity to get the EMT back
6242 * to ring-3 before executing guest code.
6243 */
6244 pVmxTransient->fEFlags = ASMIntDisableFlags();
6245
6246 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
6247 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6248 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
6249 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
6250 {
6251 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
6252 {
6253#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6254 /*
6255 * If we are executing a nested-guest make sure that we should intercept subsequent
6256 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
6257 * the VM-exit instruction emulation happy.
6258 */
6259 if (pVmxTransient->fIsNestedGuest)
6260 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
6261#endif
6262
6263 /*
6264 * We've injected any pending events. This is really the point of no return (to ring-3).
6265 *
6266 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
6267 * returns from this function, so do -not- enable them here.
6268 */
6269 pVCpu->hm.s.Event.fPending = false;
6270 return VINF_SUCCESS;
6271 }
6272
6273 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
6274 rcStrict = VINF_EM_RAW_INTERRUPT;
6275 }
6276 else
6277 {
6278 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6279 rcStrict = VINF_EM_RAW_TO_R3;
6280 }
6281
6282 ASMSetFlags(pVmxTransient->fEFlags);
6283 VMMRZCallRing3Enable(pVCpu);
6284
6285 return rcStrict;
6286}
6287
6288
6289/**
6290 * Final preparations before executing guest code using hardware-assisted VMX.
6291 *
6292 * We can no longer get preempted to a different host CPU and there are no returns
6293 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
6294 * failures), this function is not intended to fail sans unrecoverable hardware
6295 * errors.
6296 *
6297 * @param pVCpu The cross context virtual CPU structure.
6298 * @param pVmxTransient The VMX-transient structure.
6299 *
6300 * @remarks Called with preemption disabled.
6301 * @remarks No-long-jump zone!!!
6302 */
6303static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6304{
6305 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6306 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6307 Assert(!pVCpu->hm.s.Event.fPending);
6308
6309 /*
6310 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
6311 */
6312 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6313 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
6314
6315 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6316 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6317 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
6318 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
6319
6320 if (!CPUMIsGuestFPUStateActive(pVCpu))
6321 {
6322 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6323 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
6324 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
6325 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
6326 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
6327 }
6328
6329 /*
6330 * Re-export the host state bits as we may've been preempted (only happens when
6331 * thread-context hooks are used or when the VM start function changes) or if
6332 * the host CR0 is modified while loading the guest FPU state above.
6333 *
6334 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
6335 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
6336 * see @bugref{8432}.
6337 *
6338 * This may also happen when switching to/from a nested-guest VMCS without leaving
6339 * ring-0.
6340 */
6341 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
6342 {
6343 hmR0VmxExportHostState(pVCpu);
6344 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
6345 }
6346 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
6347
6348 /*
6349 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
6350 */
6351 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
6352 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
6353 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
6354
6355 /*
6356 * Store status of the shared guest/host debug state at the time of VM-entry.
6357 */
6358 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
6359 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
6360
6361 /*
6362 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
6363 * more than one conditional check. The post-run side of our code shall determine
6364 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
6365 */
6366 if (pVmcsInfo->pbVirtApic)
6367 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
6368
6369 /*
6370 * Update the host MSRs values in the VM-exit MSR-load area.
6371 */
6372 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
6373 {
6374 if (pVmcsInfo->cExitMsrLoad > 0)
6375 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
6376 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
6377 }
6378
6379 /*
6380 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
6381 * VMX-preemption timer based on the next virtual sync clock deadline.
6382 */
6383 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
6384 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
6385 {
6386 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
6387 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
6388 }
6389
6390 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
6391 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
6392 if (!fIsRdtscIntercepted)
6393 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
6394 else
6395 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
6396
6397 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
6398 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
6399 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
6400 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
6401 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
6402 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
6403
6404 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
6405
6406 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
6407 as we're about to start executing the guest. */
6408
6409 /*
6410 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
6411 *
6412 * This is done this late as updating the TSC offsetting/preemption timer above
6413 * figures out if we can skip intercepting RDTSCP by calculating the number of
6414 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
6415 */
6416 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
6417 && !fIsRdtscIntercepted)
6418 {
6419 vmxHCImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
6420
6421 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
6422 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
6423 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
6424 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
6425 AssertRC(rc);
6426 Assert(!pVmxTransient->fRemoveTscAuxMsr);
6427 pVmxTransient->fRemoveTscAuxMsr = true;
6428 }
6429
6430#ifdef VBOX_STRICT
6431 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
6432 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
6433 hmR0VmxCheckHostEferMsr(pVmcsInfo);
6434 AssertRC(vmxHCCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
6435#endif
6436
6437#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
6438 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
6439 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
6440 * see @bugref{9180#c54}. */
6441 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
6442 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
6443 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
6444#endif
6445}
6446
6447
6448/**
6449 * First C routine invoked after running guest code using hardware-assisted VMX.
6450 *
6451 * @param pVCpu The cross context virtual CPU structure.
6452 * @param pVmxTransient The VMX-transient structure.
6453 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
6454 *
6455 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
6456 *
6457 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
6458 * unconditionally when it is safe to do so.
6459 */
6460static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
6461{
6462 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
6463 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
6464 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
6465 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
6466 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
6467 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
6468
6469 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6470 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
6471 {
6472 uint64_t uGstTsc;
6473 if (!pVmxTransient->fIsNestedGuest)
6474 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6475 else
6476 {
6477 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
6478 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
6479 }
6480 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
6481 }
6482
6483 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
6484 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
6485 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
6486
6487 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
6488 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
6489#ifdef VBOX_STRICT
6490 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
6491#endif
6492 Assert(!ASMIntAreEnabled());
6493 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
6494 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6495
6496#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
6497 /*
6498 * Clean all the VMCS fields in the transient structure before reading
6499 * anything from the VMCS.
6500 */
6501 pVmxTransient->uExitReason = 0;
6502 pVmxTransient->uExitIntErrorCode = 0;
6503 pVmxTransient->uExitQual = 0;
6504 pVmxTransient->uGuestLinearAddr = 0;
6505 pVmxTransient->uExitIntInfo = 0;
6506 pVmxTransient->cbExitInstr = 0;
6507 pVmxTransient->ExitInstrInfo.u = 0;
6508 pVmxTransient->uEntryIntInfo = 0;
6509 pVmxTransient->uEntryXcptErrorCode = 0;
6510 pVmxTransient->cbEntryInstr = 0;
6511 pVmxTransient->uIdtVectoringInfo = 0;
6512 pVmxTransient->uIdtVectoringErrorCode = 0;
6513#endif
6514
6515 /*
6516 * Save the basic VM-exit reason and check if the VM-entry failed.
6517 * See Intel spec. 24.9.1 "Basic VM-exit Information".
6518 */
6519 uint32_t uExitReason;
6520 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
6521 AssertRC(rc);
6522 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
6523 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
6524
6525 /*
6526 * Log the VM-exit before logging anything else as otherwise it might be a
6527 * tad confusing what happens before and after the world-switch.
6528 */
6529 HMVMX_LOG_EXIT(pVCpu, uExitReason);
6530
6531 /*
6532 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
6533 * bitmap permissions, if it was added before VM-entry.
6534 */
6535 if (pVmxTransient->fRemoveTscAuxMsr)
6536 {
6537 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
6538 pVmxTransient->fRemoveTscAuxMsr = false;
6539 }
6540
6541 /*
6542 * Check if VMLAUNCH/VMRESUME succeeded.
6543 * If this failed, we cause a guru meditation and cease further execution.
6544 *
6545 * However, if we are executing a nested-guest we might fail if we use the
6546 * fast path rather than fully emulating VMLAUNCH/VMRESUME instruction in IEM.
6547 */
6548 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
6549 {
6550 /*
6551 * Update the VM-exit history array here even if the VM-entry failed due to:
6552 * - Invalid guest state.
6553 * - MSR loading.
6554 * - Machine-check event.
6555 *
6556 * In any of the above cases we will still have a "valid" VM-exit reason
6557 * despite @a fVMEntryFailed being false.
6558 *
6559 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
6560 *
6561 * Note! We don't have CS or RIP at this point. Will probably address that later
6562 * by amending the history entry added here.
6563 */
6564 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
6565 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
6566
6567 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
6568 {
6569 VMMRZCallRing3Enable(pVCpu);
6570 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6571
6572#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
6573 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
6574#endif
6575
6576 /*
6577 * Always import the guest-interruptibility state as we need it while evaluating
6578 * injecting events on re-entry.
6579 *
6580 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
6581 * checking for real-mode while exporting the state because all bits that cause
6582 * mode changes wrt CR0 are intercepted.
6583 */
6584 uint64_t const fImportMask = CPUMCTX_EXTRN_INHIBIT_INT
6585 | CPUMCTX_EXTRN_INHIBIT_NMI
6586#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
6587 | HMVMX_CPUMCTX_EXTRN_ALL
6588#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
6589 | CPUMCTX_EXTRN_RFLAGS
6590#endif
6591 ;
6592 rc = vmxHCImportGuestState(pVCpu, pVmcsInfo, fImportMask);
6593 AssertRC(rc);
6594
6595 /*
6596 * Sync the TPR shadow with our APIC state.
6597 */
6598 if ( !pVmxTransient->fIsNestedGuest
6599 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
6600 {
6601 Assert(pVmcsInfo->pbVirtApic);
6602 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
6603 {
6604 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
6605 AssertRC(rc);
6606 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
6607 }
6608 }
6609
6610 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6611 Assert( pVmxTransient->fWasGuestDebugStateActive == false
6612 || pVmxTransient->fWasHyperDebugStateActive == false);
6613 return;
6614 }
6615 }
6616#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6617 else if (pVmxTransient->fIsNestedGuest)
6618 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
6619#endif
6620 else
6621 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
6622
6623 VMMRZCallRing3Enable(pVCpu);
6624}
6625
6626
6627/**
6628 * Runs the guest code using hardware-assisted VMX the normal way.
6629 *
6630 * @returns VBox status code.
6631 * @param pVCpu The cross context virtual CPU structure.
6632 * @param pcLoops Pointer to the number of executed loops.
6633 */
6634static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
6635{
6636 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6637 Assert(pcLoops);
6638 Assert(*pcLoops <= cMaxResumeLoops);
6639 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6640
6641#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6642 /*
6643 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
6644 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
6645 * guest VMCS while entering the VMX ring-0 session.
6646 */
6647 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6648 {
6649 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
6650 if (RT_SUCCESS(rc))
6651 { /* likely */ }
6652 else
6653 {
6654 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
6655 return rc;
6656 }
6657 }
6658#endif
6659
6660 VMXTRANSIENT VmxTransient;
6661 RT_ZERO(VmxTransient);
6662 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6663
6664 /* Paranoia. */
6665 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
6666
6667 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6668 for (;;)
6669 {
6670 Assert(!HMR0SuspendPending());
6671 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6672 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6673
6674 /*
6675 * Preparatory work for running nested-guest code, this may force us to
6676 * return to ring-3.
6677 *
6678 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6679 */
6680 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6681 if (rcStrict != VINF_SUCCESS)
6682 break;
6683
6684 /* Interrupts are disabled at this point! */
6685 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6686 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6687 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6688 /* Interrupts are re-enabled at this point! */
6689
6690 /*
6691 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6692 */
6693 if (RT_SUCCESS(rcRun))
6694 { /* very likely */ }
6695 else
6696 {
6697 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6698 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6699 return rcRun;
6700 }
6701
6702 /*
6703 * Profile the VM-exit.
6704 */
6705 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6706 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
6707 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6708 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6709 HMVMX_START_EXIT_DISPATCH_PROF();
6710
6711 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6712
6713 /*
6714 * Handle the VM-exit.
6715 */
6716#ifdef HMVMX_USE_FUNCTION_TABLE
6717 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
6718#else
6719 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
6720#endif
6721 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6722 if (rcStrict == VINF_SUCCESS)
6723 {
6724 if (++(*pcLoops) <= cMaxResumeLoops)
6725 continue;
6726 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6727 rcStrict = VINF_EM_RAW_INTERRUPT;
6728 }
6729 break;
6730 }
6731
6732 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6733 return rcStrict;
6734}
6735
6736
6737#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
6738/**
6739 * Runs the nested-guest code using hardware-assisted VMX.
6740 *
6741 * @returns VBox status code.
6742 * @param pVCpu The cross context virtual CPU structure.
6743 * @param pcLoops Pointer to the number of executed loops.
6744 *
6745 * @sa hmR0VmxRunGuestCodeNormal.
6746 */
6747static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
6748{
6749 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
6750 Assert(pcLoops);
6751 Assert(*pcLoops <= cMaxResumeLoops);
6752 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
6753
6754 /*
6755 * Switch to the nested-guest VMCS as we may have transitioned from executing the
6756 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
6757 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
6758 */
6759 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
6760 {
6761 int rc = vmxHCSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
6762 if (RT_SUCCESS(rc))
6763 { /* likely */ }
6764 else
6765 {
6766 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
6767 return rc;
6768 }
6769 }
6770
6771 VMXTRANSIENT VmxTransient;
6772 RT_ZERO(VmxTransient);
6773 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6774 VmxTransient.fIsNestedGuest = true;
6775
6776 /* Paranoia. */
6777 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
6778
6779 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
6780 for (;;)
6781 {
6782 Assert(!HMR0SuspendPending());
6783 HMVMX_ASSERT_CPU_SAFE(pVCpu);
6784 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
6785
6786 /*
6787 * Preparatory work for running guest code, this may force us to
6788 * return to ring-3.
6789 *
6790 * Warning! This bugger disables interrupts on VINF_SUCCESS!
6791 */
6792 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
6793 if (rcStrict != VINF_SUCCESS)
6794 break;
6795
6796 /* Interrupts are disabled at this point! */
6797 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
6798 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
6799 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
6800 /* Interrupts are re-enabled at this point! */
6801
6802 /*
6803 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
6804 */
6805 if (RT_SUCCESS(rcRun))
6806 { /* very likely */ }
6807 else
6808 {
6809 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
6810 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
6811 return rcRun;
6812 }
6813
6814 /*
6815 * Profile the VM-exit.
6816 */
6817 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
6818 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
6819 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
6820 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
6821 HMVMX_START_EXIT_DISPATCH_PROF();
6822
6823 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
6824
6825 /*
6826 * Handle the VM-exit.
6827 */
6828 rcStrict = vmxHCHandleExitNested(pVCpu, &VmxTransient);
6829 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
6830 if (rcStrict == VINF_SUCCESS)
6831 {
6832 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
6833 {
6834 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
6835 rcStrict = VINF_VMX_VMEXIT;
6836 }
6837 else
6838 {
6839 if (++(*pcLoops) <= cMaxResumeLoops)
6840 continue;
6841 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
6842 rcStrict = VINF_EM_RAW_INTERRUPT;
6843 }
6844 }
6845 else
6846 Assert(rcStrict != VINF_VMX_VMEXIT);
6847 break;
6848 }
6849
6850 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
6851 return rcStrict;
6852}
6853#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
6854
6855
6856/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
6857 * probes.
6858 *
6859 * The following few functions and associated structure contains the bloat
6860 * necessary for providing detailed debug events and dtrace probes as well as
6861 * reliable host side single stepping. This works on the principle of
6862 * "subclassing" the normal execution loop and workers. We replace the loop
6863 * method completely and override selected helpers to add necessary adjustments
6864 * to their core operation.
6865 *
6866 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
6867 * any performance for debug and analysis features.
6868 *
6869 * @{
6870 */
6871
6872/**
6873 * Transient per-VCPU debug state of VMCS and related info. we save/restore in
6874 * the debug run loop.
6875 */
6876typedef struct VMXRUNDBGSTATE
6877{
6878 /** The RIP we started executing at. This is for detecting that we stepped. */
6879 uint64_t uRipStart;
6880 /** The CS we started executing with. */
6881 uint16_t uCsStart;
6882
6883 /** Whether we've actually modified the 1st execution control field. */
6884 bool fModifiedProcCtls : 1;
6885 /** Whether we've actually modified the 2nd execution control field. */
6886 bool fModifiedProcCtls2 : 1;
6887 /** Whether we've actually modified the exception bitmap. */
6888 bool fModifiedXcptBitmap : 1;
6889
6890 /** We desire the modified the CR0 mask to be cleared. */
6891 bool fClearCr0Mask : 1;
6892 /** We desire the modified the CR4 mask to be cleared. */
6893 bool fClearCr4Mask : 1;
6894 /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC. */
6895 uint32_t fCpe1Extra;
6896 /** Stuff we do not want in VMX_VMCS32_CTRL_PROC_EXEC. */
6897 uint32_t fCpe1Unwanted;
6898 /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC2. */
6899 uint32_t fCpe2Extra;
6900 /** Extra stuff we need in VMX_VMCS32_CTRL_EXCEPTION_BITMAP. */
6901 uint32_t bmXcptExtra;
6902 /** The sequence number of the Dtrace provider settings the state was
6903 * configured against. */
6904 uint32_t uDtraceSettingsSeqNo;
6905 /** VM-exits to check (one bit per VM-exit). */
6906 uint32_t bmExitsToCheck[3];
6907
6908 /** The initial VMX_VMCS32_CTRL_PROC_EXEC value (helps with restore). */
6909 uint32_t fProcCtlsInitial;
6910 /** The initial VMX_VMCS32_CTRL_PROC_EXEC2 value (helps with restore). */
6911 uint32_t fProcCtls2Initial;
6912 /** The initial VMX_VMCS32_CTRL_EXCEPTION_BITMAP value (helps with restore). */
6913 uint32_t bmXcptInitial;
6914} VMXRUNDBGSTATE;
6915AssertCompileMemberSize(VMXRUNDBGSTATE, bmExitsToCheck, (VMX_EXIT_MAX + 1 + 31) / 32 * 4);
6916typedef VMXRUNDBGSTATE *PVMXRUNDBGSTATE;
6917
6918
6919/**
6920 * Initializes the VMXRUNDBGSTATE structure.
6921 *
6922 * @param pVCpu The cross context virtual CPU structure of the
6923 * calling EMT.
6924 * @param pVmxTransient The VMX-transient structure.
6925 * @param pDbgState The debug state to initialize.
6926 */
6927static void hmR0VmxRunDebugStateInit(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
6928{
6929 pDbgState->uRipStart = pVCpu->cpum.GstCtx.rip;
6930 pDbgState->uCsStart = pVCpu->cpum.GstCtx.cs.Sel;
6931
6932 pDbgState->fModifiedProcCtls = false;
6933 pDbgState->fModifiedProcCtls2 = false;
6934 pDbgState->fModifiedXcptBitmap = false;
6935 pDbgState->fClearCr0Mask = false;
6936 pDbgState->fClearCr4Mask = false;
6937 pDbgState->fCpe1Extra = 0;
6938 pDbgState->fCpe1Unwanted = 0;
6939 pDbgState->fCpe2Extra = 0;
6940 pDbgState->bmXcptExtra = 0;
6941 pDbgState->fProcCtlsInitial = pVmxTransient->pVmcsInfo->u32ProcCtls;
6942 pDbgState->fProcCtls2Initial = pVmxTransient->pVmcsInfo->u32ProcCtls2;
6943 pDbgState->bmXcptInitial = pVmxTransient->pVmcsInfo->u32XcptBitmap;
6944}
6945
6946
6947/**
6948 * Updates the VMSC fields with changes requested by @a pDbgState.
6949 *
6950 * This is performed after hmR0VmxPreRunGuestDebugStateUpdate as well
6951 * immediately before executing guest code, i.e. when interrupts are disabled.
6952 * We don't check status codes here as we cannot easily assert or return in the
6953 * latter case.
6954 *
6955 * @param pVCpu The cross context virtual CPU structure.
6956 * @param pVmxTransient The VMX-transient structure.
6957 * @param pDbgState The debug state.
6958 */
6959static void hmR0VmxPreRunGuestDebugStateApply(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
6960{
6961 /*
6962 * Ensure desired flags in VMCS control fields are set.
6963 * (Ignoring write failure here, as we're committed and it's just debug extras.)
6964 *
6965 * Note! We load the shadow CR0 & CR4 bits when we flag the clearing, so
6966 * there should be no stale data in pCtx at this point.
6967 */
6968 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6969 if ( (pVmcsInfo->u32ProcCtls & pDbgState->fCpe1Extra) != pDbgState->fCpe1Extra
6970 || (pVmcsInfo->u32ProcCtls & pDbgState->fCpe1Unwanted))
6971 {
6972 pVmcsInfo->u32ProcCtls |= pDbgState->fCpe1Extra;
6973 pVmcsInfo->u32ProcCtls &= ~pDbgState->fCpe1Unwanted;
6974 VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
6975 Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC: %#RX32\n", pVmcsInfo->u32ProcCtls));
6976 pDbgState->fModifiedProcCtls = true;
6977 }
6978
6979 if ((pVmcsInfo->u32ProcCtls2 & pDbgState->fCpe2Extra) != pDbgState->fCpe2Extra)
6980 {
6981 pVmcsInfo->u32ProcCtls2 |= pDbgState->fCpe2Extra;
6982 VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pVmcsInfo->u32ProcCtls2);
6983 Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC2: %#RX32\n", pVmcsInfo->u32ProcCtls2));
6984 pDbgState->fModifiedProcCtls2 = true;
6985 }
6986
6987 if ((pVmcsInfo->u32XcptBitmap & pDbgState->bmXcptExtra) != pDbgState->bmXcptExtra)
6988 {
6989 pVmcsInfo->u32XcptBitmap |= pDbgState->bmXcptExtra;
6990 VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVmcsInfo->u32XcptBitmap);
6991 Log6Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP: %#RX32\n", pVmcsInfo->u32XcptBitmap));
6992 pDbgState->fModifiedXcptBitmap = true;
6993 }
6994
6995 if (pDbgState->fClearCr0Mask && pVmcsInfo->u64Cr0Mask != 0)
6996 {
6997 pVmcsInfo->u64Cr0Mask = 0;
6998 VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, 0);
6999 Log6Func(("VMX_VMCS_CTRL_CR0_MASK: 0\n"));
7000 }
7001
7002 if (pDbgState->fClearCr4Mask && pVmcsInfo->u64Cr4Mask != 0)
7003 {
7004 pVmcsInfo->u64Cr4Mask = 0;
7005 VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, 0);
7006 Log6Func(("VMX_VMCS_CTRL_CR4_MASK: 0\n"));
7007 }
7008
7009 NOREF(pVCpu);
7010}
7011
7012
7013/**
7014 * Restores VMCS fields that were changed by hmR0VmxPreRunGuestDebugStateApply for
7015 * re-entry next time around.
7016 *
7017 * @returns Strict VBox status code (i.e. informational status codes too).
7018 * @param pVCpu The cross context virtual CPU structure.
7019 * @param pVmxTransient The VMX-transient structure.
7020 * @param pDbgState The debug state.
7021 * @param rcStrict The return code from executing the guest using single
7022 * stepping.
7023 */
7024static VBOXSTRICTRC hmR0VmxRunDebugStateRevert(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState,
7025 VBOXSTRICTRC rcStrict)
7026{
7027 /*
7028 * Restore VM-exit control settings as we may not reenter this function the
7029 * next time around.
7030 */
7031 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
7032
7033 /* We reload the initial value, trigger what we can of recalculations the
7034 next time around. From the looks of things, that's all that's required atm. */
7035 if (pDbgState->fModifiedProcCtls)
7036 {
7037 if (!(pDbgState->fProcCtlsInitial & VMX_PROC_CTLS_MOV_DR_EXIT) && CPUMIsHyperDebugStateActive(pVCpu))
7038 pDbgState->fProcCtlsInitial |= VMX_PROC_CTLS_MOV_DR_EXIT; /* Avoid assertion in hmR0VmxLeave */
7039 int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pDbgState->fProcCtlsInitial);
7040 AssertRC(rc2);
7041 pVmcsInfo->u32ProcCtls = pDbgState->fProcCtlsInitial;
7042 }
7043
7044 /* We're currently the only ones messing with this one, so just restore the
7045 cached value and reload the field. */
7046 if ( pDbgState->fModifiedProcCtls2
7047 && pVmcsInfo->u32ProcCtls2 != pDbgState->fProcCtls2Initial)
7048 {
7049 int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pDbgState->fProcCtls2Initial);
7050 AssertRC(rc2);
7051 pVmcsInfo->u32ProcCtls2 = pDbgState->fProcCtls2Initial;
7052 }
7053
7054 /* If we've modified the exception bitmap, we restore it and trigger
7055 reloading and partial recalculation the next time around. */
7056 if (pDbgState->fModifiedXcptBitmap)
7057 pVmcsInfo->u32XcptBitmap = pDbgState->bmXcptInitial;
7058
7059 return rcStrict;
7060}
7061
7062
7063/**
7064 * Configures VM-exit controls for current DBGF and DTrace settings.
7065 *
7066 * This updates @a pDbgState and the VMCS execution control fields to reflect
7067 * the necessary VM-exits demanded by DBGF and DTrace.
7068 *
7069 * @param pVCpu The cross context virtual CPU structure.
7070 * @param pVmxTransient The VMX-transient structure. May update
7071 * fUpdatedTscOffsettingAndPreemptTimer.
7072 * @param pDbgState The debug state.
7073 */
7074static void hmR0VmxPreRunGuestDebugStateUpdate(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
7075{
7076 /*
7077 * Take down the dtrace serial number so we can spot changes.
7078 */
7079 pDbgState->uDtraceSettingsSeqNo = VBOXVMM_GET_SETTINGS_SEQ_NO();
7080 ASMCompilerBarrier();
7081
7082 /*
7083 * We'll rebuild most of the middle block of data members (holding the
7084 * current settings) as we go along here, so start by clearing it all.
7085 */
7086 pDbgState->bmXcptExtra = 0;
7087 pDbgState->fCpe1Extra = 0;
7088 pDbgState->fCpe1Unwanted = 0;
7089 pDbgState->fCpe2Extra = 0;
7090 for (unsigned i = 0; i < RT_ELEMENTS(pDbgState->bmExitsToCheck); i++)
7091 pDbgState->bmExitsToCheck[i] = 0;
7092
7093 /*
7094 * Software interrupts (INT XXh) - no idea how to trigger these...
7095 */
7096 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
7097 if ( DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_INTERRUPT_SOFTWARE)
7098 || VBOXVMM_INT_SOFTWARE_ENABLED())
7099 {
7100 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
7101 }
7102
7103 /*
7104 * INT3 breakpoints - triggered by #BP exceptions.
7105 */
7106 if (pVM->dbgf.ro.cEnabledInt3Breakpoints > 0)
7107 pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
7108
7109 /*
7110 * Exception bitmap and XCPT events+probes.
7111 */
7112 for (int iXcpt = 0; iXcpt < (DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST + 1); iXcpt++)
7113 if (DBGF_IS_EVENT_ENABLED(pVM, (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + iXcpt)))
7114 pDbgState->bmXcptExtra |= RT_BIT_32(iXcpt);
7115
7116 if (VBOXVMM_XCPT_DE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DE);
7117 if (VBOXVMM_XCPT_DB_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DB);
7118 if (VBOXVMM_XCPT_BP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
7119 if (VBOXVMM_XCPT_OF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_OF);
7120 if (VBOXVMM_XCPT_BR_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BR);
7121 if (VBOXVMM_XCPT_UD_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_UD);
7122 if (VBOXVMM_XCPT_NM_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NM);
7123 if (VBOXVMM_XCPT_DF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DF);
7124 if (VBOXVMM_XCPT_TS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_TS);
7125 if (VBOXVMM_XCPT_NP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NP);
7126 if (VBOXVMM_XCPT_SS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SS);
7127 if (VBOXVMM_XCPT_GP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_GP);
7128 if (VBOXVMM_XCPT_PF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_PF);
7129 if (VBOXVMM_XCPT_MF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_MF);
7130 if (VBOXVMM_XCPT_AC_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_AC);
7131 if (VBOXVMM_XCPT_XF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_XF);
7132 if (VBOXVMM_XCPT_VE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_VE);
7133 if (VBOXVMM_XCPT_SX_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SX);
7134
7135 if (pDbgState->bmXcptExtra)
7136 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
7137
7138 /*
7139 * Process events and probes for VM-exits, making sure we get the wanted VM-exits.
7140 *
7141 * Note! This is the reverse of what hmR0VmxHandleExitDtraceEvents does.
7142 * So, when adding/changing/removing please don't forget to update it.
7143 *
7144 * Some of the macros are picking up local variables to save horizontal space,
7145 * (being able to see it in a table is the lesser evil here).
7146 */
7147#define IS_EITHER_ENABLED(a_pVM, a_EventSubName) \
7148 ( DBGF_IS_EVENT_ENABLED(a_pVM, RT_CONCAT(DBGFEVENT_, a_EventSubName)) \
7149 || RT_CONCAT3(VBOXVMM_, a_EventSubName, _ENABLED)() )
7150#define SET_ONLY_XBM_IF_EITHER_EN(a_EventSubName, a_uExit) \
7151 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
7152 { AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
7153 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
7154 } else do { } while (0)
7155#define SET_CPE1_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec) \
7156 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
7157 { \
7158 (pDbgState)->fCpe1Extra |= (a_fCtrlProcExec); \
7159 AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
7160 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
7161 } else do { } while (0)
7162#define SET_CPEU_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fUnwantedCtrlProcExec) \
7163 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
7164 { \
7165 (pDbgState)->fCpe1Unwanted |= (a_fUnwantedCtrlProcExec); \
7166 AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
7167 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
7168 } else do { } while (0)
7169#define SET_CPE2_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec2) \
7170 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
7171 { \
7172 (pDbgState)->fCpe2Extra |= (a_fCtrlProcExec2); \
7173 AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
7174 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
7175 } else do { } while (0)
7176
7177 SET_ONLY_XBM_IF_EITHER_EN(EXIT_TASK_SWITCH, VMX_EXIT_TASK_SWITCH); /* unconditional */
7178 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_VIOLATION, VMX_EXIT_EPT_VIOLATION); /* unconditional */
7179 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_MISCONFIG, VMX_EXIT_EPT_MISCONFIG); /* unconditional (unless #VE) */
7180 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_ACCESS, VMX_EXIT_APIC_ACCESS); /* feature dependent, nothing to enable here */
7181 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_WRITE, VMX_EXIT_APIC_WRITE); /* feature dependent, nothing to enable here */
7182
7183 SET_ONLY_XBM_IF_EITHER_EN(INSTR_CPUID, VMX_EXIT_CPUID); /* unconditional */
7184 SET_ONLY_XBM_IF_EITHER_EN( EXIT_CPUID, VMX_EXIT_CPUID);
7185 SET_ONLY_XBM_IF_EITHER_EN(INSTR_GETSEC, VMX_EXIT_GETSEC); /* unconditional */
7186 SET_ONLY_XBM_IF_EITHER_EN( EXIT_GETSEC, VMX_EXIT_GETSEC);
7187 SET_CPE1_XBM_IF_EITHER_EN(INSTR_HALT, VMX_EXIT_HLT, VMX_PROC_CTLS_HLT_EXIT); /* paranoia */
7188 SET_ONLY_XBM_IF_EITHER_EN( EXIT_HALT, VMX_EXIT_HLT);
7189 SET_ONLY_XBM_IF_EITHER_EN(INSTR_INVD, VMX_EXIT_INVD); /* unconditional */
7190 SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVD, VMX_EXIT_INVD);
7191 SET_CPE1_XBM_IF_EITHER_EN(INSTR_INVLPG, VMX_EXIT_INVLPG, VMX_PROC_CTLS_INVLPG_EXIT);
7192 SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVLPG, VMX_EXIT_INVLPG);
7193 SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDPMC, VMX_EXIT_RDPMC, VMX_PROC_CTLS_RDPMC_EXIT);
7194 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDPMC, VMX_EXIT_RDPMC);
7195 SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSC, VMX_EXIT_RDTSC, VMX_PROC_CTLS_RDTSC_EXIT);
7196 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSC, VMX_EXIT_RDTSC);
7197 SET_ONLY_XBM_IF_EITHER_EN(INSTR_RSM, VMX_EXIT_RSM); /* unconditional */
7198 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RSM, VMX_EXIT_RSM);
7199 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMM_CALL, VMX_EXIT_VMCALL); /* unconditional */
7200 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMM_CALL, VMX_EXIT_VMCALL);
7201 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMCLEAR, VMX_EXIT_VMCLEAR); /* unconditional */
7202 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMCLEAR, VMX_EXIT_VMCLEAR);
7203 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH); /* unconditional */
7204 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH);
7205 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRLD, VMX_EXIT_VMPTRLD); /* unconditional */
7206 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRLD, VMX_EXIT_VMPTRLD);
7207 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRST, VMX_EXIT_VMPTRST); /* unconditional */
7208 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRST, VMX_EXIT_VMPTRST);
7209 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMREAD, VMX_EXIT_VMREAD); /* unconditional */
7210 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMREAD, VMX_EXIT_VMREAD);
7211 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMRESUME, VMX_EXIT_VMRESUME); /* unconditional */
7212 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMRESUME, VMX_EXIT_VMRESUME);
7213 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMWRITE, VMX_EXIT_VMWRITE); /* unconditional */
7214 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMWRITE, VMX_EXIT_VMWRITE);
7215 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXOFF, VMX_EXIT_VMXOFF); /* unconditional */
7216 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXOFF, VMX_EXIT_VMXOFF);
7217 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXON, VMX_EXIT_VMXON); /* unconditional */
7218 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXON, VMX_EXIT_VMXON);
7219
7220 if ( IS_EITHER_ENABLED(pVM, INSTR_CRX_READ)
7221 || IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
7222 {
7223 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR4
7224 | CPUMCTX_EXTRN_APIC_TPR);
7225 AssertRC(rc);
7226
7227#if 0 /** @todo fix me */
7228 pDbgState->fClearCr0Mask = true;
7229 pDbgState->fClearCr4Mask = true;
7230#endif
7231 if (IS_EITHER_ENABLED(pVM, INSTR_CRX_READ))
7232 pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_STORE_EXIT | VMX_PROC_CTLS_CR8_STORE_EXIT;
7233 if (IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
7234 pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR8_LOAD_EXIT;
7235 pDbgState->fCpe1Unwanted |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* risky? */
7236 /* Note! We currently don't use VMX_VMCS32_CTRL_CR3_TARGET_COUNT. It would
7237 require clearing here and in the loop if we start using it. */
7238 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_CRX);
7239 }
7240 else
7241 {
7242 if (pDbgState->fClearCr0Mask)
7243 {
7244 pDbgState->fClearCr0Mask = false;
7245 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
7246 }
7247 if (pDbgState->fClearCr4Mask)
7248 {
7249 pDbgState->fClearCr4Mask = false;
7250 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4);
7251 }
7252 }
7253 SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_READ, VMX_EXIT_MOV_CRX);
7254 SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_WRITE, VMX_EXIT_MOV_CRX);
7255
7256 if ( IS_EITHER_ENABLED(pVM, INSTR_DRX_READ)
7257 || IS_EITHER_ENABLED(pVM, INSTR_DRX_WRITE))
7258 {
7259 /** @todo later, need to fix handler as it assumes this won't usually happen. */
7260 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_DRX);
7261 }
7262 SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_READ, VMX_EXIT_MOV_DRX);
7263 SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_WRITE, VMX_EXIT_MOV_DRX);
7264
7265 SET_CPEU_XBM_IF_EITHER_EN(INSTR_RDMSR, VMX_EXIT_RDMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS); /* risky clearing this? */
7266 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDMSR, VMX_EXIT_RDMSR);
7267 SET_CPEU_XBM_IF_EITHER_EN(INSTR_WRMSR, VMX_EXIT_WRMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS);
7268 SET_ONLY_XBM_IF_EITHER_EN( EXIT_WRMSR, VMX_EXIT_WRMSR);
7269 SET_CPE1_XBM_IF_EITHER_EN(INSTR_MWAIT, VMX_EXIT_MWAIT, VMX_PROC_CTLS_MWAIT_EXIT); /* paranoia */
7270 SET_ONLY_XBM_IF_EITHER_EN( EXIT_MWAIT, VMX_EXIT_MWAIT);
7271 SET_CPE1_XBM_IF_EITHER_EN(INSTR_MONITOR, VMX_EXIT_MONITOR, VMX_PROC_CTLS_MONITOR_EXIT); /* paranoia */
7272 SET_ONLY_XBM_IF_EITHER_EN( EXIT_MONITOR, VMX_EXIT_MONITOR);
7273#if 0 /** @todo too slow, fix handler. */
7274 SET_CPE1_XBM_IF_EITHER_EN(INSTR_PAUSE, VMX_EXIT_PAUSE, VMX_PROC_CTLS_PAUSE_EXIT);
7275#endif
7276 SET_ONLY_XBM_IF_EITHER_EN( EXIT_PAUSE, VMX_EXIT_PAUSE);
7277
7278 if ( IS_EITHER_ENABLED(pVM, INSTR_SGDT)
7279 || IS_EITHER_ENABLED(pVM, INSTR_SIDT)
7280 || IS_EITHER_ENABLED(pVM, INSTR_LGDT)
7281 || IS_EITHER_ENABLED(pVM, INSTR_LIDT))
7282 {
7283 pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
7284 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_GDTR_IDTR_ACCESS);
7285 }
7286 SET_ONLY_XBM_IF_EITHER_EN( EXIT_SGDT, VMX_EXIT_GDTR_IDTR_ACCESS);
7287 SET_ONLY_XBM_IF_EITHER_EN( EXIT_SIDT, VMX_EXIT_GDTR_IDTR_ACCESS);
7288 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LGDT, VMX_EXIT_GDTR_IDTR_ACCESS);
7289 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LIDT, VMX_EXIT_GDTR_IDTR_ACCESS);
7290
7291 if ( IS_EITHER_ENABLED(pVM, INSTR_SLDT)
7292 || IS_EITHER_ENABLED(pVM, INSTR_STR)
7293 || IS_EITHER_ENABLED(pVM, INSTR_LLDT)
7294 || IS_EITHER_ENABLED(pVM, INSTR_LTR))
7295 {
7296 pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
7297 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_LDTR_TR_ACCESS);
7298 }
7299 SET_ONLY_XBM_IF_EITHER_EN( EXIT_SLDT, VMX_EXIT_LDTR_TR_ACCESS);
7300 SET_ONLY_XBM_IF_EITHER_EN( EXIT_STR, VMX_EXIT_LDTR_TR_ACCESS);
7301 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LLDT, VMX_EXIT_LDTR_TR_ACCESS);
7302 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LTR, VMX_EXIT_LDTR_TR_ACCESS);
7303
7304 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVEPT, VMX_EXIT_INVEPT); /* unconditional */
7305 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVEPT, VMX_EXIT_INVEPT);
7306 SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSCP, VMX_EXIT_RDTSCP, VMX_PROC_CTLS_RDTSC_EXIT);
7307 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSCP, VMX_EXIT_RDTSCP);
7308 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVVPID, VMX_EXIT_INVVPID); /* unconditional */
7309 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVVPID, VMX_EXIT_INVVPID);
7310 SET_CPE2_XBM_IF_EITHER_EN(INSTR_WBINVD, VMX_EXIT_WBINVD, VMX_PROC_CTLS2_WBINVD_EXIT);
7311 SET_ONLY_XBM_IF_EITHER_EN( EXIT_WBINVD, VMX_EXIT_WBINVD);
7312 SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSETBV, VMX_EXIT_XSETBV); /* unconditional */
7313 SET_ONLY_XBM_IF_EITHER_EN( EXIT_XSETBV, VMX_EXIT_XSETBV);
7314 SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDRAND, VMX_EXIT_RDRAND, VMX_PROC_CTLS2_RDRAND_EXIT);
7315 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDRAND, VMX_EXIT_RDRAND);
7316 SET_CPE1_XBM_IF_EITHER_EN(INSTR_VMX_INVPCID, VMX_EXIT_INVPCID, VMX_PROC_CTLS_INVLPG_EXIT);
7317 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVPCID, VMX_EXIT_INVPCID);
7318 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMFUNC, VMX_EXIT_VMFUNC); /* unconditional for the current setup */
7319 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMFUNC, VMX_EXIT_VMFUNC);
7320 SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDSEED, VMX_EXIT_RDSEED, VMX_PROC_CTLS2_RDSEED_EXIT);
7321 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDSEED, VMX_EXIT_RDSEED);
7322 SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSAVES, VMX_EXIT_XSAVES); /* unconditional (enabled by host, guest cfg) */
7323 SET_ONLY_XBM_IF_EITHER_EN(EXIT_XSAVES, VMX_EXIT_XSAVES);
7324 SET_ONLY_XBM_IF_EITHER_EN(INSTR_XRSTORS, VMX_EXIT_XRSTORS); /* unconditional (enabled by host, guest cfg) */
7325 SET_ONLY_XBM_IF_EITHER_EN( EXIT_XRSTORS, VMX_EXIT_XRSTORS);
7326
7327#undef IS_EITHER_ENABLED
7328#undef SET_ONLY_XBM_IF_EITHER_EN
7329#undef SET_CPE1_XBM_IF_EITHER_EN
7330#undef SET_CPEU_XBM_IF_EITHER_EN
7331#undef SET_CPE2_XBM_IF_EITHER_EN
7332
7333 /*
7334 * Sanitize the control stuff.
7335 */
7336 pDbgState->fCpe2Extra &= g_HmMsrs.u.vmx.ProcCtls2.n.allowed1;
7337 if (pDbgState->fCpe2Extra)
7338 pDbgState->fCpe1Extra |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
7339 pDbgState->fCpe1Extra &= g_HmMsrs.u.vmx.ProcCtls.n.allowed1;
7340 pDbgState->fCpe1Unwanted &= ~g_HmMsrs.u.vmx.ProcCtls.n.allowed0;
7341 if (pVCpu->hmr0.s.fDebugWantRdTscExit != RT_BOOL(pDbgState->fCpe1Extra & VMX_PROC_CTLS_RDTSC_EXIT))
7342 {
7343 pVCpu->hmr0.s.fDebugWantRdTscExit ^= true;
7344 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
7345 }
7346
7347 Log6(("HM: debug state: cpe1=%#RX32 cpeu=%#RX32 cpe2=%#RX32%s%s\n",
7348 pDbgState->fCpe1Extra, pDbgState->fCpe1Unwanted, pDbgState->fCpe2Extra,
7349 pDbgState->fClearCr0Mask ? " clr-cr0" : "",
7350 pDbgState->fClearCr4Mask ? " clr-cr4" : ""));
7351}
7352
7353
7354/**
7355 * Fires off DBGF events and dtrace probes for a VM-exit, when it's
7356 * appropriate.
7357 *
7358 * The caller has checked the VM-exit against the
7359 * VMXRUNDBGSTATE::bmExitsToCheck bitmap. The caller has checked for NMIs
7360 * already, so we don't have to do that either.
7361 *
7362 * @returns Strict VBox status code (i.e. informational status codes too).
7363 * @param pVCpu The cross context virtual CPU structure.
7364 * @param pVmxTransient The VMX-transient structure.
7365 * @param uExitReason The VM-exit reason.
7366 *
7367 * @remarks The name of this function is displayed by dtrace, so keep it short
7368 * and to the point. No longer than 33 chars long, please.
7369 */
7370static VBOXSTRICTRC hmR0VmxHandleExitDtraceEvents(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uExitReason)
7371{
7372 /*
7373 * Translate the event into a DBGF event (enmEvent + uEventArg) and at the
7374 * same time check whether any corresponding Dtrace event is enabled (fDtrace).
7375 *
7376 * Note! This is the reverse operation of what hmR0VmxPreRunGuestDebugStateUpdate
7377 * does. Must add/change/remove both places. Same ordering, please.
7378 *
7379 * Added/removed events must also be reflected in the next section
7380 * where we dispatch dtrace events.
7381 */
7382 bool fDtrace1 = false;
7383 bool fDtrace2 = false;
7384 DBGFEVENTTYPE enmEvent1 = DBGFEVENT_END;
7385 DBGFEVENTTYPE enmEvent2 = DBGFEVENT_END;
7386 uint32_t uEventArg = 0;
7387#define SET_EXIT(a_EventSubName) \
7388 do { \
7389 enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \
7390 fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \
7391 } while (0)
7392#define SET_BOTH(a_EventSubName) \
7393 do { \
7394 enmEvent1 = RT_CONCAT(DBGFEVENT_INSTR_, a_EventSubName); \
7395 enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \
7396 fDtrace1 = RT_CONCAT3(VBOXVMM_INSTR_, a_EventSubName, _ENABLED)(); \
7397 fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \
7398 } while (0)
7399 switch (uExitReason)
7400 {
7401 case VMX_EXIT_MTF:
7402 return vmxHCExitMtf(pVCpu, pVmxTransient);
7403
7404 case VMX_EXIT_XCPT_OR_NMI:
7405 {
7406 uint8_t const idxVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
7407 switch (VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo))
7408 {
7409 case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
7410 case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
7411 case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
7412 if (idxVector <= (unsigned)(DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST))
7413 {
7414 if (VMX_EXIT_INT_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uExitIntInfo))
7415 {
7416 vmxHCReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
7417 uEventArg = pVmxTransient->uExitIntErrorCode;
7418 }
7419 enmEvent1 = (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + idxVector);
7420 switch (enmEvent1)
7421 {
7422 case DBGFEVENT_XCPT_DE: fDtrace1 = VBOXVMM_XCPT_DE_ENABLED(); break;
7423 case DBGFEVENT_XCPT_DB: fDtrace1 = VBOXVMM_XCPT_DB_ENABLED(); break;
7424 case DBGFEVENT_XCPT_BP: fDtrace1 = VBOXVMM_XCPT_BP_ENABLED(); break;
7425 case DBGFEVENT_XCPT_OF: fDtrace1 = VBOXVMM_XCPT_OF_ENABLED(); break;
7426 case DBGFEVENT_XCPT_BR: fDtrace1 = VBOXVMM_XCPT_BR_ENABLED(); break;
7427 case DBGFEVENT_XCPT_UD: fDtrace1 = VBOXVMM_XCPT_UD_ENABLED(); break;
7428 case DBGFEVENT_XCPT_NM: fDtrace1 = VBOXVMM_XCPT_NM_ENABLED(); break;
7429 case DBGFEVENT_XCPT_DF: fDtrace1 = VBOXVMM_XCPT_DF_ENABLED(); break;
7430 case DBGFEVENT_XCPT_TS: fDtrace1 = VBOXVMM_XCPT_TS_ENABLED(); break;
7431 case DBGFEVENT_XCPT_NP: fDtrace1 = VBOXVMM_XCPT_NP_ENABLED(); break;
7432 case DBGFEVENT_XCPT_SS: fDtrace1 = VBOXVMM_XCPT_SS_ENABLED(); break;
7433 case DBGFEVENT_XCPT_GP: fDtrace1 = VBOXVMM_XCPT_GP_ENABLED(); break;
7434 case DBGFEVENT_XCPT_PF: fDtrace1 = VBOXVMM_XCPT_PF_ENABLED(); break;
7435 case DBGFEVENT_XCPT_MF: fDtrace1 = VBOXVMM_XCPT_MF_ENABLED(); break;
7436 case DBGFEVENT_XCPT_AC: fDtrace1 = VBOXVMM_XCPT_AC_ENABLED(); break;
7437 case DBGFEVENT_XCPT_XF: fDtrace1 = VBOXVMM_XCPT_XF_ENABLED(); break;
7438 case DBGFEVENT_XCPT_VE: fDtrace1 = VBOXVMM_XCPT_VE_ENABLED(); break;
7439 case DBGFEVENT_XCPT_SX: fDtrace1 = VBOXVMM_XCPT_SX_ENABLED(); break;
7440 default: break;
7441 }
7442 }
7443 else
7444 AssertFailed();
7445 break;
7446
7447 case VMX_EXIT_INT_INFO_TYPE_SW_INT:
7448 uEventArg = idxVector;
7449 enmEvent1 = DBGFEVENT_INTERRUPT_SOFTWARE;
7450 fDtrace1 = VBOXVMM_INT_SOFTWARE_ENABLED();
7451 break;
7452 }
7453 break;
7454 }
7455
7456 case VMX_EXIT_TRIPLE_FAULT:
7457 enmEvent1 = DBGFEVENT_TRIPLE_FAULT;
7458 //fDtrace1 = VBOXVMM_EXIT_TRIPLE_FAULT_ENABLED();
7459 break;
7460 case VMX_EXIT_TASK_SWITCH: SET_EXIT(TASK_SWITCH); break;
7461 case VMX_EXIT_EPT_VIOLATION: SET_EXIT(VMX_EPT_VIOLATION); break;
7462 case VMX_EXIT_EPT_MISCONFIG: SET_EXIT(VMX_EPT_MISCONFIG); break;
7463 case VMX_EXIT_APIC_ACCESS: SET_EXIT(VMX_VAPIC_ACCESS); break;
7464 case VMX_EXIT_APIC_WRITE: SET_EXIT(VMX_VAPIC_WRITE); break;
7465
7466 /* Instruction specific VM-exits: */
7467 case VMX_EXIT_CPUID: SET_BOTH(CPUID); break;
7468 case VMX_EXIT_GETSEC: SET_BOTH(GETSEC); break;
7469 case VMX_EXIT_HLT: SET_BOTH(HALT); break;
7470 case VMX_EXIT_INVD: SET_BOTH(INVD); break;
7471 case VMX_EXIT_INVLPG: SET_BOTH(INVLPG); break;
7472 case VMX_EXIT_RDPMC: SET_BOTH(RDPMC); break;
7473 case VMX_EXIT_RDTSC: SET_BOTH(RDTSC); break;
7474 case VMX_EXIT_RSM: SET_BOTH(RSM); break;
7475 case VMX_EXIT_VMCALL: SET_BOTH(VMM_CALL); break;
7476 case VMX_EXIT_VMCLEAR: SET_BOTH(VMX_VMCLEAR); break;
7477 case VMX_EXIT_VMLAUNCH: SET_BOTH(VMX_VMLAUNCH); break;
7478 case VMX_EXIT_VMPTRLD: SET_BOTH(VMX_VMPTRLD); break;
7479 case VMX_EXIT_VMPTRST: SET_BOTH(VMX_VMPTRST); break;
7480 case VMX_EXIT_VMREAD: SET_BOTH(VMX_VMREAD); break;
7481 case VMX_EXIT_VMRESUME: SET_BOTH(VMX_VMRESUME); break;
7482 case VMX_EXIT_VMWRITE: SET_BOTH(VMX_VMWRITE); break;
7483 case VMX_EXIT_VMXOFF: SET_BOTH(VMX_VMXOFF); break;
7484 case VMX_EXIT_VMXON: SET_BOTH(VMX_VMXON); break;
7485 case VMX_EXIT_MOV_CRX:
7486 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
7487 if (VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_CRX_ACCESS_READ)
7488 SET_BOTH(CRX_READ);
7489 else
7490 SET_BOTH(CRX_WRITE);
7491 uEventArg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
7492 break;
7493 case VMX_EXIT_MOV_DRX:
7494 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
7495 if ( VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual)
7496 == VMX_EXIT_QUAL_DRX_DIRECTION_READ)
7497 SET_BOTH(DRX_READ);
7498 else
7499 SET_BOTH(DRX_WRITE);
7500 uEventArg = VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual);
7501 break;
7502 case VMX_EXIT_RDMSR: SET_BOTH(RDMSR); break;
7503 case VMX_EXIT_WRMSR: SET_BOTH(WRMSR); break;
7504 case VMX_EXIT_MWAIT: SET_BOTH(MWAIT); break;
7505 case VMX_EXIT_MONITOR: SET_BOTH(MONITOR); break;
7506 case VMX_EXIT_PAUSE: SET_BOTH(PAUSE); break;
7507 case VMX_EXIT_GDTR_IDTR_ACCESS:
7508 vmxHCReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
7509 switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_XDTR_INSINFO_INSTR_ID))
7510 {
7511 case VMX_XDTR_INSINFO_II_SGDT: SET_BOTH(SGDT); break;
7512 case VMX_XDTR_INSINFO_II_SIDT: SET_BOTH(SIDT); break;
7513 case VMX_XDTR_INSINFO_II_LGDT: SET_BOTH(LGDT); break;
7514 case VMX_XDTR_INSINFO_II_LIDT: SET_BOTH(LIDT); break;
7515 }
7516 break;
7517
7518 case VMX_EXIT_LDTR_TR_ACCESS:
7519 vmxHCReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
7520 switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_YYTR_INSINFO_INSTR_ID))
7521 {
7522 case VMX_YYTR_INSINFO_II_SLDT: SET_BOTH(SLDT); break;
7523 case VMX_YYTR_INSINFO_II_STR: SET_BOTH(STR); break;
7524 case VMX_YYTR_INSINFO_II_LLDT: SET_BOTH(LLDT); break;
7525 case VMX_YYTR_INSINFO_II_LTR: SET_BOTH(LTR); break;
7526 }
7527 break;
7528
7529 case VMX_EXIT_INVEPT: SET_BOTH(VMX_INVEPT); break;
7530 case VMX_EXIT_RDTSCP: SET_BOTH(RDTSCP); break;
7531 case VMX_EXIT_INVVPID: SET_BOTH(VMX_INVVPID); break;
7532 case VMX_EXIT_WBINVD: SET_BOTH(WBINVD); break;
7533 case VMX_EXIT_XSETBV: SET_BOTH(XSETBV); break;
7534 case VMX_EXIT_RDRAND: SET_BOTH(RDRAND); break;
7535 case VMX_EXIT_INVPCID: SET_BOTH(VMX_INVPCID); break;
7536 case VMX_EXIT_VMFUNC: SET_BOTH(VMX_VMFUNC); break;
7537 case VMX_EXIT_RDSEED: SET_BOTH(RDSEED); break;
7538 case VMX_EXIT_XSAVES: SET_BOTH(XSAVES); break;
7539 case VMX_EXIT_XRSTORS: SET_BOTH(XRSTORS); break;
7540
7541 /* Events that aren't relevant at this point. */
7542 case VMX_EXIT_EXT_INT:
7543 case VMX_EXIT_INT_WINDOW:
7544 case VMX_EXIT_NMI_WINDOW:
7545 case VMX_EXIT_TPR_BELOW_THRESHOLD:
7546 case VMX_EXIT_PREEMPT_TIMER:
7547 case VMX_EXIT_IO_INSTR:
7548 break;
7549
7550 /* Errors and unexpected events. */
7551 case VMX_EXIT_INIT_SIGNAL:
7552 case VMX_EXIT_SIPI:
7553 case VMX_EXIT_IO_SMI:
7554 case VMX_EXIT_SMI:
7555 case VMX_EXIT_ERR_INVALID_GUEST_STATE:
7556 case VMX_EXIT_ERR_MSR_LOAD:
7557 case VMX_EXIT_ERR_MACHINE_CHECK:
7558 case VMX_EXIT_PML_FULL:
7559 case VMX_EXIT_VIRTUALIZED_EOI:
7560 break;
7561
7562 default:
7563 AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
7564 break;
7565 }
7566#undef SET_BOTH
7567#undef SET_EXIT
7568
7569 /*
7570 * Dtrace tracepoints go first. We do them here at once so we don't
7571 * have to copy the guest state saving and stuff a few dozen times.
7572 * Down side is that we've got to repeat the switch, though this time
7573 * we use enmEvent since the probes are a subset of what DBGF does.
7574 */
7575 if (fDtrace1 || fDtrace2)
7576 {
7577 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
7578 vmxHCImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
7579 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7580 switch (enmEvent1)
7581 {
7582 /** @todo consider which extra parameters would be helpful for each probe. */
7583 case DBGFEVENT_END: break;
7584 case DBGFEVENT_XCPT_DE: VBOXVMM_XCPT_DE(pVCpu, pCtx); break;
7585 case DBGFEVENT_XCPT_DB: VBOXVMM_XCPT_DB(pVCpu, pCtx, pCtx->dr[6]); break;
7586 case DBGFEVENT_XCPT_BP: VBOXVMM_XCPT_BP(pVCpu, pCtx); break;
7587 case DBGFEVENT_XCPT_OF: VBOXVMM_XCPT_OF(pVCpu, pCtx); break;
7588 case DBGFEVENT_XCPT_BR: VBOXVMM_XCPT_BR(pVCpu, pCtx); break;
7589 case DBGFEVENT_XCPT_UD: VBOXVMM_XCPT_UD(pVCpu, pCtx); break;
7590 case DBGFEVENT_XCPT_NM: VBOXVMM_XCPT_NM(pVCpu, pCtx); break;
7591 case DBGFEVENT_XCPT_DF: VBOXVMM_XCPT_DF(pVCpu, pCtx); break;
7592 case DBGFEVENT_XCPT_TS: VBOXVMM_XCPT_TS(pVCpu, pCtx, uEventArg); break;
7593 case DBGFEVENT_XCPT_NP: VBOXVMM_XCPT_NP(pVCpu, pCtx, uEventArg); break;
7594 case DBGFEVENT_XCPT_SS: VBOXVMM_XCPT_SS(pVCpu, pCtx, uEventArg); break;
7595 case DBGFEVENT_XCPT_GP: VBOXVMM_XCPT_GP(pVCpu, pCtx, uEventArg); break;
7596 case DBGFEVENT_XCPT_PF: VBOXVMM_XCPT_PF(pVCpu, pCtx, uEventArg, pCtx->cr2); break;
7597 case DBGFEVENT_XCPT_MF: VBOXVMM_XCPT_MF(pVCpu, pCtx); break;
7598 case DBGFEVENT_XCPT_AC: VBOXVMM_XCPT_AC(pVCpu, pCtx); break;
7599 case DBGFEVENT_XCPT_XF: VBOXVMM_XCPT_XF(pVCpu, pCtx); break;
7600 case DBGFEVENT_XCPT_VE: VBOXVMM_XCPT_VE(pVCpu, pCtx); break;
7601 case DBGFEVENT_XCPT_SX: VBOXVMM_XCPT_SX(pVCpu, pCtx, uEventArg); break;
7602 case DBGFEVENT_INTERRUPT_SOFTWARE: VBOXVMM_INT_SOFTWARE(pVCpu, pCtx, (uint8_t)uEventArg); break;
7603 case DBGFEVENT_INSTR_CPUID: VBOXVMM_INSTR_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
7604 case DBGFEVENT_INSTR_GETSEC: VBOXVMM_INSTR_GETSEC(pVCpu, pCtx); break;
7605 case DBGFEVENT_INSTR_HALT: VBOXVMM_INSTR_HALT(pVCpu, pCtx); break;
7606 case DBGFEVENT_INSTR_INVD: VBOXVMM_INSTR_INVD(pVCpu, pCtx); break;
7607 case DBGFEVENT_INSTR_INVLPG: VBOXVMM_INSTR_INVLPG(pVCpu, pCtx); break;
7608 case DBGFEVENT_INSTR_RDPMC: VBOXVMM_INSTR_RDPMC(pVCpu, pCtx); break;
7609 case DBGFEVENT_INSTR_RDTSC: VBOXVMM_INSTR_RDTSC(pVCpu, pCtx); break;
7610 case DBGFEVENT_INSTR_RSM: VBOXVMM_INSTR_RSM(pVCpu, pCtx); break;
7611 case DBGFEVENT_INSTR_CRX_READ: VBOXVMM_INSTR_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
7612 case DBGFEVENT_INSTR_CRX_WRITE: VBOXVMM_INSTR_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
7613 case DBGFEVENT_INSTR_DRX_READ: VBOXVMM_INSTR_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
7614 case DBGFEVENT_INSTR_DRX_WRITE: VBOXVMM_INSTR_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
7615 case DBGFEVENT_INSTR_RDMSR: VBOXVMM_INSTR_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
7616 case DBGFEVENT_INSTR_WRMSR: VBOXVMM_INSTR_WRMSR(pVCpu, pCtx, pCtx->ecx,
7617 RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
7618 case DBGFEVENT_INSTR_MWAIT: VBOXVMM_INSTR_MWAIT(pVCpu, pCtx); break;
7619 case DBGFEVENT_INSTR_MONITOR: VBOXVMM_INSTR_MONITOR(pVCpu, pCtx); break;
7620 case DBGFEVENT_INSTR_PAUSE: VBOXVMM_INSTR_PAUSE(pVCpu, pCtx); break;
7621 case DBGFEVENT_INSTR_SGDT: VBOXVMM_INSTR_SGDT(pVCpu, pCtx); break;
7622 case DBGFEVENT_INSTR_SIDT: VBOXVMM_INSTR_SIDT(pVCpu, pCtx); break;
7623 case DBGFEVENT_INSTR_LGDT: VBOXVMM_INSTR_LGDT(pVCpu, pCtx); break;
7624 case DBGFEVENT_INSTR_LIDT: VBOXVMM_INSTR_LIDT(pVCpu, pCtx); break;
7625 case DBGFEVENT_INSTR_SLDT: VBOXVMM_INSTR_SLDT(pVCpu, pCtx); break;
7626 case DBGFEVENT_INSTR_STR: VBOXVMM_INSTR_STR(pVCpu, pCtx); break;
7627 case DBGFEVENT_INSTR_LLDT: VBOXVMM_INSTR_LLDT(pVCpu, pCtx); break;
7628 case DBGFEVENT_INSTR_LTR: VBOXVMM_INSTR_LTR(pVCpu, pCtx); break;
7629 case DBGFEVENT_INSTR_RDTSCP: VBOXVMM_INSTR_RDTSCP(pVCpu, pCtx); break;
7630 case DBGFEVENT_INSTR_WBINVD: VBOXVMM_INSTR_WBINVD(pVCpu, pCtx); break;
7631 case DBGFEVENT_INSTR_XSETBV: VBOXVMM_INSTR_XSETBV(pVCpu, pCtx); break;
7632 case DBGFEVENT_INSTR_RDRAND: VBOXVMM_INSTR_RDRAND(pVCpu, pCtx); break;
7633 case DBGFEVENT_INSTR_RDSEED: VBOXVMM_INSTR_RDSEED(pVCpu, pCtx); break;
7634 case DBGFEVENT_INSTR_XSAVES: VBOXVMM_INSTR_XSAVES(pVCpu, pCtx); break;
7635 case DBGFEVENT_INSTR_XRSTORS: VBOXVMM_INSTR_XRSTORS(pVCpu, pCtx); break;
7636 case DBGFEVENT_INSTR_VMM_CALL: VBOXVMM_INSTR_VMM_CALL(pVCpu, pCtx); break;
7637 case DBGFEVENT_INSTR_VMX_VMCLEAR: VBOXVMM_INSTR_VMX_VMCLEAR(pVCpu, pCtx); break;
7638 case DBGFEVENT_INSTR_VMX_VMLAUNCH: VBOXVMM_INSTR_VMX_VMLAUNCH(pVCpu, pCtx); break;
7639 case DBGFEVENT_INSTR_VMX_VMPTRLD: VBOXVMM_INSTR_VMX_VMPTRLD(pVCpu, pCtx); break;
7640 case DBGFEVENT_INSTR_VMX_VMPTRST: VBOXVMM_INSTR_VMX_VMPTRST(pVCpu, pCtx); break;
7641 case DBGFEVENT_INSTR_VMX_VMREAD: VBOXVMM_INSTR_VMX_VMREAD(pVCpu, pCtx); break;
7642 case DBGFEVENT_INSTR_VMX_VMRESUME: VBOXVMM_INSTR_VMX_VMRESUME(pVCpu, pCtx); break;
7643 case DBGFEVENT_INSTR_VMX_VMWRITE: VBOXVMM_INSTR_VMX_VMWRITE(pVCpu, pCtx); break;
7644 case DBGFEVENT_INSTR_VMX_VMXOFF: VBOXVMM_INSTR_VMX_VMXOFF(pVCpu, pCtx); break;
7645 case DBGFEVENT_INSTR_VMX_VMXON: VBOXVMM_INSTR_VMX_VMXON(pVCpu, pCtx); break;
7646 case DBGFEVENT_INSTR_VMX_INVEPT: VBOXVMM_INSTR_VMX_INVEPT(pVCpu, pCtx); break;
7647 case DBGFEVENT_INSTR_VMX_INVVPID: VBOXVMM_INSTR_VMX_INVVPID(pVCpu, pCtx); break;
7648 case DBGFEVENT_INSTR_VMX_INVPCID: VBOXVMM_INSTR_VMX_INVPCID(pVCpu, pCtx); break;
7649 case DBGFEVENT_INSTR_VMX_VMFUNC: VBOXVMM_INSTR_VMX_VMFUNC(pVCpu, pCtx); break;
7650 default: AssertMsgFailed(("enmEvent1=%d uExitReason=%d\n", enmEvent1, uExitReason)); break;
7651 }
7652 switch (enmEvent2)
7653 {
7654 /** @todo consider which extra parameters would be helpful for each probe. */
7655 case DBGFEVENT_END: break;
7656 case DBGFEVENT_EXIT_TASK_SWITCH: VBOXVMM_EXIT_TASK_SWITCH(pVCpu, pCtx); break;
7657 case DBGFEVENT_EXIT_CPUID: VBOXVMM_EXIT_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
7658 case DBGFEVENT_EXIT_GETSEC: VBOXVMM_EXIT_GETSEC(pVCpu, pCtx); break;
7659 case DBGFEVENT_EXIT_HALT: VBOXVMM_EXIT_HALT(pVCpu, pCtx); break;
7660 case DBGFEVENT_EXIT_INVD: VBOXVMM_EXIT_INVD(pVCpu, pCtx); break;
7661 case DBGFEVENT_EXIT_INVLPG: VBOXVMM_EXIT_INVLPG(pVCpu, pCtx); break;
7662 case DBGFEVENT_EXIT_RDPMC: VBOXVMM_EXIT_RDPMC(pVCpu, pCtx); break;
7663 case DBGFEVENT_EXIT_RDTSC: VBOXVMM_EXIT_RDTSC(pVCpu, pCtx); break;
7664 case DBGFEVENT_EXIT_RSM: VBOXVMM_EXIT_RSM(pVCpu, pCtx); break;
7665 case DBGFEVENT_EXIT_CRX_READ: VBOXVMM_EXIT_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
7666 case DBGFEVENT_EXIT_CRX_WRITE: VBOXVMM_EXIT_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
7667 case DBGFEVENT_EXIT_DRX_READ: VBOXVMM_EXIT_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
7668 case DBGFEVENT_EXIT_DRX_WRITE: VBOXVMM_EXIT_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
7669 case DBGFEVENT_EXIT_RDMSR: VBOXVMM_EXIT_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
7670 case DBGFEVENT_EXIT_WRMSR: VBOXVMM_EXIT_WRMSR(pVCpu, pCtx, pCtx->ecx,
7671 RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
7672 case DBGFEVENT_EXIT_MWAIT: VBOXVMM_EXIT_MWAIT(pVCpu, pCtx); break;
7673 case DBGFEVENT_EXIT_MONITOR: VBOXVMM_EXIT_MONITOR(pVCpu, pCtx); break;
7674 case DBGFEVENT_EXIT_PAUSE: VBOXVMM_EXIT_PAUSE(pVCpu, pCtx); break;
7675 case DBGFEVENT_EXIT_SGDT: VBOXVMM_EXIT_SGDT(pVCpu, pCtx); break;
7676 case DBGFEVENT_EXIT_SIDT: VBOXVMM_EXIT_SIDT(pVCpu, pCtx); break;
7677 case DBGFEVENT_EXIT_LGDT: VBOXVMM_EXIT_LGDT(pVCpu, pCtx); break;
7678 case DBGFEVENT_EXIT_LIDT: VBOXVMM_EXIT_LIDT(pVCpu, pCtx); break;
7679 case DBGFEVENT_EXIT_SLDT: VBOXVMM_EXIT_SLDT(pVCpu, pCtx); break;
7680 case DBGFEVENT_EXIT_STR: VBOXVMM_EXIT_STR(pVCpu, pCtx); break;
7681 case DBGFEVENT_EXIT_LLDT: VBOXVMM_EXIT_LLDT(pVCpu, pCtx); break;
7682 case DBGFEVENT_EXIT_LTR: VBOXVMM_EXIT_LTR(pVCpu, pCtx); break;
7683 case DBGFEVENT_EXIT_RDTSCP: VBOXVMM_EXIT_RDTSCP(pVCpu, pCtx); break;
7684 case DBGFEVENT_EXIT_WBINVD: VBOXVMM_EXIT_WBINVD(pVCpu, pCtx); break;
7685 case DBGFEVENT_EXIT_XSETBV: VBOXVMM_EXIT_XSETBV(pVCpu, pCtx); break;
7686 case DBGFEVENT_EXIT_RDRAND: VBOXVMM_EXIT_RDRAND(pVCpu, pCtx); break;
7687 case DBGFEVENT_EXIT_RDSEED: VBOXVMM_EXIT_RDSEED(pVCpu, pCtx); break;
7688 case DBGFEVENT_EXIT_XSAVES: VBOXVMM_EXIT_XSAVES(pVCpu, pCtx); break;
7689 case DBGFEVENT_EXIT_XRSTORS: VBOXVMM_EXIT_XRSTORS(pVCpu, pCtx); break;
7690 case DBGFEVENT_EXIT_VMM_CALL: VBOXVMM_EXIT_VMM_CALL(pVCpu, pCtx); break;
7691 case DBGFEVENT_EXIT_VMX_VMCLEAR: VBOXVMM_EXIT_VMX_VMCLEAR(pVCpu, pCtx); break;
7692 case DBGFEVENT_EXIT_VMX_VMLAUNCH: VBOXVMM_EXIT_VMX_VMLAUNCH(pVCpu, pCtx); break;
7693 case DBGFEVENT_EXIT_VMX_VMPTRLD: VBOXVMM_EXIT_VMX_VMPTRLD(pVCpu, pCtx); break;
7694 case DBGFEVENT_EXIT_VMX_VMPTRST: VBOXVMM_EXIT_VMX_VMPTRST(pVCpu, pCtx); break;
7695 case DBGFEVENT_EXIT_VMX_VMREAD: VBOXVMM_EXIT_VMX_VMREAD(pVCpu, pCtx); break;
7696 case DBGFEVENT_EXIT_VMX_VMRESUME: VBOXVMM_EXIT_VMX_VMRESUME(pVCpu, pCtx); break;
7697 case DBGFEVENT_EXIT_VMX_VMWRITE: VBOXVMM_EXIT_VMX_VMWRITE(pVCpu, pCtx); break;
7698 case DBGFEVENT_EXIT_VMX_VMXOFF: VBOXVMM_EXIT_VMX_VMXOFF(pVCpu, pCtx); break;
7699 case DBGFEVENT_EXIT_VMX_VMXON: VBOXVMM_EXIT_VMX_VMXON(pVCpu, pCtx); break;
7700 case DBGFEVENT_EXIT_VMX_INVEPT: VBOXVMM_EXIT_VMX_INVEPT(pVCpu, pCtx); break;
7701 case DBGFEVENT_EXIT_VMX_INVVPID: VBOXVMM_EXIT_VMX_INVVPID(pVCpu, pCtx); break;
7702 case DBGFEVENT_EXIT_VMX_INVPCID: VBOXVMM_EXIT_VMX_INVPCID(pVCpu, pCtx); break;
7703 case DBGFEVENT_EXIT_VMX_VMFUNC: VBOXVMM_EXIT_VMX_VMFUNC(pVCpu, pCtx); break;
7704 case DBGFEVENT_EXIT_VMX_EPT_MISCONFIG: VBOXVMM_EXIT_VMX_EPT_MISCONFIG(pVCpu, pCtx); break;
7705 case DBGFEVENT_EXIT_VMX_EPT_VIOLATION: VBOXVMM_EXIT_VMX_EPT_VIOLATION(pVCpu, pCtx); break;
7706 case DBGFEVENT_EXIT_VMX_VAPIC_ACCESS: VBOXVMM_EXIT_VMX_VAPIC_ACCESS(pVCpu, pCtx); break;
7707 case DBGFEVENT_EXIT_VMX_VAPIC_WRITE: VBOXVMM_EXIT_VMX_VAPIC_WRITE(pVCpu, pCtx); break;
7708 default: AssertMsgFailed(("enmEvent2=%d uExitReason=%d\n", enmEvent2, uExitReason)); break;
7709 }
7710 }
7711
7712 /*
7713 * Fire of the DBGF event, if enabled (our check here is just a quick one,
7714 * the DBGF call will do a full check).
7715 *
7716 * Note! DBGF sets DBGFEVENT_INTERRUPT_SOFTWARE in the bitmap.
7717 * Note! If we have to events, we prioritize the first, i.e. the instruction
7718 * one, in order to avoid event nesting.
7719 */
7720 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
7721 if ( enmEvent1 != DBGFEVENT_END
7722 && DBGF_IS_EVENT_ENABLED(pVM, enmEvent1))
7723 {
7724 vmxHCImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7725 VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent1, DBGFEVENTCTX_HM, 1, uEventArg);
7726 if (rcStrict != VINF_SUCCESS)
7727 return rcStrict;
7728 }
7729 else if ( enmEvent2 != DBGFEVENT_END
7730 && DBGF_IS_EVENT_ENABLED(pVM, enmEvent2))
7731 {
7732 vmxHCImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7733 VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent2, DBGFEVENTCTX_HM, 1, uEventArg);
7734 if (rcStrict != VINF_SUCCESS)
7735 return rcStrict;
7736 }
7737
7738 return VINF_SUCCESS;
7739}
7740
7741
7742/**
7743 * Single-stepping VM-exit filtering.
7744 *
7745 * This is preprocessing the VM-exits and deciding whether we've gotten far
7746 * enough to return VINF_EM_DBG_STEPPED already. If not, normal VM-exit
7747 * handling is performed.
7748 *
7749 * @returns Strict VBox status code (i.e. informational status codes too).
7750 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
7751 * @param pVmxTransient The VMX-transient structure.
7752 * @param pDbgState The debug state.
7753 */
7754DECLINLINE(VBOXSTRICTRC) hmR0VmxRunDebugHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
7755{
7756 /*
7757 * Expensive (saves context) generic dtrace VM-exit probe.
7758 */
7759 uint32_t const uExitReason = pVmxTransient->uExitReason;
7760 if (!VBOXVMM_R0_HMVMX_VMEXIT_ENABLED())
7761 { /* more likely */ }
7762 else
7763 {
7764 vmxHCReadExitQualVmcs(pVCpu, pVmxTransient);
7765 int rc = vmxHCImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
7766 AssertRC(rc);
7767 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
7768 }
7769
7770 /*
7771 * Check for host NMI, just to get that out of the way.
7772 */
7773 if (uExitReason != VMX_EXIT_XCPT_OR_NMI)
7774 { /* normally likely */ }
7775 else
7776 {
7777 vmxHCReadExitIntInfoVmcs(pVCpu, pVmxTransient);
7778 uint32_t const uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
7779 if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI)
7780 return hmR0VmxExitHostNmi(pVCpu, pVmxTransient->pVmcsInfo);
7781 }
7782
7783 /*
7784 * Check for single stepping event if we're stepping.
7785 */
7786 if (pVCpu->hm.s.fSingleInstruction)
7787 {
7788 switch (uExitReason)
7789 {
7790 case VMX_EXIT_MTF:
7791 return vmxHCExitMtf(pVCpu, pVmxTransient);
7792
7793 /* Various events: */
7794 case VMX_EXIT_XCPT_OR_NMI:
7795 case VMX_EXIT_EXT_INT:
7796 case VMX_EXIT_TRIPLE_FAULT:
7797 case VMX_EXIT_INT_WINDOW:
7798 case VMX_EXIT_NMI_WINDOW:
7799 case VMX_EXIT_TASK_SWITCH:
7800 case VMX_EXIT_TPR_BELOW_THRESHOLD:
7801 case VMX_EXIT_APIC_ACCESS:
7802 case VMX_EXIT_EPT_VIOLATION:
7803 case VMX_EXIT_EPT_MISCONFIG:
7804 case VMX_EXIT_PREEMPT_TIMER:
7805
7806 /* Instruction specific VM-exits: */
7807 case VMX_EXIT_CPUID:
7808 case VMX_EXIT_GETSEC:
7809 case VMX_EXIT_HLT:
7810 case VMX_EXIT_INVD:
7811 case VMX_EXIT_INVLPG:
7812 case VMX_EXIT_RDPMC:
7813 case VMX_EXIT_RDTSC:
7814 case VMX_EXIT_RSM:
7815 case VMX_EXIT_VMCALL:
7816 case VMX_EXIT_VMCLEAR:
7817 case VMX_EXIT_VMLAUNCH:
7818 case VMX_EXIT_VMPTRLD:
7819 case VMX_EXIT_VMPTRST:
7820 case VMX_EXIT_VMREAD:
7821 case VMX_EXIT_VMRESUME:
7822 case VMX_EXIT_VMWRITE:
7823 case VMX_EXIT_VMXOFF:
7824 case VMX_EXIT_VMXON:
7825 case VMX_EXIT_MOV_CRX:
7826 case VMX_EXIT_MOV_DRX:
7827 case VMX_EXIT_IO_INSTR:
7828 case VMX_EXIT_RDMSR:
7829 case VMX_EXIT_WRMSR:
7830 case VMX_EXIT_MWAIT:
7831 case VMX_EXIT_MONITOR:
7832 case VMX_EXIT_PAUSE:
7833 case VMX_EXIT_GDTR_IDTR_ACCESS:
7834 case VMX_EXIT_LDTR_TR_ACCESS:
7835 case VMX_EXIT_INVEPT:
7836 case VMX_EXIT_RDTSCP:
7837 case VMX_EXIT_INVVPID:
7838 case VMX_EXIT_WBINVD:
7839 case VMX_EXIT_XSETBV:
7840 case VMX_EXIT_RDRAND:
7841 case VMX_EXIT_INVPCID:
7842 case VMX_EXIT_VMFUNC:
7843 case VMX_EXIT_RDSEED:
7844 case VMX_EXIT_XSAVES:
7845 case VMX_EXIT_XRSTORS:
7846 {
7847 int rc = vmxHCImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
7848 AssertRCReturn(rc, rc);
7849 if ( pVCpu->cpum.GstCtx.rip != pDbgState->uRipStart
7850 || pVCpu->cpum.GstCtx.cs.Sel != pDbgState->uCsStart)
7851 return VINF_EM_DBG_STEPPED;
7852 break;
7853 }
7854
7855 /* Errors and unexpected events: */
7856 case VMX_EXIT_INIT_SIGNAL:
7857 case VMX_EXIT_SIPI:
7858 case VMX_EXIT_IO_SMI:
7859 case VMX_EXIT_SMI:
7860 case VMX_EXIT_ERR_INVALID_GUEST_STATE:
7861 case VMX_EXIT_ERR_MSR_LOAD:
7862 case VMX_EXIT_ERR_MACHINE_CHECK:
7863 case VMX_EXIT_PML_FULL:
7864 case VMX_EXIT_VIRTUALIZED_EOI:
7865 case VMX_EXIT_APIC_WRITE: /* Some talk about this being fault like, so I guess we must process it? */
7866 break;
7867
7868 default:
7869 AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
7870 break;
7871 }
7872 }
7873
7874 /*
7875 * Check for debugger event breakpoints and dtrace probes.
7876 */
7877 if ( uExitReason < RT_ELEMENTS(pDbgState->bmExitsToCheck) * 32U
7878 && ASMBitTest(pDbgState->bmExitsToCheck, uExitReason) )
7879 {
7880 VBOXSTRICTRC rcStrict = hmR0VmxHandleExitDtraceEvents(pVCpu, pVmxTransient, uExitReason);
7881 if (rcStrict != VINF_SUCCESS)
7882 return rcStrict;
7883 }
7884
7885 /*
7886 * Normal processing.
7887 */
7888#ifdef HMVMX_USE_FUNCTION_TABLE
7889 return g_aVMExitHandlers[uExitReason].pfn(pVCpu, pVmxTransient);
7890#else
7891 return vmxHCHandleExit(pVCpu, pVmxTransient, uExitReason);
7892#endif
7893}
7894
7895
7896/**
7897 * Single steps guest code using hardware-assisted VMX.
7898 *
7899 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
7900 * but single-stepping through the hypervisor debugger.
7901 *
7902 * @returns Strict VBox status code (i.e. informational status codes too).
7903 * @param pVCpu The cross context virtual CPU structure.
7904 * @param pcLoops Pointer to the number of executed loops.
7905 *
7906 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
7907 */
7908static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
7909{
7910 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
7911 Assert(pcLoops);
7912 Assert(*pcLoops <= cMaxResumeLoops);
7913
7914 VMXTRANSIENT VmxTransient;
7915 RT_ZERO(VmxTransient);
7916 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
7917
7918 /* Set HMCPU indicators. */
7919 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
7920 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
7921 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
7922 pVCpu->hmr0.s.fUsingDebugLoop = true;
7923
7924 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
7925 VMXRUNDBGSTATE DbgState;
7926 hmR0VmxRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
7927 hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
7928
7929 /*
7930 * The loop.
7931 */
7932 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
7933 for (;;)
7934 {
7935 Assert(!HMR0SuspendPending());
7936 HMVMX_ASSERT_CPU_SAFE(pVCpu);
7937 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7938 bool fStepping = pVCpu->hm.s.fSingleInstruction;
7939
7940 /* Set up VM-execution controls the next two can respond to. */
7941 hmR0VmxPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7942
7943 /*
7944 * Preparatory work for running guest code, this may force us to
7945 * return to ring-3.
7946 *
7947 * Warning! This bugger disables interrupts on VINF_SUCCESS!
7948 */
7949 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
7950 if (rcStrict != VINF_SUCCESS)
7951 break;
7952
7953 /* Interrupts are disabled at this point! */
7954 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
7955
7956 /* Override any obnoxious code in the above two calls. */
7957 hmR0VmxPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
7958
7959 /*
7960 * Finally execute the guest.
7961 */
7962 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
7963
7964 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
7965 /* Interrupts are re-enabled at this point! */
7966
7967 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7968 if (RT_SUCCESS(rcRun))
7969 { /* very likely */ }
7970 else
7971 {
7972 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
7973 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
7974 return rcRun;
7975 }
7976
7977 /* Profile the VM-exit. */
7978 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7979 STAM_COUNTER_INC(&pVCpu->hm.s.StatDebugExitAll);
7980 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7981 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
7982 HMVMX_START_EXIT_DISPATCH_PROF();
7983
7984 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
7985
7986 /*
7987 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
7988 */
7989 rcStrict = hmR0VmxRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
7990 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
7991 if (rcStrict != VINF_SUCCESS)
7992 break;
7993 if (++(*pcLoops) > cMaxResumeLoops)
7994 {
7995 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
7996 rcStrict = VINF_EM_RAW_INTERRUPT;
7997 break;
7998 }
7999
8000 /*
8001 * Stepping: Did the RIP change, if so, consider it a single step.
8002 * Otherwise, make sure one of the TFs gets set.
8003 */
8004 if (fStepping)
8005 {
8006 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
8007 AssertRC(rc);
8008 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
8009 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
8010 {
8011 rcStrict = VINF_EM_DBG_STEPPED;
8012 break;
8013 }
8014 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
8015 }
8016
8017 /*
8018 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
8019 */
8020 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
8021 hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
8022
8023 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
8024 rcStrict = hmR0VmxRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
8025 Assert(rcStrict == VINF_SUCCESS);
8026 }
8027
8028 /*
8029 * Clear the X86_EFL_TF if necessary.
8030 */
8031 if (pVCpu->hmr0.s.fClearTrapFlag)
8032 {
8033 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
8034 AssertRC(rc);
8035 pVCpu->hmr0.s.fClearTrapFlag = false;
8036 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
8037 }
8038 /** @todo there seems to be issues with the resume flag when the monitor trap
8039 * flag is pending without being used. Seen early in bios init when
8040 * accessing APIC page in protected mode. */
8041
8042/** @todo we need to do hmR0VmxRunDebugStateRevert here too, in case we broke
8043 * out of the above loop. */
8044
8045 /* Restore HMCPU indicators. */
8046 pVCpu->hmr0.s.fUsingDebugLoop = false;
8047 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
8048 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
8049
8050 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
8051 return rcStrict;
8052}
8053
8054
8055/** @} */
8056
8057
8058/**
8059 * Checks if any expensive dtrace probes are enabled and we should go to the
8060 * debug loop.
8061 *
8062 * @returns true if we should use debug loop, false if not.
8063 */
8064static bool hmR0VmxAnyExpensiveProbesEnabled(void)
8065{
8066 /* It's probably faster to OR the raw 32-bit counter variables together.
8067 Since the variables are in an array and the probes are next to one
8068 another (more or less), we have good locality. So, better read
8069 eight-nine cache lines ever time and only have one conditional, than
8070 128+ conditionals, right? */
8071 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
8072 | VBOXVMM_XCPT_DE_ENABLED_RAW()
8073 | VBOXVMM_XCPT_DB_ENABLED_RAW()
8074 | VBOXVMM_XCPT_BP_ENABLED_RAW()
8075 | VBOXVMM_XCPT_OF_ENABLED_RAW()
8076 | VBOXVMM_XCPT_BR_ENABLED_RAW()
8077 | VBOXVMM_XCPT_UD_ENABLED_RAW()
8078 | VBOXVMM_XCPT_NM_ENABLED_RAW()
8079 | VBOXVMM_XCPT_DF_ENABLED_RAW()
8080 | VBOXVMM_XCPT_TS_ENABLED_RAW()
8081 | VBOXVMM_XCPT_NP_ENABLED_RAW()
8082 | VBOXVMM_XCPT_SS_ENABLED_RAW()
8083 | VBOXVMM_XCPT_GP_ENABLED_RAW()
8084 | VBOXVMM_XCPT_PF_ENABLED_RAW()
8085 | VBOXVMM_XCPT_MF_ENABLED_RAW()
8086 | VBOXVMM_XCPT_AC_ENABLED_RAW()
8087 | VBOXVMM_XCPT_XF_ENABLED_RAW()
8088 | VBOXVMM_XCPT_VE_ENABLED_RAW()
8089 | VBOXVMM_XCPT_SX_ENABLED_RAW()
8090 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
8091 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
8092 ) != 0
8093 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
8094 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
8095 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
8096 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
8097 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
8098 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
8099 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
8100 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
8101 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
8102 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
8103 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
8104 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
8105 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
8106 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
8107 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
8108 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
8109 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
8110 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
8111 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
8112 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
8113 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
8114 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
8115 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
8116 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
8117 | VBOXVMM_INSTR_STR_ENABLED_RAW()
8118 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
8119 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
8120 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
8121 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
8122 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
8123 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
8124 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
8125 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
8126 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
8127 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
8128 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
8129 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
8130 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
8131 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
8132 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
8133 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
8134 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
8135 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
8136 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
8137 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
8138 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
8139 ) != 0
8140 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
8141 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
8142 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
8143 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
8144 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
8145 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
8146 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
8147 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
8148 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
8149 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
8150 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
8151 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
8152 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
8153 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
8154 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
8155 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
8156 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
8157 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
8158 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
8159 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
8160 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
8161 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
8162 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
8163 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
8164 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
8165 | VBOXVMM_EXIT_STR_ENABLED_RAW()
8166 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
8167 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
8168 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
8169 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
8170 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
8171 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
8172 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
8173 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
8174 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
8175 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
8176 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
8177 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
8178 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
8179 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
8180 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
8181 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
8182 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
8183 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
8184 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
8185 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
8186 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
8187 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
8188 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
8189 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
8190 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
8191 ) != 0;
8192}
8193
8194
8195/**
8196 * Runs the guest using hardware-assisted VMX.
8197 *
8198 * @returns Strict VBox status code (i.e. informational status codes too).
8199 * @param pVCpu The cross context virtual CPU structure.
8200 */
8201VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
8202{
8203 AssertPtr(pVCpu);
8204 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
8205 Assert(VMMRZCallRing3IsEnabled(pVCpu));
8206 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
8207 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
8208
8209 VBOXSTRICTRC rcStrict;
8210 uint32_t cLoops = 0;
8211 for (;;)
8212 {
8213#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
8214 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
8215#else
8216 NOREF(pCtx);
8217 bool const fInNestedGuestMode = false;
8218#endif
8219 if (!fInNestedGuestMode)
8220 {
8221 if ( !pVCpu->hm.s.fUseDebugLoop
8222 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
8223 && !DBGFIsStepping(pVCpu)
8224 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
8225 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
8226 else
8227 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
8228 }
8229#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
8230 else
8231 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
8232
8233 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
8234 {
8235 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
8236 continue;
8237 }
8238 if (rcStrict == VINF_VMX_VMEXIT)
8239 {
8240 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
8241 continue;
8242 }
8243#endif
8244 break;
8245 }
8246
8247 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
8248 switch (rcLoop)
8249 {
8250 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
8251 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
8252 }
8253
8254 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
8255 if (RT_FAILURE(rc2))
8256 {
8257 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
8258 rcStrict = rc2;
8259 }
8260 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
8261 Assert(!VMMR0AssertionIsNotificationSet(pVCpu));
8262 return rcStrict;
8263}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette