VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 92023

Last change on this file since 92023 was 92023, checked in by vboxsync, 3 years ago

VMM/HMVMXR0: Removed superfluous call to hmR0VmxImportGuestState in hmR0VmxExitMovToCrX.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 740.8 KB
Line 
1/* $Id: HMVMXR0.cpp 92023 2021-10-25 10:34:48Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#define VMCPU_INCL_CPUM_GST_CTX
24#include <iprt/x86.h>
25#include <iprt/asm-amd64-x86.h>
26#include <iprt/thread.h>
27#include <iprt/mem.h>
28#include <iprt/mp.h>
29
30#include <VBox/vmm/pdmapi.h>
31#include <VBox/vmm/dbgf.h>
32#include <VBox/vmm/iem.h>
33#include <VBox/vmm/iom.h>
34#include <VBox/vmm/tm.h>
35#include <VBox/vmm/em.h>
36#include <VBox/vmm/gim.h>
37#include <VBox/vmm/apic.h>
38#include "HMInternal.h"
39#include <VBox/vmm/vmcc.h>
40#include <VBox/vmm/hmvmxinline.h>
41#include "HMVMXR0.h"
42#include "dtrace/VBoxVMM.h"
43
44#ifdef DEBUG_ramshankar
45# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
46# define HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
47# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
48# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
49# define HMVMX_ALWAYS_CLEAN_TRANSIENT
50# define HMVMX_ALWAYS_CHECK_GUEST_STATE
51# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
52# define HMVMX_ALWAYS_TRAP_PF
53# define HMVMX_ALWAYS_FLUSH_TLB
54# define HMVMX_ALWAYS_SWAP_EFER
55#endif
56
57
58/*********************************************************************************************************************************
59* Defined Constants And Macros *
60*********************************************************************************************************************************/
61/** Use the function table. */
62#define HMVMX_USE_FUNCTION_TABLE
63
64/** Determine which tagged-TLB flush handler to use. */
65#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0
66#define HMVMX_FLUSH_TAGGED_TLB_EPT 1
67#define HMVMX_FLUSH_TAGGED_TLB_VPID 2
68#define HMVMX_FLUSH_TAGGED_TLB_NONE 3
69
70/**
71 * Flags to skip redundant reads of some common VMCS fields that are not part of
72 * the guest-CPU or VCPU state but are needed while handling VM-exits.
73 */
74#define HMVMX_READ_IDT_VECTORING_INFO RT_BIT_32(0)
75#define HMVMX_READ_IDT_VECTORING_ERROR_CODE RT_BIT_32(1)
76#define HMVMX_READ_EXIT_QUALIFICATION RT_BIT_32(2)
77#define HMVMX_READ_EXIT_INSTR_LEN RT_BIT_32(3)
78#define HMVMX_READ_EXIT_INTERRUPTION_INFO RT_BIT_32(4)
79#define HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE RT_BIT_32(5)
80#define HMVMX_READ_EXIT_INSTR_INFO RT_BIT_32(6)
81#define HMVMX_READ_GUEST_LINEAR_ADDR RT_BIT_32(7)
82#define HMVMX_READ_GUEST_PHYSICAL_ADDR RT_BIT_32(8)
83#define HMVMX_READ_GUEST_PENDING_DBG_XCPTS RT_BIT_32(9)
84
85/** All the VMCS fields required for processing of exception/NMI VM-exits. */
86#define HMVMX_READ_XCPT_INFO ( HMVMX_READ_EXIT_INTERRUPTION_INFO \
87 | HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE \
88 | HMVMX_READ_EXIT_INSTR_LEN \
89 | HMVMX_READ_IDT_VECTORING_INFO \
90 | HMVMX_READ_IDT_VECTORING_ERROR_CODE)
91
92/** Assert that all the given fields have been read from the VMCS. */
93#ifdef VBOX_STRICT
94# define HMVMX_ASSERT_READ(a_pVmxTransient, a_fReadFields) \
95 do { \
96 uint32_t const fVmcsFieldRead = ASMAtomicUoReadU32(&pVmxTransient->fVmcsFieldsRead); \
97 Assert((fVmcsFieldRead & (a_fReadFields)) == (a_fReadFields)); \
98 } while (0)
99#else
100# define HMVMX_ASSERT_READ(a_pVmxTransient, a_fReadFields) do { } while (0)
101#endif
102
103/**
104 * Subset of the guest-CPU state that is kept by VMX R0 code while executing the
105 * guest using hardware-assisted VMX.
106 *
107 * This excludes state like GPRs (other than RSP) which are always are
108 * swapped and restored across the world-switch and also registers like EFER,
109 * MSR which cannot be modified by the guest without causing a VM-exit.
110 */
111#define HMVMX_CPUMCTX_EXTRN_ALL ( CPUMCTX_EXTRN_RIP \
112 | CPUMCTX_EXTRN_RFLAGS \
113 | CPUMCTX_EXTRN_RSP \
114 | CPUMCTX_EXTRN_SREG_MASK \
115 | CPUMCTX_EXTRN_TABLE_MASK \
116 | CPUMCTX_EXTRN_KERNEL_GS_BASE \
117 | CPUMCTX_EXTRN_SYSCALL_MSRS \
118 | CPUMCTX_EXTRN_SYSENTER_MSRS \
119 | CPUMCTX_EXTRN_TSC_AUX \
120 | CPUMCTX_EXTRN_OTHER_MSRS \
121 | CPUMCTX_EXTRN_CR0 \
122 | CPUMCTX_EXTRN_CR3 \
123 | CPUMCTX_EXTRN_CR4 \
124 | CPUMCTX_EXTRN_DR7 \
125 | CPUMCTX_EXTRN_HWVIRT \
126 | CPUMCTX_EXTRN_HM_VMX_MASK)
127
128/**
129 * Exception bitmap mask for real-mode guests (real-on-v86).
130 *
131 * We need to intercept all exceptions manually except:
132 * - \#AC and \#DB are always intercepted to prevent the CPU from deadlocking
133 * due to bugs in Intel CPUs.
134 * - \#PF need not be intercepted even in real-mode if we have nested paging
135 * support.
136 */
137#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) /* always: | RT_BIT(X86_XCPT_DB) */ | RT_BIT(X86_XCPT_NMI) \
138 | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \
139 | RT_BIT(X86_XCPT_UD) | RT_BIT(X86_XCPT_NM) | RT_BIT(X86_XCPT_DF) \
140 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \
141 | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \
142 | RT_BIT(X86_XCPT_MF) /* always: | RT_BIT(X86_XCPT_AC) */ | RT_BIT(X86_XCPT_MC) \
143 | RT_BIT(X86_XCPT_XF))
144
145/** Maximum VM-instruction error number. */
146#define HMVMX_INSTR_ERROR_MAX 28
147
148/** Profiling macro. */
149#ifdef HM_PROFILE_EXIT_DISPATCH
150# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed)
151# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed)
152#else
153# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0)
154# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0)
155#endif
156
157/** Assert that preemption is disabled or covered by thread-context hooks. */
158#define HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu) Assert( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
159 || !RTThreadPreemptIsEnabled(NIL_RTTHREAD))
160
161/** Assert that we haven't migrated CPUs when thread-context hooks are not
162 * used. */
163#define HMVMX_ASSERT_CPU_SAFE(a_pVCpu) AssertMsg( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \
164 || (a_pVCpu)->hmr0.s.idEnteredCpu == RTMpCpuId(), \
165 ("Illegal migration! Entered on CPU %u Current %u\n", \
166 (a_pVCpu)->hmr0.s.idEnteredCpu, RTMpCpuId()))
167
168/** Asserts that the given CPUMCTX_EXTRN_XXX bits are present in the guest-CPU
169 * context. */
170#define HMVMX_CPUMCTX_ASSERT(a_pVCpu, a_fExtrnMbz) AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \
171 ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", \
172 (a_pVCpu)->cpum.GstCtx.fExtrn, (a_fExtrnMbz)))
173
174/** Log the VM-exit reason with an easily visible marker to identify it in a
175 * potential sea of logging data. */
176#define HMVMX_LOG_EXIT(a_pVCpu, a_uExitReason) \
177 do { \
178 Log4(("VM-exit: vcpu[%RU32] %85s -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-\n", (a_pVCpu)->idCpu, \
179 HMGetVmxExitName(a_uExitReason))); \
180 } while (0) \
181
182
183/*********************************************************************************************************************************
184* Structures and Typedefs *
185*********************************************************************************************************************************/
186/**
187 * VMX per-VCPU transient state.
188 *
189 * A state structure for holding miscellaneous information across
190 * VMX non-root operation and restored after the transition.
191 *
192 * Note: The members are ordered and aligned such that the most
193 * frequently used ones (in the guest execution loop) fall within
194 * the first cache line.
195 */
196typedef struct VMXTRANSIENT
197{
198 /** Mask of currently read VMCS fields; HMVMX_READ_XXX. */
199 uint32_t fVmcsFieldsRead;
200 /** The guest's TPR value used for TPR shadowing. */
201 uint8_t u8GuestTpr;
202 uint8_t abAlignment0[3];
203
204 /** Whether the VM-exit was caused by a page-fault during delivery of an
205 * external interrupt or NMI. */
206 bool fVectoringPF;
207 /** Whether the VM-exit was caused by a page-fault during delivery of a
208 * contributory exception or a page-fault. */
209 bool fVectoringDoublePF;
210 /** Whether the VM-entry failed or not. */
211 bool fVMEntryFailed;
212 /** Whether the TSC_AUX MSR needs to be removed from the auto-load/store MSR
213 * area after VM-exit. */
214 bool fRemoveTscAuxMsr;
215 /** Whether TSC-offsetting and VMX-preemption timer was updated before VM-entry. */
216 bool fUpdatedTscOffsettingAndPreemptTimer;
217 /** Whether we are currently executing a nested-guest. */
218 bool fIsNestedGuest;
219 /** Whether the guest debug state was active at the time of VM-exit. */
220 bool fWasGuestDebugStateActive;
221 /** Whether the hyper debug state was active at the time of VM-exit. */
222 bool fWasHyperDebugStateActive;
223
224 /** The basic VM-exit reason. */
225 uint32_t uExitReason;
226 /** The VM-exit interruption error code. */
227 uint32_t uExitIntErrorCode;
228
229 /** The host's rflags/eflags. */
230 RTCCUINTREG fEFlags;
231
232 /** The VM-exit exit code qualification. */
233 uint64_t uExitQual;
234
235 /** The VMCS info. object. */
236 PVMXVMCSINFO pVmcsInfo;
237
238 /** The VM-exit interruption-information field. */
239 uint32_t uExitIntInfo;
240 /** The VM-exit instruction-length field. */
241 uint32_t cbExitInstr;
242
243 /** The VM-exit instruction-information field. */
244 VMXEXITINSTRINFO ExitInstrInfo;
245 /** IDT-vectoring information field. */
246 uint32_t uIdtVectoringInfo;
247
248 /** IDT-vectoring error code. */
249 uint32_t uIdtVectoringErrorCode;
250 uint32_t u32Alignment0;
251
252 /** The Guest-linear address. */
253 uint64_t uGuestLinearAddr;
254
255 /** The Guest-physical address. */
256 uint64_t uGuestPhysicalAddr;
257
258 /** The Guest pending-debug exceptions. */
259 uint64_t uGuestPendingDbgXcpts;
260
261 /** The VM-entry interruption-information field. */
262 uint32_t uEntryIntInfo;
263 /** The VM-entry exception error code field. */
264 uint32_t uEntryXcptErrorCode;
265
266 /** The VM-entry instruction length field. */
267 uint32_t cbEntryInstr;
268} VMXTRANSIENT;
269AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t));
270AssertCompileMemberAlignment(VMXTRANSIENT, fVmcsFieldsRead, 8);
271AssertCompileMemberAlignment(VMXTRANSIENT, fVectoringPF, 8);
272AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, 8);
273AssertCompileMemberAlignment(VMXTRANSIENT, fEFlags, 8);
274AssertCompileMemberAlignment(VMXTRANSIENT, uExitQual, 8);
275AssertCompileMemberAlignment(VMXTRANSIENT, pVmcsInfo, 8);
276AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, 8);
277AssertCompileMemberAlignment(VMXTRANSIENT, ExitInstrInfo, 8);
278AssertCompileMemberAlignment(VMXTRANSIENT, uIdtVectoringErrorCode, 8);
279AssertCompileMemberAlignment(VMXTRANSIENT, uGuestLinearAddr, 8);
280AssertCompileMemberAlignment(VMXTRANSIENT, uGuestPhysicalAddr, 8);
281AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, 8);
282AssertCompileMemberAlignment(VMXTRANSIENT, cbEntryInstr, 8);
283/** Pointer to VMX transient state. */
284typedef VMXTRANSIENT *PVMXTRANSIENT;
285/** Pointer to a const VMX transient state. */
286typedef const VMXTRANSIENT *PCVMXTRANSIENT;
287
288/**
289 * VMX page allocation information.
290 */
291typedef struct
292{
293 uint32_t fValid; /**< Whether to allocate this page (e.g, based on a CPU feature). */
294 uint32_t uPadding0; /**< Padding to ensure array of these structs are aligned to a multiple of 8. */
295 PRTHCPHYS pHCPhys; /**< Where to store the host-physical address of the allocation. */
296 PRTR0PTR ppVirt; /**< Where to store the host-virtual address of the allocation. */
297} VMXPAGEALLOCINFO;
298/** Pointer to VMX page-allocation info. */
299typedef VMXPAGEALLOCINFO *PVMXPAGEALLOCINFO;
300/** Pointer to a const VMX page-allocation info. */
301typedef const VMXPAGEALLOCINFO *PCVMXPAGEALLOCINFO;
302AssertCompileSizeAlignment(VMXPAGEALLOCINFO, 8);
303
304/**
305 * Memory operand read or write access.
306 */
307typedef enum VMXMEMACCESS
308{
309 VMXMEMACCESS_READ = 0,
310 VMXMEMACCESS_WRITE = 1
311} VMXMEMACCESS;
312
313/**
314 * VMX VM-exit handler.
315 *
316 * @returns Strict VBox status code (i.e. informational status codes too).
317 * @param pVCpu The cross context virtual CPU structure.
318 * @param pVmxTransient The VMX-transient structure.
319 */
320#ifndef HMVMX_USE_FUNCTION_TABLE
321typedef VBOXSTRICTRC FNVMXEXITHANDLER(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
322#else
323typedef DECLCALLBACKTYPE(VBOXSTRICTRC, FNVMXEXITHANDLER,(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient));
324/** Pointer to VM-exit handler. */
325typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER;
326#endif
327
328/**
329 * VMX VM-exit handler, non-strict status code.
330 *
331 * This is generally the same as FNVMXEXITHANDLER, the NSRC bit is just FYI.
332 *
333 * @returns VBox status code, no informational status code returned.
334 * @param pVCpu The cross context virtual CPU structure.
335 * @param pVmxTransient The VMX-transient structure.
336 *
337 * @remarks This is not used on anything returning VERR_EM_INTERPRETER as the
338 * use of that status code will be replaced with VINF_EM_SOMETHING
339 * later when switching over to IEM.
340 */
341#ifndef HMVMX_USE_FUNCTION_TABLE
342typedef int FNVMXEXITHANDLERNSRC(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
343#else
344typedef FNVMXEXITHANDLER FNVMXEXITHANDLERNSRC;
345#endif
346
347
348/*********************************************************************************************************************************
349* Internal Functions *
350*********************************************************************************************************************************/
351#ifndef HMVMX_USE_FUNCTION_TABLE
352DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
353# define HMVMX_EXIT_DECL DECLINLINE(VBOXSTRICTRC)
354# define HMVMX_EXIT_NSRC_DECL DECLINLINE(int)
355#else
356# define HMVMX_EXIT_DECL static DECLCALLBACK(VBOXSTRICTRC)
357# define HMVMX_EXIT_NSRC_DECL HMVMX_EXIT_DECL
358#endif
359#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
360DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExitNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient);
361#endif
362
363static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat);
364
365/** @name VM-exit handler prototypes.
366 * @{
367 */
368static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi;
369static FNVMXEXITHANDLER hmR0VmxExitExtInt;
370static FNVMXEXITHANDLER hmR0VmxExitTripleFault;
371static FNVMXEXITHANDLERNSRC hmR0VmxExitIntWindow;
372static FNVMXEXITHANDLERNSRC hmR0VmxExitNmiWindow;
373static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch;
374static FNVMXEXITHANDLER hmR0VmxExitCpuid;
375static FNVMXEXITHANDLER hmR0VmxExitGetsec;
376static FNVMXEXITHANDLER hmR0VmxExitHlt;
377static FNVMXEXITHANDLERNSRC hmR0VmxExitInvd;
378static FNVMXEXITHANDLER hmR0VmxExitInvlpg;
379static FNVMXEXITHANDLER hmR0VmxExitRdpmc;
380static FNVMXEXITHANDLER hmR0VmxExitVmcall;
381#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
382static FNVMXEXITHANDLER hmR0VmxExitVmclear;
383static FNVMXEXITHANDLER hmR0VmxExitVmlaunch;
384static FNVMXEXITHANDLER hmR0VmxExitVmptrld;
385static FNVMXEXITHANDLER hmR0VmxExitVmptrst;
386static FNVMXEXITHANDLER hmR0VmxExitVmread;
387static FNVMXEXITHANDLER hmR0VmxExitVmresume;
388static FNVMXEXITHANDLER hmR0VmxExitVmwrite;
389static FNVMXEXITHANDLER hmR0VmxExitVmxoff;
390static FNVMXEXITHANDLER hmR0VmxExitVmxon;
391static FNVMXEXITHANDLER hmR0VmxExitInvvpid;
392#endif
393static FNVMXEXITHANDLER hmR0VmxExitRdtsc;
394static FNVMXEXITHANDLER hmR0VmxExitMovCRx;
395static FNVMXEXITHANDLER hmR0VmxExitMovDRx;
396static FNVMXEXITHANDLER hmR0VmxExitIoInstr;
397static FNVMXEXITHANDLER hmR0VmxExitRdmsr;
398static FNVMXEXITHANDLER hmR0VmxExitWrmsr;
399static FNVMXEXITHANDLER hmR0VmxExitMwait;
400static FNVMXEXITHANDLER hmR0VmxExitMtf;
401static FNVMXEXITHANDLER hmR0VmxExitMonitor;
402static FNVMXEXITHANDLER hmR0VmxExitPause;
403static FNVMXEXITHANDLERNSRC hmR0VmxExitTprBelowThreshold;
404static FNVMXEXITHANDLER hmR0VmxExitApicAccess;
405static FNVMXEXITHANDLER hmR0VmxExitEptViolation;
406static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig;
407static FNVMXEXITHANDLER hmR0VmxExitRdtscp;
408static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer;
409static FNVMXEXITHANDLERNSRC hmR0VmxExitWbinvd;
410static FNVMXEXITHANDLER hmR0VmxExitXsetbv;
411static FNVMXEXITHANDLER hmR0VmxExitInvpcid;
412static FNVMXEXITHANDLERNSRC hmR0VmxExitSetPendingXcptUD;
413static FNVMXEXITHANDLERNSRC hmR0VmxExitErrInvalidGuestState;
414static FNVMXEXITHANDLERNSRC hmR0VmxExitErrUnexpected;
415/** @} */
416
417#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
418/** @name Nested-guest VM-exit handler prototypes.
419 * @{
420 */
421static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmiNested;
422static FNVMXEXITHANDLER hmR0VmxExitTripleFaultNested;
423static FNVMXEXITHANDLERNSRC hmR0VmxExitIntWindowNested;
424static FNVMXEXITHANDLERNSRC hmR0VmxExitNmiWindowNested;
425static FNVMXEXITHANDLER hmR0VmxExitTaskSwitchNested;
426static FNVMXEXITHANDLER hmR0VmxExitHltNested;
427static FNVMXEXITHANDLER hmR0VmxExitInvlpgNested;
428static FNVMXEXITHANDLER hmR0VmxExitRdpmcNested;
429static FNVMXEXITHANDLER hmR0VmxExitVmreadVmwriteNested;
430static FNVMXEXITHANDLER hmR0VmxExitRdtscNested;
431static FNVMXEXITHANDLER hmR0VmxExitMovCRxNested;
432static FNVMXEXITHANDLER hmR0VmxExitMovDRxNested;
433static FNVMXEXITHANDLER hmR0VmxExitIoInstrNested;
434static FNVMXEXITHANDLER hmR0VmxExitRdmsrNested;
435static FNVMXEXITHANDLER hmR0VmxExitWrmsrNested;
436static FNVMXEXITHANDLER hmR0VmxExitMwaitNested;
437static FNVMXEXITHANDLER hmR0VmxExitMtfNested;
438static FNVMXEXITHANDLER hmR0VmxExitMonitorNested;
439static FNVMXEXITHANDLER hmR0VmxExitPauseNested;
440static FNVMXEXITHANDLERNSRC hmR0VmxExitTprBelowThresholdNested;
441static FNVMXEXITHANDLER hmR0VmxExitApicAccessNested;
442static FNVMXEXITHANDLER hmR0VmxExitApicWriteNested;
443static FNVMXEXITHANDLER hmR0VmxExitVirtEoiNested;
444static FNVMXEXITHANDLER hmR0VmxExitRdtscpNested;
445static FNVMXEXITHANDLERNSRC hmR0VmxExitWbinvdNested;
446static FNVMXEXITHANDLER hmR0VmxExitInvpcidNested;
447static FNVMXEXITHANDLERNSRC hmR0VmxExitErrInvalidGuestStateNested;
448static FNVMXEXITHANDLER hmR0VmxExitInstrNested;
449static FNVMXEXITHANDLER hmR0VmxExitInstrWithInfoNested;
450/** @} */
451#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
452
453
454/*********************************************************************************************************************************
455* Global Variables *
456*********************************************************************************************************************************/
457#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
458/**
459 * Array of all VMCS fields.
460 * Any fields added to the VT-x spec. should be added here.
461 *
462 * Currently only used to derive shadow VMCS fields for hardware-assisted execution
463 * of nested-guests.
464 */
465static const uint32_t g_aVmcsFields[] =
466{
467 /* 16-bit control fields. */
468 VMX_VMCS16_VPID,
469 VMX_VMCS16_POSTED_INT_NOTIFY_VECTOR,
470 VMX_VMCS16_EPTP_INDEX,
471
472 /* 16-bit guest-state fields. */
473 VMX_VMCS16_GUEST_ES_SEL,
474 VMX_VMCS16_GUEST_CS_SEL,
475 VMX_VMCS16_GUEST_SS_SEL,
476 VMX_VMCS16_GUEST_DS_SEL,
477 VMX_VMCS16_GUEST_FS_SEL,
478 VMX_VMCS16_GUEST_GS_SEL,
479 VMX_VMCS16_GUEST_LDTR_SEL,
480 VMX_VMCS16_GUEST_TR_SEL,
481 VMX_VMCS16_GUEST_INTR_STATUS,
482 VMX_VMCS16_GUEST_PML_INDEX,
483
484 /* 16-bits host-state fields. */
485 VMX_VMCS16_HOST_ES_SEL,
486 VMX_VMCS16_HOST_CS_SEL,
487 VMX_VMCS16_HOST_SS_SEL,
488 VMX_VMCS16_HOST_DS_SEL,
489 VMX_VMCS16_HOST_FS_SEL,
490 VMX_VMCS16_HOST_GS_SEL,
491 VMX_VMCS16_HOST_TR_SEL,
492
493 /* 64-bit control fields. */
494 VMX_VMCS64_CTRL_IO_BITMAP_A_FULL,
495 VMX_VMCS64_CTRL_IO_BITMAP_A_HIGH,
496 VMX_VMCS64_CTRL_IO_BITMAP_B_FULL,
497 VMX_VMCS64_CTRL_IO_BITMAP_B_HIGH,
498 VMX_VMCS64_CTRL_MSR_BITMAP_FULL,
499 VMX_VMCS64_CTRL_MSR_BITMAP_HIGH,
500 VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL,
501 VMX_VMCS64_CTRL_EXIT_MSR_STORE_HIGH,
502 VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL,
503 VMX_VMCS64_CTRL_EXIT_MSR_LOAD_HIGH,
504 VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL,
505 VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_HIGH,
506 VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL,
507 VMX_VMCS64_CTRL_EXEC_VMCS_PTR_HIGH,
508 VMX_VMCS64_CTRL_EXEC_PML_ADDR_FULL,
509 VMX_VMCS64_CTRL_EXEC_PML_ADDR_HIGH,
510 VMX_VMCS64_CTRL_TSC_OFFSET_FULL,
511 VMX_VMCS64_CTRL_TSC_OFFSET_HIGH,
512 VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL,
513 VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_HIGH,
514 VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL,
515 VMX_VMCS64_CTRL_APIC_ACCESSADDR_HIGH,
516 VMX_VMCS64_CTRL_POSTED_INTR_DESC_FULL,
517 VMX_VMCS64_CTRL_POSTED_INTR_DESC_HIGH,
518 VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL,
519 VMX_VMCS64_CTRL_VMFUNC_CTRLS_HIGH,
520 VMX_VMCS64_CTRL_EPTP_FULL,
521 VMX_VMCS64_CTRL_EPTP_HIGH,
522 VMX_VMCS64_CTRL_EOI_BITMAP_0_FULL,
523 VMX_VMCS64_CTRL_EOI_BITMAP_0_HIGH,
524 VMX_VMCS64_CTRL_EOI_BITMAP_1_FULL,
525 VMX_VMCS64_CTRL_EOI_BITMAP_1_HIGH,
526 VMX_VMCS64_CTRL_EOI_BITMAP_2_FULL,
527 VMX_VMCS64_CTRL_EOI_BITMAP_2_HIGH,
528 VMX_VMCS64_CTRL_EOI_BITMAP_3_FULL,
529 VMX_VMCS64_CTRL_EOI_BITMAP_3_HIGH,
530 VMX_VMCS64_CTRL_EPTP_LIST_FULL,
531 VMX_VMCS64_CTRL_EPTP_LIST_HIGH,
532 VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL,
533 VMX_VMCS64_CTRL_VMREAD_BITMAP_HIGH,
534 VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL,
535 VMX_VMCS64_CTRL_VMWRITE_BITMAP_HIGH,
536 VMX_VMCS64_CTRL_VE_XCPT_INFO_ADDR_FULL,
537 VMX_VMCS64_CTRL_VE_XCPT_INFO_ADDR_HIGH,
538 VMX_VMCS64_CTRL_XSS_EXITING_BITMAP_FULL,
539 VMX_VMCS64_CTRL_XSS_EXITING_BITMAP_HIGH,
540 VMX_VMCS64_CTRL_ENCLS_EXITING_BITMAP_FULL,
541 VMX_VMCS64_CTRL_ENCLS_EXITING_BITMAP_HIGH,
542 VMX_VMCS64_CTRL_SPPTP_FULL,
543 VMX_VMCS64_CTRL_SPPTP_HIGH,
544 VMX_VMCS64_CTRL_TSC_MULTIPLIER_FULL,
545 VMX_VMCS64_CTRL_TSC_MULTIPLIER_HIGH,
546 VMX_VMCS64_CTRL_PROC_EXEC3_FULL,
547 VMX_VMCS64_CTRL_PROC_EXEC3_HIGH,
548 VMX_VMCS64_CTRL_ENCLV_EXITING_BITMAP_FULL,
549 VMX_VMCS64_CTRL_ENCLV_EXITING_BITMAP_HIGH,
550
551 /* 64-bit read-only data fields. */
552 VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL,
553 VMX_VMCS64_RO_GUEST_PHYS_ADDR_HIGH,
554
555 /* 64-bit guest-state fields. */
556 VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL,
557 VMX_VMCS64_GUEST_VMCS_LINK_PTR_HIGH,
558 VMX_VMCS64_GUEST_DEBUGCTL_FULL,
559 VMX_VMCS64_GUEST_DEBUGCTL_HIGH,
560 VMX_VMCS64_GUEST_PAT_FULL,
561 VMX_VMCS64_GUEST_PAT_HIGH,
562 VMX_VMCS64_GUEST_EFER_FULL,
563 VMX_VMCS64_GUEST_EFER_HIGH,
564 VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL,
565 VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_HIGH,
566 VMX_VMCS64_GUEST_PDPTE0_FULL,
567 VMX_VMCS64_GUEST_PDPTE0_HIGH,
568 VMX_VMCS64_GUEST_PDPTE1_FULL,
569 VMX_VMCS64_GUEST_PDPTE1_HIGH,
570 VMX_VMCS64_GUEST_PDPTE2_FULL,
571 VMX_VMCS64_GUEST_PDPTE2_HIGH,
572 VMX_VMCS64_GUEST_PDPTE3_FULL,
573 VMX_VMCS64_GUEST_PDPTE3_HIGH,
574 VMX_VMCS64_GUEST_BNDCFGS_FULL,
575 VMX_VMCS64_GUEST_BNDCFGS_HIGH,
576 VMX_VMCS64_GUEST_RTIT_CTL_FULL,
577 VMX_VMCS64_GUEST_RTIT_CTL_HIGH,
578 VMX_VMCS64_GUEST_PKRS_FULL,
579 VMX_VMCS64_GUEST_PKRS_HIGH,
580
581 /* 64-bit host-state fields. */
582 VMX_VMCS64_HOST_PAT_FULL,
583 VMX_VMCS64_HOST_PAT_HIGH,
584 VMX_VMCS64_HOST_EFER_FULL,
585 VMX_VMCS64_HOST_EFER_HIGH,
586 VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL,
587 VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_HIGH,
588 VMX_VMCS64_HOST_PKRS_FULL,
589 VMX_VMCS64_HOST_PKRS_HIGH,
590
591 /* 32-bit control fields. */
592 VMX_VMCS32_CTRL_PIN_EXEC,
593 VMX_VMCS32_CTRL_PROC_EXEC,
594 VMX_VMCS32_CTRL_EXCEPTION_BITMAP,
595 VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK,
596 VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH,
597 VMX_VMCS32_CTRL_CR3_TARGET_COUNT,
598 VMX_VMCS32_CTRL_EXIT,
599 VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT,
600 VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT,
601 VMX_VMCS32_CTRL_ENTRY,
602 VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT,
603 VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO,
604 VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE,
605 VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH,
606 VMX_VMCS32_CTRL_TPR_THRESHOLD,
607 VMX_VMCS32_CTRL_PROC_EXEC2,
608 VMX_VMCS32_CTRL_PLE_GAP,
609 VMX_VMCS32_CTRL_PLE_WINDOW,
610
611 /* 32-bits read-only fields. */
612 VMX_VMCS32_RO_VM_INSTR_ERROR,
613 VMX_VMCS32_RO_EXIT_REASON,
614 VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO,
615 VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE,
616 VMX_VMCS32_RO_IDT_VECTORING_INFO,
617 VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE,
618 VMX_VMCS32_RO_EXIT_INSTR_LENGTH,
619 VMX_VMCS32_RO_EXIT_INSTR_INFO,
620
621 /* 32-bit guest-state fields. */
622 VMX_VMCS32_GUEST_ES_LIMIT,
623 VMX_VMCS32_GUEST_CS_LIMIT,
624 VMX_VMCS32_GUEST_SS_LIMIT,
625 VMX_VMCS32_GUEST_DS_LIMIT,
626 VMX_VMCS32_GUEST_FS_LIMIT,
627 VMX_VMCS32_GUEST_GS_LIMIT,
628 VMX_VMCS32_GUEST_LDTR_LIMIT,
629 VMX_VMCS32_GUEST_TR_LIMIT,
630 VMX_VMCS32_GUEST_GDTR_LIMIT,
631 VMX_VMCS32_GUEST_IDTR_LIMIT,
632 VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS,
633 VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS,
634 VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS,
635 VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS,
636 VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS,
637 VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS,
638 VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS,
639 VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS,
640 VMX_VMCS32_GUEST_INT_STATE,
641 VMX_VMCS32_GUEST_ACTIVITY_STATE,
642 VMX_VMCS32_GUEST_SMBASE,
643 VMX_VMCS32_GUEST_SYSENTER_CS,
644 VMX_VMCS32_PREEMPT_TIMER_VALUE,
645
646 /* 32-bit host-state fields. */
647 VMX_VMCS32_HOST_SYSENTER_CS,
648
649 /* Natural-width control fields. */
650 VMX_VMCS_CTRL_CR0_MASK,
651 VMX_VMCS_CTRL_CR4_MASK,
652 VMX_VMCS_CTRL_CR0_READ_SHADOW,
653 VMX_VMCS_CTRL_CR4_READ_SHADOW,
654 VMX_VMCS_CTRL_CR3_TARGET_VAL0,
655 VMX_VMCS_CTRL_CR3_TARGET_VAL1,
656 VMX_VMCS_CTRL_CR3_TARGET_VAL2,
657 VMX_VMCS_CTRL_CR3_TARGET_VAL3,
658
659 /* Natural-width read-only data fields. */
660 VMX_VMCS_RO_EXIT_QUALIFICATION,
661 VMX_VMCS_RO_IO_RCX,
662 VMX_VMCS_RO_IO_RSI,
663 VMX_VMCS_RO_IO_RDI,
664 VMX_VMCS_RO_IO_RIP,
665 VMX_VMCS_RO_GUEST_LINEAR_ADDR,
666
667 /* Natural-width guest-state field */
668 VMX_VMCS_GUEST_CR0,
669 VMX_VMCS_GUEST_CR3,
670 VMX_VMCS_GUEST_CR4,
671 VMX_VMCS_GUEST_ES_BASE,
672 VMX_VMCS_GUEST_CS_BASE,
673 VMX_VMCS_GUEST_SS_BASE,
674 VMX_VMCS_GUEST_DS_BASE,
675 VMX_VMCS_GUEST_FS_BASE,
676 VMX_VMCS_GUEST_GS_BASE,
677 VMX_VMCS_GUEST_LDTR_BASE,
678 VMX_VMCS_GUEST_TR_BASE,
679 VMX_VMCS_GUEST_GDTR_BASE,
680 VMX_VMCS_GUEST_IDTR_BASE,
681 VMX_VMCS_GUEST_DR7,
682 VMX_VMCS_GUEST_RSP,
683 VMX_VMCS_GUEST_RIP,
684 VMX_VMCS_GUEST_RFLAGS,
685 VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS,
686 VMX_VMCS_GUEST_SYSENTER_ESP,
687 VMX_VMCS_GUEST_SYSENTER_EIP,
688 VMX_VMCS_GUEST_S_CET,
689 VMX_VMCS_GUEST_SSP,
690 VMX_VMCS_GUEST_INTR_SSP_TABLE_ADDR,
691
692 /* Natural-width host-state fields */
693 VMX_VMCS_HOST_CR0,
694 VMX_VMCS_HOST_CR3,
695 VMX_VMCS_HOST_CR4,
696 VMX_VMCS_HOST_FS_BASE,
697 VMX_VMCS_HOST_GS_BASE,
698 VMX_VMCS_HOST_TR_BASE,
699 VMX_VMCS_HOST_GDTR_BASE,
700 VMX_VMCS_HOST_IDTR_BASE,
701 VMX_VMCS_HOST_SYSENTER_ESP,
702 VMX_VMCS_HOST_SYSENTER_EIP,
703 VMX_VMCS_HOST_RSP,
704 VMX_VMCS_HOST_RIP,
705 VMX_VMCS_HOST_S_CET,
706 VMX_VMCS_HOST_SSP,
707 VMX_VMCS_HOST_INTR_SSP_TABLE_ADDR
708};
709#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
710
711#ifdef VBOX_STRICT
712static const uint32_t g_aVmcsSegBase[] =
713{
714 VMX_VMCS_GUEST_ES_BASE,
715 VMX_VMCS_GUEST_CS_BASE,
716 VMX_VMCS_GUEST_SS_BASE,
717 VMX_VMCS_GUEST_DS_BASE,
718 VMX_VMCS_GUEST_FS_BASE,
719 VMX_VMCS_GUEST_GS_BASE
720};
721static const uint32_t g_aVmcsSegSel[] =
722{
723 VMX_VMCS16_GUEST_ES_SEL,
724 VMX_VMCS16_GUEST_CS_SEL,
725 VMX_VMCS16_GUEST_SS_SEL,
726 VMX_VMCS16_GUEST_DS_SEL,
727 VMX_VMCS16_GUEST_FS_SEL,
728 VMX_VMCS16_GUEST_GS_SEL
729};
730static const uint32_t g_aVmcsSegLimit[] =
731{
732 VMX_VMCS32_GUEST_ES_LIMIT,
733 VMX_VMCS32_GUEST_CS_LIMIT,
734 VMX_VMCS32_GUEST_SS_LIMIT,
735 VMX_VMCS32_GUEST_DS_LIMIT,
736 VMX_VMCS32_GUEST_FS_LIMIT,
737 VMX_VMCS32_GUEST_GS_LIMIT
738};
739static const uint32_t g_aVmcsSegAttr[] =
740{
741 VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS,
742 VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS,
743 VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS,
744 VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS,
745 VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS,
746 VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS
747};
748AssertCompile(RT_ELEMENTS(g_aVmcsSegSel) == X86_SREG_COUNT);
749AssertCompile(RT_ELEMENTS(g_aVmcsSegLimit) == X86_SREG_COUNT);
750AssertCompile(RT_ELEMENTS(g_aVmcsSegBase) == X86_SREG_COUNT);
751AssertCompile(RT_ELEMENTS(g_aVmcsSegAttr) == X86_SREG_COUNT);
752#endif /* VBOX_STRICT */
753
754#ifdef HMVMX_USE_FUNCTION_TABLE
755/**
756 * VMX_EXIT dispatch table.
757 */
758static const struct CLANG11NOTHROWWEIRDNESS { PFNVMXEXITHANDLER pfn; } g_aVMExitHandlers[VMX_EXIT_MAX + 1] =
759{
760 /* 0 VMX_EXIT_XCPT_OR_NMI */ { hmR0VmxExitXcptOrNmi },
761 /* 1 VMX_EXIT_EXT_INT */ { hmR0VmxExitExtInt },
762 /* 2 VMX_EXIT_TRIPLE_FAULT */ { hmR0VmxExitTripleFault },
763 /* 3 VMX_EXIT_INIT_SIGNAL */ { hmR0VmxExitErrUnexpected },
764 /* 4 VMX_EXIT_SIPI */ { hmR0VmxExitErrUnexpected },
765 /* 5 VMX_EXIT_IO_SMI */ { hmR0VmxExitErrUnexpected },
766 /* 6 VMX_EXIT_SMI */ { hmR0VmxExitErrUnexpected },
767 /* 7 VMX_EXIT_INT_WINDOW */ { hmR0VmxExitIntWindow },
768 /* 8 VMX_EXIT_NMI_WINDOW */ { hmR0VmxExitNmiWindow },
769 /* 9 VMX_EXIT_TASK_SWITCH */ { hmR0VmxExitTaskSwitch },
770 /* 10 VMX_EXIT_CPUID */ { hmR0VmxExitCpuid },
771 /* 11 VMX_EXIT_GETSEC */ { hmR0VmxExitGetsec },
772 /* 12 VMX_EXIT_HLT */ { hmR0VmxExitHlt },
773 /* 13 VMX_EXIT_INVD */ { hmR0VmxExitInvd },
774 /* 14 VMX_EXIT_INVLPG */ { hmR0VmxExitInvlpg },
775 /* 15 VMX_EXIT_RDPMC */ { hmR0VmxExitRdpmc },
776 /* 16 VMX_EXIT_RDTSC */ { hmR0VmxExitRdtsc },
777 /* 17 VMX_EXIT_RSM */ { hmR0VmxExitErrUnexpected },
778 /* 18 VMX_EXIT_VMCALL */ { hmR0VmxExitVmcall },
779#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
780 /* 19 VMX_EXIT_VMCLEAR */ { hmR0VmxExitVmclear },
781 /* 20 VMX_EXIT_VMLAUNCH */ { hmR0VmxExitVmlaunch },
782 /* 21 VMX_EXIT_VMPTRLD */ { hmR0VmxExitVmptrld },
783 /* 22 VMX_EXIT_VMPTRST */ { hmR0VmxExitVmptrst },
784 /* 23 VMX_EXIT_VMREAD */ { hmR0VmxExitVmread },
785 /* 24 VMX_EXIT_VMRESUME */ { hmR0VmxExitVmresume },
786 /* 25 VMX_EXIT_VMWRITE */ { hmR0VmxExitVmwrite },
787 /* 26 VMX_EXIT_VMXOFF */ { hmR0VmxExitVmxoff },
788 /* 27 VMX_EXIT_VMXON */ { hmR0VmxExitVmxon },
789#else
790 /* 19 VMX_EXIT_VMCLEAR */ { hmR0VmxExitSetPendingXcptUD },
791 /* 20 VMX_EXIT_VMLAUNCH */ { hmR0VmxExitSetPendingXcptUD },
792 /* 21 VMX_EXIT_VMPTRLD */ { hmR0VmxExitSetPendingXcptUD },
793 /* 22 VMX_EXIT_VMPTRST */ { hmR0VmxExitSetPendingXcptUD },
794 /* 23 VMX_EXIT_VMREAD */ { hmR0VmxExitSetPendingXcptUD },
795 /* 24 VMX_EXIT_VMRESUME */ { hmR0VmxExitSetPendingXcptUD },
796 /* 25 VMX_EXIT_VMWRITE */ { hmR0VmxExitSetPendingXcptUD },
797 /* 26 VMX_EXIT_VMXOFF */ { hmR0VmxExitSetPendingXcptUD },
798 /* 27 VMX_EXIT_VMXON */ { hmR0VmxExitSetPendingXcptUD },
799#endif
800 /* 28 VMX_EXIT_MOV_CRX */ { hmR0VmxExitMovCRx },
801 /* 29 VMX_EXIT_MOV_DRX */ { hmR0VmxExitMovDRx },
802 /* 30 VMX_EXIT_IO_INSTR */ { hmR0VmxExitIoInstr },
803 /* 31 VMX_EXIT_RDMSR */ { hmR0VmxExitRdmsr },
804 /* 32 VMX_EXIT_WRMSR */ { hmR0VmxExitWrmsr },
805 /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ { hmR0VmxExitErrInvalidGuestState },
806 /* 34 VMX_EXIT_ERR_MSR_LOAD */ { hmR0VmxExitErrUnexpected },
807 /* 35 UNDEFINED */ { hmR0VmxExitErrUnexpected },
808 /* 36 VMX_EXIT_MWAIT */ { hmR0VmxExitMwait },
809 /* 37 VMX_EXIT_MTF */ { hmR0VmxExitMtf },
810 /* 38 UNDEFINED */ { hmR0VmxExitErrUnexpected },
811 /* 39 VMX_EXIT_MONITOR */ { hmR0VmxExitMonitor },
812 /* 40 VMX_EXIT_PAUSE */ { hmR0VmxExitPause },
813 /* 41 VMX_EXIT_ERR_MACHINE_CHECK */ { hmR0VmxExitErrUnexpected },
814 /* 42 UNDEFINED */ { hmR0VmxExitErrUnexpected },
815 /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ { hmR0VmxExitTprBelowThreshold },
816 /* 44 VMX_EXIT_APIC_ACCESS */ { hmR0VmxExitApicAccess },
817 /* 45 VMX_EXIT_VIRTUALIZED_EOI */ { hmR0VmxExitErrUnexpected },
818 /* 46 VMX_EXIT_GDTR_IDTR_ACCESS */ { hmR0VmxExitErrUnexpected },
819 /* 47 VMX_EXIT_LDTR_TR_ACCESS */ { hmR0VmxExitErrUnexpected },
820 /* 48 VMX_EXIT_EPT_VIOLATION */ { hmR0VmxExitEptViolation },
821 /* 49 VMX_EXIT_EPT_MISCONFIG */ { hmR0VmxExitEptMisconfig },
822 /* 50 VMX_EXIT_INVEPT */ { hmR0VmxExitSetPendingXcptUD },
823 /* 51 VMX_EXIT_RDTSCP */ { hmR0VmxExitRdtscp },
824 /* 52 VMX_EXIT_PREEMPT_TIMER */ { hmR0VmxExitPreemptTimer },
825#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
826 /* 53 VMX_EXIT_INVVPID */ { hmR0VmxExitInvvpid },
827#else
828 /* 53 VMX_EXIT_INVVPID */ { hmR0VmxExitSetPendingXcptUD },
829#endif
830 /* 54 VMX_EXIT_WBINVD */ { hmR0VmxExitWbinvd },
831 /* 55 VMX_EXIT_XSETBV */ { hmR0VmxExitXsetbv },
832 /* 56 VMX_EXIT_APIC_WRITE */ { hmR0VmxExitErrUnexpected },
833 /* 57 VMX_EXIT_RDRAND */ { hmR0VmxExitErrUnexpected },
834 /* 58 VMX_EXIT_INVPCID */ { hmR0VmxExitInvpcid },
835 /* 59 VMX_EXIT_VMFUNC */ { hmR0VmxExitErrUnexpected },
836 /* 60 VMX_EXIT_ENCLS */ { hmR0VmxExitErrUnexpected },
837 /* 61 VMX_EXIT_RDSEED */ { hmR0VmxExitErrUnexpected },
838 /* 62 VMX_EXIT_PML_FULL */ { hmR0VmxExitErrUnexpected },
839 /* 63 VMX_EXIT_XSAVES */ { hmR0VmxExitErrUnexpected },
840 /* 64 VMX_EXIT_XRSTORS */ { hmR0VmxExitErrUnexpected },
841 /* 65 UNDEFINED */ { hmR0VmxExitErrUnexpected },
842 /* 66 VMX_EXIT_SPP_EVENT */ { hmR0VmxExitErrUnexpected },
843 /* 67 VMX_EXIT_UMWAIT */ { hmR0VmxExitErrUnexpected },
844 /* 68 VMX_EXIT_TPAUSE */ { hmR0VmxExitErrUnexpected },
845 /* 69 VMX_EXIT_LOADIWKEY */ { hmR0VmxExitErrUnexpected },
846};
847#endif /* HMVMX_USE_FUNCTION_TABLE */
848
849#if defined(VBOX_STRICT) && defined(LOG_ENABLED)
850static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] =
851{
852 /* 0 */ "(Not Used)",
853 /* 1 */ "VMCALL executed in VMX root operation.",
854 /* 2 */ "VMCLEAR with invalid physical address.",
855 /* 3 */ "VMCLEAR with VMXON pointer.",
856 /* 4 */ "VMLAUNCH with non-clear VMCS.",
857 /* 5 */ "VMRESUME with non-launched VMCS.",
858 /* 6 */ "VMRESUME after VMXOFF",
859 /* 7 */ "VM-entry with invalid control fields.",
860 /* 8 */ "VM-entry with invalid host state fields.",
861 /* 9 */ "VMPTRLD with invalid physical address.",
862 /* 10 */ "VMPTRLD with VMXON pointer.",
863 /* 11 */ "VMPTRLD with incorrect revision identifier.",
864 /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.",
865 /* 13 */ "VMWRITE to read-only VMCS component.",
866 /* 14 */ "(Not Used)",
867 /* 15 */ "VMXON executed in VMX root operation.",
868 /* 16 */ "VM-entry with invalid executive-VMCS pointer.",
869 /* 17 */ "VM-entry with non-launched executing VMCS.",
870 /* 18 */ "VM-entry with executive-VMCS pointer not VMXON pointer.",
871 /* 19 */ "VMCALL with non-clear VMCS.",
872 /* 20 */ "VMCALL with invalid VM-exit control fields.",
873 /* 21 */ "(Not Used)",
874 /* 22 */ "VMCALL with incorrect MSEG revision identifier.",
875 /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.",
876 /* 24 */ "VMCALL with invalid SMM-monitor features.",
877 /* 25 */ "VM-entry with invalid VM-execution control fields in executive VMCS.",
878 /* 26 */ "VM-entry with events blocked by MOV SS.",
879 /* 27 */ "(Not Used)",
880 /* 28 */ "Invalid operand to INVEPT/INVVPID."
881};
882#endif /* VBOX_STRICT && LOG_ENABLED */
883
884
885/**
886 * Checks if the given MSR is part of the lastbranch-from-IP MSR stack.
887 * @returns @c true if it's part of LBR stack, @c false otherwise.
888 *
889 * @param pVM The cross context VM structure.
890 * @param idMsr The MSR.
891 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
892 * Optional, can be NULL.
893 *
894 * @remarks Must only be called when LBR is enabled.
895 */
896DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchFromMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
897{
898 Assert(pVM->hmr0.s.vmx.fLbr);
899 Assert(pVM->hmr0.s.vmx.idLbrFromIpMsrFirst);
900 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
901 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
902 if (idxMsr < cLbrStack)
903 {
904 if (pidxMsr)
905 *pidxMsr = idxMsr;
906 return true;
907 }
908 return false;
909}
910
911
912/**
913 * Checks if the given MSR is part of the lastbranch-to-IP MSR stack.
914 * @returns @c true if it's part of LBR stack, @c false otherwise.
915 *
916 * @param pVM The cross context VM structure.
917 * @param idMsr The MSR.
918 * @param pidxMsr Where to store the index of the MSR in the LBR MSR array.
919 * Optional, can be NULL.
920 *
921 * @remarks Must only be called when LBR is enabled and when lastbranch-to-IP MSRs
922 * are supported by the CPU (see hmR0VmxSetupLbrMsrRange).
923 */
924DECL_FORCE_INLINE(bool) hmR0VmxIsLbrBranchToMsr(PCVMCC pVM, uint32_t idMsr, uint32_t *pidxMsr)
925{
926 Assert(pVM->hmr0.s.vmx.fLbr);
927 if (pVM->hmr0.s.vmx.idLbrToIpMsrFirst)
928 {
929 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrToIpMsrLast - pVM->hmr0.s.vmx.idLbrToIpMsrFirst + 1;
930 uint32_t const idxMsr = idMsr - pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
931 if (idxMsr < cLbrStack)
932 {
933 if (pidxMsr)
934 *pidxMsr = idxMsr;
935 return true;
936 }
937 }
938 return false;
939}
940
941
942/**
943 * Gets the CR0 guest/host mask.
944 *
945 * These bits typically does not change through the lifetime of a VM. Any bit set in
946 * this mask is owned by the host/hypervisor and would cause a VM-exit when modified
947 * by the guest.
948 *
949 * @returns The CR0 guest/host mask.
950 * @param pVCpu The cross context virtual CPU structure.
951 */
952static uint64_t hmR0VmxGetFixedCr0Mask(PCVMCPUCC pVCpu)
953{
954 /*
955 * Modifications to CR0 bits that VT-x ignores saving/restoring (CD, ET, NW) and
956 * to CR0 bits that we require for shadow paging (PG) by the guest must cause VM-exits.
957 *
958 * Furthermore, modifications to any bits that are reserved/unspecified currently
959 * by the Intel spec. must also cause a VM-exit. This prevents unpredictable behavior
960 * when future CPUs specify and use currently reserved/unspecified bits.
961 */
962 /** @todo Avoid intercepting CR0.PE with unrestricted guest execution. Fix PGM
963 * enmGuestMode to be in-sync with the current mode. See @bugref{6398}
964 * and @bugref{6944}. */
965 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
966 return ( X86_CR0_PE
967 | X86_CR0_NE
968 | (pVM->hmr0.s.fNestedPaging ? 0 : X86_CR0_WP)
969 | X86_CR0_PG
970 | VMX_EXIT_HOST_CR0_IGNORE_MASK);
971}
972
973
974/**
975 * Gets the CR4 guest/host mask.
976 *
977 * These bits typically does not change through the lifetime of a VM. Any bit set in
978 * this mask is owned by the host/hypervisor and would cause a VM-exit when modified
979 * by the guest.
980 *
981 * @returns The CR4 guest/host mask.
982 * @param pVCpu The cross context virtual CPU structure.
983 */
984static uint64_t hmR0VmxGetFixedCr4Mask(PCVMCPUCC pVCpu)
985{
986 /*
987 * We construct a mask of all CR4 bits that the guest can modify without causing
988 * a VM-exit. Then invert this mask to obtain all CR4 bits that should cause
989 * a VM-exit when the guest attempts to modify them when executing using
990 * hardware-assisted VMX.
991 *
992 * When a feature is not exposed to the guest (and may be present on the host),
993 * we want to intercept guest modifications to the bit so we can emulate proper
994 * behavior (e.g., #GP).
995 *
996 * Furthermore, only modifications to those bits that don't require immediate
997 * emulation is allowed. For e.g., PCIDE is excluded because the behavior
998 * depends on CR3 which might not always be the guest value while executing
999 * using hardware-assisted VMX.
1000 */
1001 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
1002 bool const fFsGsBase = pVM->cpum.ro.GuestFeatures.fFsGsBase;
1003 bool const fXSaveRstor = pVM->cpum.ro.GuestFeatures.fXSaveRstor;
1004 bool const fFxSaveRstor = pVM->cpum.ro.GuestFeatures.fFxSaveRstor;
1005
1006 /*
1007 * Paranoia.
1008 * Ensure features exposed to the guest are present on the host.
1009 */
1010 Assert(!fFsGsBase || pVM->cpum.ro.HostFeatures.fFsGsBase);
1011 Assert(!fXSaveRstor || pVM->cpum.ro.HostFeatures.fXSaveRstor);
1012 Assert(!fFxSaveRstor || pVM->cpum.ro.HostFeatures.fFxSaveRstor);
1013
1014 uint64_t const fGstMask = ( X86_CR4_PVI
1015 | X86_CR4_TSD
1016 | X86_CR4_DE
1017 | X86_CR4_MCE
1018 | X86_CR4_PCE
1019 | X86_CR4_OSXMMEEXCPT
1020 | (fFsGsBase ? X86_CR4_FSGSBASE : 0)
1021 | (fXSaveRstor ? X86_CR4_OSXSAVE : 0)
1022 | (fFxSaveRstor ? X86_CR4_OSFXSR : 0));
1023 return ~fGstMask;
1024}
1025
1026
1027/**
1028 * Gets the active (in use) VMCS info. object for the specified VCPU.
1029 *
1030 * This is either the guest or nested-guest VMCS info. and need not necessarily
1031 * pertain to the "current" VMCS (in the VMX definition of the term). For instance,
1032 * if the VM-entry failed due to an invalid-guest state, we may have "cleared" the
1033 * current VMCS while returning to ring-3. However, the VMCS info. object for that
1034 * VMCS would still be active and returned here so that we could dump the VMCS
1035 * fields to ring-3 for diagnostics. This function is thus only used to
1036 * distinguish between the nested-guest or guest VMCS.
1037 *
1038 * @returns The active VMCS information.
1039 * @param pVCpu The cross context virtual CPU structure.
1040 *
1041 * @thread EMT.
1042 * @remarks This function may be called with preemption or interrupts disabled!
1043 */
1044DECLINLINE(PVMXVMCSINFO) hmGetVmxActiveVmcsInfo(PVMCPUCC pVCpu)
1045{
1046 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
1047 return &pVCpu->hmr0.s.vmx.VmcsInfo;
1048 return &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
1049}
1050
1051
1052/**
1053 * Returns whether the VM-exit MSR-store area differs from the VM-exit MSR-load
1054 * area.
1055 *
1056 * @returns @c true if it's different, @c false otherwise.
1057 * @param pVmcsInfo The VMCS info. object.
1058 */
1059DECL_FORCE_INLINE(bool) hmR0VmxIsSeparateExitMsrStoreAreaVmcs(PCVMXVMCSINFO pVmcsInfo)
1060{
1061 return RT_BOOL( pVmcsInfo->pvGuestMsrStore != pVmcsInfo->pvGuestMsrLoad
1062 && pVmcsInfo->pvGuestMsrStore);
1063}
1064
1065
1066/**
1067 * Sets the given Processor-based VM-execution controls.
1068 *
1069 * @param pVmxTransient The VMX-transient structure.
1070 * @param uProcCtls The Processor-based VM-execution controls to set.
1071 */
1072static void hmR0VmxSetProcCtlsVmcs(PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
1073{
1074 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1075 if ((pVmcsInfo->u32ProcCtls & uProcCtls) != uProcCtls)
1076 {
1077 pVmcsInfo->u32ProcCtls |= uProcCtls;
1078 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
1079 AssertRC(rc);
1080 }
1081}
1082
1083
1084/**
1085 * Removes the given Processor-based VM-execution controls.
1086 *
1087 * @param pVCpu The cross context virtual CPU structure.
1088 * @param pVmxTransient The VMX-transient structure.
1089 * @param uProcCtls The Processor-based VM-execution controls to remove.
1090 *
1091 * @remarks When executing a nested-guest, this will not remove any of the specified
1092 * controls if the nested hypervisor has set any one of them.
1093 */
1094static void hmR0VmxRemoveProcCtlsVmcs(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uProcCtls)
1095{
1096 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1097 if (pVmcsInfo->u32ProcCtls & uProcCtls)
1098 {
1099#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1100 if ( !pVmxTransient->fIsNestedGuest
1101 || !CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uProcCtls))
1102#else
1103 NOREF(pVCpu);
1104 if (!pVmxTransient->fIsNestedGuest)
1105#endif
1106 {
1107 pVmcsInfo->u32ProcCtls &= ~uProcCtls;
1108 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
1109 AssertRC(rc);
1110 }
1111 }
1112}
1113
1114
1115/**
1116 * Sets the TSC offset for the current VMCS.
1117 *
1118 * @param uTscOffset The TSC offset to set.
1119 * @param pVmcsInfo The VMCS info. object.
1120 */
1121static void hmR0VmxSetTscOffsetVmcs(PVMXVMCSINFO pVmcsInfo, uint64_t uTscOffset)
1122{
1123 if (pVmcsInfo->u64TscOffset != uTscOffset)
1124 {
1125 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset);
1126 AssertRC(rc);
1127 pVmcsInfo->u64TscOffset = uTscOffset;
1128 }
1129}
1130
1131
1132/**
1133 * Adds one or more exceptions to the exception bitmap and commits it to the current
1134 * VMCS.
1135 *
1136 * @param pVmxTransient The VMX-transient structure.
1137 * @param uXcptMask The exception(s) to add.
1138 */
1139static void hmR0VmxAddXcptInterceptMask(PCVMXTRANSIENT pVmxTransient, uint32_t uXcptMask)
1140{
1141 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1142 uint32_t uXcptBitmap = pVmcsInfo->u32XcptBitmap;
1143 if ((uXcptBitmap & uXcptMask) != uXcptMask)
1144 {
1145 uXcptBitmap |= uXcptMask;
1146 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
1147 AssertRC(rc);
1148 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
1149 }
1150}
1151
1152
1153/**
1154 * Adds an exception to the exception bitmap and commits it to the current VMCS.
1155 *
1156 * @param pVmxTransient The VMX-transient structure.
1157 * @param uXcpt The exception to add.
1158 */
1159static void hmR0VmxAddXcptIntercept(PCVMXTRANSIENT pVmxTransient, uint8_t uXcpt)
1160{
1161 Assert(uXcpt <= X86_XCPT_LAST);
1162 hmR0VmxAddXcptInterceptMask(pVmxTransient, RT_BIT_32(uXcpt));
1163}
1164
1165
1166/**
1167 * Remove one or more exceptions from the exception bitmap and commits it to the
1168 * current VMCS.
1169 *
1170 * This takes care of not removing the exception intercept if a nested-guest
1171 * requires the exception to be intercepted.
1172 *
1173 * @returns VBox status code.
1174 * @param pVCpu The cross context virtual CPU structure.
1175 * @param pVmxTransient The VMX-transient structure.
1176 * @param uXcptMask The exception(s) to remove.
1177 */
1178static int hmR0VmxRemoveXcptInterceptMask(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t uXcptMask)
1179{
1180 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
1181 uint32_t u32XcptBitmap = pVmcsInfo->u32XcptBitmap;
1182 if (u32XcptBitmap & uXcptMask)
1183 {
1184#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1185 if (!pVmxTransient->fIsNestedGuest)
1186 { /* likely */ }
1187 else
1188 uXcptMask &= ~pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs.u32XcptBitmap;
1189#endif
1190#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
1191 uXcptMask &= ~( RT_BIT(X86_XCPT_BP)
1192 | RT_BIT(X86_XCPT_DE)
1193 | RT_BIT(X86_XCPT_NM)
1194 | RT_BIT(X86_XCPT_TS)
1195 | RT_BIT(X86_XCPT_UD)
1196 | RT_BIT(X86_XCPT_NP)
1197 | RT_BIT(X86_XCPT_SS)
1198 | RT_BIT(X86_XCPT_GP)
1199 | RT_BIT(X86_XCPT_PF)
1200 | RT_BIT(X86_XCPT_MF));
1201#elif defined(HMVMX_ALWAYS_TRAP_PF)
1202 uXcptMask &= ~RT_BIT(X86_XCPT_PF);
1203#endif
1204 if (uXcptMask)
1205 {
1206 /* Validate we are not removing any essential exception intercepts. */
1207 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging || !(uXcptMask & RT_BIT(X86_XCPT_PF)));
1208 NOREF(pVCpu);
1209 Assert(!(uXcptMask & RT_BIT(X86_XCPT_DB)));
1210 Assert(!(uXcptMask & RT_BIT(X86_XCPT_AC)));
1211
1212 /* Remove it from the exception bitmap. */
1213 u32XcptBitmap &= ~uXcptMask;
1214
1215 /* Commit and update the cache if necessary. */
1216 if (pVmcsInfo->u32XcptBitmap != u32XcptBitmap)
1217 {
1218 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
1219 AssertRC(rc);
1220 pVmcsInfo->u32XcptBitmap = u32XcptBitmap;
1221 }
1222 }
1223 }
1224 return VINF_SUCCESS;
1225}
1226
1227
1228/**
1229 * Remove an exceptions from the exception bitmap and commits it to the current
1230 * VMCS.
1231 *
1232 * @returns VBox status code.
1233 * @param pVCpu The cross context virtual CPU structure.
1234 * @param pVmxTransient The VMX-transient structure.
1235 * @param uXcpt The exception to remove.
1236 */
1237static int hmR0VmxRemoveXcptIntercept(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint8_t uXcpt)
1238{
1239 return hmR0VmxRemoveXcptInterceptMask(pVCpu, pVmxTransient, RT_BIT(uXcpt));
1240}
1241
1242
1243/**
1244 * Loads the VMCS specified by the VMCS info. object.
1245 *
1246 * @returns VBox status code.
1247 * @param pVmcsInfo The VMCS info. object.
1248 *
1249 * @remarks Can be called with interrupts disabled.
1250 */
1251static int hmR0VmxLoadVmcs(PVMXVMCSINFO pVmcsInfo)
1252{
1253 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
1254 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1255
1256 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysVmcs);
1257 if (RT_SUCCESS(rc))
1258 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
1259 return rc;
1260}
1261
1262
1263/**
1264 * Clears the VMCS specified by the VMCS info. object.
1265 *
1266 * @returns VBox status code.
1267 * @param pVmcsInfo The VMCS info. object.
1268 *
1269 * @remarks Can be called with interrupts disabled.
1270 */
1271static int hmR0VmxClearVmcs(PVMXVMCSINFO pVmcsInfo)
1272{
1273 Assert(pVmcsInfo->HCPhysVmcs != 0 && pVmcsInfo->HCPhysVmcs != NIL_RTHCPHYS);
1274 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1275
1276 int rc = VMXClearVmcs(pVmcsInfo->HCPhysVmcs);
1277 if (RT_SUCCESS(rc))
1278 pVmcsInfo->fVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
1279 return rc;
1280}
1281
1282
1283#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1284/**
1285 * Loads the shadow VMCS specified by the VMCS info. object.
1286 *
1287 * @returns VBox status code.
1288 * @param pVmcsInfo The VMCS info. object.
1289 *
1290 * @remarks Can be called with interrupts disabled.
1291 */
1292static int hmR0VmxLoadShadowVmcs(PVMXVMCSINFO pVmcsInfo)
1293{
1294 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1295 Assert(pVmcsInfo->HCPhysShadowVmcs != 0 && pVmcsInfo->HCPhysShadowVmcs != NIL_RTHCPHYS);
1296
1297 int rc = VMXLoadVmcs(pVmcsInfo->HCPhysShadowVmcs);
1298 if (RT_SUCCESS(rc))
1299 pVmcsInfo->fShadowVmcsState |= VMX_V_VMCS_LAUNCH_STATE_CURRENT;
1300 return rc;
1301}
1302
1303
1304/**
1305 * Clears the shadow VMCS specified by the VMCS info. object.
1306 *
1307 * @returns VBox status code.
1308 * @param pVmcsInfo The VMCS info. object.
1309 *
1310 * @remarks Can be called with interrupts disabled.
1311 */
1312static int hmR0VmxClearShadowVmcs(PVMXVMCSINFO pVmcsInfo)
1313{
1314 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1315 Assert(pVmcsInfo->HCPhysShadowVmcs != 0 && pVmcsInfo->HCPhysShadowVmcs != NIL_RTHCPHYS);
1316
1317 int rc = VMXClearVmcs(pVmcsInfo->HCPhysShadowVmcs);
1318 if (RT_SUCCESS(rc))
1319 pVmcsInfo->fShadowVmcsState = VMX_V_VMCS_LAUNCH_STATE_CLEAR;
1320 return rc;
1321}
1322
1323
1324/**
1325 * Switches from and to the specified VMCSes.
1326 *
1327 * @returns VBox status code.
1328 * @param pVmcsInfoFrom The VMCS info. object we are switching from.
1329 * @param pVmcsInfoTo The VMCS info. object we are switching to.
1330 *
1331 * @remarks Called with interrupts disabled.
1332 */
1333static int hmR0VmxSwitchVmcs(PVMXVMCSINFO pVmcsInfoFrom, PVMXVMCSINFO pVmcsInfoTo)
1334{
1335 /*
1336 * Clear the VMCS we are switching out if it has not already been cleared.
1337 * This will sync any CPU internal data back to the VMCS.
1338 */
1339 if (pVmcsInfoFrom->fVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
1340 {
1341 int rc = hmR0VmxClearVmcs(pVmcsInfoFrom);
1342 if (RT_SUCCESS(rc))
1343 {
1344 /*
1345 * The shadow VMCS, if any, would not be active at this point since we
1346 * would have cleared it while importing the virtual hardware-virtualization
1347 * state as part the VMLAUNCH/VMRESUME VM-exit. Hence, there's no need to
1348 * clear the shadow VMCS here, just assert for safety.
1349 */
1350 Assert(!pVmcsInfoFrom->pvShadowVmcs || pVmcsInfoFrom->fShadowVmcsState == VMX_V_VMCS_LAUNCH_STATE_CLEAR);
1351 }
1352 else
1353 return rc;
1354 }
1355
1356 /*
1357 * Clear the VMCS we are switching to if it has not already been cleared.
1358 * This will initialize the VMCS launch state to "clear" required for loading it.
1359 *
1360 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
1361 */
1362 if (pVmcsInfoTo->fVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
1363 {
1364 int rc = hmR0VmxClearVmcs(pVmcsInfoTo);
1365 if (RT_SUCCESS(rc))
1366 { /* likely */ }
1367 else
1368 return rc;
1369 }
1370
1371 /*
1372 * Finally, load the VMCS we are switching to.
1373 */
1374 return hmR0VmxLoadVmcs(pVmcsInfoTo);
1375}
1376
1377
1378/**
1379 * Switches between the guest VMCS and the nested-guest VMCS as specified by the
1380 * caller.
1381 *
1382 * @returns VBox status code.
1383 * @param pVCpu The cross context virtual CPU structure.
1384 * @param fSwitchToNstGstVmcs Whether to switch to the nested-guest VMCS (pass
1385 * true) or guest VMCS (pass false).
1386 */
1387static int hmR0VmxSwitchToGstOrNstGstVmcs(PVMCPUCC pVCpu, bool fSwitchToNstGstVmcs)
1388{
1389 /* Ensure we have synced everything from the guest-CPU context to the VMCS before switching. */
1390 HMVMX_CPUMCTX_ASSERT(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL);
1391
1392 PVMXVMCSINFO pVmcsInfoFrom;
1393 PVMXVMCSINFO pVmcsInfoTo;
1394 if (fSwitchToNstGstVmcs)
1395 {
1396 pVmcsInfoFrom = &pVCpu->hmr0.s.vmx.VmcsInfo;
1397 pVmcsInfoTo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
1398 }
1399 else
1400 {
1401 pVmcsInfoFrom = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
1402 pVmcsInfoTo = &pVCpu->hmr0.s.vmx.VmcsInfo;
1403 }
1404
1405 /*
1406 * Disable interrupts to prevent being preempted while we switch the current VMCS as the
1407 * preemption hook code path acquires the current VMCS.
1408 */
1409 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
1410
1411 int rc = hmR0VmxSwitchVmcs(pVmcsInfoFrom, pVmcsInfoTo);
1412 if (RT_SUCCESS(rc))
1413 {
1414 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fSwitchToNstGstVmcs;
1415 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fSwitchToNstGstVmcs;
1416
1417 /*
1418 * If we are switching to a VMCS that was executed on a different host CPU or was
1419 * never executed before, flag that we need to export the host state before executing
1420 * guest/nested-guest code using hardware-assisted VMX.
1421 *
1422 * This could probably be done in a preemptible context since the preemption hook
1423 * will flag the necessary change in host context. However, since preemption is
1424 * already disabled and to avoid making assumptions about host specific code in
1425 * RTMpCpuId when called with preemption enabled, we'll do this while preemption is
1426 * disabled.
1427 */
1428 if (pVmcsInfoTo->idHostCpuState == RTMpCpuId())
1429 { /* likely */ }
1430 else
1431 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE);
1432
1433 ASMSetFlags(fEFlags);
1434
1435 /*
1436 * We use a different VM-exit MSR-store areas for the guest and nested-guest. Hence,
1437 * flag that we need to update the host MSR values there. Even if we decide in the
1438 * future to share the VM-exit MSR-store area page between the guest and nested-guest,
1439 * if its content differs, we would have to update the host MSRs anyway.
1440 */
1441 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
1442 }
1443 else
1444 ASMSetFlags(fEFlags);
1445 return rc;
1446}
1447#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
1448
1449
1450/**
1451 * Updates the VM's last error record.
1452 *
1453 * If there was a VMX instruction error, reads the error data from the VMCS and
1454 * updates VCPU's last error record as well.
1455 *
1456 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
1457 * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or
1458 * VERR_VMX_INVALID_VMCS_FIELD.
1459 * @param rc The error code.
1460 */
1461static void hmR0VmxUpdateErrorRecord(PVMCPUCC pVCpu, int rc)
1462{
1463 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
1464 || rc == VERR_VMX_UNABLE_TO_START_VM)
1465 {
1466 AssertPtrReturnVoid(pVCpu);
1467 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
1468 }
1469 pVCpu->CTX_SUFF(pVM)->hm.s.ForR3.rcInit = rc;
1470}
1471
1472
1473#ifdef VBOX_STRICT
1474/**
1475 * Reads the VM-entry interruption-information field from the VMCS into the VMX
1476 * transient structure.
1477 *
1478 * @param pVmxTransient The VMX-transient structure.
1479 */
1480DECLINLINE(void) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
1481{
1482 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo);
1483 AssertRC(rc);
1484}
1485
1486
1487/**
1488 * Reads the VM-entry exception error code field from the VMCS into
1489 * the VMX transient structure.
1490 *
1491 * @param pVmxTransient The VMX-transient structure.
1492 */
1493DECLINLINE(void) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
1494{
1495 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode);
1496 AssertRC(rc);
1497}
1498
1499
1500/**
1501 * Reads the VM-entry exception error code field from the VMCS into
1502 * the VMX transient structure.
1503 *
1504 * @param pVmxTransient The VMX-transient structure.
1505 */
1506DECLINLINE(void) hmR0VmxReadEntryInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
1507{
1508 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr);
1509 AssertRC(rc);
1510}
1511#endif /* VBOX_STRICT */
1512
1513
1514/**
1515 * Reads the VM-exit interruption-information field from the VMCS into the VMX
1516 * transient structure.
1517 *
1518 * @param pVmxTransient The VMX-transient structure.
1519 */
1520DECLINLINE(void) hmR0VmxReadExitIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
1521{
1522 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_INFO))
1523 {
1524 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo);
1525 AssertRC(rc);
1526 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_INFO;
1527 }
1528}
1529
1530
1531/**
1532 * Reads the VM-exit interruption error code from the VMCS into the VMX
1533 * transient structure.
1534 *
1535 * @param pVmxTransient The VMX-transient structure.
1536 */
1537DECLINLINE(void) hmR0VmxReadExitIntErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
1538{
1539 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE))
1540 {
1541 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
1542 AssertRC(rc);
1543 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE;
1544 }
1545}
1546
1547
1548/**
1549 * Reads the VM-exit instruction length field from the VMCS into the VMX
1550 * transient structure.
1551 *
1552 * @param pVmxTransient The VMX-transient structure.
1553 */
1554DECLINLINE(void) hmR0VmxReadExitInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
1555{
1556 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_LEN))
1557 {
1558 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbExitInstr);
1559 AssertRC(rc);
1560 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_LEN;
1561 }
1562}
1563
1564
1565/**
1566 * Reads the VM-exit instruction-information field from the VMCS into
1567 * the VMX transient structure.
1568 *
1569 * @param pVmxTransient The VMX-transient structure.
1570 */
1571DECLINLINE(void) hmR0VmxReadExitInstrInfoVmcs(PVMXTRANSIENT pVmxTransient)
1572{
1573 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_INFO))
1574 {
1575 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u);
1576 AssertRC(rc);
1577 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_INFO;
1578 }
1579}
1580
1581
1582/**
1583 * Reads the Exit Qualification from the VMCS into the VMX transient structure.
1584 *
1585 * @param pVmxTransient The VMX-transient structure.
1586 */
1587DECLINLINE(void) hmR0VmxReadExitQualVmcs(PVMXTRANSIENT pVmxTransient)
1588{
1589 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_QUALIFICATION))
1590 {
1591 int rc = VMXReadVmcsNw(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQual);
1592 AssertRC(rc);
1593 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_QUALIFICATION;
1594 }
1595}
1596
1597
1598/**
1599 * Reads the Guest-linear address from the VMCS into the VMX transient structure.
1600 *
1601 * @param pVmxTransient The VMX-transient structure.
1602 */
1603DECLINLINE(void) hmR0VmxReadGuestLinearAddrVmcs(PVMXTRANSIENT pVmxTransient)
1604{
1605 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_LINEAR_ADDR))
1606 {
1607 int rc = VMXReadVmcsNw(VMX_VMCS_RO_GUEST_LINEAR_ADDR, &pVmxTransient->uGuestLinearAddr);
1608 AssertRC(rc);
1609 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_LINEAR_ADDR;
1610 }
1611}
1612
1613
1614/**
1615 * Reads the Guest-physical address from the VMCS into the VMX transient structure.
1616 *
1617 * @param pVmxTransient The VMX-transient structure.
1618 */
1619DECLINLINE(void) hmR0VmxReadGuestPhysicalAddrVmcs(PVMXTRANSIENT pVmxTransient)
1620{
1621 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_PHYSICAL_ADDR))
1622 {
1623 int rc = VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &pVmxTransient->uGuestPhysicalAddr);
1624 AssertRC(rc);
1625 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_PHYSICAL_ADDR;
1626 }
1627}
1628
1629#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1630/**
1631 * Reads the Guest pending-debug exceptions from the VMCS into the VMX transient
1632 * structure.
1633 *
1634 * @param pVmxTransient The VMX-transient structure.
1635 */
1636DECLINLINE(void) hmR0VmxReadGuestPendingDbgXctps(PVMXTRANSIENT pVmxTransient)
1637{
1638 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_PENDING_DBG_XCPTS))
1639 {
1640 int rc = VMXReadVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &pVmxTransient->uGuestPendingDbgXcpts);
1641 AssertRC(rc);
1642 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_PENDING_DBG_XCPTS;
1643 }
1644}
1645#endif
1646
1647/**
1648 * Reads the IDT-vectoring information field from the VMCS into the VMX
1649 * transient structure.
1650 *
1651 * @param pVmxTransient The VMX-transient structure.
1652 *
1653 * @remarks No-long-jump zone!!!
1654 */
1655DECLINLINE(void) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient)
1656{
1657 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_INFO))
1658 {
1659 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_INFO, &pVmxTransient->uIdtVectoringInfo);
1660 AssertRC(rc);
1661 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_INFO;
1662 }
1663}
1664
1665
1666/**
1667 * Reads the IDT-vectoring error code from the VMCS into the VMX
1668 * transient structure.
1669 *
1670 * @param pVmxTransient The VMX-transient structure.
1671 */
1672DECLINLINE(void) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
1673{
1674 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_ERROR_CODE))
1675 {
1676 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
1677 AssertRC(rc);
1678 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_ERROR_CODE;
1679 }
1680}
1681
1682#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
1683/**
1684 * Reads all relevant read-only VMCS fields into the VMX transient structure.
1685 *
1686 * @param pVmxTransient The VMX-transient structure.
1687 */
1688static void hmR0VmxReadAllRoFieldsVmcs(PVMXTRANSIENT pVmxTransient)
1689{
1690 int rc = VMXReadVmcsNw(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQual);
1691 rc |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbExitInstr);
1692 rc |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u);
1693 rc |= VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_INFO, &pVmxTransient->uIdtVectoringInfo);
1694 rc |= VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
1695 rc |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo);
1696 rc |= VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
1697 rc |= VMXReadVmcsNw(VMX_VMCS_RO_GUEST_LINEAR_ADDR, &pVmxTransient->uGuestLinearAddr);
1698 rc |= VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &pVmxTransient->uGuestPhysicalAddr);
1699 AssertRC(rc);
1700 pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_QUALIFICATION
1701 | HMVMX_READ_EXIT_INSTR_LEN
1702 | HMVMX_READ_EXIT_INSTR_INFO
1703 | HMVMX_READ_IDT_VECTORING_INFO
1704 | HMVMX_READ_IDT_VECTORING_ERROR_CODE
1705 | HMVMX_READ_EXIT_INTERRUPTION_INFO
1706 | HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE
1707 | HMVMX_READ_GUEST_LINEAR_ADDR
1708 | HMVMX_READ_GUEST_PHYSICAL_ADDR;
1709}
1710#endif
1711
1712/**
1713 * Enters VMX root mode operation on the current CPU.
1714 *
1715 * @returns VBox status code.
1716 * @param pHostCpu The HM physical-CPU structure.
1717 * @param pVM The cross context VM structure. Can be
1718 * NULL, after a resume.
1719 * @param HCPhysCpuPage Physical address of the VMXON region.
1720 * @param pvCpuPage Pointer to the VMXON region.
1721 */
1722static int hmR0VmxEnterRootMode(PHMPHYSCPU pHostCpu, PVMCC pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
1723{
1724 Assert(pHostCpu);
1725 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
1726 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
1727 Assert(pvCpuPage);
1728 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1729
1730 if (pVM)
1731 {
1732 /* Write the VMCS revision identifier to the VMXON region. */
1733 *(uint32_t *)pvCpuPage = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
1734 }
1735
1736 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
1737 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
1738
1739 /* Enable the VMX bit in CR4 if necessary. */
1740 RTCCUINTREG const uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX);
1741
1742 /* Record whether VMXE was already prior to us enabling it above. */
1743 pHostCpu->fVmxeAlreadyEnabled = RT_BOOL(uOldCr4 & X86_CR4_VMXE);
1744
1745 /* Enter VMX root mode. */
1746 int rc = VMXEnable(HCPhysCpuPage);
1747 if (RT_FAILURE(rc))
1748 {
1749 /* Restore CR4.VMXE if it was not set prior to our attempt to set it above. */
1750 if (!pHostCpu->fVmxeAlreadyEnabled)
1751 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
1752
1753 if (pVM)
1754 pVM->hm.s.ForR3.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
1755 }
1756
1757 /* Restore interrupts. */
1758 ASMSetFlags(fEFlags);
1759 return rc;
1760}
1761
1762
1763/**
1764 * Exits VMX root mode operation on the current CPU.
1765 *
1766 * @returns VBox status code.
1767 * @param pHostCpu The HM physical-CPU structure.
1768 */
1769static int hmR0VmxLeaveRootMode(PHMPHYSCPU pHostCpu)
1770{
1771 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1772
1773 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
1774 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
1775
1776 /* If we're for some reason not in VMX root mode, then don't leave it. */
1777 RTCCUINTREG const uHostCr4 = ASMGetCR4();
1778
1779 int rc;
1780 if (uHostCr4 & X86_CR4_VMXE)
1781 {
1782 /* Exit VMX root mode and clear the VMX bit in CR4. */
1783 VMXDisable();
1784
1785 /* Clear CR4.VMXE only if it was clear prior to use setting it. */
1786 if (!pHostCpu->fVmxeAlreadyEnabled)
1787 SUPR0ChangeCR4(0 /* fOrMask */, ~(uint64_t)X86_CR4_VMXE);
1788
1789 rc = VINF_SUCCESS;
1790 }
1791 else
1792 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
1793
1794 /* Restore interrupts. */
1795 ASMSetFlags(fEFlags);
1796 return rc;
1797}
1798
1799
1800/**
1801 * Allocates pages specified as specified by an array of VMX page allocation info
1802 * objects.
1803 *
1804 * The pages contents are zero'd after allocation.
1805 *
1806 * @returns VBox status code.
1807 * @param phMemObj Where to return the handle to the allocation.
1808 * @param paAllocInfo The pointer to the first element of the VMX
1809 * page-allocation info object array.
1810 * @param cEntries The number of elements in the @a paAllocInfo array.
1811 */
1812static int hmR0VmxPagesAllocZ(PRTR0MEMOBJ phMemObj, PVMXPAGEALLOCINFO paAllocInfo, uint32_t cEntries)
1813{
1814 *phMemObj = NIL_RTR0MEMOBJ;
1815
1816 /* Figure out how many pages to allocate. */
1817 uint32_t cPages = 0;
1818 for (uint32_t iPage = 0; iPage < cEntries; iPage++)
1819 cPages += !!paAllocInfo[iPage].fValid;
1820
1821 /* Allocate the pages. */
1822 if (cPages)
1823 {
1824 size_t const cbPages = cPages << PAGE_SHIFT;
1825 int rc = RTR0MemObjAllocPage(phMemObj, cbPages, false /* fExecutable */);
1826 if (RT_FAILURE(rc))
1827 return rc;
1828
1829 /* Zero the contents and assign each page to the corresponding VMX page-allocation entry. */
1830 void *pvFirstPage = RTR0MemObjAddress(*phMemObj);
1831 RT_BZERO(pvFirstPage, cbPages);
1832
1833 uint32_t iPage = 0;
1834 for (uint32_t i = 0; i < cEntries; i++)
1835 if (paAllocInfo[i].fValid)
1836 {
1837 RTHCPHYS const HCPhysPage = RTR0MemObjGetPagePhysAddr(*phMemObj, iPage);
1838 void *pvPage = (void *)((uintptr_t)pvFirstPage + (iPage << X86_PAGE_4K_SHIFT));
1839 Assert(HCPhysPage && HCPhysPage != NIL_RTHCPHYS);
1840 AssertPtr(pvPage);
1841
1842 Assert(paAllocInfo[iPage].pHCPhys);
1843 Assert(paAllocInfo[iPage].ppVirt);
1844 *paAllocInfo[iPage].pHCPhys = HCPhysPage;
1845 *paAllocInfo[iPage].ppVirt = pvPage;
1846
1847 /* Move to next page. */
1848 ++iPage;
1849 }
1850
1851 /* Make sure all valid (requested) pages have been assigned. */
1852 Assert(iPage == cPages);
1853 }
1854 return VINF_SUCCESS;
1855}
1856
1857
1858/**
1859 * Frees pages allocated using hmR0VmxPagesAllocZ.
1860 *
1861 * @param phMemObj Pointer to the memory object handle. Will be set to
1862 * NIL.
1863 */
1864DECL_FORCE_INLINE(void) hmR0VmxPagesFree(PRTR0MEMOBJ phMemObj)
1865{
1866 /* We can cleanup wholesale since it's all one allocation. */
1867 if (*phMemObj != NIL_RTR0MEMOBJ)
1868 {
1869 RTR0MemObjFree(*phMemObj, true /* fFreeMappings */);
1870 *phMemObj = NIL_RTR0MEMOBJ;
1871 }
1872}
1873
1874
1875/**
1876 * Initializes a VMCS info. object.
1877 *
1878 * @param pVmcsInfo The VMCS info. object.
1879 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
1880 */
1881static void hmR0VmxVmcsInfoInit(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
1882{
1883 RT_ZERO(*pVmcsInfo);
1884 RT_ZERO(*pVmcsInfoShared);
1885
1886 pVmcsInfo->pShared = pVmcsInfoShared;
1887 Assert(pVmcsInfo->hMemObj == NIL_RTR0MEMOBJ);
1888 pVmcsInfo->HCPhysVmcs = NIL_RTHCPHYS;
1889 pVmcsInfo->HCPhysShadowVmcs = NIL_RTHCPHYS;
1890 pVmcsInfo->HCPhysMsrBitmap = NIL_RTHCPHYS;
1891 pVmcsInfo->HCPhysGuestMsrLoad = NIL_RTHCPHYS;
1892 pVmcsInfo->HCPhysGuestMsrStore = NIL_RTHCPHYS;
1893 pVmcsInfo->HCPhysHostMsrLoad = NIL_RTHCPHYS;
1894 pVmcsInfo->HCPhysVirtApic = NIL_RTHCPHYS;
1895 pVmcsInfo->HCPhysEPTP = NIL_RTHCPHYS;
1896 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
1897 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
1898 pVmcsInfo->idHostCpuExec = NIL_RTCPUID;
1899}
1900
1901
1902/**
1903 * Frees the VT-x structures for a VMCS info. object.
1904 *
1905 * @param pVmcsInfo The VMCS info. object.
1906 * @param pVmcsInfoShared The VMCS info. object shared with ring-3.
1907 */
1908static void hmR0VmxVmcsInfoFree(PVMXVMCSINFO pVmcsInfo, PVMXVMCSINFOSHARED pVmcsInfoShared)
1909{
1910 hmR0VmxPagesFree(&pVmcsInfo->hMemObj);
1911 hmR0VmxVmcsInfoInit(pVmcsInfo, pVmcsInfoShared);
1912}
1913
1914
1915/**
1916 * Allocates the VT-x structures for a VMCS info. object.
1917 *
1918 * @returns VBox status code.
1919 * @param pVCpu The cross context virtual CPU structure.
1920 * @param pVmcsInfo The VMCS info. object.
1921 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
1922 *
1923 * @remarks The caller is expected to take care of any and all allocation failures.
1924 * This function will not perform any cleanup for failures half-way
1925 * through.
1926 */
1927static int hmR0VmxAllocVmcsInfo(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
1928{
1929 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
1930
1931 bool const fMsrBitmaps = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS);
1932 bool const fShadowVmcs = !fIsNstGstVmcs ? pVM->hmr0.s.vmx.fUseVmcsShadowing : pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing;
1933 Assert(!pVM->cpum.ro.GuestFeatures.fVmxVmcsShadowing); /* VMCS shadowing is not yet exposed to the guest. */
1934 VMXPAGEALLOCINFO aAllocInfo[] =
1935 {
1936 { true, 0 /* Unused */, &pVmcsInfo->HCPhysVmcs, &pVmcsInfo->pvVmcs },
1937 { true, 0 /* Unused */, &pVmcsInfo->HCPhysGuestMsrLoad, &pVmcsInfo->pvGuestMsrLoad },
1938 { true, 0 /* Unused */, &pVmcsInfo->HCPhysHostMsrLoad, &pVmcsInfo->pvHostMsrLoad },
1939 { fMsrBitmaps, 0 /* Unused */, &pVmcsInfo->HCPhysMsrBitmap, &pVmcsInfo->pvMsrBitmap },
1940 { fShadowVmcs, 0 /* Unused */, &pVmcsInfo->HCPhysShadowVmcs, &pVmcsInfo->pvShadowVmcs },
1941 };
1942
1943 int rc = hmR0VmxPagesAllocZ(&pVmcsInfo->hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
1944 if (RT_FAILURE(rc))
1945 return rc;
1946
1947 /*
1948 * We use the same page for VM-entry MSR-load and VM-exit MSR store areas.
1949 * Because they contain a symmetric list of guest MSRs to load on VM-entry and store on VM-exit.
1950 */
1951 AssertCompile(RT_ELEMENTS(aAllocInfo) > 0);
1952 Assert(pVmcsInfo->HCPhysGuestMsrLoad != NIL_RTHCPHYS);
1953 pVmcsInfo->pvGuestMsrStore = pVmcsInfo->pvGuestMsrLoad;
1954 pVmcsInfo->HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrLoad;
1955
1956 /*
1957 * Get the virtual-APIC page rather than allocating them again.
1958 */
1959 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)
1960 {
1961 if (!fIsNstGstVmcs)
1962 {
1963 if (PDMHasApic(pVM))
1964 {
1965 rc = APICGetApicPageForCpu(pVCpu, &pVmcsInfo->HCPhysVirtApic, (PRTR0PTR)&pVmcsInfo->pbVirtApic, NULL /*pR3Ptr*/);
1966 if (RT_FAILURE(rc))
1967 return rc;
1968 Assert(pVmcsInfo->pbVirtApic);
1969 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
1970 }
1971 }
1972 else
1973 {
1974 pVmcsInfo->pbVirtApic = &pVCpu->cpum.GstCtx.hwvirt.vmx.abVirtApicPage[0];
1975 pVmcsInfo->HCPhysVirtApic = GVMMR0ConvertGVMPtr2HCPhys(pVM, pVmcsInfo->pbVirtApic);
1976 Assert(pVmcsInfo->HCPhysVirtApic && pVmcsInfo->HCPhysVirtApic != NIL_RTHCPHYS);
1977 }
1978 }
1979
1980 return VINF_SUCCESS;
1981}
1982
1983
1984/**
1985 * Free all VT-x structures for the VM.
1986 *
1987 * @returns IPRT status code.
1988 * @param pVM The cross context VM structure.
1989 */
1990static void hmR0VmxStructsFree(PVMCC pVM)
1991{
1992 hmR0VmxPagesFree(&pVM->hmr0.s.vmx.hMemObj);
1993#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
1994 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
1995 {
1996 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsFields);
1997 pVM->hmr0.s.vmx.paShadowVmcsFields = NULL;
1998 RTMemFree(pVM->hmr0.s.vmx.paShadowVmcsRoFields);
1999 pVM->hmr0.s.vmx.paShadowVmcsRoFields = NULL;
2000 }
2001#endif
2002
2003 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
2004 {
2005 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
2006 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
2007#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2008 if (pVM->cpum.ro.GuestFeatures.fVmx)
2009 hmR0VmxVmcsInfoFree(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
2010#endif
2011 }
2012}
2013
2014
2015/**
2016 * Allocate all VT-x structures for the VM.
2017 *
2018 * @returns IPRT status code.
2019 * @param pVM The cross context VM structure.
2020 *
2021 * @remarks This functions will cleanup on memory allocation failures.
2022 */
2023static int hmR0VmxStructsAlloc(PVMCC pVM)
2024{
2025 /*
2026 * Sanity check the VMCS size reported by the CPU as we assume 4KB allocations.
2027 * The VMCS size cannot be more than 4096 bytes.
2028 *
2029 * See Intel spec. Appendix A.1 "Basic VMX Information".
2030 */
2031 uint32_t const cbVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_SIZE);
2032 if (cbVmcs <= X86_PAGE_4K_SIZE)
2033 { /* likely */ }
2034 else
2035 {
2036 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE;
2037 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2038 }
2039
2040 /*
2041 * Allocate per-VM VT-x structures.
2042 */
2043 bool const fVirtApicAccess = RT_BOOL(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
2044 bool const fUseVmcsShadowing = pVM->hmr0.s.vmx.fUseVmcsShadowing;
2045 VMXPAGEALLOCINFO aAllocInfo[] =
2046 {
2047 { fVirtApicAccess, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysApicAccess, (PRTR0PTR)&pVM->hmr0.s.vmx.pbApicAccess },
2048 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmreadBitmap, &pVM->hmr0.s.vmx.pvVmreadBitmap },
2049 { fUseVmcsShadowing, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysVmwriteBitmap, &pVM->hmr0.s.vmx.pvVmwriteBitmap },
2050#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2051 { true, 0 /* Unused */, &pVM->hmr0.s.vmx.HCPhysScratch, (PRTR0PTR)&pVM->hmr0.s.vmx.pbScratch },
2052#endif
2053 };
2054
2055 int rc = hmR0VmxPagesAllocZ(&pVM->hmr0.s.vmx.hMemObj, &aAllocInfo[0], RT_ELEMENTS(aAllocInfo));
2056 if (RT_SUCCESS(rc))
2057 {
2058#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2059 /* Allocate the shadow VMCS-fields array. */
2060 if (fUseVmcsShadowing)
2061 {
2062 Assert(!pVM->hmr0.s.vmx.cShadowVmcsFields);
2063 Assert(!pVM->hmr0.s.vmx.cShadowVmcsRoFields);
2064 pVM->hmr0.s.vmx.paShadowVmcsFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
2065 pVM->hmr0.s.vmx.paShadowVmcsRoFields = (uint32_t *)RTMemAllocZ(sizeof(g_aVmcsFields));
2066 if (!pVM->hmr0.s.vmx.paShadowVmcsFields || !pVM->hmr0.s.vmx.paShadowVmcsRoFields)
2067 rc = VERR_NO_MEMORY;
2068 }
2069#endif
2070
2071 /*
2072 * Allocate per-VCPU VT-x structures.
2073 */
2074 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus && RT_SUCCESS(rc); idCpu++)
2075 {
2076 /* Allocate the guest VMCS structures. */
2077 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
2078 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
2079
2080#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2081 /* Allocate the nested-guest VMCS structures, when the VMX feature is exposed to the guest. */
2082 if (pVM->cpum.ro.GuestFeatures.fVmx && RT_SUCCESS(rc))
2083 rc = hmR0VmxAllocVmcsInfo(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
2084#endif
2085 }
2086 if (RT_SUCCESS(rc))
2087 return VINF_SUCCESS;
2088 }
2089 hmR0VmxStructsFree(pVM);
2090 return rc;
2091}
2092
2093
2094/**
2095 * Pre-initializes non-zero fields in VMX structures that will be allocated.
2096 *
2097 * @param pVM The cross context VM structure.
2098 */
2099static void hmR0VmxStructsInit(PVMCC pVM)
2100{
2101 /* Paranoia. */
2102 Assert(pVM->hmr0.s.vmx.pbApicAccess == NULL);
2103#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2104 Assert(pVM->hmr0.s.vmx.pbScratch == NULL);
2105#endif
2106
2107 /*
2108 * Initialize members up-front so we can cleanup en masse on allocation failures.
2109 */
2110#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2111 pVM->hmr0.s.vmx.HCPhysScratch = NIL_RTHCPHYS;
2112#endif
2113 pVM->hmr0.s.vmx.HCPhysApicAccess = NIL_RTHCPHYS;
2114 pVM->hmr0.s.vmx.HCPhysVmreadBitmap = NIL_RTHCPHYS;
2115 pVM->hmr0.s.vmx.HCPhysVmwriteBitmap = NIL_RTHCPHYS;
2116 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
2117 {
2118 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
2119 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfo, &pVCpu->hm.s.vmx.VmcsInfo);
2120 hmR0VmxVmcsInfoInit(&pVCpu->hmr0.s.vmx.VmcsInfoNstGst, &pVCpu->hm.s.vmx.VmcsInfoNstGst);
2121 }
2122}
2123
2124#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2125/**
2126 * Returns whether an MSR at the given MSR-bitmap offset is intercepted or not.
2127 *
2128 * @returns @c true if the MSR is intercepted, @c false otherwise.
2129 * @param pbMsrBitmap The MSR bitmap.
2130 * @param offMsr The MSR byte offset.
2131 * @param iBit The bit offset from the byte offset.
2132 */
2133DECLINLINE(bool) hmR0VmxIsMsrBitSet(uint8_t const *pbMsrBitmap, uint16_t offMsr, int32_t iBit)
2134{
2135 Assert(offMsr + (iBit >> 3) <= X86_PAGE_4K_SIZE);
2136 return ASMBitTest(pbMsrBitmap + offMsr, iBit);
2137}
2138#endif
2139
2140/**
2141 * Sets the permission bits for the specified MSR in the given MSR bitmap.
2142 *
2143 * If the passed VMCS is a nested-guest VMCS, this function ensures that the
2144 * read/write intercept is cleared from the MSR bitmap used for hardware-assisted
2145 * VMX execution of the nested-guest, only if nested-guest is also not intercepting
2146 * the read/write access of this MSR.
2147 *
2148 * @param pVCpu The cross context virtual CPU structure.
2149 * @param pVmcsInfo The VMCS info. object.
2150 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2151 * @param idMsr The MSR value.
2152 * @param fMsrpm The MSR permissions (see VMXMSRPM_XXX). This must
2153 * include both a read -and- a write permission!
2154 *
2155 * @sa CPUMGetVmxMsrPermission.
2156 * @remarks Can be called with interrupts disabled.
2157 */
2158static void hmR0VmxSetMsrPermission(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs, uint32_t idMsr, uint32_t fMsrpm)
2159{
2160 uint8_t *pbMsrBitmap = (uint8_t *)pVmcsInfo->pvMsrBitmap;
2161 Assert(pbMsrBitmap);
2162 Assert(VMXMSRPM_IS_FLAG_VALID(fMsrpm));
2163
2164 /*
2165 * MSR-bitmap Layout:
2166 * Byte index MSR range Interpreted as
2167 * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits.
2168 * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits.
2169 * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits.
2170 * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits.
2171 *
2172 * A bit corresponding to an MSR within the above range causes a VM-exit
2173 * if the bit is 1 on executions of RDMSR/WRMSR. If an MSR falls out of
2174 * the MSR range, it always cause a VM-exit.
2175 *
2176 * See Intel spec. 24.6.9 "MSR-Bitmap Address".
2177 */
2178 uint16_t const offBitmapRead = 0;
2179 uint16_t const offBitmapWrite = 0x800;
2180 uint16_t offMsr;
2181 int32_t iBit;
2182 if (idMsr <= UINT32_C(0x00001fff))
2183 {
2184 offMsr = 0;
2185 iBit = idMsr;
2186 }
2187 else if (idMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff))
2188 {
2189 offMsr = 0x400;
2190 iBit = idMsr - UINT32_C(0xc0000000);
2191 }
2192 else
2193 AssertMsgFailedReturnVoid(("Invalid MSR %#RX32\n", idMsr));
2194
2195 /*
2196 * Set the MSR read permission.
2197 */
2198 uint16_t const offMsrRead = offBitmapRead + offMsr;
2199 Assert(offMsrRead + (iBit >> 3) < offBitmapWrite);
2200 if (fMsrpm & VMXMSRPM_ALLOW_RD)
2201 {
2202#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2203 bool const fClear = !fIsNstGstVmcs ? true
2204 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrRead, iBit);
2205#else
2206 RT_NOREF2(pVCpu, fIsNstGstVmcs);
2207 bool const fClear = true;
2208#endif
2209 if (fClear)
2210 ASMBitClear(pbMsrBitmap + offMsrRead, iBit);
2211 }
2212 else
2213 ASMBitSet(pbMsrBitmap + offMsrRead, iBit);
2214
2215 /*
2216 * Set the MSR write permission.
2217 */
2218 uint16_t const offMsrWrite = offBitmapWrite + offMsr;
2219 Assert(offMsrWrite + (iBit >> 3) < X86_PAGE_4K_SIZE);
2220 if (fMsrpm & VMXMSRPM_ALLOW_WR)
2221 {
2222#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
2223 bool const fClear = !fIsNstGstVmcs ? true
2224 : !hmR0VmxIsMsrBitSet(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, offMsrWrite, iBit);
2225#else
2226 RT_NOREF2(pVCpu, fIsNstGstVmcs);
2227 bool const fClear = true;
2228#endif
2229 if (fClear)
2230 ASMBitClear(pbMsrBitmap + offMsrWrite, iBit);
2231 }
2232 else
2233 ASMBitSet(pbMsrBitmap + offMsrWrite, iBit);
2234}
2235
2236
2237/**
2238 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
2239 * area.
2240 *
2241 * @returns VBox status code.
2242 * @param pVCpu The cross context virtual CPU structure.
2243 * @param pVmcsInfo The VMCS info. object.
2244 * @param cMsrs The number of MSRs.
2245 */
2246static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint32_t cMsrs)
2247{
2248 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
2249 uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc);
2250 if (RT_LIKELY(cMsrs < cMaxSupportedMsrs))
2251 {
2252 /* Commit the MSR counts to the VMCS and update the cache. */
2253 if (pVmcsInfo->cEntryMsrLoad != cMsrs)
2254 {
2255 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
2256 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRC(rc);
2257 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRC(rc);
2258 pVmcsInfo->cEntryMsrLoad = cMsrs;
2259 pVmcsInfo->cExitMsrStore = cMsrs;
2260 pVmcsInfo->cExitMsrLoad = cMsrs;
2261 }
2262 return VINF_SUCCESS;
2263 }
2264
2265 LogRel(("Auto-load/store MSR count exceeded! cMsrs=%u MaxSupported=%u\n", cMsrs, cMaxSupportedMsrs));
2266 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
2267 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2268}
2269
2270
2271/**
2272 * Adds a new (or updates the value of an existing) guest/host MSR
2273 * pair to be swapped during the world-switch as part of the
2274 * auto-load/store MSR area in the VMCS.
2275 *
2276 * @returns VBox status code.
2277 * @param pVCpu The cross context virtual CPU structure.
2278 * @param pVmxTransient The VMX-transient structure.
2279 * @param idMsr The MSR.
2280 * @param uGuestMsrValue Value of the guest MSR.
2281 * @param fSetReadWrite Whether to set the guest read/write access of this
2282 * MSR (thus not causing a VM-exit).
2283 * @param fUpdateHostMsr Whether to update the value of the host MSR if
2284 * necessary.
2285 */
2286static int hmR0VmxAddAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr, uint64_t uGuestMsrValue,
2287 bool fSetReadWrite, bool fUpdateHostMsr)
2288{
2289 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
2290 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
2291 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
2292 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
2293 uint32_t i;
2294
2295 /* Paranoia. */
2296 Assert(pGuestMsrLoad);
2297
2298#ifndef DEBUG_bird
2299 LogFlowFunc(("pVCpu=%p idMsr=%#RX32 uGuestMsrValue=%#RX64\n", pVCpu, idMsr, uGuestMsrValue));
2300#endif
2301
2302 /* Check if the MSR already exists in the VM-entry MSR-load area. */
2303 for (i = 0; i < cMsrs; i++)
2304 {
2305 if (pGuestMsrLoad[i].u32Msr == idMsr)
2306 break;
2307 }
2308
2309 bool fAdded = false;
2310 if (i == cMsrs)
2311 {
2312 /* The MSR does not exist, bump the MSR count to make room for the new MSR. */
2313 ++cMsrs;
2314 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
2315 AssertMsgRCReturn(rc, ("Insufficient space to add MSR to VM-entry MSR-load/store area %u\n", idMsr), rc);
2316
2317 /* Set the guest to read/write this MSR without causing VM-exits. */
2318 if ( fSetReadWrite
2319 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
2320 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_ALLOW_RD_WR);
2321
2322 Log4Func(("Added MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
2323 fAdded = true;
2324 }
2325
2326 /* Update the MSR value for the newly added or already existing MSR. */
2327 pGuestMsrLoad[i].u32Msr = idMsr;
2328 pGuestMsrLoad[i].u64Value = uGuestMsrValue;
2329
2330 /* Create the corresponding slot in the VM-exit MSR-store area if we use a different page. */
2331 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
2332 {
2333 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
2334 pGuestMsrStore[i].u32Msr = idMsr;
2335 pGuestMsrStore[i].u64Value = uGuestMsrValue;
2336 }
2337
2338 /* Update the corresponding slot in the host MSR area. */
2339 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
2340 Assert(pHostMsr != pVmcsInfo->pvGuestMsrLoad);
2341 Assert(pHostMsr != pVmcsInfo->pvGuestMsrStore);
2342 pHostMsr[i].u32Msr = idMsr;
2343
2344 /*
2345 * Only if the caller requests to update the host MSR value AND we've newly added the
2346 * MSR to the host MSR area do we actually update the value. Otherwise, it will be
2347 * updated by hmR0VmxUpdateAutoLoadHostMsrs().
2348 *
2349 * We do this for performance reasons since reading MSRs may be quite expensive.
2350 */
2351 if (fAdded)
2352 {
2353 if (fUpdateHostMsr)
2354 {
2355 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
2356 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2357 pHostMsr[i].u64Value = ASMRdMsr(idMsr);
2358 }
2359 else
2360 {
2361 /* Someone else can do the work. */
2362 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
2363 }
2364 }
2365 return VINF_SUCCESS;
2366}
2367
2368
2369/**
2370 * Removes a guest/host MSR pair to be swapped during the world-switch from the
2371 * auto-load/store MSR area in the VMCS.
2372 *
2373 * @returns VBox status code.
2374 * @param pVCpu The cross context virtual CPU structure.
2375 * @param pVmxTransient The VMX-transient structure.
2376 * @param idMsr The MSR.
2377 */
2378static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t idMsr)
2379{
2380 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
2381 bool const fIsNstGstVmcs = pVmxTransient->fIsNestedGuest;
2382 PVMXAUTOMSR pGuestMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
2383 uint32_t cMsrs = pVmcsInfo->cEntryMsrLoad;
2384
2385#ifndef DEBUG_bird
2386 LogFlowFunc(("pVCpu=%p idMsr=%#RX32\n", pVCpu, idMsr));
2387#endif
2388
2389 for (uint32_t i = 0; i < cMsrs; i++)
2390 {
2391 /* Find the MSR. */
2392 if (pGuestMsrLoad[i].u32Msr == idMsr)
2393 {
2394 /*
2395 * If it's the last MSR, we only need to reduce the MSR count.
2396 * If it's -not- the last MSR, copy the last MSR in place of it and reduce the MSR count.
2397 */
2398 if (i < cMsrs - 1)
2399 {
2400 /* Remove it from the VM-entry MSR-load area. */
2401 pGuestMsrLoad[i].u32Msr = pGuestMsrLoad[cMsrs - 1].u32Msr;
2402 pGuestMsrLoad[i].u64Value = pGuestMsrLoad[cMsrs - 1].u64Value;
2403
2404 /* Remove it from the VM-exit MSR-store area if it's in a different page. */
2405 if (hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo))
2406 {
2407 PVMXAUTOMSR pGuestMsrStore = (PVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
2408 Assert(pGuestMsrStore[i].u32Msr == idMsr);
2409 pGuestMsrStore[i].u32Msr = pGuestMsrStore[cMsrs - 1].u32Msr;
2410 pGuestMsrStore[i].u64Value = pGuestMsrStore[cMsrs - 1].u64Value;
2411 }
2412
2413 /* Remove it from the VM-exit MSR-load area. */
2414 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
2415 Assert(pHostMsr[i].u32Msr == idMsr);
2416 pHostMsr[i].u32Msr = pHostMsr[cMsrs - 1].u32Msr;
2417 pHostMsr[i].u64Value = pHostMsr[cMsrs - 1].u64Value;
2418 }
2419
2420 /* Reduce the count to reflect the removed MSR and bail. */
2421 --cMsrs;
2422 break;
2423 }
2424 }
2425
2426 /* Update the VMCS if the count changed (meaning the MSR was found and removed). */
2427 if (cMsrs != pVmcsInfo->cEntryMsrLoad)
2428 {
2429 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, pVmcsInfo, cMsrs);
2430 AssertRCReturn(rc, rc);
2431
2432 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
2433 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2434 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, fIsNstGstVmcs, idMsr, VMXMSRPM_EXIT_RD | VMXMSRPM_EXIT_WR);
2435
2436 Log4Func(("Removed MSR %#RX32, cMsrs=%u\n", idMsr, cMsrs));
2437 return VINF_SUCCESS;
2438 }
2439
2440 return VERR_NOT_FOUND;
2441}
2442
2443
2444/**
2445 * Checks if the specified guest MSR is part of the VM-entry MSR-load area.
2446 *
2447 * @returns @c true if found, @c false otherwise.
2448 * @param pVmcsInfo The VMCS info. object.
2449 * @param idMsr The MSR to find.
2450 */
2451static bool hmR0VmxIsAutoLoadGuestMsr(PCVMXVMCSINFO pVmcsInfo, uint32_t idMsr)
2452{
2453 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
2454 uint32_t const cMsrs = pVmcsInfo->cEntryMsrLoad;
2455 Assert(pMsrs);
2456 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
2457 for (uint32_t i = 0; i < cMsrs; i++)
2458 {
2459 if (pMsrs[i].u32Msr == idMsr)
2460 return true;
2461 }
2462 return false;
2463}
2464
2465
2466/**
2467 * Updates the value of all host MSRs in the VM-exit MSR-load area.
2468 *
2469 * @param pVCpu The cross context virtual CPU structure.
2470 * @param pVmcsInfo The VMCS info. object.
2471 *
2472 * @remarks No-long-jump zone!!!
2473 */
2474static void hmR0VmxUpdateAutoLoadHostMsrs(PCVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
2475{
2476 RT_NOREF(pVCpu);
2477 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2478
2479 PVMXAUTOMSR pHostMsrLoad = (PVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
2480 uint32_t const cMsrs = pVmcsInfo->cExitMsrLoad;
2481 Assert(pHostMsrLoad);
2482 Assert(sizeof(*pHostMsrLoad) * cMsrs <= X86_PAGE_4K_SIZE);
2483 LogFlowFunc(("pVCpu=%p cMsrs=%u\n", pVCpu, cMsrs));
2484 for (uint32_t i = 0; i < cMsrs; i++)
2485 {
2486 /*
2487 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
2488 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
2489 */
2490 if (pHostMsrLoad[i].u32Msr == MSR_K6_EFER)
2491 pHostMsrLoad[i].u64Value = g_uHmVmxHostMsrEfer;
2492 else
2493 pHostMsrLoad[i].u64Value = ASMRdMsr(pHostMsrLoad[i].u32Msr);
2494 }
2495}
2496
2497
2498/**
2499 * Saves a set of host MSRs to allow read/write passthru access to the guest and
2500 * perform lazy restoration of the host MSRs while leaving VT-x.
2501 *
2502 * @param pVCpu The cross context virtual CPU structure.
2503 *
2504 * @remarks No-long-jump zone!!!
2505 */
2506static void hmR0VmxLazySaveHostMsrs(PVMCPUCC pVCpu)
2507{
2508 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2509
2510 /*
2511 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap accesses in hmR0VmxSetupVmcsProcCtls().
2512 */
2513 if (!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST))
2514 {
2515 Assert(!(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */
2516 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
2517 {
2518 pVCpu->hmr0.s.vmx.u64HostMsrLStar = ASMRdMsr(MSR_K8_LSTAR);
2519 pVCpu->hmr0.s.vmx.u64HostMsrStar = ASMRdMsr(MSR_K6_STAR);
2520 pVCpu->hmr0.s.vmx.u64HostMsrSfMask = ASMRdMsr(MSR_K8_SF_MASK);
2521 pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
2522 }
2523 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
2524 }
2525}
2526
2527
2528/**
2529 * Checks whether the MSR belongs to the set of guest MSRs that we restore
2530 * lazily while leaving VT-x.
2531 *
2532 * @returns true if it does, false otherwise.
2533 * @param pVCpu The cross context virtual CPU structure.
2534 * @param idMsr The MSR to check.
2535 */
2536static bool hmR0VmxIsLazyGuestMsr(PCVMCPUCC pVCpu, uint32_t idMsr)
2537{
2538 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
2539 {
2540 switch (idMsr)
2541 {
2542 case MSR_K8_LSTAR:
2543 case MSR_K6_STAR:
2544 case MSR_K8_SF_MASK:
2545 case MSR_K8_KERNEL_GS_BASE:
2546 return true;
2547 }
2548 }
2549 return false;
2550}
2551
2552
2553/**
2554 * Loads a set of guests MSRs to allow read/passthru to the guest.
2555 *
2556 * The name of this function is slightly confusing. This function does NOT
2557 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
2558 * common prefix for functions dealing with "lazy restoration" of the shared
2559 * MSRs.
2560 *
2561 * @param pVCpu The cross context virtual CPU structure.
2562 *
2563 * @remarks No-long-jump zone!!!
2564 */
2565static void hmR0VmxLazyLoadGuestMsrs(PVMCPUCC pVCpu)
2566{
2567 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2568 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
2569
2570 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
2571 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
2572 {
2573 /*
2574 * If the guest MSRs are not loaded -and- if all the guest MSRs are identical
2575 * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then
2576 * we can skip a few MSR writes.
2577 *
2578 * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the
2579 * guest MSR values in the guest-CPU context might be different to what's currently
2580 * loaded in the CPU. In either case, we need to write the new guest MSR values to the
2581 * CPU, see @bugref{8728}.
2582 */
2583 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
2584 if ( !(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
2585 && pCtx->msrKERNELGSBASE == pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase
2586 && pCtx->msrLSTAR == pVCpu->hmr0.s.vmx.u64HostMsrLStar
2587 && pCtx->msrSTAR == pVCpu->hmr0.s.vmx.u64HostMsrStar
2588 && pCtx->msrSFMASK == pVCpu->hmr0.s.vmx.u64HostMsrSfMask)
2589 {
2590#ifdef VBOX_STRICT
2591 Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE);
2592 Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR);
2593 Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR);
2594 Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK);
2595#endif
2596 }
2597 else
2598 {
2599 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE);
2600 ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR);
2601 ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR);
2602 /* The system call flag mask register isn't as benign and accepting of all
2603 values as the above, so mask it to avoid #GP'ing on corrupted input. */
2604 Assert(!(pCtx->msrSFMASK & ~(uint64_t)UINT32_MAX));
2605 ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK & UINT32_MAX);
2606 }
2607 }
2608 pVCpu->hmr0.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
2609}
2610
2611
2612/**
2613 * Performs lazy restoration of the set of host MSRs if they were previously
2614 * loaded with guest MSR values.
2615 *
2616 * @param pVCpu The cross context virtual CPU structure.
2617 *
2618 * @remarks No-long-jump zone!!!
2619 * @remarks The guest MSRs should have been saved back into the guest-CPU
2620 * context by hmR0VmxImportGuestState()!!!
2621 */
2622static void hmR0VmxLazyRestoreHostMsrs(PVMCPUCC pVCpu)
2623{
2624 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2625 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
2626
2627 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
2628 {
2629 Assert(pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
2630 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fAllow64BitGuests)
2631 {
2632 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hmr0.s.vmx.u64HostMsrLStar);
2633 ASMWrMsr(MSR_K6_STAR, pVCpu->hmr0.s.vmx.u64HostMsrStar);
2634 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hmr0.s.vmx.u64HostMsrSfMask);
2635 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hmr0.s.vmx.u64HostMsrKernelGsBase);
2636 }
2637 }
2638 pVCpu->hmr0.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
2639}
2640
2641
2642/**
2643 * Verifies that our cached values of the VMCS fields are all consistent with
2644 * what's actually present in the VMCS.
2645 *
2646 * @returns VBox status code.
2647 * @retval VINF_SUCCESS if all our caches match their respective VMCS fields.
2648 * @retval VERR_VMX_VMCS_FIELD_CACHE_INVALID if a cache field doesn't match the
2649 * VMCS content. HMCPU error-field is
2650 * updated, see VMX_VCI_XXX.
2651 * @param pVCpu The cross context virtual CPU structure.
2652 * @param pVmcsInfo The VMCS info. object.
2653 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2654 */
2655static int hmR0VmxCheckCachedVmcsCtls(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2656{
2657 const char * const pcszVmcs = fIsNstGstVmcs ? "Nested-guest VMCS" : "VMCS";
2658
2659 uint32_t u32Val;
2660 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
2661 AssertRC(rc);
2662 AssertMsgReturnStmt(pVmcsInfo->u32EntryCtls == u32Val,
2663 ("%s entry controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32EntryCtls, u32Val),
2664 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_ENTRY,
2665 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2666
2667 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val);
2668 AssertRC(rc);
2669 AssertMsgReturnStmt(pVmcsInfo->u32ExitCtls == u32Val,
2670 ("%s exit controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32ExitCtls, u32Val),
2671 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_EXIT,
2672 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2673
2674 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val);
2675 AssertRC(rc);
2676 AssertMsgReturnStmt(pVmcsInfo->u32PinCtls == u32Val,
2677 ("%s pin controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32PinCtls, u32Val),
2678 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PIN_EXEC,
2679 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2680
2681 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val);
2682 AssertRC(rc);
2683 AssertMsgReturnStmt(pVmcsInfo->u32ProcCtls == u32Val,
2684 ("%s proc controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32ProcCtls, u32Val),
2685 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC,
2686 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2687
2688 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
2689 {
2690 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val);
2691 AssertRC(rc);
2692 AssertMsgReturnStmt(pVmcsInfo->u32ProcCtls2 == u32Val,
2693 ("%s proc2 controls mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32ProcCtls2, u32Val),
2694 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC2,
2695 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2696 }
2697
2698 uint64_t u64Val;
2699 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TERTIARY_CTLS)
2700 {
2701 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_PROC_EXEC3_FULL, &u64Val);
2702 AssertRC(rc);
2703 AssertMsgReturnStmt(pVmcsInfo->u64ProcCtls3 == u64Val,
2704 ("%s proc3 controls mismatch: Cache=%#RX32 VMCS=%#RX64\n", pcszVmcs, pVmcsInfo->u64ProcCtls3, u64Val),
2705 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC3,
2706 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2707 }
2708
2709 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val);
2710 AssertRC(rc);
2711 AssertMsgReturnStmt(pVmcsInfo->u32XcptBitmap == u32Val,
2712 ("%s exception bitmap mismatch: Cache=%#RX32 VMCS=%#RX32\n", pcszVmcs, pVmcsInfo->u32XcptBitmap, u32Val),
2713 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_XCPT_BITMAP,
2714 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2715
2716 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, &u64Val);
2717 AssertRC(rc);
2718 AssertMsgReturnStmt(pVmcsInfo->u64TscOffset == u64Val,
2719 ("%s TSC offset mismatch: Cache=%#RX64 VMCS=%#RX64\n", pcszVmcs, pVmcsInfo->u64TscOffset, u64Val),
2720 pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_TSC_OFFSET,
2721 VERR_VMX_VMCS_FIELD_CACHE_INVALID);
2722
2723 NOREF(pcszVmcs);
2724 return VINF_SUCCESS;
2725}
2726
2727#ifdef VBOX_STRICT
2728
2729/**
2730 * Verifies that our cached host EFER MSR value has not changed since we cached it.
2731 *
2732 * @param pVmcsInfo The VMCS info. object.
2733 */
2734static void hmR0VmxCheckHostEferMsr(PCVMXVMCSINFO pVmcsInfo)
2735{
2736 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2737
2738 if (pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR)
2739 {
2740 uint64_t const uHostEferMsr = ASMRdMsr(MSR_K6_EFER);
2741 uint64_t const uHostEferMsrCache = g_uHmVmxHostMsrEfer;
2742 uint64_t uVmcsEferMsrVmcs;
2743 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &uVmcsEferMsrVmcs);
2744 AssertRC(rc);
2745
2746 AssertMsgReturnVoid(uHostEferMsr == uVmcsEferMsrVmcs,
2747 ("EFER Host/VMCS mismatch! host=%#RX64 vmcs=%#RX64\n", uHostEferMsr, uVmcsEferMsrVmcs));
2748 AssertMsgReturnVoid(uHostEferMsr == uHostEferMsrCache,
2749 ("EFER Host/Cache mismatch! host=%#RX64 cache=%#RX64\n", uHostEferMsr, uHostEferMsrCache));
2750 }
2751}
2752
2753
2754/**
2755 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
2756 * VMCS are correct.
2757 *
2758 * @param pVCpu The cross context virtual CPU structure.
2759 * @param pVmcsInfo The VMCS info. object.
2760 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
2761 */
2762static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
2763{
2764 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2765
2766 /* Read the various MSR-area counts from the VMCS. */
2767 uint32_t cEntryLoadMsrs;
2768 uint32_t cExitStoreMsrs;
2769 uint32_t cExitLoadMsrs;
2770 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cEntryLoadMsrs); AssertRC(rc);
2771 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cExitStoreMsrs); AssertRC(rc);
2772 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cExitLoadMsrs); AssertRC(rc);
2773
2774 /* Verify all the MSR counts are the same. */
2775 Assert(cEntryLoadMsrs == cExitStoreMsrs);
2776 Assert(cExitStoreMsrs == cExitLoadMsrs);
2777 uint32_t const cMsrs = cExitLoadMsrs;
2778
2779 /* Verify the MSR counts do not exceed the maximum count supported by the hardware. */
2780 Assert(cMsrs < VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
2781
2782 /* Verify the MSR counts are within the allocated page size. */
2783 Assert(sizeof(VMXAUTOMSR) * cMsrs <= X86_PAGE_4K_SIZE);
2784
2785 /* Verify the relevant contents of the MSR areas match. */
2786 PCVMXAUTOMSR pGuestMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrLoad;
2787 PCVMXAUTOMSR pGuestMsrStore = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
2788 PCVMXAUTOMSR pHostMsrLoad = (PCVMXAUTOMSR)pVmcsInfo->pvHostMsrLoad;
2789 bool const fSeparateExitMsrStorePage = hmR0VmxIsSeparateExitMsrStoreAreaVmcs(pVmcsInfo);
2790 for (uint32_t i = 0; i < cMsrs; i++)
2791 {
2792 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
2793 if (fSeparateExitMsrStorePage)
2794 {
2795 AssertMsgReturnVoid(pGuestMsrLoad->u32Msr == pGuestMsrStore->u32Msr,
2796 ("GuestMsrLoad=%#RX32 GuestMsrStore=%#RX32 cMsrs=%u\n",
2797 pGuestMsrLoad->u32Msr, pGuestMsrStore->u32Msr, cMsrs));
2798 }
2799
2800 AssertMsgReturnVoid(pHostMsrLoad->u32Msr == pGuestMsrLoad->u32Msr,
2801 ("HostMsrLoad=%#RX32 GuestMsrLoad=%#RX32 cMsrs=%u\n",
2802 pHostMsrLoad->u32Msr, pGuestMsrLoad->u32Msr, cMsrs));
2803
2804 uint64_t const u64HostMsr = ASMRdMsr(pHostMsrLoad->u32Msr);
2805 AssertMsgReturnVoid(pHostMsrLoad->u64Value == u64HostMsr,
2806 ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
2807 pHostMsrLoad->u32Msr, pHostMsrLoad->u64Value, u64HostMsr, cMsrs));
2808
2809 /* Verify that cached host EFER MSR matches what's loaded on the CPU. */
2810 bool const fIsEferMsr = RT_BOOL(pHostMsrLoad->u32Msr == MSR_K6_EFER);
2811 AssertMsgReturnVoid(!fIsEferMsr || u64HostMsr == g_uHmVmxHostMsrEfer,
2812 ("Cached=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", g_uHmVmxHostMsrEfer, u64HostMsr, cMsrs));
2813
2814 /* Verify that the accesses are as expected in the MSR bitmap for auto-load/store MSRs. */
2815 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
2816 {
2817 uint32_t const fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, pGuestMsrLoad->u32Msr);
2818 if (fIsEferMsr)
2819 {
2820 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_RD), ("Passthru read for EFER MSR!?\n"));
2821 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_EXIT_WR), ("Passthru write for EFER MSR!?\n"));
2822 }
2823 else
2824 {
2825 /* Verify LBR MSRs (used only for debugging) are intercepted. We don't passthru these MSRs to the guest yet. */
2826 PCVMCC pVM = pVCpu->CTX_SUFF(pVM);
2827 if ( pVM->hmr0.s.vmx.fLbr
2828 && ( hmR0VmxIsLbrBranchFromMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
2829 || hmR0VmxIsLbrBranchToMsr(pVM, pGuestMsrLoad->u32Msr, NULL /* pidxMsr */)
2830 || pGuestMsrLoad->u32Msr == pVM->hmr0.s.vmx.idLbrTosMsr))
2831 {
2832 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_EXIT_RD_WR,
2833 ("u32Msr=%#RX32 cMsrs=%u Passthru read/write for LBR MSRs!\n",
2834 pGuestMsrLoad->u32Msr, cMsrs));
2835 }
2836 else if (!fIsNstGstVmcs)
2837 {
2838 AssertMsgReturnVoid((fMsrpm & VMXMSRPM_MASK) == VMXMSRPM_ALLOW_RD_WR,
2839 ("u32Msr=%#RX32 cMsrs=%u No passthru read/write!\n", pGuestMsrLoad->u32Msr, cMsrs));
2840 }
2841 else
2842 {
2843 /*
2844 * A nested-guest VMCS must -also- allow read/write passthrough for the MSR for us to
2845 * execute a nested-guest with MSR passthrough.
2846 *
2847 * Check if the nested-guest MSR bitmap allows passthrough, and if so, assert that we
2848 * allow passthrough too.
2849 */
2850 void const *pvMsrBitmapNstGst = pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap;
2851 Assert(pvMsrBitmapNstGst);
2852 uint32_t const fMsrpmNstGst = CPUMGetVmxMsrPermission(pvMsrBitmapNstGst, pGuestMsrLoad->u32Msr);
2853 AssertMsgReturnVoid(fMsrpm == fMsrpmNstGst,
2854 ("u32Msr=%#RX32 cMsrs=%u Permission mismatch fMsrpm=%#x fMsrpmNstGst=%#x!\n",
2855 pGuestMsrLoad->u32Msr, cMsrs, fMsrpm, fMsrpmNstGst));
2856 }
2857 }
2858 }
2859
2860 /* Move to the next MSR. */
2861 pHostMsrLoad++;
2862 pGuestMsrLoad++;
2863 pGuestMsrStore++;
2864 }
2865}
2866
2867#endif /* VBOX_STRICT */
2868
2869/**
2870 * Flushes the TLB using EPT.
2871 *
2872 * @returns VBox status code.
2873 * @param pVCpu The cross context virtual CPU structure of the calling
2874 * EMT. Can be NULL depending on @a enmTlbFlush.
2875 * @param pVmcsInfo The VMCS info. object. Can be NULL depending on @a
2876 * enmTlbFlush.
2877 * @param enmTlbFlush Type of flush.
2878 *
2879 * @remarks Caller is responsible for making sure this function is called only
2880 * when NestedPaging is supported and providing @a enmTlbFlush that is
2881 * supported by the CPU.
2882 * @remarks Can be called with interrupts disabled.
2883 */
2884static void hmR0VmxFlushEpt(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, VMXTLBFLUSHEPT enmTlbFlush)
2885{
2886 uint64_t au64Descriptor[2];
2887 if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS)
2888 au64Descriptor[0] = 0;
2889 else
2890 {
2891 Assert(pVCpu);
2892 Assert(pVmcsInfo);
2893 au64Descriptor[0] = pVmcsInfo->HCPhysEPTP;
2894 }
2895 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
2896
2897 int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]);
2898 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %#RHp failed. rc=%Rrc\n", enmTlbFlush, au64Descriptor[0], rc));
2899
2900 if ( RT_SUCCESS(rc)
2901 && pVCpu)
2902 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
2903}
2904
2905
2906/**
2907 * Flushes the TLB using VPID.
2908 *
2909 * @returns VBox status code.
2910 * @param pVCpu The cross context virtual CPU structure of the calling
2911 * EMT. Can be NULL depending on @a enmTlbFlush.
2912 * @param enmTlbFlush Type of flush.
2913 * @param GCPtr Virtual address of the page to flush (can be 0 depending
2914 * on @a enmTlbFlush).
2915 *
2916 * @remarks Can be called with interrupts disabled.
2917 */
2918static void hmR0VmxFlushVpid(PVMCPUCC pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr)
2919{
2920 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid);
2921
2922 uint64_t au64Descriptor[2];
2923 if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS)
2924 {
2925 au64Descriptor[0] = 0;
2926 au64Descriptor[1] = 0;
2927 }
2928 else
2929 {
2930 AssertPtr(pVCpu);
2931 AssertMsg(pVCpu->hmr0.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
2932 AssertMsg(pVCpu->hmr0.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hmr0.s.uCurrentAsid));
2933 au64Descriptor[0] = pVCpu->hmr0.s.uCurrentAsid;
2934 au64Descriptor[1] = GCPtr;
2935 }
2936
2937 int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]);
2938 AssertMsg(rc == VINF_SUCCESS,
2939 ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hmr0.s.uCurrentAsid : 0, GCPtr, rc));
2940
2941 if ( RT_SUCCESS(rc)
2942 && pVCpu)
2943 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
2944 NOREF(rc);
2945}
2946
2947
2948/**
2949 * Invalidates a guest page by guest virtual address. Only relevant for EPT/VPID,
2950 * otherwise there is nothing really to invalidate.
2951 *
2952 * @returns VBox status code.
2953 * @param pVCpu The cross context virtual CPU structure.
2954 * @param GCVirt Guest virtual address of the page to invalidate.
2955 */
2956VMMR0DECL(int) VMXR0InvalidatePage(PVMCPUCC pVCpu, RTGCPTR GCVirt)
2957{
2958 AssertPtr(pVCpu);
2959 LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt));
2960
2961 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH))
2962 {
2963 /*
2964 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for
2965 * the EPT case. See @bugref{6043} and @bugref{6177}.
2966 *
2967 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*()
2968 * as this function maybe called in a loop with individual addresses.
2969 */
2970 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
2971 if (pVM->hmr0.s.vmx.fVpid)
2972 {
2973 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2974 {
2975 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt);
2976 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
2977 }
2978 else
2979 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2980 }
2981 else if (pVM->hmr0.s.fNestedPaging)
2982 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2983 }
2984
2985 return VINF_SUCCESS;
2986}
2987
2988
2989/**
2990 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
2991 * case where neither EPT nor VPID is supported by the CPU.
2992 *
2993 * @param pHostCpu The HM physical-CPU structure.
2994 * @param pVCpu The cross context virtual CPU structure.
2995 *
2996 * @remarks Called with interrupts disabled.
2997 */
2998static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
2999{
3000 AssertPtr(pVCpu);
3001 AssertPtr(pHostCpu);
3002
3003 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
3004
3005 Assert(pHostCpu->idCpu != NIL_RTCPUID);
3006 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
3007 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
3008 pVCpu->hmr0.s.fForceTLBFlush = false;
3009 return;
3010}
3011
3012
3013/**
3014 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
3015 *
3016 * @param pHostCpu The HM physical-CPU structure.
3017 * @param pVCpu The cross context virtual CPU structure.
3018 * @param pVmcsInfo The VMCS info. object.
3019 *
3020 * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's
3021 * nomenclature. The reason is, to avoid confusion in compare statements
3022 * since the host-CPU copies are named "ASID".
3023 *
3024 * @remarks Called with interrupts disabled.
3025 */
3026static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
3027{
3028#ifdef VBOX_WITH_STATISTICS
3029 bool fTlbFlushed = false;
3030# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
3031# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
3032 if (!fTlbFlushed) \
3033 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
3034 } while (0)
3035#else
3036# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
3037# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
3038#endif
3039
3040 AssertPtr(pVCpu);
3041 AssertPtr(pHostCpu);
3042 Assert(pHostCpu->idCpu != NIL_RTCPUID);
3043
3044 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3045 AssertMsg(pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid,
3046 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
3047 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hmr0.s.fNestedPaging, pVM->hmr0.s.vmx.fVpid));
3048
3049 /*
3050 * Force a TLB flush for the first world-switch if the current CPU differs from the one we
3051 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
3052 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
3053 * cannot reuse the current ASID anymore.
3054 */
3055 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
3056 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
3057 {
3058 ++pHostCpu->uCurrentAsid;
3059 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
3060 {
3061 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
3062 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
3063 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
3064 }
3065
3066 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
3067 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
3068 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
3069
3070 /*
3071 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
3072 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
3073 */
3074 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
3075 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
3076 HMVMX_SET_TAGGED_TLB_FLUSHED();
3077 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
3078 }
3079 else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */
3080 {
3081 /*
3082 * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU
3083 * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT
3084 * tables when EPT is in use. Flushing-by-VPID will only flush linear (only
3085 * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical
3086 * mappings, see @bugref{6568}.
3087 *
3088 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information".
3089 */
3090 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
3091 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
3092 HMVMX_SET_TAGGED_TLB_FLUSHED();
3093 }
3094 else if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
3095 {
3096 /*
3097 * The nested-guest specifies its own guest-physical address to use as the APIC-access
3098 * address which requires flushing the TLB of EPT cached structures.
3099 *
3100 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
3101 */
3102 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVM->hmr0.s.vmx.enmTlbFlushEpt);
3103 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
3104 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
3105 HMVMX_SET_TAGGED_TLB_FLUSHED();
3106 }
3107
3108
3109 pVCpu->hmr0.s.fForceTLBFlush = false;
3110 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
3111
3112 Assert(pVCpu->hmr0.s.idLastCpu == pHostCpu->idCpu);
3113 Assert(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes);
3114 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
3115 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
3116 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
3117 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
3118 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
3119 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
3120 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
3121
3122 /* Update VMCS with the VPID. */
3123 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
3124 AssertRC(rc);
3125
3126#undef HMVMX_SET_TAGGED_TLB_FLUSHED
3127}
3128
3129
3130/**
3131 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
3132 *
3133 * @param pHostCpu The HM physical-CPU structure.
3134 * @param pVCpu The cross context virtual CPU structure.
3135 * @param pVmcsInfo The VMCS info. object.
3136 *
3137 * @remarks Called with interrupts disabled.
3138 */
3139static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
3140{
3141 AssertPtr(pVCpu);
3142 AssertPtr(pHostCpu);
3143 Assert(pHostCpu->idCpu != NIL_RTCPUID);
3144 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging."));
3145 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID."));
3146
3147 /*
3148 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
3149 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
3150 */
3151 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
3152 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
3153 {
3154 pVCpu->hmr0.s.fForceTLBFlush = true;
3155 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
3156 }
3157
3158 /* Check for explicit TLB flushes. */
3159 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
3160 {
3161 pVCpu->hmr0.s.fForceTLBFlush = true;
3162 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
3163 }
3164
3165 /* Check for TLB flushes while switching to/from a nested-guest. */
3166 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
3167 {
3168 pVCpu->hmr0.s.fForceTLBFlush = true;
3169 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
3170 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
3171 }
3172
3173 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
3174 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
3175
3176 if (pVCpu->hmr0.s.fForceTLBFlush)
3177 {
3178 hmR0VmxFlushEpt(pVCpu, pVmcsInfo, pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.enmTlbFlushEpt);
3179 pVCpu->hmr0.s.fForceTLBFlush = false;
3180 }
3181}
3182
3183
3184/**
3185 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
3186 *
3187 * @param pHostCpu The HM physical-CPU structure.
3188 * @param pVCpu The cross context virtual CPU structure.
3189 *
3190 * @remarks Called with interrupts disabled.
3191 */
3192static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu)
3193{
3194 AssertPtr(pVCpu);
3195 AssertPtr(pHostCpu);
3196 Assert(pHostCpu->idCpu != NIL_RTCPUID);
3197 AssertMsg(pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID."));
3198 AssertMsg(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging"));
3199
3200 /*
3201 * Force a TLB flush for the first world switch if the current CPU differs from the one we
3202 * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID
3203 * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we
3204 * cannot reuse the current ASID anymore.
3205 */
3206 if ( pVCpu->hmr0.s.idLastCpu != pHostCpu->idCpu
3207 || pVCpu->hmr0.s.cTlbFlushes != pHostCpu->cTlbFlushes)
3208 {
3209 pVCpu->hmr0.s.fForceTLBFlush = true;
3210 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
3211 }
3212
3213 /* Check for explicit TLB flushes. */
3214 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
3215 {
3216 /*
3217 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see
3218 * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an
3219 * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to
3220 * include fExplicitFlush's too) - an obscure corner case.
3221 */
3222 pVCpu->hmr0.s.fForceTLBFlush = true;
3223 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
3224 }
3225
3226 /* Check for TLB flushes while switching to/from a nested-guest. */
3227 if (pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb)
3228 {
3229 pVCpu->hmr0.s.fForceTLBFlush = true;
3230 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = false;
3231 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbNstGst);
3232 }
3233
3234 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3235 pVCpu->hmr0.s.idLastCpu = pHostCpu->idCpu;
3236 if (pVCpu->hmr0.s.fForceTLBFlush)
3237 {
3238 ++pHostCpu->uCurrentAsid;
3239 if (pHostCpu->uCurrentAsid >= g_uHmMaxAsid)
3240 {
3241 pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
3242 pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
3243 pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
3244 }
3245
3246 pVCpu->hmr0.s.fForceTLBFlush = false;
3247 pVCpu->hmr0.s.cTlbFlushes = pHostCpu->cTlbFlushes;
3248 pVCpu->hmr0.s.uCurrentAsid = pHostCpu->uCurrentAsid;
3249 if (pHostCpu->fFlushAsidBeforeUse)
3250 {
3251 if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT)
3252 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
3253 else if (pVM->hmr0.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS)
3254 {
3255 hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
3256 pHostCpu->fFlushAsidBeforeUse = false;
3257 }
3258 else
3259 {
3260 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
3261 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
3262 }
3263 }
3264 }
3265
3266 AssertMsg(pVCpu->hmr0.s.cTlbFlushes == pHostCpu->cTlbFlushes,
3267 ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hmr0.s.cTlbFlushes, pHostCpu->cTlbFlushes));
3268 AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < g_uHmMaxAsid,
3269 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu,
3270 pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hmr0.s.idLastCpu, pVCpu->hmr0.s.cTlbFlushes));
3271 AssertMsg(pVCpu->hmr0.s.uCurrentAsid >= 1 && pVCpu->hmr0.s.uCurrentAsid < g_uHmMaxAsid,
3272 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hmr0.s.uCurrentAsid));
3273
3274 int rc = VMXWriteVmcs16(VMX_VMCS16_VPID, pVCpu->hmr0.s.uCurrentAsid);
3275 AssertRC(rc);
3276}
3277
3278
3279/**
3280 * Flushes the guest TLB entry based on CPU capabilities.
3281 *
3282 * @param pHostCpu The HM physical-CPU structure.
3283 * @param pVCpu The cross context virtual CPU structure.
3284 * @param pVmcsInfo The VMCS info. object.
3285 *
3286 * @remarks Called with interrupts disabled.
3287 */
3288static void hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
3289{
3290#ifdef HMVMX_ALWAYS_FLUSH_TLB
3291 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
3292#endif
3293 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3294 switch (pVM->hmr0.s.vmx.enmTlbFlushType)
3295 {
3296 case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu, pVmcsInfo); break;
3297 case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu, pVmcsInfo); break;
3298 case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break;
3299 case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break;
3300 default:
3301 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
3302 break;
3303 }
3304 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
3305}
3306
3307
3308/**
3309 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
3310 * TLB entries from the host TLB before VM-entry.
3311 *
3312 * @returns VBox status code.
3313 * @param pVM The cross context VM structure.
3314 */
3315static int hmR0VmxSetupTaggedTlb(PVMCC pVM)
3316{
3317 /*
3318 * Determine optimal flush type for nested paging.
3319 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup
3320 * unrestricted guest execution (see hmR3InitFinalizeR0()).
3321 */
3322 if (pVM->hmr0.s.fNestedPaging)
3323 {
3324 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
3325 {
3326 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
3327 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT;
3328 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
3329 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS;
3330 else
3331 {
3332 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
3333 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
3334 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
3335 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3336 }
3337
3338 /* Make sure the write-back cacheable memory type for EPT is supported. */
3339 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_MEMTYPE_WB)))
3340 {
3341 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
3342 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
3343 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3344 }
3345
3346 /* EPT requires a page-walk length of 4. */
3347 if (RT_UNLIKELY(!(g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
3348 {
3349 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
3350 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
3351 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3352 }
3353 }
3354 else
3355 {
3356 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
3357 pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED;
3358 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
3359 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3360 }
3361 }
3362
3363 /*
3364 * Determine optimal flush type for VPID.
3365 */
3366 if (pVM->hmr0.s.vmx.fVpid)
3367 {
3368 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
3369 {
3370 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
3371 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT;
3372 else if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
3373 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS;
3374 else
3375 {
3376 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
3377 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
3378 LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n"));
3379 if (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
3380 LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
3381 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
3382 pVM->hmr0.s.vmx.fVpid = false;
3383 }
3384 }
3385 else
3386 {
3387 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
3388 Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n"));
3389 pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED;
3390 pVM->hmr0.s.vmx.fVpid = false;
3391 }
3392 }
3393
3394 /*
3395 * Setup the handler for flushing tagged-TLBs.
3396 */
3397 if (pVM->hmr0.s.fNestedPaging && pVM->hmr0.s.vmx.fVpid)
3398 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID;
3399 else if (pVM->hmr0.s.fNestedPaging)
3400 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT;
3401 else if (pVM->hmr0.s.vmx.fVpid)
3402 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID;
3403 else
3404 pVM->hmr0.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE;
3405
3406
3407 /*
3408 * Copy out the result to ring-3.
3409 */
3410 pVM->hm.s.ForR3.vmx.fVpid = pVM->hmr0.s.vmx.fVpid;
3411 pVM->hm.s.ForR3.vmx.enmTlbFlushType = pVM->hmr0.s.vmx.enmTlbFlushType;
3412 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt;
3413 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid;
3414 return VINF_SUCCESS;
3415}
3416
3417
3418/**
3419 * Sets up the LBR MSR ranges based on the host CPU.
3420 *
3421 * @returns VBox status code.
3422 * @param pVM The cross context VM structure.
3423 */
3424static int hmR0VmxSetupLbrMsrRange(PVMCC pVM)
3425{
3426 Assert(pVM->hmr0.s.vmx.fLbr);
3427 uint32_t idLbrFromIpMsrFirst;
3428 uint32_t idLbrFromIpMsrLast;
3429 uint32_t idLbrToIpMsrFirst;
3430 uint32_t idLbrToIpMsrLast;
3431 uint32_t idLbrTosMsr;
3432
3433 /*
3434 * Determine the LBR MSRs supported for this host CPU family and model.
3435 *
3436 * See Intel spec. 17.4.8 "LBR Stack".
3437 * See Intel "Model-Specific Registers" spec.
3438 */
3439 uint32_t const uFamilyModel = (pVM->cpum.ro.HostFeatures.uFamily << 8)
3440 | pVM->cpum.ro.HostFeatures.uModel;
3441 switch (uFamilyModel)
3442 {
3443 case 0x0f01: case 0x0f02:
3444 idLbrFromIpMsrFirst = MSR_P4_LASTBRANCH_0;
3445 idLbrFromIpMsrLast = MSR_P4_LASTBRANCH_3;
3446 idLbrToIpMsrFirst = 0x0;
3447 idLbrToIpMsrLast = 0x0;
3448 idLbrTosMsr = MSR_P4_LASTBRANCH_TOS;
3449 break;
3450
3451 case 0x065c: case 0x065f: case 0x064e: case 0x065e: case 0x068e:
3452 case 0x069e: case 0x0655: case 0x0666: case 0x067a: case 0x0667:
3453 case 0x066a: case 0x066c: case 0x067d: case 0x067e:
3454 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
3455 idLbrFromIpMsrLast = MSR_LASTBRANCH_31_FROM_IP;
3456 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
3457 idLbrToIpMsrLast = MSR_LASTBRANCH_31_TO_IP;
3458 idLbrTosMsr = MSR_LASTBRANCH_TOS;
3459 break;
3460
3461 case 0x063d: case 0x0647: case 0x064f: case 0x0656: case 0x063c:
3462 case 0x0645: case 0x0646: case 0x063f: case 0x062a: case 0x062d:
3463 case 0x063a: case 0x063e: case 0x061a: case 0x061e: case 0x061f:
3464 case 0x062e: case 0x0625: case 0x062c: case 0x062f:
3465 idLbrFromIpMsrFirst = MSR_LASTBRANCH_0_FROM_IP;
3466 idLbrFromIpMsrLast = MSR_LASTBRANCH_15_FROM_IP;
3467 idLbrToIpMsrFirst = MSR_LASTBRANCH_0_TO_IP;
3468 idLbrToIpMsrLast = MSR_LASTBRANCH_15_TO_IP;
3469 idLbrTosMsr = MSR_LASTBRANCH_TOS;
3470 break;
3471
3472 case 0x0617: case 0x061d: case 0x060f:
3473 idLbrFromIpMsrFirst = MSR_CORE2_LASTBRANCH_0_FROM_IP;
3474 idLbrFromIpMsrLast = MSR_CORE2_LASTBRANCH_3_FROM_IP;
3475 idLbrToIpMsrFirst = MSR_CORE2_LASTBRANCH_0_TO_IP;
3476 idLbrToIpMsrLast = MSR_CORE2_LASTBRANCH_3_TO_IP;
3477 idLbrTosMsr = MSR_CORE2_LASTBRANCH_TOS;
3478 break;
3479
3480 /* Atom and related microarchitectures we don't care about:
3481 case 0x0637: case 0x064a: case 0x064c: case 0x064d: case 0x065a:
3482 case 0x065d: case 0x061c: case 0x0626: case 0x0627: case 0x0635:
3483 case 0x0636: */
3484 /* All other CPUs: */
3485 default:
3486 {
3487 LogRelFunc(("Could not determine LBR stack size for the CPU model %#x\n", uFamilyModel));
3488 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_UNKNOWN;
3489 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3490 }
3491 }
3492
3493 /*
3494 * Validate.
3495 */
3496 uint32_t const cLbrStack = idLbrFromIpMsrLast - idLbrFromIpMsrFirst + 1;
3497 PCVMCPU pVCpu0 = VMCC_GET_CPU_0(pVM);
3498 AssertCompile( RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr)
3499 == RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrToIpMsr));
3500 if (cLbrStack > RT_ELEMENTS(pVCpu0->hm.s.vmx.VmcsInfo.au64LbrFromIpMsr))
3501 {
3502 LogRelFunc(("LBR stack size of the CPU (%u) exceeds our buffer size\n", cLbrStack));
3503 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_LBR_STACK_SIZE_OVERFLOW;
3504 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3505 }
3506 NOREF(pVCpu0);
3507
3508 /*
3509 * Update the LBR info. to the VM struct. for use later.
3510 */
3511 pVM->hmr0.s.vmx.idLbrTosMsr = idLbrTosMsr;
3512
3513 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrFirst = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst = idLbrFromIpMsrFirst;
3514 pVM->hm.s.ForR3.vmx.idLbrFromIpMsrLast = pVM->hmr0.s.vmx.idLbrFromIpMsrLast = idLbrFromIpMsrLast;
3515
3516 pVM->hm.s.ForR3.vmx.idLbrToIpMsrFirst = pVM->hmr0.s.vmx.idLbrToIpMsrFirst = idLbrToIpMsrFirst;
3517 pVM->hm.s.ForR3.vmx.idLbrToIpMsrLast = pVM->hmr0.s.vmx.idLbrToIpMsrLast = idLbrToIpMsrLast;
3518 return VINF_SUCCESS;
3519}
3520
3521
3522#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3523/**
3524 * Sets up the shadow VMCS fields arrays.
3525 *
3526 * This function builds arrays of VMCS fields to sync the shadow VMCS later while
3527 * executing the guest.
3528 *
3529 * @returns VBox status code.
3530 * @param pVM The cross context VM structure.
3531 */
3532static int hmR0VmxSetupShadowVmcsFieldsArrays(PVMCC pVM)
3533{
3534 /*
3535 * Paranoia. Ensure we haven't exposed the VMWRITE-All VMX feature to the guest
3536 * when the host does not support it.
3537 */
3538 bool const fGstVmwriteAll = pVM->cpum.ro.GuestFeatures.fVmxVmwriteAll;
3539 if ( !fGstVmwriteAll
3540 || (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL))
3541 { /* likely. */ }
3542 else
3543 {
3544 LogRelFunc(("VMX VMWRITE-All feature exposed to the guest but host CPU does not support it!\n"));
3545 VMCC_GET_CPU_0(pVM)->hm.s.u32HMError = VMX_UFC_GST_HOST_VMWRITE_ALL;
3546 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3547 }
3548
3549 uint32_t const cVmcsFields = RT_ELEMENTS(g_aVmcsFields);
3550 uint32_t cRwFields = 0;
3551 uint32_t cRoFields = 0;
3552 for (uint32_t i = 0; i < cVmcsFields; i++)
3553 {
3554 VMXVMCSFIELD VmcsField;
3555 VmcsField.u = g_aVmcsFields[i];
3556
3557 /*
3558 * We will be writing "FULL" (64-bit) fields while syncing the shadow VMCS.
3559 * Therefore, "HIGH" (32-bit portion of 64-bit) fields must not be included
3560 * in the shadow VMCS fields array as they would be redundant.
3561 *
3562 * If the VMCS field depends on a CPU feature that is not exposed to the guest,
3563 * we must not include it in the shadow VMCS fields array. Guests attempting to
3564 * VMREAD/VMWRITE such VMCS fields would cause a VM-exit and we shall emulate
3565 * the required behavior.
3566 */
3567 if ( VmcsField.n.fAccessType == VMX_VMCSFIELD_ACCESS_FULL
3568 && CPUMIsGuestVmxVmcsFieldValid(pVM, VmcsField.u))
3569 {
3570 /*
3571 * Read-only fields are placed in a separate array so that while syncing shadow
3572 * VMCS fields later (which is more performance critical) we can avoid branches.
3573 *
3574 * However, if the guest can write to all fields (including read-only fields),
3575 * we treat it a as read/write field. Otherwise, writing to these fields would
3576 * cause a VMWRITE instruction error while syncing the shadow VMCS.
3577 */
3578 if ( fGstVmwriteAll
3579 || !VMXIsVmcsFieldReadOnly(VmcsField.u))
3580 pVM->hmr0.s.vmx.paShadowVmcsFields[cRwFields++] = VmcsField.u;
3581 else
3582 pVM->hmr0.s.vmx.paShadowVmcsRoFields[cRoFields++] = VmcsField.u;
3583 }
3584 }
3585
3586 /* Update the counts. */
3587 pVM->hmr0.s.vmx.cShadowVmcsFields = cRwFields;
3588 pVM->hmr0.s.vmx.cShadowVmcsRoFields = cRoFields;
3589 return VINF_SUCCESS;
3590}
3591
3592
3593/**
3594 * Sets up the VMREAD and VMWRITE bitmaps.
3595 *
3596 * @param pVM The cross context VM structure.
3597 */
3598static void hmR0VmxSetupVmreadVmwriteBitmaps(PVMCC pVM)
3599{
3600 /*
3601 * By default, ensure guest attempts to access any VMCS fields cause VM-exits.
3602 */
3603 uint32_t const cbBitmap = X86_PAGE_4K_SIZE;
3604 uint8_t *pbVmreadBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmreadBitmap;
3605 uint8_t *pbVmwriteBitmap = (uint8_t *)pVM->hmr0.s.vmx.pvVmwriteBitmap;
3606 ASMMemFill32(pbVmreadBitmap, cbBitmap, UINT32_C(0xffffffff));
3607 ASMMemFill32(pbVmwriteBitmap, cbBitmap, UINT32_C(0xffffffff));
3608
3609 /*
3610 * Skip intercepting VMREAD/VMWRITE to guest read/write fields in the
3611 * VMREAD and VMWRITE bitmaps.
3612 */
3613 {
3614 uint32_t const *paShadowVmcsFields = pVM->hmr0.s.vmx.paShadowVmcsFields;
3615 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
3616 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
3617 {
3618 uint32_t const uVmcsField = paShadowVmcsFields[i];
3619 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
3620 Assert(uVmcsField >> 3 < cbBitmap);
3621 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
3622 ASMBitClear(pbVmwriteBitmap + (uVmcsField >> 3), uVmcsField & 7);
3623 }
3624 }
3625
3626 /*
3627 * Skip intercepting VMREAD for guest read-only fields in the VMREAD bitmap
3628 * if the host supports VMWRITE to all supported VMCS fields.
3629 */
3630 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
3631 {
3632 uint32_t const *paShadowVmcsRoFields = pVM->hmr0.s.vmx.paShadowVmcsRoFields;
3633 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
3634 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
3635 {
3636 uint32_t const uVmcsField = paShadowVmcsRoFields[i];
3637 Assert(!(uVmcsField & VMX_VMCSFIELD_RSVD_MASK));
3638 Assert(uVmcsField >> 3 < cbBitmap);
3639 ASMBitClear(pbVmreadBitmap + (uVmcsField >> 3), uVmcsField & 7);
3640 }
3641 }
3642}
3643#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
3644
3645
3646/**
3647 * Sets up the virtual-APIC page address for the VMCS.
3648 *
3649 * @param pVmcsInfo The VMCS info. object.
3650 */
3651DECLINLINE(void) hmR0VmxSetupVmcsVirtApicAddr(PCVMXVMCSINFO pVmcsInfo)
3652{
3653 RTHCPHYS const HCPhysVirtApic = pVmcsInfo->HCPhysVirtApic;
3654 Assert(HCPhysVirtApic != NIL_RTHCPHYS);
3655 Assert(!(HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
3656 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
3657 AssertRC(rc);
3658}
3659
3660
3661/**
3662 * Sets up the MSR-bitmap address for the VMCS.
3663 *
3664 * @param pVmcsInfo The VMCS info. object.
3665 */
3666DECLINLINE(void) hmR0VmxSetupVmcsMsrBitmapAddr(PCVMXVMCSINFO pVmcsInfo)
3667{
3668 RTHCPHYS const HCPhysMsrBitmap = pVmcsInfo->HCPhysMsrBitmap;
3669 Assert(HCPhysMsrBitmap != NIL_RTHCPHYS);
3670 Assert(!(HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
3671 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, HCPhysMsrBitmap);
3672 AssertRC(rc);
3673}
3674
3675
3676/**
3677 * Sets up the APIC-access page address for the VMCS.
3678 *
3679 * @param pVCpu The cross context virtual CPU structure.
3680 */
3681DECLINLINE(void) hmR0VmxSetupVmcsApicAccessAddr(PVMCPUCC pVCpu)
3682{
3683 RTHCPHYS const HCPhysApicAccess = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysApicAccess;
3684 Assert(HCPhysApicAccess != NIL_RTHCPHYS);
3685 Assert(!(HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
3686 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
3687 AssertRC(rc);
3688}
3689
3690#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
3691
3692/**
3693 * Sets up the VMREAD bitmap address for the VMCS.
3694 *
3695 * @param pVCpu The cross context virtual CPU structure.
3696 */
3697DECLINLINE(void) hmR0VmxSetupVmcsVmreadBitmapAddr(PVMCPUCC pVCpu)
3698{
3699 RTHCPHYS const HCPhysVmreadBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmreadBitmap;
3700 Assert(HCPhysVmreadBitmap != NIL_RTHCPHYS);
3701 Assert(!(HCPhysVmreadBitmap & 0xfff)); /* Bits 11:0 MBZ. */
3702 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMREAD_BITMAP_FULL, HCPhysVmreadBitmap);
3703 AssertRC(rc);
3704}
3705
3706
3707/**
3708 * Sets up the VMWRITE bitmap address for the VMCS.
3709 *
3710 * @param pVCpu The cross context virtual CPU structure.
3711 */
3712DECLINLINE(void) hmR0VmxSetupVmcsVmwriteBitmapAddr(PVMCPUCC pVCpu)
3713{
3714 RTHCPHYS const HCPhysVmwriteBitmap = pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.HCPhysVmwriteBitmap;
3715 Assert(HCPhysVmwriteBitmap != NIL_RTHCPHYS);
3716 Assert(!(HCPhysVmwriteBitmap & 0xfff)); /* Bits 11:0 MBZ. */
3717 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_VMWRITE_BITMAP_FULL, HCPhysVmwriteBitmap);
3718 AssertRC(rc);
3719}
3720
3721#endif
3722
3723/**
3724 * Sets up the VM-entry MSR load, VM-exit MSR-store and VM-exit MSR-load addresses
3725 * in the VMCS.
3726 *
3727 * @returns VBox status code.
3728 * @param pVmcsInfo The VMCS info. object.
3729 */
3730DECLINLINE(int) hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(PVMXVMCSINFO pVmcsInfo)
3731{
3732 RTHCPHYS const HCPhysGuestMsrLoad = pVmcsInfo->HCPhysGuestMsrLoad;
3733 Assert(HCPhysGuestMsrLoad != NIL_RTHCPHYS);
3734 Assert(!(HCPhysGuestMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
3735
3736 RTHCPHYS const HCPhysGuestMsrStore = pVmcsInfo->HCPhysGuestMsrStore;
3737 Assert(HCPhysGuestMsrStore != NIL_RTHCPHYS);
3738 Assert(!(HCPhysGuestMsrStore & 0xf)); /* Bits 3:0 MBZ. */
3739
3740 RTHCPHYS const HCPhysHostMsrLoad = pVmcsInfo->HCPhysHostMsrLoad;
3741 Assert(HCPhysHostMsrLoad != NIL_RTHCPHYS);
3742 Assert(!(HCPhysHostMsrLoad & 0xf)); /* Bits 3:0 MBZ. */
3743
3744 int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, HCPhysGuestMsrLoad); AssertRC(rc);
3745 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, HCPhysGuestMsrStore); AssertRC(rc);
3746 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, HCPhysHostMsrLoad); AssertRC(rc);
3747 return VINF_SUCCESS;
3748}
3749
3750
3751/**
3752 * Sets up MSR permissions in the MSR bitmap of a VMCS info. object.
3753 *
3754 * @param pVCpu The cross context virtual CPU structure.
3755 * @param pVmcsInfo The VMCS info. object.
3756 */
3757static void hmR0VmxSetupVmcsMsrPermissions(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
3758{
3759 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS);
3760
3761 /*
3762 * By default, ensure guest attempts to access any MSR cause VM-exits.
3763 * This shall later be relaxed for specific MSRs as necessary.
3764 *
3765 * Note: For nested-guests, the entire bitmap will be merged prior to
3766 * executing the nested-guest using hardware-assisted VMX and hence there
3767 * is no need to perform this operation. See hmR0VmxMergeMsrBitmapNested.
3768 */
3769 Assert(pVmcsInfo->pvMsrBitmap);
3770 ASMMemFill32(pVmcsInfo->pvMsrBitmap, X86_PAGE_4K_SIZE, UINT32_C(0xffffffff));
3771
3772 /*
3773 * The guest can access the following MSRs (read, write) without causing
3774 * VM-exits; they are loaded/stored automatically using fields in the VMCS.
3775 */
3776 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3777 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_CS, VMXMSRPM_ALLOW_RD_WR);
3778 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_ESP, VMXMSRPM_ALLOW_RD_WR);
3779 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SYSENTER_EIP, VMXMSRPM_ALLOW_RD_WR);
3780 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
3781 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_FS_BASE, VMXMSRPM_ALLOW_RD_WR);
3782
3783 /*
3784 * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state
3785 * associated with then. We never need to intercept access (writes need to be
3786 * executed without causing a VM-exit, reads will #GP fault anyway).
3787 *
3788 * The IA32_SPEC_CTRL MSR is read/write and has state. We allow the guest to
3789 * read/write them. We swap the guest/host MSR value using the
3790 * auto-load/store MSR area.
3791 */
3792 if (pVM->cpum.ro.GuestFeatures.fIbpb)
3793 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_PRED_CMD, VMXMSRPM_ALLOW_RD_WR);
3794 if (pVM->cpum.ro.GuestFeatures.fFlushCmd)
3795 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_FLUSH_CMD, VMXMSRPM_ALLOW_RD_WR);
3796 if (pVM->cpum.ro.GuestFeatures.fIbrs)
3797 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_IA32_SPEC_CTRL, VMXMSRPM_ALLOW_RD_WR);
3798
3799 /*
3800 * Allow full read/write access for the following MSRs (mandatory for VT-x)
3801 * required for 64-bit guests.
3802 */
3803 if (pVM->hmr0.s.fAllow64BitGuests)
3804 {
3805 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_LSTAR, VMXMSRPM_ALLOW_RD_WR);
3806 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K6_STAR, VMXMSRPM_ALLOW_RD_WR);
3807 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_SF_MASK, VMXMSRPM_ALLOW_RD_WR);
3808 hmR0VmxSetMsrPermission(pVCpu, pVmcsInfo, false, MSR_K8_KERNEL_GS_BASE, VMXMSRPM_ALLOW_RD_WR);
3809 }
3810
3811 /*
3812 * IA32_EFER MSR is always intercepted, see @bugref{9180#c37}.
3813 */
3814#ifdef VBOX_STRICT
3815 Assert(pVmcsInfo->pvMsrBitmap);
3816 uint32_t const fMsrpmEfer = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, MSR_K6_EFER);
3817 Assert(fMsrpmEfer == VMXMSRPM_EXIT_RD_WR);
3818#endif
3819}
3820
3821
3822/**
3823 * Sets up pin-based VM-execution controls in the VMCS.
3824 *
3825 * @returns VBox status code.
3826 * @param pVCpu The cross context virtual CPU structure.
3827 * @param pVmcsInfo The VMCS info. object.
3828 */
3829static int hmR0VmxSetupVmcsPinCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
3830{
3831 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3832 uint32_t fVal = g_HmMsrs.u.vmx.PinCtls.n.allowed0; /* Bits set here must always be set. */
3833 uint32_t const fZap = g_HmMsrs.u.vmx.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
3834
3835 fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
3836 | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
3837
3838 if (g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI)
3839 fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
3840
3841 /* Enable the VMX-preemption timer. */
3842 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
3843 {
3844 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER);
3845 fVal |= VMX_PIN_CTLS_PREEMPT_TIMER;
3846 }
3847
3848#if 0
3849 /* Enable posted-interrupt processing. */
3850 if (pVM->hm.s.fPostedIntrs)
3851 {
3852 Assert(g_HmMsrs.u.vmx.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT);
3853 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT);
3854 fVal |= VMX_PIN_CTLS_POSTED_INT;
3855 }
3856#endif
3857
3858 if ((fVal & fZap) != fVal)
3859 {
3860 LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
3861 g_HmMsrs.u.vmx.PinCtls.n.allowed0, fVal, fZap));
3862 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
3863 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3864 }
3865
3866 /* Commit it to the VMCS and update our cache. */
3867 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal);
3868 AssertRC(rc);
3869 pVmcsInfo->u32PinCtls = fVal;
3870
3871 return VINF_SUCCESS;
3872}
3873
3874
3875/**
3876 * Sets up secondary processor-based VM-execution controls in the VMCS.
3877 *
3878 * @returns VBox status code.
3879 * @param pVCpu The cross context virtual CPU structure.
3880 * @param pVmcsInfo The VMCS info. object.
3881 */
3882static int hmR0VmxSetupVmcsProcCtls2(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
3883{
3884 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3885 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */
3886 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
3887
3888 /* WBINVD causes a VM-exit. */
3889 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT)
3890 fVal |= VMX_PROC_CTLS2_WBINVD_EXIT;
3891
3892 /* Enable EPT (aka nested-paging). */
3893 if (pVM->hmr0.s.fNestedPaging)
3894 fVal |= VMX_PROC_CTLS2_EPT;
3895
3896 /* Enable the INVPCID instruction if we expose it to the guest and is supported
3897 by the hardware. Without this, guest executing INVPCID would cause a #UD. */
3898 if ( pVM->cpum.ro.GuestFeatures.fInvpcid
3899 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID))
3900 fVal |= VMX_PROC_CTLS2_INVPCID;
3901
3902 /* Enable VPID. */
3903 if (pVM->hmr0.s.vmx.fVpid)
3904 fVal |= VMX_PROC_CTLS2_VPID;
3905
3906 /* Enable unrestricted guest execution. */
3907 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
3908 fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST;
3909
3910#if 0
3911 if (pVM->hm.s.fVirtApicRegs)
3912 {
3913 /* Enable APIC-register virtualization. */
3914 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT);
3915 fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT;
3916
3917 /* Enable virtual-interrupt delivery. */
3918 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY);
3919 fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY;
3920 }
3921#endif
3922
3923 /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is
3924 where the TPR shadow resides. */
3925 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
3926 * done dynamically. */
3927 if (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
3928 {
3929 fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS;
3930 hmR0VmxSetupVmcsApicAccessAddr(pVCpu);
3931 }
3932
3933 /* Enable the RDTSCP instruction if we expose it to the guest and is supported
3934 by the hardware. Without this, guest executing RDTSCP would cause a #UD. */
3935 if ( pVM->cpum.ro.GuestFeatures.fRdTscP
3936 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP))
3937 fVal |= VMX_PROC_CTLS2_RDTSCP;
3938
3939 /* Enable Pause-Loop exiting. */
3940 if ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT)
3941 && pVM->hm.s.vmx.cPleGapTicks
3942 && pVM->hm.s.vmx.cPleWindowTicks)
3943 {
3944 fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT;
3945
3946 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); AssertRC(rc);
3947 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRC(rc);
3948 }
3949
3950 if ((fVal & fZap) != fVal)
3951 {
3952 LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
3953 g_HmMsrs.u.vmx.ProcCtls2.n.allowed0, fVal, fZap));
3954 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
3955 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3956 }
3957
3958 /* Commit it to the VMCS and update our cache. */
3959 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal);
3960 AssertRC(rc);
3961 pVmcsInfo->u32ProcCtls2 = fVal;
3962
3963 return VINF_SUCCESS;
3964}
3965
3966
3967/**
3968 * Sets up processor-based VM-execution controls in the VMCS.
3969 *
3970 * @returns VBox status code.
3971 * @param pVCpu The cross context virtual CPU structure.
3972 * @param pVmcsInfo The VMCS info. object.
3973 */
3974static int hmR0VmxSetupVmcsProcCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
3975{
3976 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
3977 uint32_t fVal = g_HmMsrs.u.vmx.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
3978 uint32_t const fZap = g_HmMsrs.u.vmx.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
3979
3980 fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */
3981 | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
3982 | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
3983 | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
3984 | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */
3985 | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */
3986 | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
3987
3988 /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
3989 if ( !(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT)
3990 || (g_HmMsrs.u.vmx.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT))
3991 {
3992 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
3993 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3994 }
3995
3996 /* Without nested paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
3997 if (!pVM->hmr0.s.fNestedPaging)
3998 {
3999 Assert(!pVM->hmr0.s.vmx.fUnrestrictedGuest);
4000 fVal |= VMX_PROC_CTLS_INVLPG_EXIT
4001 | VMX_PROC_CTLS_CR3_LOAD_EXIT
4002 | VMX_PROC_CTLS_CR3_STORE_EXIT;
4003 }
4004
4005 /* Use TPR shadowing if supported by the CPU. */
4006 if ( PDMHasApic(pVM)
4007 && (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW))
4008 {
4009 fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
4010 /* CR8 writes cause a VM-exit based on TPR threshold. */
4011 Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT));
4012 Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT));
4013 hmR0VmxSetupVmcsVirtApicAddr(pVmcsInfo);
4014 }
4015 else
4016 {
4017 /* Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is
4018 invalid on 32-bit Intel CPUs. Set this control only for 64-bit guests. */
4019 if (pVM->hmr0.s.fAllow64BitGuests)
4020 fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
4021 | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
4022 }
4023
4024 /* Use MSR-bitmaps if supported by the CPU. */
4025 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
4026 {
4027 fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS;
4028 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
4029 }
4030
4031 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
4032 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
4033 fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
4034
4035 if ((fVal & fZap) != fVal)
4036 {
4037 LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
4038 g_HmMsrs.u.vmx.ProcCtls.n.allowed0, fVal, fZap));
4039 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
4040 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
4041 }
4042
4043 /* Commit it to the VMCS and update our cache. */
4044 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal);
4045 AssertRC(rc);
4046 pVmcsInfo->u32ProcCtls = fVal;
4047
4048 /* Set up MSR permissions that don't change through the lifetime of the VM. */
4049 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
4050 hmR0VmxSetupVmcsMsrPermissions(pVCpu, pVmcsInfo);
4051
4052 /* Set up secondary processor-based VM-execution controls if the CPU supports it. */
4053 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS)
4054 return hmR0VmxSetupVmcsProcCtls2(pVCpu, pVmcsInfo);
4055
4056 /* Sanity check, should not really happen. */
4057 if (RT_LIKELY(!pVM->hmr0.s.vmx.fUnrestrictedGuest))
4058 { /* likely */ }
4059 else
4060 {
4061 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
4062 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
4063 }
4064
4065 /* Old CPUs without secondary processor-based VM-execution controls would end up here. */
4066 return VINF_SUCCESS;
4067}
4068
4069
4070/**
4071 * Sets up miscellaneous (everything other than Pin, Processor and secondary
4072 * Processor-based VM-execution) control fields in the VMCS.
4073 *
4074 * @returns VBox status code.
4075 * @param pVCpu The cross context virtual CPU structure.
4076 * @param pVmcsInfo The VMCS info. object.
4077 */
4078static int hmR0VmxSetupVmcsMiscCtls(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
4079{
4080#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4081 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
4082 {
4083 hmR0VmxSetupVmcsVmreadBitmapAddr(pVCpu);
4084 hmR0VmxSetupVmcsVmwriteBitmapAddr(pVCpu);
4085 }
4086#endif
4087
4088 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
4089 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
4090 AssertRC(rc);
4091
4092 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
4093 if (RT_SUCCESS(rc))
4094 {
4095 uint64_t const u64Cr0Mask = hmR0VmxGetFixedCr0Mask(pVCpu);
4096 uint64_t const u64Cr4Mask = hmR0VmxGetFixedCr4Mask(pVCpu);
4097
4098 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask); AssertRC(rc);
4099 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask); AssertRC(rc);
4100
4101 pVmcsInfo->u64Cr0Mask = u64Cr0Mask;
4102 pVmcsInfo->u64Cr4Mask = u64Cr4Mask;
4103
4104 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fLbr)
4105 {
4106 rc = VMXWriteVmcsNw(VMX_VMCS64_GUEST_DEBUGCTL_FULL, MSR_IA32_DEBUGCTL_LBR);
4107 AssertRC(rc);
4108 }
4109 return VINF_SUCCESS;
4110 }
4111 else
4112 LogRelFunc(("Failed to initialize VMCS auto-load/store MSR addresses. rc=%Rrc\n", rc));
4113 return rc;
4114}
4115
4116
4117/**
4118 * Sets up the initial exception bitmap in the VMCS based on static conditions.
4119 *
4120 * We shall setup those exception intercepts that don't change during the
4121 * lifetime of the VM here. The rest are done dynamically while loading the
4122 * guest state.
4123 *
4124 * @param pVCpu The cross context virtual CPU structure.
4125 * @param pVmcsInfo The VMCS info. object.
4126 */
4127static void hmR0VmxSetupVmcsXcptBitmap(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
4128{
4129 /*
4130 * The following exceptions are always intercepted:
4131 *
4132 * #AC - To prevent the guest from hanging the CPU and for dealing with
4133 * split-lock detecting host configs.
4134 * #DB - To maintain the DR6 state even when intercepting DRx reads/writes and
4135 * recursive #DBs can cause a CPU hang.
4136 * #PF - To sync our shadow page tables when nested-paging is not used.
4137 */
4138 bool const fNestedPaging = pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging;
4139 uint32_t const uXcptBitmap = RT_BIT(X86_XCPT_AC)
4140 | RT_BIT(X86_XCPT_DB)
4141 | (fNestedPaging ? 0 : RT_BIT(X86_XCPT_PF));
4142
4143 /* Commit it to the VMCS. */
4144 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
4145 AssertRC(rc);
4146
4147 /* Update our cache of the exception bitmap. */
4148 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
4149}
4150
4151
4152#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4153/**
4154 * Sets up the VMCS for executing a nested-guest using hardware-assisted VMX.
4155 *
4156 * @returns VBox status code.
4157 * @param pVmcsInfo The VMCS info. object.
4158 */
4159static int hmR0VmxSetupVmcsCtlsNested(PVMXVMCSINFO pVmcsInfo)
4160{
4161 Assert(pVmcsInfo->u64VmcsLinkPtr == NIL_RTHCPHYS);
4162 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS);
4163 AssertRC(rc);
4164
4165 rc = hmR0VmxSetupVmcsAutoLoadStoreMsrAddrs(pVmcsInfo);
4166 if (RT_SUCCESS(rc))
4167 {
4168 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS)
4169 hmR0VmxSetupVmcsMsrBitmapAddr(pVmcsInfo);
4170
4171 /* Paranoia - We've not yet initialized these, they shall be done while merging the VMCS. */
4172 Assert(!pVmcsInfo->u64Cr0Mask);
4173 Assert(!pVmcsInfo->u64Cr4Mask);
4174 return VINF_SUCCESS;
4175 }
4176 LogRelFunc(("Failed to set up the VMCS link pointer in the nested-guest VMCS. rc=%Rrc\n", rc));
4177 return rc;
4178}
4179#endif
4180
4181
4182/**
4183 * Sets pfnStartVm to the best suited variant.
4184 *
4185 * This must be called whenever anything changes relative to the hmR0VmXStartVm
4186 * variant selection:
4187 * - pVCpu->hm.s.fLoadSaveGuestXcr0
4188 * - HM_WSF_IBPB_ENTRY in pVCpu->hmr0.s.fWorldSwitcher
4189 * - HM_WSF_IBPB_EXIT in pVCpu->hmr0.s.fWorldSwitcher
4190 * - Perhaps: CPUMIsGuestFPUStateActive() (windows only)
4191 * - Perhaps: CPUMCTX.fXStateMask (windows only)
4192 *
4193 * We currently ASSUME that neither HM_WSF_IBPB_ENTRY nor HM_WSF_IBPB_EXIT
4194 * cannot be changed at runtime.
4195 */
4196static void hmR0VmxUpdateStartVmFunction(PVMCPUCC pVCpu)
4197{
4198 static const struct CLANGWORKAROUND { PFNHMVMXSTARTVM pfn; } s_aHmR0VmxStartVmFunctions[] =
4199 {
4200 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
4201 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
4202 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
4203 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_SansIbpbExit },
4204 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
4205 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
4206 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
4207 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_SansIbpbExit },
4208 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
4209 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
4210 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
4211 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_SansIbpbExit },
4212 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
4213 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
4214 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
4215 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_SansIbpbExit },
4216 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
4217 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
4218 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
4219 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_SansMdsEntry_WithIbpbExit },
4220 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
4221 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
4222 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
4223 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_SansMdsEntry_WithIbpbExit },
4224 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
4225 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
4226 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
4227 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_SansL1dEntry_WithMdsEntry_WithIbpbExit },
4228 { hmR0VmxStartVm_SansXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
4229 { hmR0VmxStartVm_WithXcr0_SansIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
4230 { hmR0VmxStartVm_SansXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
4231 { hmR0VmxStartVm_WithXcr0_WithIbpbEntry_WithL1dEntry_WithMdsEntry_WithIbpbExit },
4232 };
4233 uintptr_t const idx = (pVCpu->hmr0.s.fLoadSaveGuestXcr0 ? 1 : 0)
4234 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_ENTRY ? 2 : 0)
4235 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_ENTRY ? 4 : 0)
4236 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_ENTRY ? 8 : 0)
4237 | (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_IBPB_EXIT ? 16 : 0);
4238 PFNHMVMXSTARTVM const pfnStartVm = s_aHmR0VmxStartVmFunctions[idx].pfn;
4239 if (pVCpu->hmr0.s.vmx.pfnStartVm != pfnStartVm)
4240 pVCpu->hmr0.s.vmx.pfnStartVm = pfnStartVm;
4241}
4242
4243
4244/**
4245 * Selector FNHMSVMVMRUN implementation.
4246 */
4247static DECLCALLBACK(int) hmR0VmxStartVmSelector(PVMXVMCSINFO pVmcsInfo, PVMCPUCC pVCpu, bool fResume)
4248{
4249 hmR0VmxUpdateStartVmFunction(pVCpu);
4250 return pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResume);
4251}
4252
4253
4254/**
4255 * Sets up the VMCS for executing a guest (or nested-guest) using hardware-assisted
4256 * VMX.
4257 *
4258 * @returns VBox status code.
4259 * @param pVCpu The cross context virtual CPU structure.
4260 * @param pVmcsInfo The VMCS info. object.
4261 * @param fIsNstGstVmcs Whether this is a nested-guest VMCS.
4262 */
4263static int hmR0VmxSetupVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, bool fIsNstGstVmcs)
4264{
4265 Assert(pVmcsInfo->pvVmcs);
4266 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4267
4268 /* Set the CPU specified revision identifier at the beginning of the VMCS structure. */
4269 *(uint32_t *)pVmcsInfo->pvVmcs = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
4270 const char * const pszVmcs = fIsNstGstVmcs ? "nested-guest VMCS" : "guest VMCS";
4271
4272 LogFlowFunc(("\n"));
4273
4274 /*
4275 * Initialize the VMCS using VMCLEAR before loading the VMCS.
4276 * See Intel spec. 31.6 "Preparation And Launching A Virtual Machine".
4277 */
4278 int rc = hmR0VmxClearVmcs(pVmcsInfo);
4279 if (RT_SUCCESS(rc))
4280 {
4281 rc = hmR0VmxLoadVmcs(pVmcsInfo);
4282 if (RT_SUCCESS(rc))
4283 {
4284 /*
4285 * Initialize the hardware-assisted VMX execution handler for guest and nested-guest VMCS.
4286 * The host is always 64-bit since we no longer support 32-bit hosts.
4287 * Currently we have just a single handler for all guest modes as well, see @bugref{6208#c73}.
4288 */
4289 if (!fIsNstGstVmcs)
4290 {
4291 rc = hmR0VmxSetupVmcsPinCtls(pVCpu, pVmcsInfo);
4292 if (RT_SUCCESS(rc))
4293 {
4294 rc = hmR0VmxSetupVmcsProcCtls(pVCpu, pVmcsInfo);
4295 if (RT_SUCCESS(rc))
4296 {
4297 rc = hmR0VmxSetupVmcsMiscCtls(pVCpu, pVmcsInfo);
4298 if (RT_SUCCESS(rc))
4299 {
4300 hmR0VmxSetupVmcsXcptBitmap(pVCpu, pVmcsInfo);
4301#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4302 /*
4303 * If a shadow VMCS is allocated for the VMCS info. object, initialize the
4304 * VMCS revision ID and shadow VMCS indicator bit. Also, clear the VMCS
4305 * making it fit for use when VMCS shadowing is later enabled.
4306 */
4307 if (pVmcsInfo->pvShadowVmcs)
4308 {
4309 VMXVMCSREVID VmcsRevId;
4310 VmcsRevId.u = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID);
4311 VmcsRevId.n.fIsShadowVmcs = 1;
4312 *(uint32_t *)pVmcsInfo->pvShadowVmcs = VmcsRevId.u;
4313 rc = hmR0VmxClearShadowVmcs(pVmcsInfo);
4314 if (RT_SUCCESS(rc))
4315 { /* likely */ }
4316 else
4317 LogRelFunc(("Failed to initialize shadow VMCS. rc=%Rrc\n", rc));
4318 }
4319#endif
4320 }
4321 else
4322 LogRelFunc(("Failed to setup miscellaneous controls. rc=%Rrc\n", rc));
4323 }
4324 else
4325 LogRelFunc(("Failed to setup processor-based VM-execution controls. rc=%Rrc\n", rc));
4326 }
4327 else
4328 LogRelFunc(("Failed to setup pin-based controls. rc=%Rrc\n", rc));
4329 }
4330 else
4331 {
4332#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4333 rc = hmR0VmxSetupVmcsCtlsNested(pVmcsInfo);
4334 if (RT_SUCCESS(rc))
4335 { /* likely */ }
4336 else
4337 LogRelFunc(("Failed to initialize nested-guest VMCS. rc=%Rrc\n", rc));
4338#else
4339 AssertFailed();
4340#endif
4341 }
4342 }
4343 else
4344 LogRelFunc(("Failed to load the %s. rc=%Rrc\n", rc, pszVmcs));
4345 }
4346 else
4347 LogRelFunc(("Failed to clear the %s. rc=%Rrc\n", rc, pszVmcs));
4348
4349 /* Sync any CPU internal VMCS data back into our VMCS in memory. */
4350 if (RT_SUCCESS(rc))
4351 {
4352 rc = hmR0VmxClearVmcs(pVmcsInfo);
4353 if (RT_SUCCESS(rc))
4354 { /* likely */ }
4355 else
4356 LogRelFunc(("Failed to clear the %s post setup. rc=%Rrc\n", rc, pszVmcs));
4357 }
4358
4359 /*
4360 * Update the last-error record both for failures and success, so we
4361 * can propagate the status code back to ring-3 for diagnostics.
4362 */
4363 hmR0VmxUpdateErrorRecord(pVCpu, rc);
4364 NOREF(pszVmcs);
4365 return rc;
4366}
4367
4368
4369/**
4370 * Does global VT-x initialization (called during module initialization).
4371 *
4372 * @returns VBox status code.
4373 */
4374VMMR0DECL(int) VMXR0GlobalInit(void)
4375{
4376#ifdef HMVMX_USE_FUNCTION_TABLE
4377 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_aVMExitHandlers));
4378# ifdef VBOX_STRICT
4379 for (unsigned i = 0; i < RT_ELEMENTS(g_aVMExitHandlers); i++)
4380 Assert(g_aVMExitHandlers[i].pfn);
4381# endif
4382#endif
4383 return VINF_SUCCESS;
4384}
4385
4386
4387/**
4388 * Does global VT-x termination (called during module termination).
4389 */
4390VMMR0DECL(void) VMXR0GlobalTerm()
4391{
4392 /* Nothing to do currently. */
4393}
4394
4395
4396/**
4397 * Sets up and activates VT-x on the current CPU.
4398 *
4399 * @returns VBox status code.
4400 * @param pHostCpu The HM physical-CPU structure.
4401 * @param pVM The cross context VM structure. Can be
4402 * NULL after a host resume operation.
4403 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
4404 * fEnabledByHost is @c true).
4405 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
4406 * @a fEnabledByHost is @c true).
4407 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
4408 * enable VT-x on the host.
4409 * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs.
4410 */
4411VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVMCC pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
4412 PCSUPHWVIRTMSRS pHwvirtMsrs)
4413{
4414 AssertPtr(pHostCpu);
4415 AssertPtr(pHwvirtMsrs);
4416 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4417
4418 /* Enable VT-x if it's not already enabled by the host. */
4419 if (!fEnabledByHost)
4420 {
4421 int rc = hmR0VmxEnterRootMode(pHostCpu, pVM, HCPhysCpuPage, pvCpuPage);
4422 if (RT_FAILURE(rc))
4423 return rc;
4424 }
4425
4426 /*
4427 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been
4428 * using EPTPs) so we don't retain any stale guest-physical mappings which won't get
4429 * invalidated when flushing by VPID.
4430 */
4431 if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
4432 {
4433 hmR0VmxFlushEpt(NULL /* pVCpu */, NULL /* pVmcsInfo */, VMXTLBFLUSHEPT_ALL_CONTEXTS);
4434 pHostCpu->fFlushAsidBeforeUse = false;
4435 }
4436 else
4437 pHostCpu->fFlushAsidBeforeUse = true;
4438
4439 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
4440 ++pHostCpu->cTlbFlushes;
4441
4442 return VINF_SUCCESS;
4443}
4444
4445
4446/**
4447 * Deactivates VT-x on the current CPU.
4448 *
4449 * @returns VBox status code.
4450 * @param pHostCpu The HM physical-CPU structure.
4451 * @param pvCpuPage Pointer to the VMXON region.
4452 * @param HCPhysCpuPage Physical address of the VMXON region.
4453 *
4454 * @remarks This function should never be called when SUPR0EnableVTx() or
4455 * similar was used to enable VT-x on the host.
4456 */
4457VMMR0DECL(int) VMXR0DisableCpu(PHMPHYSCPU pHostCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
4458{
4459 RT_NOREF2(pvCpuPage, HCPhysCpuPage);
4460
4461 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4462 return hmR0VmxLeaveRootMode(pHostCpu);
4463}
4464
4465
4466/**
4467 * Does per-VM VT-x initialization.
4468 *
4469 * @returns VBox status code.
4470 * @param pVM The cross context VM structure.
4471 */
4472VMMR0DECL(int) VMXR0InitVM(PVMCC pVM)
4473{
4474 AssertPtr(pVM);
4475 LogFlowFunc(("pVM=%p\n", pVM));
4476
4477 hmR0VmxStructsInit(pVM);
4478 int rc = hmR0VmxStructsAlloc(pVM);
4479 if (RT_FAILURE(rc))
4480 {
4481 LogRelFunc(("Failed to allocated VMX structures. rc=%Rrc\n", rc));
4482 return rc;
4483 }
4484
4485 /* Setup the crash dump page. */
4486#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4487 strcpy((char *)pVM->hmr0.s.vmx.pbScratch, "SCRATCH Magic");
4488 *(uint64_t *)(pVM->hmr0.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
4489#endif
4490 return VINF_SUCCESS;
4491}
4492
4493
4494/**
4495 * Does per-VM VT-x termination.
4496 *
4497 * @returns VBox status code.
4498 * @param pVM The cross context VM structure.
4499 */
4500VMMR0DECL(int) VMXR0TermVM(PVMCC pVM)
4501{
4502 AssertPtr(pVM);
4503 LogFlowFunc(("pVM=%p\n", pVM));
4504
4505#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4506 if (pVM->hmr0.s.vmx.pbScratch)
4507 RT_BZERO(pVM->hmr0.s.vmx.pbScratch, X86_PAGE_4K_SIZE);
4508#endif
4509 hmR0VmxStructsFree(pVM);
4510 return VINF_SUCCESS;
4511}
4512
4513
4514/**
4515 * Sets up the VM for execution using hardware-assisted VMX.
4516 * This function is only called once per-VM during initialization.
4517 *
4518 * @returns VBox status code.
4519 * @param pVM The cross context VM structure.
4520 */
4521VMMR0DECL(int) VMXR0SetupVM(PVMCC pVM)
4522{
4523 AssertPtr(pVM);
4524 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
4525
4526 LogFlowFunc(("pVM=%p\n", pVM));
4527
4528 /*
4529 * At least verify if VMX is enabled, since we can't check if we're in VMX root mode or not
4530 * without causing a #GP.
4531 */
4532 RTCCUINTREG const uHostCr4 = ASMGetCR4();
4533 if (RT_LIKELY(uHostCr4 & X86_CR4_VMXE))
4534 { /* likely */ }
4535 else
4536 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
4537
4538 /*
4539 * Check that nested paging is supported if enabled and copy over the flag to the
4540 * ring-0 only structure.
4541 */
4542 bool const fNestedPaging = pVM->hm.s.fNestedPagingCfg;
4543 AssertReturn( !fNestedPaging
4544 || (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_EPT), /** @todo use a ring-0 copy of ProcCtls2.n.allowed1 */
4545 VERR_INCOMPATIBLE_CONFIG);
4546 pVM->hmr0.s.fNestedPaging = fNestedPaging;
4547 pVM->hmr0.s.fAllow64BitGuests = pVM->hm.s.fAllow64BitGuestsCfg;
4548
4549 /*
4550 * Without unrestricted guest execution, pRealModeTSS and pNonPagingModeEPTPageTable *must*
4551 * always be allocated. We no longer support the highly unlikely case of unrestricted guest
4552 * without pRealModeTSS, see hmR3InitFinalizeR0Intel().
4553 */
4554 bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuestCfg;
4555 AssertReturn( !fUnrestrictedGuest
4556 || ( (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_UNRESTRICTED_GUEST)
4557 && fNestedPaging),
4558 VERR_INCOMPATIBLE_CONFIG);
4559 if ( !fUnrestrictedGuest
4560 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
4561 || !pVM->hm.s.vmx.pRealModeTSS))
4562 {
4563 LogRelFunc(("Invalid real-on-v86 state.\n"));
4564 return VERR_INTERNAL_ERROR;
4565 }
4566 pVM->hmr0.s.vmx.fUnrestrictedGuest = fUnrestrictedGuest;
4567
4568 /* Initialize these always, see hmR3InitFinalizeR0().*/
4569 pVM->hm.s.ForR3.vmx.enmTlbFlushEpt = pVM->hmr0.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE;
4570 pVM->hm.s.ForR3.vmx.enmTlbFlushVpid = pVM->hmr0.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE;
4571
4572 /* Setup the tagged-TLB flush handlers. */
4573 int rc = hmR0VmxSetupTaggedTlb(pVM);
4574 if (RT_FAILURE(rc))
4575 {
4576 LogRelFunc(("Failed to setup tagged TLB. rc=%Rrc\n", rc));
4577 return rc;
4578 }
4579
4580 /* Determine LBR capabilities. */
4581 pVM->hmr0.s.vmx.fLbr = pVM->hm.s.vmx.fLbrCfg;
4582 if (pVM->hmr0.s.vmx.fLbr)
4583 {
4584 rc = hmR0VmxSetupLbrMsrRange(pVM);
4585 if (RT_FAILURE(rc))
4586 {
4587 LogRelFunc(("Failed to setup LBR MSR range. rc=%Rrc\n", rc));
4588 return rc;
4589 }
4590 }
4591
4592#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4593 /* Setup the shadow VMCS fields array and VMREAD/VMWRITE bitmaps. */
4594 if (pVM->hmr0.s.vmx.fUseVmcsShadowing)
4595 {
4596 rc = hmR0VmxSetupShadowVmcsFieldsArrays(pVM);
4597 if (RT_SUCCESS(rc))
4598 hmR0VmxSetupVmreadVmwriteBitmaps(pVM);
4599 else
4600 {
4601 LogRelFunc(("Failed to setup shadow VMCS fields arrays. rc=%Rrc\n", rc));
4602 return rc;
4603 }
4604 }
4605#endif
4606
4607 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
4608 {
4609 PVMCPUCC pVCpu = VMCC_GET_CPU(pVM, idCpu);
4610 Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
4611
4612 pVCpu->hmr0.s.vmx.pfnStartVm = hmR0VmxStartVmSelector;
4613
4614 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfo, false /* fIsNstGstVmcs */);
4615 if (RT_SUCCESS(rc))
4616 {
4617#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4618 if (pVM->cpum.ro.GuestFeatures.fVmx)
4619 {
4620 rc = hmR0VmxSetupVmcs(pVCpu, &pVCpu->hmr0.s.vmx.VmcsInfoNstGst, true /* fIsNstGstVmcs */);
4621 if (RT_SUCCESS(rc))
4622 { /* likely */ }
4623 else
4624 {
4625 LogRelFunc(("Nested-guest VMCS setup failed. rc=%Rrc\n", rc));
4626 return rc;
4627 }
4628 }
4629#endif
4630 }
4631 else
4632 {
4633 LogRelFunc(("VMCS setup failed. rc=%Rrc\n", rc));
4634 return rc;
4635 }
4636 }
4637
4638 return VINF_SUCCESS;
4639}
4640
4641
4642/**
4643 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
4644 * the VMCS.
4645 * @returns CR4 for passing along to hmR0VmxExportHostSegmentRegs.
4646 */
4647static uint64_t hmR0VmxExportHostControlRegs(void)
4648{
4649 int rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR0, ASMGetCR0()); AssertRC(rc);
4650 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR3, ASMGetCR3()); AssertRC(rc);
4651 uint64_t uHostCr4 = ASMGetCR4();
4652 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_CR4, uHostCr4); AssertRC(rc);
4653 return uHostCr4;
4654}
4655
4656
4657/**
4658 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
4659 * the host-state area in the VMCS.
4660 *
4661 * @returns VBox status code.
4662 * @param pVCpu The cross context virtual CPU structure.
4663 * @param uHostCr4 The host CR4 value.
4664 */
4665static int hmR0VmxExportHostSegmentRegs(PVMCPUCC pVCpu, uint64_t uHostCr4)
4666{
4667 /*
4668 * If we've executed guest code using hardware-assisted VMX, the host-state bits
4669 * will be messed up. We should -not- save the messed up state without restoring
4670 * the original host-state, see @bugref{7240}.
4671 *
4672 * This apparently can happen (most likely the FPU changes), deal with it rather than
4673 * asserting. Was observed booting Solaris 10u10 32-bit guest.
4674 */
4675 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
4676 {
4677 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags,
4678 pVCpu->idCpu));
4679 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
4680 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
4681 }
4682
4683 /*
4684 * Get all the host info.
4685 * ASSUME it is safe to use rdfsbase and friends if the CR4.FSGSBASE bit is set
4686 * without also checking the cpuid bit.
4687 */
4688 uint32_t fRestoreHostFlags;
4689#if RT_INLINE_ASM_EXTERNAL
4690 if (uHostCr4 & X86_CR4_FSGSBASE)
4691 {
4692 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, true /*fHaveFsGsBase*/);
4693 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
4694 }
4695 else
4696 {
4697 hmR0VmxExportHostSegmentRegsAsmHlp(&pVCpu->hmr0.s.vmx.RestoreHost, false /*fHaveFsGsBase*/);
4698 fRestoreHostFlags = 0;
4699 }
4700 RTSEL uSelES = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES;
4701 RTSEL uSelDS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS;
4702 RTSEL uSelFS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS;
4703 RTSEL uSelGS = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS;
4704#else
4705 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR = ASMGetTR();
4706 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS = ASMGetSS();
4707 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS = ASMGetCS();
4708 ASMGetGDTR((PRTGDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr);
4709 ASMGetIDTR((PRTIDTR)&pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr);
4710 if (uHostCr4 & X86_CR4_FSGSBASE)
4711 {
4712 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMGetFSBase();
4713 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMGetGSBase();
4714 fRestoreHostFlags = VMX_RESTORE_HOST_CAN_USE_WRFSBASE_AND_WRGSBASE;
4715 }
4716 else
4717 {
4718 pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase = ASMRdMsr(MSR_K8_FS_BASE);
4719 pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase = ASMRdMsr(MSR_K8_GS_BASE);
4720 fRestoreHostFlags = 0;
4721 }
4722 RTSEL uSelES, uSelDS, uSelFS, uSelGS;
4723 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelDS = uSelDS = ASMGetDS();
4724 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelES = uSelES = ASMGetES();
4725 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelFS = uSelFS = ASMGetFS();
4726 pVCpu->hmr0.s.vmx.RestoreHost.uHostSelGS = uSelGS = ASMGetGS();
4727#endif
4728
4729 /*
4730 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to
4731 * gain VM-entry and restore them before we get preempted.
4732 *
4733 * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
4734 */
4735 RTSEL const uSelAll = uSelFS | uSelGS | uSelES | uSelDS;
4736 if (uSelAll & (X86_SEL_RPL | X86_SEL_LDT))
4737 {
4738 if (!(uSelAll & X86_SEL_LDT))
4739 {
4740#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
4741 do { \
4742 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
4743 if ((a_uVmcsVar) & X86_SEL_RPL) \
4744 { \
4745 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
4746 (a_uVmcsVar) = 0; \
4747 } \
4748 } while (0)
4749 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
4750 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
4751 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
4752 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
4753#undef VMXLOCAL_ADJUST_HOST_SEG
4754 }
4755 else
4756 {
4757#define VMXLOCAL_ADJUST_HOST_SEG(a_Seg, a_uVmcsVar) \
4758 do { \
4759 (a_uVmcsVar) = pVCpu->hmr0.s.vmx.RestoreHost.uHostSel##a_Seg; \
4760 if ((a_uVmcsVar) & (X86_SEL_RPL | X86_SEL_LDT)) \
4761 { \
4762 if (!((a_uVmcsVar) & X86_SEL_LDT)) \
4763 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
4764 else \
4765 { \
4766 uint32_t const fAttr = ASMGetSegAttr(a_uVmcsVar); \
4767 if ((fAttr & X86_DESC_P) && fAttr != UINT32_MAX) \
4768 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##a_Seg; \
4769 } \
4770 (a_uVmcsVar) = 0; \
4771 } \
4772 } while (0)
4773 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
4774 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
4775 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
4776 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
4777#undef VMXLOCAL_ADJUST_HOST_SEG
4778 }
4779 }
4780
4781 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
4782 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR);
4783 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS & X86_SEL_LDT)); Assert(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS);
4784 Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_RPL)); Assert(!(pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS & X86_SEL_LDT));
4785 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
4786 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
4787 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
4788 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
4789
4790 /*
4791 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps
4792 * them to the maximum limit (0xffff) on every VM-exit.
4793 */
4794 if (pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb != 0xffff)
4795 fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
4796
4797 /*
4798 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and
4799 * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit
4800 * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior.
4801 * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or
4802 * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt
4803 * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left
4804 * at 0xffff on hosts where we are sure it won't cause trouble.
4805 */
4806#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
4807 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb < 0x0fff)
4808#else
4809 if (pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.cb != 0xffff)
4810#endif
4811 fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
4812
4813 /*
4814 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI
4815 * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and
4816 * RPL should be too in most cases.
4817 */
4818 RTSEL const uSelTR = pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR;
4819 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb,
4820 ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb),
4821 VERR_VMX_INVALID_HOST_STATE);
4822
4823 PCX86DESCHC pDesc = (PCX86DESCHC)(pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr + (uSelTR & X86_SEL_MASK));
4824 uintptr_t const uTRBase = X86DESC64_BASE(pDesc);
4825
4826 /*
4827 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on
4828 * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual
4829 * restoration if the host has something else. Task switching is not supported in 64-bit
4830 * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the
4831 * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
4832 *
4833 * [1] See Intel spec. 3.5 "System Descriptor Types".
4834 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
4835 */
4836 Assert(pDesc->System.u4Type == 11);
4837 if ( pDesc->System.u16LimitLow != 0x67
4838 || pDesc->System.u4LimitHigh)
4839 {
4840 fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
4841
4842 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
4843 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
4844 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
4845 if (g_fHmHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE)
4846 {
4847 /* The GDT is read-only but the writable GDT is available. */
4848 fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE;
4849 pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.cb = pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.cb;
4850 int rc = SUPR0GetCurrentGdtRw(&pVCpu->hmr0.s.vmx.RestoreHost.HostGdtrRw.uAddr);
4851 AssertRCReturn(rc, rc);
4852 }
4853 }
4854
4855 pVCpu->hmr0.s.vmx.fRestoreHostFlags = fRestoreHostFlags;
4856
4857 /*
4858 * Do all the VMCS updates in one block to assist nested virtualization.
4859 */
4860 int rc;
4861 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_CS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelCS); AssertRC(rc);
4862 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_SS_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelSS); AssertRC(rc);
4863 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_DS_SEL, uSelDS); AssertRC(rc);
4864 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_ES_SEL, uSelES); AssertRC(rc);
4865 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_FS_SEL, uSelFS); AssertRC(rc);
4866 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_GS_SEL, uSelGS); AssertRC(rc);
4867 rc = VMXWriteVmcs16(VMX_VMCS16_HOST_TR_SEL, pVCpu->hmr0.s.vmx.RestoreHost.uHostSelTR); AssertRC(rc);
4868 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostGdtr.uAddr); AssertRC(rc);
4869 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_IDTR_BASE, pVCpu->hmr0.s.vmx.RestoreHost.HostIdtr.uAddr); AssertRC(rc);
4870 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRC(rc);
4871 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_FS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostFSBase); AssertRC(rc);
4872 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_GS_BASE, pVCpu->hmr0.s.vmx.RestoreHost.uHostGSBase); AssertRC(rc);
4873
4874 return VINF_SUCCESS;
4875}
4876
4877
4878/**
4879 * Exports certain host MSRs in the VM-exit MSR-load area and some in the
4880 * host-state area of the VMCS.
4881 *
4882 * These MSRs will be automatically restored on the host after every successful
4883 * VM-exit.
4884 *
4885 * @param pVCpu The cross context virtual CPU structure.
4886 *
4887 * @remarks No-long-jump zone!!!
4888 */
4889static void hmR0VmxExportHostMsrs(PVMCPUCC pVCpu)
4890{
4891 AssertPtr(pVCpu);
4892
4893 /*
4894 * Save MSRs that we restore lazily (due to preemption or transition to ring-3)
4895 * rather than swapping them on every VM-entry.
4896 */
4897 hmR0VmxLazySaveHostMsrs(pVCpu);
4898
4899 /*
4900 * Host Sysenter MSRs.
4901 */
4902 int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); AssertRC(rc);
4903 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); AssertRC(rc);
4904 rc = VMXWriteVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); AssertRC(rc);
4905
4906 /*
4907 * Host EFER MSR.
4908 *
4909 * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's
4910 * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs().
4911 */
4912 if (g_fHmVmxSupportsVmcsEfer)
4913 {
4914 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, g_uHmVmxHostMsrEfer);
4915 AssertRC(rc);
4916 }
4917
4918 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
4919 * hmR0VmxExportGuestEntryExitCtls(). */
4920}
4921
4922
4923/**
4924 * Figures out if we need to swap the EFER MSR which is particularly expensive.
4925 *
4926 * We check all relevant bits. For now, that's everything besides LMA/LME, as
4927 * these two bits are handled by VM-entry, see hmR0VMxExportGuestEntryExitCtls().
4928 *
4929 * @returns true if we need to load guest EFER, false otherwise.
4930 * @param pVCpu The cross context virtual CPU structure.
4931 * @param pVmxTransient The VMX-transient structure.
4932 *
4933 * @remarks Requires EFER, CR4.
4934 * @remarks No-long-jump zone!!!
4935 */
4936static bool hmR0VmxShouldSwapEferMsr(PCVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
4937{
4938#ifdef HMVMX_ALWAYS_SWAP_EFER
4939 RT_NOREF2(pVCpu, pVmxTransient);
4940 return true;
4941#else
4942 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
4943 uint64_t const u64HostEfer = g_uHmVmxHostMsrEfer;
4944 uint64_t const u64GuestEfer = pCtx->msrEFER;
4945
4946# ifdef VBOX_WITH_NESTED_HWVIRT_VMX
4947 /*
4948 * For nested-guests, we shall honor swapping the EFER MSR when requested by
4949 * the nested-guest.
4950 */
4951 if ( pVmxTransient->fIsNestedGuest
4952 && ( CPUMIsGuestVmxEntryCtlsSet(pCtx, VMX_ENTRY_CTLS_LOAD_EFER_MSR)
4953 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_SAVE_EFER_MSR)
4954 || CPUMIsGuestVmxExitCtlsSet(pCtx, VMX_EXIT_CTLS_LOAD_EFER_MSR)))
4955 return true;
4956# else
4957 RT_NOREF(pVmxTransient);
4958#endif
4959
4960 /*
4961 * For 64-bit guests, if EFER.SCE bit differs, we need to swap the EFER MSR
4962 * to ensure that the guest's SYSCALL behaviour isn't broken, see @bugref{7386}.
4963 */
4964 if ( CPUMIsGuestInLongModeEx(pCtx)
4965 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
4966 return true;
4967
4968 /*
4969 * If the guest uses PAE and EFER.NXE bit differs, we need to swap the EFER MSR
4970 * as it affects guest paging. 64-bit paging implies CR4.PAE as well.
4971 *
4972 * See Intel spec. 4.5 "IA-32e Paging".
4973 * See Intel spec. 4.1.1 "Three Paging Modes".
4974 *
4975 * Verify that we always intercept CR4.PAE and CR0.PG bits, so we don't need to
4976 * import CR4 and CR0 from the VMCS here as those bits are always up to date.
4977 */
4978 Assert(hmR0VmxGetFixedCr4Mask(pVCpu) & X86_CR4_PAE);
4979 Assert(hmR0VmxGetFixedCr0Mask(pVCpu) & X86_CR0_PG);
4980 if ( (pCtx->cr4 & X86_CR4_PAE)
4981 && (pCtx->cr0 & X86_CR0_PG))
4982 {
4983 /*
4984 * If nested paging is not used, verify that the guest paging mode matches the
4985 * shadow paging mode which is/will be placed in the VMCS (which is what will
4986 * actually be used while executing the guest and not the CR4 shadow value).
4987 */
4988 AssertMsg( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
4989 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE
4990 || pVCpu->hm.s.enmShadowMode == PGMMODE_PAE_NX
4991 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64
4992 || pVCpu->hm.s.enmShadowMode == PGMMODE_AMD64_NX,
4993 ("enmShadowMode=%u\n", pVCpu->hm.s.enmShadowMode));
4994 if ((u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
4995 {
4996 /* Verify that the host is NX capable. */
4997 Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute);
4998 return true;
4999 }
5000 }
5001
5002 return false;
5003#endif
5004}
5005
5006
5007/**
5008 * Exports the guest state with appropriate VM-entry and VM-exit controls in the
5009 * VMCS.
5010 *
5011 * This is typically required when the guest changes paging mode.
5012 *
5013 * @returns VBox status code.
5014 * @param pVCpu The cross context virtual CPU structure.
5015 * @param pVmxTransient The VMX-transient structure.
5016 *
5017 * @remarks Requires EFER.
5018 * @remarks No-long-jump zone!!!
5019 */
5020static int hmR0VmxExportGuestEntryExitCtls(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5021{
5022 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS)
5023 {
5024 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5025 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
5026
5027 /*
5028 * VM-entry controls.
5029 */
5030 {
5031 uint32_t fVal = g_HmMsrs.u.vmx.EntryCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
5032 uint32_t const fZap = g_HmMsrs.u.vmx.EntryCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
5033
5034 /*
5035 * Load the guest debug controls (DR7 and IA32_DEBUGCTL MSR) on VM-entry.
5036 * The first VT-x capable CPUs only supported the 1-setting of this bit.
5037 *
5038 * For nested-guests, this is a mandatory VM-entry control. It's also
5039 * required because we do not want to leak host bits to the nested-guest.
5040 */
5041 fVal |= VMX_ENTRY_CTLS_LOAD_DEBUG;
5042
5043 /*
5044 * Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry.
5045 *
5046 * For nested-guests, the "IA-32e mode guest" control we initialize with what is
5047 * required to get the nested-guest working with hardware-assisted VMX execution.
5048 * It depends on the nested-guest's IA32_EFER.LMA bit. Remember, a nested hypervisor
5049 * can skip intercepting changes to the EFER MSR. This is why it needs to be done
5050 * here rather than while merging the guest VMCS controls.
5051 */
5052 if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
5053 {
5054 Assert(pVCpu->cpum.GstCtx.msrEFER & MSR_K6_EFER_LME);
5055 fVal |= VMX_ENTRY_CTLS_IA32E_MODE_GUEST;
5056 }
5057 else
5058 Assert(!(fVal & VMX_ENTRY_CTLS_IA32E_MODE_GUEST));
5059
5060 /*
5061 * If the CPU supports the newer VMCS controls for managing guest/host EFER, use it.
5062 *
5063 * For nested-guests, we use the "load IA32_EFER" if the hardware supports it,
5064 * regardless of whether the nested-guest VMCS specifies it because we are free to
5065 * load whatever MSRs we require and we do not need to modify the guest visible copy
5066 * of the VM-entry MSR load area.
5067 */
5068 if ( g_fHmVmxSupportsVmcsEfer
5069 && hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
5070 fVal |= VMX_ENTRY_CTLS_LOAD_EFER_MSR;
5071 else
5072 Assert(!(fVal & VMX_ENTRY_CTLS_LOAD_EFER_MSR));
5073
5074 /*
5075 * The following should -not- be set (since we're not in SMM mode):
5076 * - VMX_ENTRY_CTLS_ENTRY_TO_SMM
5077 * - VMX_ENTRY_CTLS_DEACTIVATE_DUAL_MON
5078 */
5079
5080 /** @todo VMX_ENTRY_CTLS_LOAD_PERF_MSR,
5081 * VMX_ENTRY_CTLS_LOAD_PAT_MSR. */
5082
5083 if ((fVal & fZap) == fVal)
5084 { /* likely */ }
5085 else
5086 {
5087 Log4Func(("Invalid VM-entry controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n",
5088 g_HmMsrs.u.vmx.EntryCtls.n.allowed0, fVal, fZap));
5089 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY;
5090 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
5091 }
5092
5093 /* Commit it to the VMCS. */
5094 if (pVmcsInfo->u32EntryCtls != fVal)
5095 {
5096 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, fVal);
5097 AssertRC(rc);
5098 pVmcsInfo->u32EntryCtls = fVal;
5099 }
5100 }
5101
5102 /*
5103 * VM-exit controls.
5104 */
5105 {
5106 uint32_t fVal = g_HmMsrs.u.vmx.ExitCtls.n.allowed0; /* Bits set here must be set in the VMCS. */
5107 uint32_t const fZap = g_HmMsrs.u.vmx.ExitCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
5108
5109 /*
5110 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only
5111 * supported the 1-setting of this bit.
5112 *
5113 * For nested-guests, we set the "save debug controls" as the converse
5114 * "load debug controls" is mandatory for nested-guests anyway.
5115 */
5116 fVal |= VMX_EXIT_CTLS_SAVE_DEBUG;
5117
5118 /*
5119 * Set the host long mode active (EFER.LMA) bit (which Intel calls
5120 * "Host address-space size") if necessary. On VM-exit, VT-x sets both the
5121 * host EFER.LMA and EFER.LME bit to this value. See assertion in
5122 * hmR0VmxExportHostMsrs().
5123 *
5124 * For nested-guests, we always set this bit as we do not support 32-bit
5125 * hosts.
5126 */
5127 fVal |= VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE;
5128
5129 /*
5130 * If the VMCS EFER MSR fields are supported by the hardware, we use it.
5131 *
5132 * For nested-guests, we should use the "save IA32_EFER" control if we also
5133 * used the "load IA32_EFER" control while exporting VM-entry controls.
5134 */
5135 if ( g_fHmVmxSupportsVmcsEfer
5136 && hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
5137 {
5138 fVal |= VMX_EXIT_CTLS_SAVE_EFER_MSR
5139 | VMX_EXIT_CTLS_LOAD_EFER_MSR;
5140 }
5141
5142 /*
5143 * Enable saving of the VMX-preemption timer value on VM-exit.
5144 * For nested-guests, currently not exposed/used.
5145 */
5146 /** @todo r=bird: Measure performance hit because of this vs. always rewriting
5147 * the timer value. */
5148 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
5149 {
5150 Assert(g_HmMsrs.u.vmx.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER);
5151 fVal |= VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER;
5152 }
5153
5154 /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */
5155 Assert(!(fVal & VMX_EXIT_CTLS_ACK_EXT_INT));
5156
5157 /** @todo VMX_EXIT_CTLS_LOAD_PERF_MSR,
5158 * VMX_EXIT_CTLS_SAVE_PAT_MSR,
5159 * VMX_EXIT_CTLS_LOAD_PAT_MSR. */
5160
5161 if ((fVal & fZap) == fVal)
5162 { /* likely */ }
5163 else
5164 {
5165 Log4Func(("Invalid VM-exit controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%R#X32\n",
5166 g_HmMsrs.u.vmx.ExitCtls.n.allowed0, fVal, fZap));
5167 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT;
5168 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
5169 }
5170
5171 /* Commit it to the VMCS. */
5172 if (pVmcsInfo->u32ExitCtls != fVal)
5173 {
5174 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, fVal);
5175 AssertRC(rc);
5176 pVmcsInfo->u32ExitCtls = fVal;
5177 }
5178 }
5179
5180 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_ENTRY_EXIT_CTLS);
5181 }
5182 return VINF_SUCCESS;
5183}
5184
5185
5186/**
5187 * Sets the TPR threshold in the VMCS.
5188 *
5189 * @param pVmcsInfo The VMCS info. object.
5190 * @param u32TprThreshold The TPR threshold (task-priority class only).
5191 */
5192DECLINLINE(void) hmR0VmxApicSetTprThreshold(PVMXVMCSINFO pVmcsInfo, uint32_t u32TprThreshold)
5193{
5194 Assert(!(u32TprThreshold & ~VMX_TPR_THRESHOLD_MASK)); /* Bits 31:4 MBZ. */
5195 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW);
5196 RT_NOREF(pVmcsInfo);
5197 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
5198 AssertRC(rc);
5199}
5200
5201
5202/**
5203 * Exports the guest APIC TPR state into the VMCS.
5204 *
5205 * @param pVCpu The cross context virtual CPU structure.
5206 * @param pVmxTransient The VMX-transient structure.
5207 *
5208 * @remarks No-long-jump zone!!!
5209 */
5210static void hmR0VmxExportGuestApicTpr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5211{
5212 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_APIC_TPR)
5213 {
5214 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_APIC_TPR);
5215
5216 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
5217 if (!pVmxTransient->fIsNestedGuest)
5218 {
5219 if ( PDMHasApic(pVCpu->CTX_SUFF(pVM))
5220 && APICIsEnabled(pVCpu))
5221 {
5222 /*
5223 * Setup TPR shadowing.
5224 */
5225 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
5226 {
5227 bool fPendingIntr = false;
5228 uint8_t u8Tpr = 0;
5229 uint8_t u8PendingIntr = 0;
5230 int rc = APICGetTpr(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr);
5231 AssertRC(rc);
5232
5233 /*
5234 * If there are interrupts pending but masked by the TPR, instruct VT-x to
5235 * cause a TPR-below-threshold VM-exit when the guest lowers its TPR below the
5236 * priority of the pending interrupt so we can deliver the interrupt. If there
5237 * are no interrupts pending, set threshold to 0 to not cause any
5238 * TPR-below-threshold VM-exits.
5239 */
5240 uint32_t u32TprThreshold = 0;
5241 if (fPendingIntr)
5242 {
5243 /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR
5244 (which is the Task-Priority Class). */
5245 const uint8_t u8PendingPriority = u8PendingIntr >> 4;
5246 const uint8_t u8TprPriority = u8Tpr >> 4;
5247 if (u8PendingPriority <= u8TprPriority)
5248 u32TprThreshold = u8PendingPriority;
5249 }
5250
5251 hmR0VmxApicSetTprThreshold(pVmcsInfo, u32TprThreshold);
5252 }
5253 }
5254 }
5255 /* else: the TPR threshold has already been updated while merging the nested-guest VMCS. */
5256 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_APIC_TPR);
5257 }
5258}
5259
5260
5261/**
5262 * Gets the guest interruptibility-state and updates related force-flags.
5263 *
5264 * @returns Guest's interruptibility-state.
5265 * @param pVCpu The cross context virtual CPU structure.
5266 *
5267 * @remarks No-long-jump zone!!!
5268 */
5269static uint32_t hmR0VmxGetGuestIntrStateAndUpdateFFs(PVMCPUCC pVCpu)
5270{
5271 /*
5272 * Check if we should inhibit interrupt delivery due to instructions like STI and MOV SS.
5273 */
5274 uint32_t fIntrState = 0;
5275 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
5276 {
5277 /* If inhibition is active, RIP and RFLAGS should've been imported from the VMCS already. */
5278 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
5279
5280 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
5281 if (pCtx->rip == EMGetInhibitInterruptsPC(pVCpu))
5282 {
5283 if (pCtx->eflags.Bits.u1IF)
5284 fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
5285 else
5286 fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS;
5287 }
5288 else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
5289 {
5290 /*
5291 * We can clear the inhibit force flag as even if we go back to the recompiler
5292 * without executing guest code in VT-x, the flag's condition to be cleared is
5293 * met and thus the cleared state is correct.
5294 */
5295 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
5296 }
5297 }
5298
5299 /*
5300 * Check if we should inhibit NMI delivery.
5301 */
5302 if (CPUMIsGuestNmiBlocking(pVCpu))
5303 fIntrState |= VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI;
5304
5305 /*
5306 * Validate.
5307 */
5308#ifdef VBOX_STRICT
5309 /* We don't support block-by-SMI yet.*/
5310 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI));
5311
5312 /* Block-by-STI must not be set when interrupts are disabled. */
5313 if (fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
5314 {
5315 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
5316 Assert(pVCpu->cpum.GstCtx.eflags.u & X86_EFL_IF);
5317 }
5318#endif
5319
5320 return fIntrState;
5321}
5322
5323
5324/**
5325 * Exports the exception intercepts required for guest execution in the VMCS.
5326 *
5327 * @param pVCpu The cross context virtual CPU structure.
5328 * @param pVmxTransient The VMX-transient structure.
5329 *
5330 * @remarks No-long-jump zone!!!
5331 */
5332static void hmR0VmxExportGuestXcptIntercepts(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5333{
5334 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_XCPT_INTERCEPTS)
5335 {
5336 /* When executing a nested-guest, we do not need to trap GIM hypercalls by intercepting #UD. */
5337 if ( !pVmxTransient->fIsNestedGuest
5338 && pVCpu->hm.s.fGIMTrapXcptUD)
5339 hmR0VmxAddXcptIntercept(pVmxTransient, X86_XCPT_UD);
5340 else
5341 hmR0VmxRemoveXcptIntercept(pVCpu, pVmxTransient, X86_XCPT_UD);
5342
5343 /* Other exception intercepts are handled elsewhere, e.g. while exporting guest CR0. */
5344 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_XCPT_INTERCEPTS);
5345 }
5346}
5347
5348
5349/**
5350 * Exports the guest's RIP into the guest-state area in the VMCS.
5351 *
5352 * @param pVCpu The cross context virtual CPU structure.
5353 *
5354 * @remarks No-long-jump zone!!!
5355 */
5356static void hmR0VmxExportGuestRip(PVMCPUCC pVCpu)
5357{
5358 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RIP)
5359 {
5360 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP);
5361
5362 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RIP, pVCpu->cpum.GstCtx.rip);
5363 AssertRC(rc);
5364
5365 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RIP);
5366 Log4Func(("rip=%#RX64\n", pVCpu->cpum.GstCtx.rip));
5367 }
5368}
5369
5370
5371/**
5372 * Exports the guest's RSP into the guest-state area in the VMCS.
5373 *
5374 * @param pVCpu The cross context virtual CPU structure.
5375 *
5376 * @remarks No-long-jump zone!!!
5377 */
5378static void hmR0VmxExportGuestRsp(PVMCPUCC pVCpu)
5379{
5380 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP)
5381 {
5382 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP);
5383
5384 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp);
5385 AssertRC(rc);
5386
5387 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP);
5388 Log4Func(("rsp=%#RX64\n", pVCpu->cpum.GstCtx.rsp));
5389 }
5390}
5391
5392
5393/**
5394 * Exports the guest's RFLAGS into the guest-state area in the VMCS.
5395 *
5396 * @param pVCpu The cross context virtual CPU structure.
5397 * @param pVmxTransient The VMX-transient structure.
5398 *
5399 * @remarks No-long-jump zone!!!
5400 */
5401static void hmR0VmxExportGuestRflags(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5402{
5403 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RFLAGS)
5404 {
5405 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
5406
5407 /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ).
5408 Let us assert it as such and use 32-bit VMWRITE. */
5409 Assert(!RT_HI_U32(pVCpu->cpum.GstCtx.rflags.u64));
5410 X86EFLAGS fEFlags = pVCpu->cpum.GstCtx.eflags;
5411 Assert(fEFlags.u32 & X86_EFL_RA1_MASK);
5412 Assert(!(fEFlags.u32 & ~(X86_EFL_1 | X86_EFL_LIVE_MASK)));
5413
5414 /*
5415 * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so
5416 * we can restore them on VM-exit. Modify the real-mode guest's eflags so that VT-x
5417 * can run the real-mode guest code under Virtual 8086 mode.
5418 */
5419 PVMXVMCSINFOSHARED pVmcsInfo = pVmxTransient->pVmcsInfo->pShared;
5420 if (pVmcsInfo->RealMode.fRealOnV86Active)
5421 {
5422 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
5423 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
5424 Assert(!pVmxTransient->fIsNestedGuest);
5425 pVmcsInfo->RealMode.Eflags.u32 = fEFlags.u32; /* Save the original eflags of the real-mode guest. */
5426 fEFlags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */
5427 fEFlags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */
5428 }
5429
5430 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_RFLAGS, fEFlags.u32);
5431 AssertRC(rc);
5432
5433 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RFLAGS);
5434 Log4Func(("eflags=%#RX32\n", fEFlags.u32));
5435 }
5436}
5437
5438
5439#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5440/**
5441 * Copies the nested-guest VMCS to the shadow VMCS.
5442 *
5443 * @returns VBox status code.
5444 * @param pVCpu The cross context virtual CPU structure.
5445 * @param pVmcsInfo The VMCS info. object.
5446 *
5447 * @remarks No-long-jump zone!!!
5448 */
5449static int hmR0VmxCopyNstGstToShadowVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
5450{
5451 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5452 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5453
5454 /*
5455 * Disable interrupts so we don't get preempted while the shadow VMCS is the
5456 * current VMCS, as we may try saving guest lazy MSRs.
5457 *
5458 * Strictly speaking the lazy MSRs are not in the VMCS, but I'd rather not risk
5459 * calling the import VMCS code which is currently performing the guest MSR reads
5460 * (on 64-bit hosts) and accessing the auto-load/store MSR area on 32-bit hosts
5461 * and the rest of the VMX leave session machinery.
5462 */
5463 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
5464
5465 int rc = hmR0VmxLoadShadowVmcs(pVmcsInfo);
5466 if (RT_SUCCESS(rc))
5467 {
5468 /*
5469 * Copy all guest read/write VMCS fields.
5470 *
5471 * We don't check for VMWRITE failures here for performance reasons and
5472 * because they are not expected to fail, barring irrecoverable conditions
5473 * like hardware errors.
5474 */
5475 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
5476 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
5477 {
5478 uint64_t u64Val;
5479 uint32_t const uVmcsField = pVM->hmr0.s.vmx.paShadowVmcsFields[i];
5480 IEMReadVmxVmcsField(pVmcsNstGst, uVmcsField, &u64Val);
5481 VMXWriteVmcs64(uVmcsField, u64Val);
5482 }
5483
5484 /*
5485 * If the host CPU supports writing all VMCS fields, copy the guest read-only
5486 * VMCS fields, so the guest can VMREAD them without causing a VM-exit.
5487 */
5488 if (g_HmMsrs.u.vmx.u64Misc & VMX_MISC_VMWRITE_ALL)
5489 {
5490 uint32_t const cShadowVmcsRoFields = pVM->hmr0.s.vmx.cShadowVmcsRoFields;
5491 for (uint32_t i = 0; i < cShadowVmcsRoFields; i++)
5492 {
5493 uint64_t u64Val;
5494 uint32_t const uVmcsField = pVM->hmr0.s.vmx.paShadowVmcsRoFields[i];
5495 IEMReadVmxVmcsField(pVmcsNstGst, uVmcsField, &u64Val);
5496 VMXWriteVmcs64(uVmcsField, u64Val);
5497 }
5498 }
5499
5500 rc = hmR0VmxClearShadowVmcs(pVmcsInfo);
5501 rc |= hmR0VmxLoadVmcs(pVmcsInfo);
5502 }
5503
5504 ASMSetFlags(fEFlags);
5505 return rc;
5506}
5507
5508
5509/**
5510 * Copies the shadow VMCS to the nested-guest VMCS.
5511 *
5512 * @returns VBox status code.
5513 * @param pVCpu The cross context virtual CPU structure.
5514 * @param pVmcsInfo The VMCS info. object.
5515 *
5516 * @remarks Called with interrupts disabled.
5517 */
5518static int hmR0VmxCopyShadowToNstGstVmcs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
5519{
5520 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
5521 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
5522 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
5523
5524 int rc = hmR0VmxLoadShadowVmcs(pVmcsInfo);
5525 if (RT_SUCCESS(rc))
5526 {
5527 /*
5528 * Copy guest read/write fields from the shadow VMCS.
5529 * Guest read-only fields cannot be modified, so no need to copy them.
5530 *
5531 * We don't check for VMREAD failures here for performance reasons and
5532 * because they are not expected to fail, barring irrecoverable conditions
5533 * like hardware errors.
5534 */
5535 uint32_t const cShadowVmcsFields = pVM->hmr0.s.vmx.cShadowVmcsFields;
5536 for (uint32_t i = 0; i < cShadowVmcsFields; i++)
5537 {
5538 uint64_t u64Val;
5539 uint32_t const uVmcsField = pVM->hmr0.s.vmx.paShadowVmcsFields[i];
5540 VMXReadVmcs64(uVmcsField, &u64Val);
5541 IEMWriteVmxVmcsField(pVmcsNstGst, uVmcsField, u64Val);
5542 }
5543
5544 rc = hmR0VmxClearShadowVmcs(pVmcsInfo);
5545 rc |= hmR0VmxLoadVmcs(pVmcsInfo);
5546 }
5547 return rc;
5548}
5549
5550
5551/**
5552 * Enables VMCS shadowing for the given VMCS info. object.
5553 *
5554 * @param pVmcsInfo The VMCS info. object.
5555 *
5556 * @remarks No-long-jump zone!!!
5557 */
5558static void hmR0VmxEnableVmcsShadowing(PVMXVMCSINFO pVmcsInfo)
5559{
5560 uint32_t uProcCtls2 = pVmcsInfo->u32ProcCtls2;
5561 if (!(uProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING))
5562 {
5563 Assert(pVmcsInfo->HCPhysShadowVmcs != 0 && pVmcsInfo->HCPhysShadowVmcs != NIL_RTHCPHYS);
5564 uProcCtls2 |= VMX_PROC_CTLS2_VMCS_SHADOWING;
5565 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, uProcCtls2); AssertRC(rc);
5566 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, pVmcsInfo->HCPhysShadowVmcs); AssertRC(rc);
5567 pVmcsInfo->u32ProcCtls2 = uProcCtls2;
5568 pVmcsInfo->u64VmcsLinkPtr = pVmcsInfo->HCPhysShadowVmcs;
5569 Log4Func(("Enabled\n"));
5570 }
5571}
5572
5573
5574/**
5575 * Disables VMCS shadowing for the given VMCS info. object.
5576 *
5577 * @param pVmcsInfo The VMCS info. object.
5578 *
5579 * @remarks No-long-jump zone!!!
5580 */
5581static void hmR0VmxDisableVmcsShadowing(PVMXVMCSINFO pVmcsInfo)
5582{
5583 /*
5584 * We want all VMREAD and VMWRITE instructions to cause VM-exits, so we clear the
5585 * VMCS shadowing control. However, VM-entry requires the shadow VMCS indicator bit
5586 * to match the VMCS shadowing control if the VMCS link pointer is not NIL_RTHCPHYS.
5587 * Hence, we must also reset the VMCS link pointer to ensure VM-entry does not fail.
5588 *
5589 * See Intel spec. 26.2.1.1 "VM-Execution Control Fields".
5590 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
5591 */
5592 uint32_t uProcCtls2 = pVmcsInfo->u32ProcCtls2;
5593 if (uProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
5594 {
5595 uProcCtls2 &= ~VMX_PROC_CTLS2_VMCS_SHADOWING;
5596 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, uProcCtls2); AssertRC(rc);
5597 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, NIL_RTHCPHYS); AssertRC(rc);
5598 pVmcsInfo->u32ProcCtls2 = uProcCtls2;
5599 pVmcsInfo->u64VmcsLinkPtr = NIL_RTHCPHYS;
5600 Log4Func(("Disabled\n"));
5601 }
5602}
5603#endif
5604
5605
5606/**
5607 * Exports the guest hardware-virtualization state.
5608 *
5609 * @returns VBox status code.
5610 * @param pVCpu The cross context virtual CPU structure.
5611 * @param pVmxTransient The VMX-transient structure.
5612 *
5613 * @remarks No-long-jump zone!!!
5614 */
5615static int hmR0VmxExportGuestHwvirtState(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5616{
5617 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_HWVIRT)
5618 {
5619#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
5620 /*
5621 * Check if the VMX feature is exposed to the guest and if the host CPU supports
5622 * VMCS shadowing.
5623 */
5624 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUseVmcsShadowing)
5625 {
5626 /*
5627 * If the nested hypervisor has loaded a current VMCS and is in VMX root mode,
5628 * copy the nested hypervisor's current VMCS into the shadow VMCS and enable
5629 * VMCS shadowing to skip intercepting some or all VMREAD/VMWRITE VM-exits.
5630 *
5631 * We check for VMX root mode here in case the guest executes VMXOFF without
5632 * clearing the current VMCS pointer and our VMXOFF instruction emulation does
5633 * not clear the current VMCS pointer.
5634 */
5635 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
5636 if ( CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)
5637 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)
5638 && CPUMIsGuestVmxCurrentVmcsValid(&pVCpu->cpum.GstCtx))
5639 {
5640 /* Paranoia. */
5641 Assert(!pVmxTransient->fIsNestedGuest);
5642
5643 /*
5644 * For performance reasons, also check if the nested hypervisor's current VMCS
5645 * was newly loaded or modified before copying it to the shadow VMCS.
5646 */
5647 if (!pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs)
5648 {
5649 int rc = hmR0VmxCopyNstGstToShadowVmcs(pVCpu, pVmcsInfo);
5650 AssertRCReturn(rc, rc);
5651 pVCpu->hm.s.vmx.fCopiedNstGstToShadowVmcs = true;
5652 }
5653 hmR0VmxEnableVmcsShadowing(pVmcsInfo);
5654 }
5655 else
5656 hmR0VmxDisableVmcsShadowing(pVmcsInfo);
5657 }
5658#else
5659 NOREF(pVmxTransient);
5660#endif
5661 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_HWVIRT);
5662 }
5663 return VINF_SUCCESS;
5664}
5665
5666
5667/**
5668 * Exports the guest CR0 control register into the guest-state area in the VMCS.
5669 *
5670 * The guest FPU state is always pre-loaded hence we don't need to bother about
5671 * sharing FPU related CR0 bits between the guest and host.
5672 *
5673 * @returns VBox status code.
5674 * @param pVCpu The cross context virtual CPU structure.
5675 * @param pVmxTransient The VMX-transient structure.
5676 *
5677 * @remarks No-long-jump zone!!!
5678 */
5679static int hmR0VmxExportGuestCR0(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5680{
5681 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR0)
5682 {
5683 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5684 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
5685
5686 uint64_t fSetCr0 = g_HmMsrs.u.vmx.u64Cr0Fixed0;
5687 uint64_t const fZapCr0 = g_HmMsrs.u.vmx.u64Cr0Fixed1;
5688 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
5689 fSetCr0 &= ~(uint64_t)(X86_CR0_PE | X86_CR0_PG);
5690 else
5691 Assert((fSetCr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG));
5692
5693 if (!pVmxTransient->fIsNestedGuest)
5694 {
5695 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
5696 uint64_t u64GuestCr0 = pVCpu->cpum.GstCtx.cr0;
5697 uint64_t const u64ShadowCr0 = u64GuestCr0;
5698 Assert(!RT_HI_U32(u64GuestCr0));
5699
5700 /*
5701 * Setup VT-x's view of the guest CR0.
5702 */
5703 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
5704 if (pVM->hmr0.s.fNestedPaging)
5705 {
5706 if (CPUMIsGuestPagingEnabled(pVCpu))
5707 {
5708 /* The guest has paging enabled, let it access CR3 without causing a VM-exit if supported. */
5709 uProcCtls &= ~( VMX_PROC_CTLS_CR3_LOAD_EXIT
5710 | VMX_PROC_CTLS_CR3_STORE_EXIT);
5711 }
5712 else
5713 {
5714 /* The guest doesn't have paging enabled, make CR3 access cause a VM-exit to update our shadow. */
5715 uProcCtls |= VMX_PROC_CTLS_CR3_LOAD_EXIT
5716 | VMX_PROC_CTLS_CR3_STORE_EXIT;
5717 }
5718
5719 /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */
5720 if (pVM->hmr0.s.vmx.fUnrestrictedGuest)
5721 uProcCtls &= ~VMX_PROC_CTLS_CR3_STORE_EXIT;
5722 }
5723 else
5724 {
5725 /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
5726 u64GuestCr0 |= X86_CR0_WP;
5727 }
5728
5729 /*
5730 * Guest FPU bits.
5731 *
5732 * Since we pre-load the guest FPU always before VM-entry there is no need to track lazy state
5733 * using CR0.TS.
5734 *
5735 * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be
5736 * set on the first CPUs to support VT-x and no mention of with regards to UX in VM-entry checks.
5737 */
5738 u64GuestCr0 |= X86_CR0_NE;
5739
5740 /* If CR0.NE isn't set, we need to intercept #MF exceptions and report them to the guest differently. */
5741 bool const fInterceptMF = !(u64ShadowCr0 & X86_CR0_NE);
5742
5743 /*
5744 * Update exception intercepts.
5745 */
5746 uint32_t uXcptBitmap = pVmcsInfo->u32XcptBitmap;
5747 if (pVmcsInfo->pShared->RealMode.fRealOnV86Active)
5748 {
5749 Assert(PDMVmmDevHeapIsEnabled(pVM));
5750 Assert(pVM->hm.s.vmx.pRealModeTSS);
5751 uXcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK;
5752 }
5753 else
5754 {
5755 /* For now, cleared here as mode-switches can happen outside HM/VT-x. See @bugref{7626#c11}. */
5756 uXcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK;
5757 if (fInterceptMF)
5758 uXcptBitmap |= RT_BIT(X86_XCPT_MF);
5759 }
5760
5761 /* Additional intercepts for debugging, define these yourself explicitly. */
5762#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
5763 uXcptBitmap |= 0
5764 | RT_BIT(X86_XCPT_BP)
5765 | RT_BIT(X86_XCPT_DE)
5766 | RT_BIT(X86_XCPT_NM)
5767 | RT_BIT(X86_XCPT_TS)
5768 | RT_BIT(X86_XCPT_UD)
5769 | RT_BIT(X86_XCPT_NP)
5770 | RT_BIT(X86_XCPT_SS)
5771 | RT_BIT(X86_XCPT_GP)
5772 | RT_BIT(X86_XCPT_PF)
5773 | RT_BIT(X86_XCPT_MF)
5774 ;
5775#elif defined(HMVMX_ALWAYS_TRAP_PF)
5776 uXcptBitmap |= RT_BIT(X86_XCPT_PF);
5777#endif
5778 if (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv)
5779 uXcptBitmap |= RT_BIT(X86_XCPT_GP);
5780 Assert(pVM->hmr0.s.fNestedPaging || (uXcptBitmap & RT_BIT(X86_XCPT_PF)));
5781
5782 /* Apply the hardware specified CR0 fixed bits and enable caching. */
5783 u64GuestCr0 |= fSetCr0;
5784 u64GuestCr0 &= fZapCr0;
5785 u64GuestCr0 &= ~(uint64_t)(X86_CR0_CD | X86_CR0_NW);
5786
5787 /* Commit the CR0 and related fields to the guest VMCS. */
5788 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR0, u64GuestCr0); AssertRC(rc);
5789 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, u64ShadowCr0); AssertRC(rc);
5790 if (uProcCtls != pVmcsInfo->u32ProcCtls)
5791 {
5792 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
5793 AssertRC(rc);
5794 }
5795 if (uXcptBitmap != pVmcsInfo->u32XcptBitmap)
5796 {
5797 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap);
5798 AssertRC(rc);
5799 }
5800
5801 /* Update our caches. */
5802 pVmcsInfo->u32ProcCtls = uProcCtls;
5803 pVmcsInfo->u32XcptBitmap = uXcptBitmap;
5804
5805 Log4Func(("cr0=%#RX64 shadow=%#RX64 set=%#RX64 zap=%#RX64\n", u64GuestCr0, u64ShadowCr0, fSetCr0, fZapCr0));
5806 }
5807 else
5808 {
5809 /*
5810 * With nested-guests, we may have extended the guest/host mask here since we
5811 * merged in the outer guest's mask. Thus, the merged mask can include more bits
5812 * (to read from the nested-guest CR0 read-shadow) than the nested hypervisor
5813 * originally supplied. We must copy those bits from the nested-guest CR0 into
5814 * the nested-guest CR0 read-shadow.
5815 */
5816 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
5817 uint64_t u64GuestCr0 = pVCpu->cpum.GstCtx.cr0;
5818 uint64_t const u64ShadowCr0 = CPUMGetGuestVmxMaskedCr0(&pVCpu->cpum.GstCtx, pVmcsInfo->u64Cr0Mask);
5819 Assert(!RT_HI_U32(u64GuestCr0));
5820 Assert(u64GuestCr0 & X86_CR0_NE);
5821
5822 /* Apply the hardware specified CR0 fixed bits and enable caching. */
5823 u64GuestCr0 |= fSetCr0;
5824 u64GuestCr0 &= fZapCr0;
5825 u64GuestCr0 &= ~(uint64_t)(X86_CR0_CD | X86_CR0_NW);
5826
5827 /* Commit the CR0 and CR0 read-shadow to the nested-guest VMCS. */
5828 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR0, u64GuestCr0); AssertRC(rc);
5829 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, u64ShadowCr0); AssertRC(rc);
5830
5831 Log4Func(("cr0=%#RX64 shadow=%#RX64 (set=%#RX64 zap=%#RX64)\n", u64GuestCr0, u64ShadowCr0, fSetCr0, fZapCr0));
5832 }
5833
5834 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR0);
5835 }
5836
5837 return VINF_SUCCESS;
5838}
5839
5840
5841/**
5842 * Exports the guest control registers (CR3, CR4) into the guest-state area
5843 * in the VMCS.
5844 *
5845 * @returns VBox strict status code.
5846 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
5847 * without unrestricted guest access and the VMMDev is not presently
5848 * mapped (e.g. EFI32).
5849 *
5850 * @param pVCpu The cross context virtual CPU structure.
5851 * @param pVmxTransient The VMX-transient structure.
5852 *
5853 * @remarks No-long-jump zone!!!
5854 */
5855static VBOXSTRICTRC hmR0VmxExportGuestCR3AndCR4(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
5856{
5857 int rc = VINF_SUCCESS;
5858 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
5859
5860 /*
5861 * Guest CR2.
5862 * It's always loaded in the assembler code. Nothing to do here.
5863 */
5864
5865 /*
5866 * Guest CR3.
5867 */
5868 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR3)
5869 {
5870 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR3);
5871
5872 if (pVM->hmr0.s.fNestedPaging)
5873 {
5874 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
5875 pVmcsInfo->HCPhysEPTP = PGMGetHyperCR3(pVCpu);
5876
5877 /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */
5878 Assert(pVmcsInfo->HCPhysEPTP != NIL_RTHCPHYS);
5879 Assert(!(pVmcsInfo->HCPhysEPTP & UINT64_C(0xfff0000000000000)));
5880 Assert(!(pVmcsInfo->HCPhysEPTP & 0xfff));
5881
5882 /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */
5883 pVmcsInfo->HCPhysEPTP |= RT_BF_MAKE(VMX_BF_EPTP_MEMTYPE, VMX_EPTP_MEMTYPE_WB)
5884 | RT_BF_MAKE(VMX_BF_EPTP_PAGE_WALK_LENGTH, VMX_EPTP_PAGE_WALK_LENGTH_4);
5885
5886 /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */
5887 AssertMsg( ((pVmcsInfo->HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */
5888 && ((pVmcsInfo->HCPhysEPTP >> 7) & 0x1f) == 0, /* Bits 7:11 MBZ. */
5889 ("EPTP %#RX64\n", pVmcsInfo->HCPhysEPTP));
5890 AssertMsg( !((pVmcsInfo->HCPhysEPTP >> 6) & 0x01) /* Bit 6 (EPT accessed & dirty bit). */
5891 || (g_HmMsrs.u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_ACCESS_DIRTY),
5892 ("EPTP accessed/dirty bit not supported by CPU but set %#RX64\n", pVmcsInfo->HCPhysEPTP));
5893
5894 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVmcsInfo->HCPhysEPTP);
5895 AssertRC(rc);
5896
5897 uint64_t u64GuestCr3;
5898 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
5899 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
5900 || CPUMIsGuestPagingEnabledEx(pCtx))
5901 {
5902 /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */
5903 if (CPUMIsGuestInPAEModeEx(pCtx))
5904 {
5905 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pCtx->aPaePdpes[0].u); AssertRC(rc);
5906 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pCtx->aPaePdpes[1].u); AssertRC(rc);
5907 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pCtx->aPaePdpes[2].u); AssertRC(rc);
5908 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pCtx->aPaePdpes[3].u); AssertRC(rc);
5909 }
5910
5911 /*
5912 * The guest's view of its CR3 is unblemished with nested paging when the
5913 * guest is using paging or we have unrestricted guest execution to handle
5914 * the guest when it's not using paging.
5915 */
5916 u64GuestCr3 = pCtx->cr3;
5917 }
5918 else
5919 {
5920 /*
5921 * The guest is not using paging, but the CPU (VT-x) has to. While the guest
5922 * thinks it accesses physical memory directly, we use our identity-mapped
5923 * page table to map guest-linear to guest-physical addresses. EPT takes care
5924 * of translating it to host-physical addresses.
5925 */
5926 RTGCPHYS GCPhys;
5927 Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable);
5928
5929 /* We obtain it here every time as the guest could have relocated this PCI region. */
5930 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
5931 if (RT_SUCCESS(rc))
5932 { /* likely */ }
5933 else if (rc == VERR_PDM_DEV_HEAP_R3_TO_GCPHYS)
5934 {
5935 Log4Func(("VERR_PDM_DEV_HEAP_R3_TO_GCPHYS -> VINF_EM_RESCHEDULE_REM\n"));
5936 return VINF_EM_RESCHEDULE_REM; /* We cannot execute now, switch to REM/IEM till the guest maps in VMMDev. */
5937 }
5938 else
5939 AssertMsgFailedReturn(("%Rrc\n", rc), rc);
5940
5941 u64GuestCr3 = GCPhys;
5942 }
5943
5944 Log4Func(("guest_cr3=%#RX64 (GstN)\n", u64GuestCr3));
5945 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR3, u64GuestCr3);
5946 AssertRC(rc);
5947 }
5948 else
5949 {
5950 Assert(!pVmxTransient->fIsNestedGuest);
5951 /* Non-nested paging case, just use the hypervisor's CR3. */
5952 RTHCPHYS const HCPhysGuestCr3 = PGMGetHyperCR3(pVCpu);
5953
5954 Log4Func(("guest_cr3=%#RX64 (HstN)\n", HCPhysGuestCr3));
5955 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR3, HCPhysGuestCr3);
5956 AssertRC(rc);
5957 }
5958
5959 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR3);
5960 }
5961
5962 /*
5963 * Guest CR4.
5964 * ASSUMES this is done everytime we get in from ring-3! (XCR0)
5965 */
5966 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR4)
5967 {
5968 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
5969 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
5970
5971 uint64_t const fSetCr4 = g_HmMsrs.u.vmx.u64Cr4Fixed0;
5972 uint64_t const fZapCr4 = g_HmMsrs.u.vmx.u64Cr4Fixed1;
5973
5974 /*
5975 * With nested-guests, we may have extended the guest/host mask here (since we
5976 * merged in the outer guest's mask, see hmR0VmxMergeVmcsNested). This means, the
5977 * mask can include more bits (to read from the nested-guest CR4 read-shadow) than
5978 * the nested hypervisor originally supplied. Thus, we should, in essence, copy
5979 * those bits from the nested-guest CR4 into the nested-guest CR4 read-shadow.
5980 */
5981 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4);
5982 uint64_t u64GuestCr4 = pCtx->cr4;
5983 uint64_t const u64ShadowCr4 = !pVmxTransient->fIsNestedGuest
5984 ? pCtx->cr4
5985 : CPUMGetGuestVmxMaskedCr4(pCtx, pVmcsInfo->u64Cr4Mask);
5986 Assert(!RT_HI_U32(u64GuestCr4));
5987
5988 /*
5989 * Setup VT-x's view of the guest CR4.
5990 *
5991 * If we're emulating real-mode using virtual-8086 mode, we want to redirect software
5992 * interrupts to the 8086 program interrupt handler. Clear the VME bit (the interrupt
5993 * redirection bitmap is already all 0, see hmR3InitFinalizeR0())
5994 *
5995 * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode".
5996 */
5997 if (pVmcsInfo->pShared->RealMode.fRealOnV86Active)
5998 {
5999 Assert(pVM->hm.s.vmx.pRealModeTSS);
6000 Assert(PDMVmmDevHeapIsEnabled(pVM));
6001 u64GuestCr4 &= ~(uint64_t)X86_CR4_VME;
6002 }
6003
6004 if (pVM->hmr0.s.fNestedPaging)
6005 {
6006 if ( !CPUMIsGuestPagingEnabledEx(pCtx)
6007 && !pVM->hmr0.s.vmx.fUnrestrictedGuest)
6008 {
6009 /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */
6010 u64GuestCr4 |= X86_CR4_PSE;
6011 /* Our identity mapping is a 32-bit page directory. */
6012 u64GuestCr4 &= ~(uint64_t)X86_CR4_PAE;
6013 }
6014 /* else use guest CR4.*/
6015 }
6016 else
6017 {
6018 Assert(!pVmxTransient->fIsNestedGuest);
6019
6020 /*
6021 * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host
6022 * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables.
6023 */
6024 switch (pVCpu->hm.s.enmShadowMode)
6025 {
6026 case PGMMODE_REAL: /* Real-mode. */
6027 case PGMMODE_PROTECTED: /* Protected mode without paging. */
6028 case PGMMODE_32_BIT: /* 32-bit paging. */
6029 {
6030 u64GuestCr4 &= ~(uint64_t)X86_CR4_PAE;
6031 break;
6032 }
6033
6034 case PGMMODE_PAE: /* PAE paging. */
6035 case PGMMODE_PAE_NX: /* PAE paging with NX. */
6036 {
6037 u64GuestCr4 |= X86_CR4_PAE;
6038 break;
6039 }
6040
6041 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
6042 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
6043 {
6044#ifdef VBOX_WITH_64_BITS_GUESTS
6045 /* For our assumption in hmR0VmxShouldSwapEferMsr. */
6046 Assert(u64GuestCr4 & X86_CR4_PAE);
6047 break;
6048#endif
6049 }
6050 default:
6051 AssertFailed();
6052 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
6053 }
6054 }
6055
6056 /* Apply the hardware specified CR4 fixed bits (mainly CR4.VMXE). */
6057 u64GuestCr4 |= fSetCr4;
6058 u64GuestCr4 &= fZapCr4;
6059
6060 /* Commit the CR4 and CR4 read-shadow to the guest VMCS. */
6061 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_CR4, u64GuestCr4); AssertRC(rc);
6062 rc = VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, u64ShadowCr4); AssertRC(rc);
6063
6064 /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
6065 bool const fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
6066 if (fLoadSaveGuestXcr0 != pVCpu->hmr0.s.fLoadSaveGuestXcr0)
6067 {
6068 pVCpu->hmr0.s.fLoadSaveGuestXcr0 = fLoadSaveGuestXcr0;
6069 hmR0VmxUpdateStartVmFunction(pVCpu);
6070 }
6071
6072 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR4);
6073
6074 Log4Func(("cr4=%#RX64 shadow=%#RX64 (set=%#RX64 zap=%#RX64)\n", u64GuestCr4, u64ShadowCr4, fSetCr4, fZapCr4));
6075 }
6076 return rc;
6077}
6078
6079
6080/**
6081 * Exports the guest debug registers into the guest-state area in the VMCS.
6082 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
6083 *
6084 * This also sets up whether \#DB and MOV DRx accesses cause VM-exits.
6085 *
6086 * @returns VBox status code.
6087 * @param pVCpu The cross context virtual CPU structure.
6088 * @param pVmxTransient The VMX-transient structure.
6089 *
6090 * @remarks No-long-jump zone!!!
6091 */
6092static int hmR0VmxExportSharedDebugState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
6093{
6094 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6095
6096 /** @todo NSTVMX: Figure out what we want to do with nested-guest instruction
6097 * stepping. */
6098 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6099 if (pVmxTransient->fIsNestedGuest)
6100 {
6101 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, CPUMGetGuestDR7(pVCpu));
6102 AssertRC(rc);
6103
6104 /*
6105 * We don't want to always intercept MOV DRx for nested-guests as it causes
6106 * problems when the nested hypervisor isn't intercepting them, see @bugref{10080}.
6107 * Instead, they are strictly only requested when the nested hypervisor intercepts
6108 * them -- handled while merging VMCS controls.
6109 *
6110 * If neither the outer nor the nested-hypervisor is intercepting MOV DRx,
6111 * then the nested-guest debug state should be actively loaded on the host so that
6112 * nested-guest reads its own debug registers without causing VM-exits.
6113 */
6114 if ( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT)
6115 && !CPUMIsGuestDebugStateActive(pVCpu))
6116 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
6117 return VINF_SUCCESS;
6118 }
6119
6120#ifdef VBOX_STRICT
6121 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
6122 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
6123 {
6124 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
6125 Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0);
6126 Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK);
6127 }
6128#endif
6129
6130 bool fSteppingDB = false;
6131 bool fInterceptMovDRx = false;
6132 uint32_t uProcCtls = pVmcsInfo->u32ProcCtls;
6133 if (pVCpu->hm.s.fSingleInstruction)
6134 {
6135 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
6136 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG)
6137 {
6138 uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
6139 Assert(fSteppingDB == false);
6140 }
6141 else
6142 {
6143 pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF;
6144 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS;
6145 pVCpu->hmr0.s.fClearTrapFlag = true;
6146 fSteppingDB = true;
6147 }
6148 }
6149
6150 uint64_t u64GuestDr7;
6151 if ( fSteppingDB
6152 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
6153 {
6154 /*
6155 * Use the combined guest and host DRx values found in the hypervisor register set
6156 * because the hypervisor debugger has breakpoints active or someone is single stepping
6157 * on the host side without a monitor trap flag.
6158 *
6159 * Note! DBGF expects a clean DR6 state before executing guest code.
6160 */
6161 if (!CPUMIsHyperDebugStateActive(pVCpu))
6162 {
6163 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
6164 Assert(CPUMIsHyperDebugStateActive(pVCpu));
6165 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
6166 }
6167
6168 /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */
6169 u64GuestDr7 = CPUMGetHyperDR7(pVCpu);
6170 pVCpu->hmr0.s.fUsingHyperDR7 = true;
6171 fInterceptMovDRx = true;
6172 }
6173 else
6174 {
6175 /*
6176 * If the guest has enabled debug registers, we need to load them prior to
6177 * executing guest code so they'll trigger at the right time.
6178 */
6179 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DR7);
6180 if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
6181 {
6182 if (!CPUMIsGuestDebugStateActive(pVCpu))
6183 {
6184 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
6185 Assert(CPUMIsGuestDebugStateActive(pVCpu));
6186 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
6187 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
6188 }
6189 Assert(!fInterceptMovDRx);
6190 }
6191 else if (!CPUMIsGuestDebugStateActive(pVCpu))
6192 {
6193 /*
6194 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
6195 * must intercept #DB in order to maintain a correct DR6 guest value, and
6196 * because we need to intercept it to prevent nested #DBs from hanging the
6197 * CPU, we end up always having to intercept it. See hmR0VmxSetupVmcsXcptBitmap().
6198 */
6199 fInterceptMovDRx = true;
6200 }
6201
6202 /* Update DR7 with the actual guest value. */
6203 u64GuestDr7 = pVCpu->cpum.GstCtx.dr[7];
6204 pVCpu->hmr0.s.fUsingHyperDR7 = false;
6205 }
6206
6207 if (fInterceptMovDRx)
6208 uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT;
6209 else
6210 uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
6211
6212 /*
6213 * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the
6214 * monitor-trap flag and update our cache.
6215 */
6216 if (uProcCtls != pVmcsInfo->u32ProcCtls)
6217 {
6218 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls);
6219 AssertRC(rc);
6220 pVmcsInfo->u32ProcCtls = uProcCtls;
6221 }
6222
6223 /*
6224 * Update guest DR7.
6225 */
6226 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, u64GuestDr7);
6227 AssertRC(rc);
6228
6229 /*
6230 * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger,
6231 * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure.
6232 *
6233 * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State".
6234 */
6235 if (fSteppingDB)
6236 {
6237 Assert(pVCpu->hm.s.fSingleInstruction);
6238 Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF);
6239
6240 uint32_t fIntrState = 0;
6241 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
6242 AssertRC(rc);
6243
6244 if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
6245 {
6246 fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
6247 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
6248 AssertRC(rc);
6249 }
6250 }
6251
6252 return VINF_SUCCESS;
6253}
6254
6255
6256#ifdef VBOX_STRICT
6257/**
6258 * Strict function to validate segment registers.
6259 *
6260 * @param pVCpu The cross context virtual CPU structure.
6261 * @param pVmcsInfo The VMCS info. object.
6262 *
6263 * @remarks Will import guest CR0 on strict builds during validation of
6264 * segments.
6265 */
6266static void hmR0VmxValidateSegmentRegs(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo)
6267{
6268 /*
6269 * Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
6270 *
6271 * The reason we check for attribute value 0 in this function and not just the unusable bit is
6272 * because hmR0VmxExportGuestSegReg() only updates the VMCS' copy of the value with the
6273 * unusable bit and doesn't change the guest-context value.
6274 */
6275 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6276 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6277 hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_CR0);
6278 if ( !pVM->hmr0.s.vmx.fUnrestrictedGuest
6279 && ( !CPUMIsGuestInRealModeEx(pCtx)
6280 && !CPUMIsGuestInV86ModeEx(pCtx)))
6281 {
6282 /* Protected mode checks */
6283 /* CS */
6284 Assert(pCtx->cs.Attr.n.u1Present);
6285 Assert(!(pCtx->cs.Attr.u & 0xf00));
6286 Assert(!(pCtx->cs.Attr.u & 0xfffe0000));
6287 Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff
6288 || !(pCtx->cs.Attr.n.u1Granularity));
6289 Assert( !(pCtx->cs.u32Limit & 0xfff00000)
6290 || (pCtx->cs.Attr.n.u1Granularity));
6291 /* CS cannot be loaded with NULL in protected mode. */
6292 Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS? */
6293 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
6294 Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl);
6295 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
6296 Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl);
6297 else
6298 AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl));
6299 /* SS */
6300 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
6301 Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL));
6302 if ( !(pCtx->cr0 & X86_CR0_PE)
6303 || pCtx->cs.Attr.n.u4Type == 3)
6304 {
6305 Assert(!pCtx->ss.Attr.n.u2Dpl);
6306 }
6307 if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
6308 {
6309 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
6310 Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7);
6311 Assert(pCtx->ss.Attr.n.u1Present);
6312 Assert(!(pCtx->ss.Attr.u & 0xf00));
6313 Assert(!(pCtx->ss.Attr.u & 0xfffe0000));
6314 Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff
6315 || !(pCtx->ss.Attr.n.u1Granularity));
6316 Assert( !(pCtx->ss.u32Limit & 0xfff00000)
6317 || (pCtx->ss.Attr.n.u1Granularity));
6318 }
6319 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSegReg(). */
6320 if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
6321 {
6322 Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
6323 Assert(pCtx->ds.Attr.n.u1Present);
6324 Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL));
6325 Assert(!(pCtx->ds.Attr.u & 0xf00));
6326 Assert(!(pCtx->ds.Attr.u & 0xfffe0000));
6327 Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff
6328 || !(pCtx->ds.Attr.n.u1Granularity));
6329 Assert( !(pCtx->ds.u32Limit & 0xfff00000)
6330 || (pCtx->ds.Attr.n.u1Granularity));
6331 Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
6332 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ));
6333 }
6334 if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
6335 {
6336 Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
6337 Assert(pCtx->es.Attr.n.u1Present);
6338 Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL));
6339 Assert(!(pCtx->es.Attr.u & 0xf00));
6340 Assert(!(pCtx->es.Attr.u & 0xfffe0000));
6341 Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff
6342 || !(pCtx->es.Attr.n.u1Granularity));
6343 Assert( !(pCtx->es.u32Limit & 0xfff00000)
6344 || (pCtx->es.Attr.n.u1Granularity));
6345 Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
6346 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ));
6347 }
6348 if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
6349 {
6350 Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
6351 Assert(pCtx->fs.Attr.n.u1Present);
6352 Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL));
6353 Assert(!(pCtx->fs.Attr.u & 0xf00));
6354 Assert(!(pCtx->fs.Attr.u & 0xfffe0000));
6355 Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff
6356 || !(pCtx->fs.Attr.n.u1Granularity));
6357 Assert( !(pCtx->fs.u32Limit & 0xfff00000)
6358 || (pCtx->fs.Attr.n.u1Granularity));
6359 Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
6360 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ));
6361 }
6362 if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
6363 {
6364 Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
6365 Assert(pCtx->gs.Attr.n.u1Present);
6366 Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL));
6367 Assert(!(pCtx->gs.Attr.u & 0xf00));
6368 Assert(!(pCtx->gs.Attr.u & 0xfffe0000));
6369 Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff
6370 || !(pCtx->gs.Attr.n.u1Granularity));
6371 Assert( !(pCtx->gs.u32Limit & 0xfff00000)
6372 || (pCtx->gs.Attr.n.u1Granularity));
6373 Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
6374 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ));
6375 }
6376 /* 64-bit capable CPUs. */
6377 Assert(!RT_HI_U32(pCtx->cs.u64Base));
6378 Assert(!pCtx->ss.Attr.u || !RT_HI_U32(pCtx->ss.u64Base));
6379 Assert(!pCtx->ds.Attr.u || !RT_HI_U32(pCtx->ds.u64Base));
6380 Assert(!pCtx->es.Attr.u || !RT_HI_U32(pCtx->es.u64Base));
6381 }
6382 else if ( CPUMIsGuestInV86ModeEx(pCtx)
6383 || ( CPUMIsGuestInRealModeEx(pCtx)
6384 && !pVM->hmr0.s.vmx.fUnrestrictedGuest))
6385 {
6386 /* Real and v86 mode checks. */
6387 /* hmR0VmxExportGuestSegReg() writes the modified in VMCS. We want what we're feeding to VT-x. */
6388 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
6389 if (pVmcsInfo->pShared->RealMode.fRealOnV86Active)
6390 {
6391 u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3;
6392 u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3;
6393 }
6394 else
6395 {
6396 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u;
6397 u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
6398 }
6399
6400 /* CS */
6401 AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel));
6402 Assert(pCtx->cs.u32Limit == 0xffff);
6403 Assert(u32CSAttr == 0xf3);
6404 /* SS */
6405 Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4);
6406 Assert(pCtx->ss.u32Limit == 0xffff);
6407 Assert(u32SSAttr == 0xf3);
6408 /* DS */
6409 Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4);
6410 Assert(pCtx->ds.u32Limit == 0xffff);
6411 Assert(u32DSAttr == 0xf3);
6412 /* ES */
6413 Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4);
6414 Assert(pCtx->es.u32Limit == 0xffff);
6415 Assert(u32ESAttr == 0xf3);
6416 /* FS */
6417 Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4);
6418 Assert(pCtx->fs.u32Limit == 0xffff);
6419 Assert(u32FSAttr == 0xf3);
6420 /* GS */
6421 Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4);
6422 Assert(pCtx->gs.u32Limit == 0xffff);
6423 Assert(u32GSAttr == 0xf3);
6424 /* 64-bit capable CPUs. */
6425 Assert(!RT_HI_U32(pCtx->cs.u64Base));
6426 Assert(!u32SSAttr || !RT_HI_U32(pCtx->ss.u64Base));
6427 Assert(!u32DSAttr || !RT_HI_U32(pCtx->ds.u64Base));
6428 Assert(!u32ESAttr || !RT_HI_U32(pCtx->es.u64Base));
6429 }
6430}
6431#endif /* VBOX_STRICT */
6432
6433
6434/**
6435 * Exports a guest segment register into the guest-state area in the VMCS.
6436 *
6437 * @returns VBox status code.
6438 * @param pVCpu The cross context virtual CPU structure.
6439 * @param pVmcsInfo The VMCS info. object.
6440 * @param iSegReg The segment register number (X86_SREG_XXX).
6441 * @param pSelReg Pointer to the segment selector.
6442 *
6443 * @remarks No-long-jump zone!!!
6444 */
6445static int hmR0VmxExportGuestSegReg(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo, uint32_t iSegReg, PCCPUMSELREG pSelReg)
6446{
6447 Assert(iSegReg < X86_SREG_COUNT);
6448
6449 uint32_t u32Access = pSelReg->Attr.u;
6450 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
6451 {
6452 /*
6453 * The way to differentiate between whether this is really a null selector or was just
6454 * a selector loaded with 0 in real-mode is using the segment attributes. A selector
6455 * loaded in real-mode with the value 0 is valid and usable in protected-mode and we
6456 * should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures
6457 * NULL selectors loaded in protected-mode have their attribute as 0.
6458 */
6459 if (u32Access)
6460 { }
6461 else
6462 u32Access = X86DESCATTR_UNUSABLE;
6463 }
6464 else
6465 {
6466 /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */
6467 u32Access = 0xf3;
6468 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
6469 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
6470 RT_NOREF_PV(pVCpu);
6471 }
6472
6473 /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */
6474 AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED),
6475 ("Access bit not set for usable segment. %.2s sel=%#x attr %#x\n", "ESCSSSDSFSGS" + iSegReg * 2, pSelReg, pSelReg->Attr.u));
6476
6477 /*
6478 * Commit it to the VMCS.
6479 */
6480 Assert((uint32_t)VMX_VMCS16_GUEST_SEG_SEL(iSegReg) == g_aVmcsSegSel[iSegReg]);
6481 Assert((uint32_t)VMX_VMCS32_GUEST_SEG_LIMIT(iSegReg) == g_aVmcsSegLimit[iSegReg]);
6482 Assert((uint32_t)VMX_VMCS32_GUEST_SEG_ACCESS_RIGHTS(iSegReg) == g_aVmcsSegAttr[iSegReg]);
6483 Assert((uint32_t)VMX_VMCS_GUEST_SEG_BASE(iSegReg) == g_aVmcsSegBase[iSegReg]);
6484 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_SEG_SEL(iSegReg), pSelReg->Sel); AssertRC(rc);
6485 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SEG_LIMIT(iSegReg), pSelReg->u32Limit); AssertRC(rc);
6486 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SEG_BASE(iSegReg), pSelReg->u64Base); AssertRC(rc);
6487 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SEG_ACCESS_RIGHTS(iSegReg), u32Access); AssertRC(rc);
6488 return VINF_SUCCESS;
6489}
6490
6491
6492/**
6493 * Exports the guest segment registers, GDTR, IDTR, LDTR, TR into the guest-state
6494 * area in the VMCS.
6495 *
6496 * @returns VBox status code.
6497 * @param pVCpu The cross context virtual CPU structure.
6498 * @param pVmxTransient The VMX-transient structure.
6499 *
6500 * @remarks Will import guest CR0 on strict builds during validation of
6501 * segments.
6502 * @remarks No-long-jump zone!!!
6503 */
6504static int hmR0VmxExportGuestSegRegsXdtr(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
6505{
6506 int rc = VERR_INTERNAL_ERROR_5;
6507 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6508 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6509 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6510 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
6511
6512 /*
6513 * Guest Segment registers: CS, SS, DS, ES, FS, GS.
6514 */
6515 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SREG_MASK)
6516 {
6517 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CS)
6518 {
6519 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS);
6520 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
6521 pVmcsInfoShared->RealMode.AttrCS.u = pCtx->cs.Attr.u;
6522 rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_CS, &pCtx->cs);
6523 AssertRC(rc);
6524 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CS);
6525 }
6526
6527 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SS)
6528 {
6529 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SS);
6530 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
6531 pVmcsInfoShared->RealMode.AttrSS.u = pCtx->ss.Attr.u;
6532 rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_SS, &pCtx->ss);
6533 AssertRC(rc);
6534 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SS);
6535 }
6536
6537 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_DS)
6538 {
6539 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DS);
6540 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
6541 pVmcsInfoShared->RealMode.AttrDS.u = pCtx->ds.Attr.u;
6542 rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_DS, &pCtx->ds);
6543 AssertRC(rc);
6544 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_DS);
6545 }
6546
6547 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_ES)
6548 {
6549 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_ES);
6550 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
6551 pVmcsInfoShared->RealMode.AttrES.u = pCtx->es.Attr.u;
6552 rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_ES, &pCtx->es);
6553 AssertRC(rc);
6554 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_ES);
6555 }
6556
6557 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_FS)
6558 {
6559 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_FS);
6560 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
6561 pVmcsInfoShared->RealMode.AttrFS.u = pCtx->fs.Attr.u;
6562 rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_FS, &pCtx->fs);
6563 AssertRC(rc);
6564 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_FS);
6565 }
6566
6567 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GS)
6568 {
6569 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GS);
6570 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
6571 pVmcsInfoShared->RealMode.AttrGS.u = pCtx->gs.Attr.u;
6572 rc = hmR0VmxExportGuestSegReg(pVCpu, pVmcsInfo, X86_SREG_GS, &pCtx->gs);
6573 AssertRC(rc);
6574 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GS);
6575 }
6576
6577#ifdef VBOX_STRICT
6578 hmR0VmxValidateSegmentRegs(pVCpu, pVmcsInfo);
6579#endif
6580 Log4Func(("cs={%#04x base=%#RX64 limit=%#RX32 attr=%#RX32}\n", pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit,
6581 pCtx->cs.Attr.u));
6582 }
6583
6584 /*
6585 * Guest TR.
6586 */
6587 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_TR)
6588 {
6589 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_TR);
6590
6591 /*
6592 * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is
6593 * achieved using the interrupt redirection bitmap (all bits cleared to let the guest
6594 * handle INT-n's) in the TSS. See hmR3InitFinalizeR0() to see how pRealModeTSS is setup.
6595 */
6596 uint16_t u16Sel;
6597 uint32_t u32Limit;
6598 uint64_t u64Base;
6599 uint32_t u32AccessRights;
6600 if (!pVmcsInfoShared->RealMode.fRealOnV86Active)
6601 {
6602 u16Sel = pCtx->tr.Sel;
6603 u32Limit = pCtx->tr.u32Limit;
6604 u64Base = pCtx->tr.u64Base;
6605 u32AccessRights = pCtx->tr.Attr.u;
6606 }
6607 else
6608 {
6609 Assert(!pVmxTransient->fIsNestedGuest);
6610 Assert(pVM->hm.s.vmx.pRealModeTSS);
6611 Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMCanExecuteGuest() -XXX- what about inner loop changes? */
6612
6613 /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */
6614 RTGCPHYS GCPhys;
6615 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
6616 AssertRCReturn(rc, rc);
6617
6618 X86DESCATTR DescAttr;
6619 DescAttr.u = 0;
6620 DescAttr.n.u1Present = 1;
6621 DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
6622
6623 u16Sel = 0;
6624 u32Limit = HM_VTX_TSS_SIZE;
6625 u64Base = GCPhys;
6626 u32AccessRights = DescAttr.u;
6627 }
6628
6629 /* Validate. */
6630 Assert(!(u16Sel & RT_BIT(2)));
6631 AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY
6632 || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights));
6633 AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights));
6634 Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/
6635 Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/
6636 Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */
6637 Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */
6638 Assert( (u32Limit & 0xfff) == 0xfff
6639 || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */
6640 Assert( !(pCtx->tr.u32Limit & 0xfff00000)
6641 || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */
6642
6643 rc = VMXWriteVmcs16(VMX_VMCS16_GUEST_TR_SEL, u16Sel); AssertRC(rc);
6644 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); AssertRC(rc);
6645 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); AssertRC(rc);
6646 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRC(rc);
6647
6648 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_TR);
6649 Log4Func(("tr base=%#RX64 limit=%#RX32\n", pCtx->tr.u64Base, pCtx->tr.u32Limit));
6650 }
6651
6652 /*
6653 * Guest GDTR.
6654 */
6655 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GDTR)
6656 {
6657 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GDTR);
6658
6659 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt); AssertRC(rc);
6660 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt); AssertRC(rc);
6661
6662 /* Validate. */
6663 Assert(!(pCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */
6664
6665 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GDTR);
6666 Log4Func(("gdtr base=%#RX64 limit=%#RX32\n", pCtx->gdtr.pGdt, pCtx->gdtr.cbGdt));
6667 }
6668
6669 /*
6670 * Guest LDTR.
6671 */
6672 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_LDTR)
6673 {
6674 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_LDTR);
6675
6676 /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */
6677 uint32_t u32Access;
6678 if ( !pVmxTransient->fIsNestedGuest
6679 && !pCtx->ldtr.Attr.u)
6680 u32Access = X86DESCATTR_UNUSABLE;
6681 else
6682 u32Access = pCtx->ldtr.Attr.u;
6683
6684 rc = VMXWriteVmcs16(VMX_VMCS16_GUEST_LDTR_SEL, pCtx->ldtr.Sel); AssertRC(rc);
6685 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit); AssertRC(rc);
6686 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); AssertRC(rc);
6687 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); AssertRC(rc);
6688
6689 /* Validate. */
6690 if (!(u32Access & X86DESCATTR_UNUSABLE))
6691 {
6692 Assert(!(pCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */
6693 Assert(pCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */
6694 Assert(!pCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */
6695 Assert(pCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */
6696 Assert(!pCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */
6697 Assert(!(pCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */
6698 Assert( (pCtx->ldtr.u32Limit & 0xfff) == 0xfff
6699 || !pCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */
6700 Assert( !(pCtx->ldtr.u32Limit & 0xfff00000)
6701 || pCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */
6702 }
6703
6704 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_LDTR);
6705 Log4Func(("ldtr base=%#RX64 limit=%#RX32\n", pCtx->ldtr.u64Base, pCtx->ldtr.u32Limit));
6706 }
6707
6708 /*
6709 * Guest IDTR.
6710 */
6711 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_IDTR)
6712 {
6713 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_IDTR);
6714
6715 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt); AssertRC(rc);
6716 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt); AssertRC(rc);
6717
6718 /* Validate. */
6719 Assert(!(pCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */
6720
6721 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_IDTR);
6722 Log4Func(("idtr base=%#RX64 limit=%#RX32\n", pCtx->idtr.pIdt, pCtx->idtr.cbIdt));
6723 }
6724
6725 return VINF_SUCCESS;
6726}
6727
6728
6729/**
6730 * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
6731 * areas.
6732 *
6733 * These MSRs will automatically be loaded to the host CPU on every successful
6734 * VM-entry and stored from the host CPU on every successful VM-exit.
6735 *
6736 * We creates/updates MSR slots for the host MSRs in the VM-exit MSR-load area. The
6737 * actual host MSR values are not- updated here for performance reasons. See
6738 * hmR0VmxExportHostMsrs().
6739 *
6740 * We also exports the guest sysenter MSRs into the guest-state area in the VMCS.
6741 *
6742 * @returns VBox status code.
6743 * @param pVCpu The cross context virtual CPU structure.
6744 * @param pVmxTransient The VMX-transient structure.
6745 *
6746 * @remarks No-long-jump zone!!!
6747 */
6748static int hmR0VmxExportGuestMsrs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
6749{
6750 AssertPtr(pVCpu);
6751 AssertPtr(pVmxTransient);
6752
6753 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
6754 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
6755
6756 /*
6757 * MSRs that we use the auto-load/store MSR area in the VMCS.
6758 * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(),
6759 * nothing to do here. The host MSR values are updated when it's safe in
6760 * hmR0VmxLazySaveHostMsrs().
6761 *
6762 * For nested-guests, the guests MSRs from the VM-entry MSR-load area are already
6763 * loaded (into the guest-CPU context) by the VMLAUNCH/VMRESUME instruction
6764 * emulation. The merged MSR permission bitmap will ensure that we get VM-exits
6765 * for any MSR that are not part of the lazy MSRs so we do not need to place
6766 * those MSRs into the auto-load/store MSR area. Nothing to do here.
6767 */
6768 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
6769 {
6770 /* No auto-load/store MSRs currently. */
6771 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS);
6772 }
6773
6774 /*
6775 * Guest Sysenter MSRs.
6776 */
6777 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK)
6778 {
6779 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS);
6780
6781 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
6782 {
6783 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
6784 AssertRC(rc);
6785 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR);
6786 }
6787
6788 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
6789 {
6790 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
6791 AssertRC(rc);
6792 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
6793 }
6794
6795 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
6796 {
6797 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
6798 AssertRC(rc);
6799 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
6800 }
6801 }
6802
6803 /*
6804 * Guest/host EFER MSR.
6805 */
6806 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR)
6807 {
6808 /* Whether we are using the VMCS to swap the EFER MSR must have been
6809 determined earlier while exporting VM-entry/VM-exit controls. */
6810 Assert(!(ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_EXIT_CTLS));
6811 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
6812
6813 if (hmR0VmxShouldSwapEferMsr(pVCpu, pVmxTransient))
6814 {
6815 /*
6816 * EFER.LME is written by software, while EFER.LMA is set by the CPU to (CR0.PG & EFER.LME).
6817 * This means a guest can set EFER.LME=1 while CR0.PG=0 and EFER.LMA can remain 0.
6818 * VT-x requires that "IA-32e mode guest" VM-entry control must be identical to EFER.LMA
6819 * and to CR0.PG. Without unrestricted execution, CR0.PG (used for VT-x, not the shadow)
6820 * must always be 1. This forces us to effectively clear both EFER.LMA and EFER.LME until
6821 * the guest has also set CR0.PG=1. Otherwise, we would run into an invalid-guest state
6822 * during VM-entry.
6823 */
6824 uint64_t uGuestEferMsr = pCtx->msrEFER;
6825 if (!pVM->hmr0.s.vmx.fUnrestrictedGuest)
6826 {
6827 if (!(pCtx->msrEFER & MSR_K6_EFER_LMA))
6828 uGuestEferMsr &= ~MSR_K6_EFER_LME;
6829 else
6830 Assert((pCtx->msrEFER & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
6831 }
6832
6833 /*
6834 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
6835 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
6836 */
6837 if (g_fHmVmxSupportsVmcsEfer)
6838 {
6839 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, uGuestEferMsr);
6840 AssertRC(rc);
6841 }
6842 else
6843 {
6844 /*
6845 * We shall use the auto-load/store MSR area only for loading the EFER MSR but we must
6846 * continue to intercept guest read and write accesses to it, see @bugref{7386#c16}.
6847 */
6848 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER, uGuestEferMsr,
6849 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
6850 AssertRCReturn(rc, rc);
6851 }
6852
6853 Log4Func(("efer=%#RX64 shadow=%#RX64\n", uGuestEferMsr, pCtx->msrEFER));
6854 }
6855 else if (!g_fHmVmxSupportsVmcsEfer)
6856 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K6_EFER);
6857
6858 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR);
6859 }
6860
6861 /*
6862 * Other MSRs.
6863 */
6864 if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_OTHER_MSRS)
6865 {
6866 /* Speculation Control (R/W). */
6867 HMVMX_CPUMCTX_ASSERT(pVCpu, HM_CHANGED_GUEST_OTHER_MSRS);
6868 if (pVM->cpum.ro.GuestFeatures.fIbrs)
6869 {
6870 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu),
6871 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
6872 AssertRCReturn(rc, rc);
6873 }
6874
6875 /* Last Branch Record. */
6876 if (pVM->hmr0.s.vmx.fLbr)
6877 {
6878 PVMXVMCSINFOSHARED const pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
6879 uint32_t const idFromIpMsrStart = pVM->hmr0.s.vmx.idLbrFromIpMsrFirst;
6880 uint32_t const idToIpMsrStart = pVM->hmr0.s.vmx.idLbrToIpMsrFirst;
6881 uint32_t const cLbrStack = pVM->hmr0.s.vmx.idLbrFromIpMsrLast - pVM->hmr0.s.vmx.idLbrFromIpMsrFirst + 1;
6882 Assert(cLbrStack <= 32);
6883 for (uint32_t i = 0; i < cLbrStack; i++)
6884 {
6885 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idFromIpMsrStart + i,
6886 pVmcsInfoShared->au64LbrFromIpMsr[i],
6887 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
6888 AssertRCReturn(rc, rc);
6889
6890 /* Some CPUs don't have a Branch-To-IP MSR (P4 and related Xeons). */
6891 if (idToIpMsrStart != 0)
6892 {
6893 rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, idToIpMsrStart + i,
6894 pVmcsInfoShared->au64LbrToIpMsr[i],
6895 false /* fSetReadWrite */, false /* fUpdateHostMsr */);
6896 AssertRCReturn(rc, rc);
6897 }
6898 }
6899
6900 /* Add LBR top-of-stack MSR (which contains the index to the most recent record). */
6901 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, pVM->hmr0.s.vmx.idLbrTosMsr,
6902 pVmcsInfoShared->u64LbrTosMsr, false /* fSetReadWrite */,
6903 false /* fUpdateHostMsr */);
6904 AssertRCReturn(rc, rc);
6905 }
6906
6907 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_OTHER_MSRS);
6908 }
6909
6910 return VINF_SUCCESS;
6911}
6912
6913
6914/**
6915 * Wrapper for running the guest code in VT-x.
6916 *
6917 * @returns VBox status code, no informational status codes.
6918 * @param pVCpu The cross context virtual CPU structure.
6919 * @param pVmxTransient The VMX-transient structure.
6920 *
6921 * @remarks No-long-jump zone!!!
6922 */
6923DECLINLINE(int) hmR0VmxRunGuest(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient)
6924{
6925 /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */
6926 pVCpu->cpum.GstCtx.fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM;
6927
6928 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
6929 bool const fResumeVM = RT_BOOL(pVmcsInfo->fVmcsState & VMX_V_VMCS_LAUNCH_STATE_LAUNCHED);
6930#ifdef VBOX_WITH_STATISTICS
6931 if (fResumeVM)
6932 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmResume);
6933 else
6934 STAM_COUNTER_INC(&pVCpu->hm.s.StatVmxVmLaunch);
6935#endif
6936 int rc = pVCpu->hmr0.s.vmx.pfnStartVm(pVmcsInfo, pVCpu, fResumeVM);
6937 AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc));
6938 return rc;
6939}
6940
6941
6942/**
6943 * Reports world-switch error and dumps some useful debug info.
6944 *
6945 * @param pVCpu The cross context virtual CPU structure.
6946 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
6947 * @param pVmxTransient The VMX-transient structure (only
6948 * exitReason updated).
6949 */
6950static void hmR0VmxReportWorldSwitchError(PVMCPUCC pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient)
6951{
6952 Assert(pVCpu);
6953 Assert(pVmxTransient);
6954 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
6955
6956 Log4Func(("VM-entry failure: %Rrc\n", rcVMRun));
6957 switch (rcVMRun)
6958 {
6959 case VERR_VMX_INVALID_VMXON_PTR:
6960 AssertFailed();
6961 break;
6962 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
6963 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
6964 {
6965 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
6966 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
6967 AssertRC(rc);
6968 hmR0VmxReadExitQualVmcs(pVmxTransient);
6969
6970 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
6971 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
6972 Cannot do it here as we may have been long preempted. */
6973
6974#ifdef VBOX_STRICT
6975 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
6976 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
6977 pVmxTransient->uExitReason));
6978 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual));
6979 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
6980 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
6981 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
6982 else
6983 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
6984 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
6985 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
6986
6987 static struct
6988 {
6989 /** Name of the field to log. */
6990 const char *pszName;
6991 /** The VMCS field. */
6992 uint32_t uVmcsField;
6993 /** Whether host support of this field needs to be checked. */
6994 bool fCheckSupport;
6995 } const s_aVmcsFields[] =
6996 {
6997 { "VMX_VMCS32_CTRL_PIN_EXEC", VMX_VMCS32_CTRL_PIN_EXEC, false },
6998 { "VMX_VMCS32_CTRL_PROC_EXEC", VMX_VMCS32_CTRL_PROC_EXEC, false },
6999 { "VMX_VMCS32_CTRL_PROC_EXEC2", VMX_VMCS32_CTRL_PROC_EXEC2, true },
7000 { "VMX_VMCS32_CTRL_ENTRY", VMX_VMCS32_CTRL_ENTRY, false },
7001 { "VMX_VMCS32_CTRL_EXIT", VMX_VMCS32_CTRL_EXIT, false },
7002 { "VMX_VMCS32_CTRL_CR3_TARGET_COUNT", VMX_VMCS32_CTRL_CR3_TARGET_COUNT, false },
7003 { "VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO", VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, false },
7004 { "VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE", VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, false },
7005 { "VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH", VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, false },
7006 { "VMX_VMCS32_CTRL_TPR_THRESHOLD", VMX_VMCS32_CTRL_TPR_THRESHOLD, false },
7007 { "VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, false },
7008 { "VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, false },
7009 { "VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT", VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, false },
7010 { "VMX_VMCS32_CTRL_EXCEPTION_BITMAP", VMX_VMCS32_CTRL_EXCEPTION_BITMAP, false },
7011 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, false },
7012 { "VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH", VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, false },
7013 { "VMX_VMCS_CTRL_CR0_MASK", VMX_VMCS_CTRL_CR0_MASK, false },
7014 { "VMX_VMCS_CTRL_CR0_READ_SHADOW", VMX_VMCS_CTRL_CR0_READ_SHADOW, false },
7015 { "VMX_VMCS_CTRL_CR4_MASK", VMX_VMCS_CTRL_CR4_MASK, false },
7016 { "VMX_VMCS_CTRL_CR4_READ_SHADOW", VMX_VMCS_CTRL_CR4_READ_SHADOW, false },
7017 { "VMX_VMCS64_CTRL_EPTP_FULL", VMX_VMCS64_CTRL_EPTP_FULL, true },
7018 { "VMX_VMCS_GUEST_RIP", VMX_VMCS_GUEST_RIP, false },
7019 { "VMX_VMCS_GUEST_RSP", VMX_VMCS_GUEST_RSP, false },
7020 { "VMX_VMCS_GUEST_RFLAGS", VMX_VMCS_GUEST_RFLAGS, false },
7021 { "VMX_VMCS16_VPID", VMX_VMCS16_VPID, true, },
7022 { "VMX_VMCS_HOST_CR0", VMX_VMCS_HOST_CR0, false },
7023 { "VMX_VMCS_HOST_CR3", VMX_VMCS_HOST_CR3, false },
7024 { "VMX_VMCS_HOST_CR4", VMX_VMCS_HOST_CR4, false },
7025 /* The order of selector fields below are fixed! */
7026 { "VMX_VMCS16_HOST_ES_SEL", VMX_VMCS16_HOST_ES_SEL, false },
7027 { "VMX_VMCS16_HOST_CS_SEL", VMX_VMCS16_HOST_CS_SEL, false },
7028 { "VMX_VMCS16_HOST_SS_SEL", VMX_VMCS16_HOST_SS_SEL, false },
7029 { "VMX_VMCS16_HOST_DS_SEL", VMX_VMCS16_HOST_DS_SEL, false },
7030 { "VMX_VMCS16_HOST_FS_SEL", VMX_VMCS16_HOST_FS_SEL, false },
7031 { "VMX_VMCS16_HOST_GS_SEL", VMX_VMCS16_HOST_GS_SEL, false },
7032 { "VMX_VMCS16_HOST_TR_SEL", VMX_VMCS16_HOST_TR_SEL, false },
7033 /* End of ordered selector fields. */
7034 { "VMX_VMCS_HOST_TR_BASE", VMX_VMCS_HOST_TR_BASE, false },
7035 { "VMX_VMCS_HOST_GDTR_BASE", VMX_VMCS_HOST_GDTR_BASE, false },
7036 { "VMX_VMCS_HOST_IDTR_BASE", VMX_VMCS_HOST_IDTR_BASE, false },
7037 { "VMX_VMCS32_HOST_SYSENTER_CS", VMX_VMCS32_HOST_SYSENTER_CS, false },
7038 { "VMX_VMCS_HOST_SYSENTER_EIP", VMX_VMCS_HOST_SYSENTER_EIP, false },
7039 { "VMX_VMCS_HOST_SYSENTER_ESP", VMX_VMCS_HOST_SYSENTER_ESP, false },
7040 { "VMX_VMCS_HOST_RSP", VMX_VMCS_HOST_RSP, false },
7041 { "VMX_VMCS_HOST_RIP", VMX_VMCS_HOST_RIP, false }
7042 };
7043
7044 RTGDTR HostGdtr;
7045 ASMGetGDTR(&HostGdtr);
7046
7047 uint32_t const cVmcsFields = RT_ELEMENTS(s_aVmcsFields);
7048 for (uint32_t i = 0; i < cVmcsFields; i++)
7049 {
7050 uint32_t const uVmcsField = s_aVmcsFields[i].uVmcsField;
7051
7052 bool fSupported;
7053 if (!s_aVmcsFields[i].fCheckSupport)
7054 fSupported = true;
7055 else
7056 {
7057 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
7058 switch (uVmcsField)
7059 {
7060 case VMX_VMCS64_CTRL_EPTP_FULL: fSupported = pVM->hmr0.s.fNestedPaging; break;
7061 case VMX_VMCS16_VPID: fSupported = pVM->hmr0.s.vmx.fVpid; break;
7062 case VMX_VMCS32_CTRL_PROC_EXEC2:
7063 fSupported = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
7064 break;
7065 default:
7066 AssertMsgFailedReturnVoid(("Failed to provide VMCS field support for %#RX32\n", uVmcsField));
7067 }
7068 }
7069
7070 if (fSupported)
7071 {
7072 uint8_t const uWidth = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_WIDTH);
7073 switch (uWidth)
7074 {
7075 case VMX_VMCSFIELD_WIDTH_16BIT:
7076 {
7077 uint16_t u16Val;
7078 rc = VMXReadVmcs16(uVmcsField, &u16Val);
7079 AssertRC(rc);
7080 Log4(("%-40s = %#RX16\n", s_aVmcsFields[i].pszName, u16Val));
7081
7082 if ( uVmcsField >= VMX_VMCS16_HOST_ES_SEL
7083 && uVmcsField <= VMX_VMCS16_HOST_TR_SEL)
7084 {
7085 if (u16Val < HostGdtr.cbGdt)
7086 {
7087 /* Order of selectors in s_apszSel is fixed and matches the order in s_aVmcsFields. */
7088 static const char * const s_apszSel[] = { "Host ES", "Host CS", "Host SS", "Host DS",
7089 "Host FS", "Host GS", "Host TR" };
7090 uint8_t const idxSel = RT_BF_GET(uVmcsField, VMX_BF_VMCSFIELD_INDEX);
7091 Assert(idxSel < RT_ELEMENTS(s_apszSel));
7092 PCX86DESCHC pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u16Val & X86_SEL_MASK));
7093 hmR0DumpDescriptor(pDesc, u16Val, s_apszSel[idxSel]);
7094 }
7095 else
7096 Log4((" Selector value exceeds GDT limit!\n"));
7097 }
7098 break;
7099 }
7100
7101 case VMX_VMCSFIELD_WIDTH_32BIT:
7102 {
7103 uint32_t u32Val;
7104 rc = VMXReadVmcs32(uVmcsField, &u32Val);
7105 AssertRC(rc);
7106 Log4(("%-40s = %#RX32\n", s_aVmcsFields[i].pszName, u32Val));
7107 break;
7108 }
7109
7110 case VMX_VMCSFIELD_WIDTH_64BIT:
7111 case VMX_VMCSFIELD_WIDTH_NATURAL:
7112 {
7113 uint64_t u64Val;
7114 rc = VMXReadVmcs64(uVmcsField, &u64Val);
7115 AssertRC(rc);
7116 Log4(("%-40s = %#RX64\n", s_aVmcsFields[i].pszName, u64Val));
7117 break;
7118 }
7119 }
7120 }
7121 }
7122
7123 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
7124 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
7125 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
7126 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
7127 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
7128 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
7129#endif /* VBOX_STRICT */
7130 break;
7131 }
7132
7133 default:
7134 /* Impossible */
7135 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
7136 break;
7137 }
7138}
7139
7140
7141/**
7142 * Sets up the usage of TSC-offsetting and updates the VMCS.
7143 *
7144 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
7145 * VMX-preemption timer.
7146 *
7147 * @returns VBox status code.
7148 * @param pVCpu The cross context virtual CPU structure.
7149 * @param pVmxTransient The VMX-transient structure.
7150 * @param idCurrentCpu The current CPU number.
7151 *
7152 * @remarks No-long-jump zone!!!
7153 */
7154static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, RTCPUID idCurrentCpu)
7155{
7156 bool fOffsettedTsc;
7157 bool fParavirtTsc;
7158 uint64_t uTscOffset;
7159 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
7160 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
7161
7162 if (pVM->hmr0.s.vmx.fUsePreemptTimer)
7163 {
7164 /* The TMCpuTickGetDeadlineAndTscOffset function is expensive (calling it on
7165 every entry slowed down the bs2-test1 CPUID testcase by ~33% (on an 10980xe). */
7166 uint64_t cTicksToDeadline;
7167 if ( idCurrentCpu == pVCpu->hmr0.s.idLastCpu
7168 && TMVirtualSyncIsCurrentDeadlineVersion(pVM, pVCpu->hmr0.s.vmx.uTscDeadlineVersion))
7169 {
7170 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadline);
7171 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
7172 cTicksToDeadline = pVCpu->hmr0.s.vmx.uTscDeadline - SUPReadTsc();
7173 if ((int64_t)cTicksToDeadline > 0)
7174 { /* hopefully */ }
7175 else
7176 {
7177 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionReusingDeadlineExpired);
7178 cTicksToDeadline = 0;
7179 }
7180 }
7181 else
7182 {
7183 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadline);
7184 cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc,
7185 &pVCpu->hmr0.s.vmx.uTscDeadline,
7186 &pVCpu->hmr0.s.vmx.uTscDeadlineVersion);
7187 pVCpu->hmr0.s.vmx.uTscDeadline += cTicksToDeadline;
7188 if (cTicksToDeadline >= 128)
7189 { /* hopefully */ }
7190 else
7191 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatVmxPreemptionRecalcingDeadlineExpired);
7192 }
7193
7194 /* Make sure the returned values have sane upper and lower boundaries. */
7195 uint64_t const u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
7196 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second, 15.625ms. */ /** @todo r=bird: Once real+virtual timers move to separate thread, we can raise the upper limit (16ms isn't much). ASSUMES working poke cpu function. */
7197 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 32678); /* 1/32768th of a second, ~30us. */
7198 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
7199
7200 /** @todo r=ramshankar: We need to find a way to integrate nested-guest
7201 * preemption timers here. We probably need to clamp the preemption timer,
7202 * after converting the timer value to the host. */
7203 uint32_t const cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
7204 int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount);
7205 AssertRC(rc);
7206 }
7207 else
7208 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc);
7209
7210 if (fParavirtTsc)
7211 {
7212 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
7213 information before every VM-entry, hence disable it for performance sake. */
7214#if 0
7215 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
7216 AssertRC(rc);
7217#endif
7218 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
7219 }
7220
7221 if ( fOffsettedTsc
7222 && RT_LIKELY(!pVCpu->hmr0.s.fDebugWantRdTscExit))
7223 {
7224 if (pVmxTransient->fIsNestedGuest)
7225 uTscOffset = CPUMApplyNestedGuestTscOffset(pVCpu, uTscOffset);
7226 hmR0VmxSetTscOffsetVmcs(pVmcsInfo, uTscOffset);
7227 hmR0VmxRemoveProcCtlsVmcs(pVCpu, pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
7228 }
7229 else
7230 {
7231 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
7232 hmR0VmxSetProcCtlsVmcs(pVmxTransient, VMX_PROC_CTLS_RDTSC_EXIT);
7233 }
7234}
7235
7236
7237/**
7238 * Gets the IEM exception flags for the specified vector and IDT vectoring /
7239 * VM-exit interruption info type.
7240 *
7241 * @returns The IEM exception flags.
7242 * @param uVector The event vector.
7243 * @param uVmxEventType The VMX event type.
7244 *
7245 * @remarks This function currently only constructs flags required for
7246 * IEMEvaluateRecursiveXcpt and not the complete flags (e.g, error-code
7247 * and CR2 aspects of an exception are not included).
7248 */
7249static uint32_t hmR0VmxGetIemXcptFlags(uint8_t uVector, uint32_t uVmxEventType)
7250{
7251 uint32_t fIemXcptFlags;
7252 switch (uVmxEventType)
7253 {
7254 case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
7255 case VMX_IDT_VECTORING_INFO_TYPE_NMI:
7256 fIemXcptFlags = IEM_XCPT_FLAGS_T_CPU_XCPT;
7257 break;
7258
7259 case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
7260 fIemXcptFlags = IEM_XCPT_FLAGS_T_EXT_INT;
7261 break;
7262
7263 case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
7264 fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_ICEBP_INSTR;
7265 break;
7266
7267 case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT:
7268 {
7269 fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
7270 if (uVector == X86_XCPT_BP)
7271 fIemXcptFlags |= IEM_XCPT_FLAGS_BP_INSTR;
7272 else if (uVector == X86_XCPT_OF)
7273 fIemXcptFlags |= IEM_XCPT_FLAGS_OF_INSTR;
7274 else
7275 {
7276 fIemXcptFlags = 0;
7277 AssertMsgFailed(("Unexpected vector for software exception. uVector=%#x", uVector));
7278 }
7279 break;
7280 }
7281
7282 case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
7283 fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT;
7284 break;
7285
7286 default:
7287 fIemXcptFlags = 0;
7288 AssertMsgFailed(("Unexpected vector type! uVmxEventType=%#x uVector=%#x", uVmxEventType, uVector));
7289 break;
7290 }
7291 return fIemXcptFlags;
7292}
7293
7294
7295/**
7296 * Sets an event as a pending event to be injected into the guest.
7297 *
7298 * @param pVCpu The cross context virtual CPU structure.
7299 * @param u32IntInfo The VM-entry interruption-information field.
7300 * @param cbInstr The VM-entry instruction length in bytes (for
7301 * software interrupts, exceptions and privileged
7302 * software exceptions).
7303 * @param u32ErrCode The VM-entry exception error code.
7304 * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
7305 * page-fault.
7306 */
7307DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPUCC pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
7308 RTGCUINTPTR GCPtrFaultAddress)
7309{
7310 Assert(!pVCpu->hm.s.Event.fPending);
7311 pVCpu->hm.s.Event.fPending = true;
7312 pVCpu->hm.s.Event.u64IntInfo = u32IntInfo;
7313 pVCpu->hm.s.Event.u32ErrCode = u32ErrCode;
7314 pVCpu->hm.s.Event.cbInstr = cbInstr;
7315 pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
7316}
7317
7318
7319/**
7320 * Sets an external interrupt as pending-for-injection into the VM.
7321 *
7322 * @param pVCpu The cross context virtual CPU structure.
7323 * @param u8Interrupt The external interrupt vector.
7324 */
7325DECLINLINE(void) hmR0VmxSetPendingExtInt(PVMCPUCC pVCpu, uint8_t u8Interrupt)
7326{
7327 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_EXIT_INT_INFO_VECTOR, u8Interrupt)
7328 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
7329 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
7330 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7331 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7332}
7333
7334
7335/**
7336 * Sets an NMI (\#NMI) exception as pending-for-injection into the VM.
7337 *
7338 * @param pVCpu The cross context virtual CPU structure.
7339 */
7340DECLINLINE(void) hmR0VmxSetPendingXcptNmi(PVMCPUCC pVCpu)
7341{
7342 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_NMI)
7343 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_ENTRY_INT_INFO_TYPE_NMI)
7344 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
7345 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7346 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7347}
7348
7349
7350/**
7351 * Sets a double-fault (\#DF) exception as pending-for-injection into the VM.
7352 *
7353 * @param pVCpu The cross context virtual CPU structure.
7354 */
7355DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPUCC pVCpu)
7356{
7357 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DF)
7358 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
7359 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
7360 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7361 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7362}
7363
7364
7365/**
7366 * Sets an invalid-opcode (\#UD) exception as pending-for-injection into the VM.
7367 *
7368 * @param pVCpu The cross context virtual CPU structure.
7369 */
7370DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPUCC pVCpu)
7371{
7372 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_UD)
7373 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
7374 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
7375 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7376 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7377}
7378
7379
7380/**
7381 * Sets a debug (\#DB) exception as pending-for-injection into the VM.
7382 *
7383 * @param pVCpu The cross context virtual CPU structure.
7384 */
7385DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPUCC pVCpu)
7386{
7387 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DB)
7388 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
7389 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
7390 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7391 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7392}
7393
7394
7395#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7396/**
7397 * Sets a general-protection (\#GP) exception as pending-for-injection into the VM.
7398 *
7399 * @param pVCpu The cross context virtual CPU structure.
7400 * @param u32ErrCode The error code for the general-protection exception.
7401 */
7402DECLINLINE(void) hmR0VmxSetPendingXcptGP(PVMCPUCC pVCpu, uint32_t u32ErrCode)
7403{
7404 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_GP)
7405 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
7406 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
7407 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7408 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */);
7409}
7410
7411
7412/**
7413 * Sets a stack (\#SS) exception as pending-for-injection into the VM.
7414 *
7415 * @param pVCpu The cross context virtual CPU structure.
7416 * @param u32ErrCode The error code for the stack exception.
7417 */
7418DECLINLINE(void) hmR0VmxSetPendingXcptSS(PVMCPUCC pVCpu, uint32_t u32ErrCode)
7419{
7420 uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_SS)
7421 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT)
7422 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1)
7423 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
7424 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */);
7425}
7426#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
7427
7428
7429/**
7430 * Fixes up attributes for the specified segment register.
7431 *
7432 * @param pVCpu The cross context virtual CPU structure.
7433 * @param pSelReg The segment register that needs fixing.
7434 * @param pszRegName The register name (for logging and assertions).
7435 */
7436static void hmR0VmxFixUnusableSegRegAttr(PVMCPUCC pVCpu, PCPUMSELREG pSelReg, const char *pszRegName)
7437{
7438 Assert(pSelReg->Attr.u & X86DESCATTR_UNUSABLE);
7439
7440 /*
7441 * If VT-x marks the segment as unusable, most other bits remain undefined:
7442 * - For CS the L, D and G bits have meaning.
7443 * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox).
7444 * - For the remaining data segments no bits are defined.
7445 *
7446 * The present bit and the unusable bit has been observed to be set at the
7447 * same time (the selector was supposed to be invalid as we started executing
7448 * a V8086 interrupt in ring-0).
7449 *
7450 * What should be important for the rest of the VBox code, is that the P bit is
7451 * cleared. Some of the other VBox code recognizes the unusable bit, but
7452 * AMD-V certainly don't, and REM doesn't really either. So, to be on the
7453 * safe side here, we'll strip off P and other bits we don't care about. If
7454 * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed.
7455 *
7456 * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers".
7457 */
7458#ifdef VBOX_STRICT
7459 uint32_t const uAttr = pSelReg->Attr.u;
7460#endif
7461
7462 /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */
7463 pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G
7464 | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT;
7465
7466#ifdef VBOX_STRICT
7467 VMMRZCallRing3Disable(pVCpu);
7468 Log4Func(("Unusable %s: sel=%#x attr=%#x -> %#x\n", pszRegName, pSelReg->Sel, uAttr, pSelReg->Attr.u));
7469# ifdef DEBUG_bird
7470 AssertMsg((uAttr & ~X86DESCATTR_P) == pSelReg->Attr.u,
7471 ("%s: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n",
7472 pszRegName, uAttr, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit));
7473# endif
7474 VMMRZCallRing3Enable(pVCpu);
7475 NOREF(uAttr);
7476#endif
7477 RT_NOREF2(pVCpu, pszRegName);
7478}
7479
7480
7481/**
7482 * Imports a guest segment register from the current VMCS into the guest-CPU
7483 * context.
7484 *
7485 * @param pVCpu The cross context virtual CPU structure.
7486 * @param iSegReg The segment register number (X86_SREG_XXX).
7487 *
7488 * @remarks Called with interrupts and/or preemption disabled.
7489 */
7490static void hmR0VmxImportGuestSegReg(PVMCPUCC pVCpu, uint32_t iSegReg)
7491{
7492 Assert(iSegReg < X86_SREG_COUNT);
7493 Assert((uint32_t)VMX_VMCS16_GUEST_SEG_SEL(iSegReg) == g_aVmcsSegSel[iSegReg]);
7494 Assert((uint32_t)VMX_VMCS32_GUEST_SEG_LIMIT(iSegReg) == g_aVmcsSegLimit[iSegReg]);
7495 Assert((uint32_t)VMX_VMCS32_GUEST_SEG_ACCESS_RIGHTS(iSegReg) == g_aVmcsSegAttr[iSegReg]);
7496 Assert((uint32_t)VMX_VMCS_GUEST_SEG_BASE(iSegReg) == g_aVmcsSegBase[iSegReg]);
7497
7498 PCPUMSELREG pSelReg = &pVCpu->cpum.GstCtx.aSRegs[iSegReg];
7499
7500 uint16_t u16Sel;
7501 int rc = VMXReadVmcs16(VMX_VMCS16_GUEST_SEG_SEL(iSegReg), &u16Sel); AssertRC(rc);
7502 pSelReg->Sel = u16Sel;
7503 pSelReg->ValidSel = u16Sel;
7504
7505 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SEG_LIMIT(iSegReg), &pSelReg->u32Limit); AssertRC(rc);
7506 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SEG_BASE(iSegReg), &pSelReg->u64Base); AssertRC(rc);
7507
7508 uint32_t u32Attr;
7509 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SEG_ACCESS_RIGHTS(iSegReg), &u32Attr); AssertRC(rc);
7510 pSelReg->Attr.u = u32Attr;
7511 if (u32Attr & X86DESCATTR_UNUSABLE)
7512 hmR0VmxFixUnusableSegRegAttr(pVCpu, pSelReg, "ES\0CS\0SS\0DS\0FS\0GS" + iSegReg * 3);
7513
7514 pSelReg->fFlags = CPUMSELREG_FLAGS_VALID;
7515}
7516
7517
7518/**
7519 * Imports the guest LDTR from the current VMCS into the guest-CPU context.
7520 *
7521 * @param pVCpu The cross context virtual CPU structure.
7522 *
7523 * @remarks Called with interrupts and/or preemption disabled.
7524 */
7525static void hmR0VmxImportGuestLdtr(PVMCPUCC pVCpu)
7526{
7527 uint16_t u16Sel;
7528 uint64_t u64Base;
7529 uint32_t u32Limit, u32Attr;
7530 int rc = VMXReadVmcs16(VMX_VMCS16_GUEST_LDTR_SEL, &u16Sel); AssertRC(rc);
7531 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, &u32Limit); AssertRC(rc);
7532 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, &u32Attr); AssertRC(rc);
7533 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_LDTR_BASE, &u64Base); AssertRC(rc);
7534
7535 pVCpu->cpum.GstCtx.ldtr.Sel = u16Sel;
7536 pVCpu->cpum.GstCtx.ldtr.ValidSel = u16Sel;
7537 pVCpu->cpum.GstCtx.ldtr.fFlags = CPUMSELREG_FLAGS_VALID;
7538 pVCpu->cpum.GstCtx.ldtr.u32Limit = u32Limit;
7539 pVCpu->cpum.GstCtx.ldtr.u64Base = u64Base;
7540 pVCpu->cpum.GstCtx.ldtr.Attr.u = u32Attr;
7541 if (u32Attr & X86DESCATTR_UNUSABLE)
7542 hmR0VmxFixUnusableSegRegAttr(pVCpu, &pVCpu->cpum.GstCtx.ldtr, "LDTR");
7543}
7544
7545
7546/**
7547 * Imports the guest TR from the current VMCS into the guest-CPU context.
7548 *
7549 * @param pVCpu The cross context virtual CPU structure.
7550 *
7551 * @remarks Called with interrupts and/or preemption disabled.
7552 */
7553static void hmR0VmxImportGuestTr(PVMCPUCC pVCpu)
7554{
7555 uint16_t u16Sel;
7556 uint64_t u64Base;
7557 uint32_t u32Limit, u32Attr;
7558 int rc = VMXReadVmcs16(VMX_VMCS16_GUEST_TR_SEL, &u16Sel); AssertRC(rc);
7559 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, &u32Limit); AssertRC(rc);
7560 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, &u32Attr); AssertRC(rc);
7561 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_TR_BASE, &u64Base); AssertRC(rc);
7562
7563 pVCpu->cpum.GstCtx.tr.Sel = u16Sel;
7564 pVCpu->cpum.GstCtx.tr.ValidSel = u16Sel;
7565 pVCpu->cpum.GstCtx.tr.fFlags = CPUMSELREG_FLAGS_VALID;
7566 pVCpu->cpum.GstCtx.tr.u32Limit = u32Limit;
7567 pVCpu->cpum.GstCtx.tr.u64Base = u64Base;
7568 pVCpu->cpum.GstCtx.tr.Attr.u = u32Attr;
7569 /* TR is the only selector that can never be unusable. */
7570 Assert(!(u32Attr & X86DESCATTR_UNUSABLE));
7571}
7572
7573
7574/**
7575 * Imports the guest RIP from the VMCS back into the guest-CPU context.
7576 *
7577 * @param pVCpu The cross context virtual CPU structure.
7578 *
7579 * @remarks Called with interrupts and/or preemption disabled, should not assert!
7580 * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
7581 * instead!!!
7582 */
7583static void hmR0VmxImportGuestRip(PVMCPUCC pVCpu)
7584{
7585 uint64_t u64Val;
7586 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7587 if (pCtx->fExtrn & CPUMCTX_EXTRN_RIP)
7588 {
7589 int rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RIP, &u64Val);
7590 AssertRC(rc);
7591
7592 pCtx->rip = u64Val;
7593 EMR0HistoryUpdatePC(pVCpu, pCtx->rip, false);
7594 pCtx->fExtrn &= ~CPUMCTX_EXTRN_RIP;
7595 }
7596}
7597
7598
7599/**
7600 * Imports the guest RFLAGS from the VMCS back into the guest-CPU context.
7601 *
7602 * @param pVCpu The cross context virtual CPU structure.
7603 * @param pVmcsInfo The VMCS info. object.
7604 *
7605 * @remarks Called with interrupts and/or preemption disabled, should not assert!
7606 * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
7607 * instead!!!
7608 */
7609static void hmR0VmxImportGuestRFlags(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
7610{
7611 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7612 if (pCtx->fExtrn & CPUMCTX_EXTRN_RFLAGS)
7613 {
7614 uint64_t u64Val;
7615 int rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RFLAGS, &u64Val);
7616 AssertRC(rc);
7617
7618 pCtx->rflags.u64 = u64Val;
7619 PCVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
7620 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
7621 {
7622 pCtx->eflags.Bits.u1VM = 0;
7623 pCtx->eflags.Bits.u2IOPL = pVmcsInfoShared->RealMode.Eflags.Bits.u2IOPL;
7624 }
7625 pCtx->fExtrn &= ~CPUMCTX_EXTRN_RFLAGS;
7626 }
7627}
7628
7629
7630/**
7631 * Imports the guest interruptibility-state from the VMCS back into the guest-CPU
7632 * context.
7633 *
7634 * @param pVCpu The cross context virtual CPU structure.
7635 * @param pVmcsInfo The VMCS info. object.
7636 *
7637 * @remarks Called with interrupts and/or preemption disabled, try not to assert and
7638 * do not log!
7639 * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState()
7640 * instead!!!
7641 */
7642static void hmR0VmxImportGuestIntrState(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
7643{
7644 uint32_t u32Val;
7645 int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32Val); AssertRC(rc);
7646 if (!u32Val)
7647 {
7648 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
7649 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
7650 CPUMSetGuestNmiBlocking(pVCpu, false);
7651 }
7652 else
7653 {
7654 /*
7655 * We must import RIP here to set our EM interrupt-inhibited state.
7656 * We also import RFLAGS as our code that evaluates pending interrupts
7657 * before VM-entry requires it.
7658 */
7659 hmR0VmxImportGuestRip(pVCpu);
7660 hmR0VmxImportGuestRFlags(pVCpu, pVmcsInfo);
7661
7662 if (u32Val & (VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS | VMX_VMCS_GUEST_INT_STATE_BLOCK_STI))
7663 EMSetInhibitInterruptsPC(pVCpu, pVCpu->cpum.GstCtx.rip);
7664 else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
7665 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
7666
7667 bool const fNmiBlocking = RT_BOOL(u32Val & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI);
7668 CPUMSetGuestNmiBlocking(pVCpu, fNmiBlocking);
7669 }
7670}
7671
7672
7673/**
7674 * Worker for VMXR0ImportStateOnDemand.
7675 *
7676 * @returns VBox status code.
7677 * @param pVCpu The cross context virtual CPU structure.
7678 * @param pVmcsInfo The VMCS info. object.
7679 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
7680 */
7681static int hmR0VmxImportGuestState(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint64_t fWhat)
7682{
7683 int rc = VINF_SUCCESS;
7684 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
7685 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
7686 uint32_t u32Val;
7687
7688 /*
7689 * Note! This is hack to workaround a mysterious BSOD observed with release builds
7690 * on Windows 10 64-bit hosts. Profile and debug builds are not affected and
7691 * neither are other host platforms.
7692 *
7693 * Committing this temporarily as it prevents BSOD.
7694 *
7695 * Update: This is very likely a compiler optimization bug, see @bugref{9180}.
7696 */
7697#ifdef RT_OS_WINDOWS
7698 if (pVM == 0 || pVM == (void *)(uintptr_t)-1)
7699 return VERR_HM_IPE_1;
7700#endif
7701
7702 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x);
7703
7704 /*
7705 * We disable interrupts to make the updating of the state and in particular
7706 * the fExtrn modification atomic wrt to preemption hooks.
7707 */
7708 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
7709
7710 fWhat &= pCtx->fExtrn;
7711 if (fWhat)
7712 {
7713 do
7714 {
7715 if (fWhat & CPUMCTX_EXTRN_RIP)
7716 hmR0VmxImportGuestRip(pVCpu);
7717
7718 if (fWhat & CPUMCTX_EXTRN_RFLAGS)
7719 hmR0VmxImportGuestRFlags(pVCpu, pVmcsInfo);
7720
7721 if (fWhat & CPUMCTX_EXTRN_HM_VMX_INT_STATE)
7722 hmR0VmxImportGuestIntrState(pVCpu, pVmcsInfo);
7723
7724 if (fWhat & CPUMCTX_EXTRN_RSP)
7725 {
7726 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RSP, &pCtx->rsp);
7727 AssertRC(rc);
7728 }
7729
7730 if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
7731 {
7732 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
7733 bool const fRealOnV86Active = pVmcsInfoShared->RealMode.fRealOnV86Active;
7734 if (fWhat & CPUMCTX_EXTRN_CS)
7735 {
7736 hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_CS);
7737 hmR0VmxImportGuestRip(pVCpu);
7738 if (fRealOnV86Active)
7739 pCtx->cs.Attr.u = pVmcsInfoShared->RealMode.AttrCS.u;
7740 EMR0HistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true /* fFlattened */);
7741 }
7742 if (fWhat & CPUMCTX_EXTRN_SS)
7743 {
7744 hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_SS);
7745 if (fRealOnV86Active)
7746 pCtx->ss.Attr.u = pVmcsInfoShared->RealMode.AttrSS.u;
7747 }
7748 if (fWhat & CPUMCTX_EXTRN_DS)
7749 {
7750 hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_DS);
7751 if (fRealOnV86Active)
7752 pCtx->ds.Attr.u = pVmcsInfoShared->RealMode.AttrDS.u;
7753 }
7754 if (fWhat & CPUMCTX_EXTRN_ES)
7755 {
7756 hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_ES);
7757 if (fRealOnV86Active)
7758 pCtx->es.Attr.u = pVmcsInfoShared->RealMode.AttrES.u;
7759 }
7760 if (fWhat & CPUMCTX_EXTRN_FS)
7761 {
7762 hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_FS);
7763 if (fRealOnV86Active)
7764 pCtx->fs.Attr.u = pVmcsInfoShared->RealMode.AttrFS.u;
7765 }
7766 if (fWhat & CPUMCTX_EXTRN_GS)
7767 {
7768 hmR0VmxImportGuestSegReg(pVCpu, X86_SREG_GS);
7769 if (fRealOnV86Active)
7770 pCtx->gs.Attr.u = pVmcsInfoShared->RealMode.AttrGS.u;
7771 }
7772 }
7773
7774 if (fWhat & CPUMCTX_EXTRN_TABLE_MASK)
7775 {
7776 if (fWhat & CPUMCTX_EXTRN_LDTR)
7777 hmR0VmxImportGuestLdtr(pVCpu);
7778
7779 if (fWhat & CPUMCTX_EXTRN_GDTR)
7780 {
7781 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &pCtx->gdtr.pGdt); AssertRC(rc);
7782 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRC(rc);
7783 pCtx->gdtr.cbGdt = u32Val;
7784 }
7785
7786 /* Guest IDTR. */
7787 if (fWhat & CPUMCTX_EXTRN_IDTR)
7788 {
7789 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &pCtx->idtr.pIdt); AssertRC(rc);
7790 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRC(rc);
7791 pCtx->idtr.cbIdt = u32Val;
7792 }
7793
7794 /* Guest TR. */
7795 if (fWhat & CPUMCTX_EXTRN_TR)
7796 {
7797 /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR,
7798 don't need to import that one. */
7799 if (!pVmcsInfo->pShared->RealMode.fRealOnV86Active)
7800 hmR0VmxImportGuestTr(pVCpu);
7801 }
7802 }
7803
7804 if (fWhat & CPUMCTX_EXTRN_DR7)
7805 {
7806 if (!pVCpu->hmr0.s.fUsingHyperDR7)
7807 {
7808 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_DR7, &pCtx->dr[7]);
7809 AssertRC(rc);
7810 }
7811 }
7812
7813 if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS)
7814 {
7815 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); AssertRC(rc);
7816 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); AssertRC(rc);
7817 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRC(rc);
7818 pCtx->SysEnter.cs = u32Val;
7819 }
7820
7821 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE)
7822 {
7823 if ( pVM->hmr0.s.fAllow64BitGuests
7824 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
7825 pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
7826 }
7827
7828 if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS)
7829 {
7830 if ( pVM->hmr0.s.fAllow64BitGuests
7831 && (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
7832 {
7833 pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
7834 pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
7835 pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
7836 }
7837 }
7838
7839 if (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS))
7840 {
7841 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
7842 PCVMXAUTOMSR pMsrs = (PCVMXAUTOMSR)pVmcsInfo->pvGuestMsrStore;
7843 uint32_t const cMsrs = pVmcsInfo->cExitMsrStore;
7844 Assert(pMsrs);
7845 Assert(cMsrs <= VMX_MISC_MAX_MSRS(g_HmMsrs.u.vmx.u64Misc));
7846 Assert(sizeof(*pMsrs) * cMsrs <= X86_PAGE_4K_SIZE);
7847 for (uint32_t i = 0; i < cMsrs; i++)
7848 {
7849 uint32_t const idMsr = pMsrs[i].u32Msr;
7850 switch (idMsr)
7851 {
7852 case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsrs[i].u64Value); break;
7853 case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsrs[i].u64Value); break;
7854 case MSR_K6_EFER: /* Can't be changed without causing a VM-exit */ break;
7855 default:
7856 {
7857 uint32_t idxLbrMsr;
7858 if (pVM->hmr0.s.vmx.fLbr)
7859 {
7860 if (hmR0VmxIsLbrBranchFromMsr(pVM, idMsr, &idxLbrMsr))
7861 {
7862 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
7863 pVmcsInfoShared->au64LbrFromIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
7864 break;
7865 }
7866 if (hmR0VmxIsLbrBranchToMsr(pVM, idMsr, &idxLbrMsr))
7867 {
7868 Assert(idxLbrMsr < RT_ELEMENTS(pVmcsInfoShared->au64LbrFromIpMsr));
7869 pVmcsInfoShared->au64LbrToIpMsr[idxLbrMsr] = pMsrs[i].u64Value;
7870 break;
7871 }
7872 if (idMsr == pVM->hmr0.s.vmx.idLbrTosMsr)
7873 {
7874 pVmcsInfoShared->u64LbrTosMsr = pMsrs[i].u64Value;
7875 break;
7876 }
7877 /* Fallthru (no break) */
7878 }
7879 pCtx->fExtrn = 0;
7880 pVCpu->hm.s.u32HMError = pMsrs->u32Msr;
7881 ASMSetFlags(fEFlags);
7882 AssertMsgFailed(("Unexpected MSR in auto-load/store area. idMsr=%#RX32 cMsrs=%u\n", idMsr, cMsrs));
7883 return VERR_HM_UNEXPECTED_LD_ST_MSR;
7884 }
7885 }
7886 }
7887 }
7888
7889 if (fWhat & CPUMCTX_EXTRN_CR_MASK)
7890 {
7891 if (fWhat & CPUMCTX_EXTRN_CR0)
7892 {
7893 uint64_t u64Cr0;
7894 uint64_t u64Shadow;
7895 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Cr0); AssertRC(rc);
7896 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Shadow); AssertRC(rc);
7897#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
7898 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
7899 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
7900#else
7901 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
7902 {
7903 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
7904 | (u64Shadow & pVmcsInfo->u64Cr0Mask);
7905 }
7906 else
7907 {
7908 /*
7909 * We've merged the guest and nested-guest's CR0 guest/host mask while executing
7910 * the nested-guest using hardware-assisted VMX. Accordingly we need to
7911 * re-construct CR0. See @bugref{9180#c95} for details.
7912 */
7913 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
7914 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
7915 u64Cr0 = (u64Cr0 & ~pVmcsInfo->u64Cr0Mask)
7916 | (pVmcsNstGst->u64GuestCr0.u & pVmcsNstGst->u64Cr0Mask.u)
7917 | (u64Shadow & (pVmcsInfoGst->u64Cr0Mask & ~pVmcsNstGst->u64Cr0Mask.u));
7918 }
7919#endif
7920 VMMRZCallRing3Disable(pVCpu); /* May call into PGM which has Log statements. */
7921 CPUMSetGuestCR0(pVCpu, u64Cr0);
7922 VMMRZCallRing3Enable(pVCpu);
7923 }
7924
7925 if (fWhat & CPUMCTX_EXTRN_CR4)
7926 {
7927 uint64_t u64Cr4;
7928 uint64_t u64Shadow;
7929 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64Cr4); AssertRC(rc);
7930 rc |= VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Shadow); AssertRC(rc);
7931#ifndef VBOX_WITH_NESTED_HWVIRT_VMX
7932 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
7933 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
7934#else
7935 if (!CPUMIsGuestInVmxNonRootMode(pCtx))
7936 {
7937 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
7938 | (u64Shadow & pVmcsInfo->u64Cr4Mask);
7939 }
7940 else
7941 {
7942 /*
7943 * We've merged the guest and nested-guest's CR4 guest/host mask while executing
7944 * the nested-guest using hardware-assisted VMX. Accordingly we need to
7945 * re-construct CR4. See @bugref{9180#c95} for details.
7946 */
7947 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
7948 PVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
7949 u64Cr4 = (u64Cr4 & ~pVmcsInfo->u64Cr4Mask)
7950 | (pVmcsNstGst->u64GuestCr4.u & pVmcsNstGst->u64Cr4Mask.u)
7951 | (u64Shadow & (pVmcsInfoGst->u64Cr4Mask & ~pVmcsNstGst->u64Cr4Mask.u));
7952 }
7953#endif
7954 pCtx->cr4 = u64Cr4;
7955 }
7956
7957 if (fWhat & CPUMCTX_EXTRN_CR3)
7958 {
7959 /* CR0.PG bit changes are always intercepted, so it's up to date. */
7960 if ( pVM->hmr0.s.vmx.fUnrestrictedGuest
7961 || ( pVM->hmr0.s.fNestedPaging
7962 && CPUMIsGuestPagingEnabledEx(pCtx)))
7963 {
7964 uint64_t u64Cr3;
7965 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR3, &u64Cr3); AssertRC(rc);
7966 if (pCtx->cr3 != u64Cr3)
7967 {
7968 pCtx->cr3 = u64Cr3;
7969 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
7970 }
7971
7972 /*
7973 * If the guest is in PAE mode, sync back the PDPE's into the guest state.
7974 * CR4.PAE, CR0.PG, EFER MSR changes are always intercepted, so they're up to date.
7975 */
7976 if (CPUMIsGuestInPAEModeEx(pCtx))
7977 {
7978 X86PDPE aPaePdpes[4];
7979 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPaePdpes[0].u); AssertRC(rc);
7980 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPaePdpes[1].u); AssertRC(rc);
7981 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPaePdpes[2].u); AssertRC(rc);
7982 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPaePdpes[3].u); AssertRC(rc);
7983 if (memcmp(&aPaePdpes[0], &pCtx->aPaePdpes[0], sizeof(aPaePdpes)))
7984 {
7985 memcpy(&pCtx->aPaePdpes[0], &aPaePdpes[0], sizeof(aPaePdpes));
7986 /* PGM now updates PAE PDPTEs while updating CR3. */
7987 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
7988 }
7989 }
7990 }
7991 }
7992 }
7993
7994#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
7995 if (fWhat & CPUMCTX_EXTRN_HWVIRT)
7996 {
7997 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING)
7998 && !CPUMIsGuestInVmxNonRootMode(pCtx))
7999 {
8000 Assert(CPUMIsGuestInVmxRootMode(pCtx));
8001 rc = hmR0VmxCopyShadowToNstGstVmcs(pVCpu, pVmcsInfo);
8002 if (RT_SUCCESS(rc))
8003 { /* likely */ }
8004 else
8005 break;
8006 }
8007 }
8008#endif
8009 } while (0);
8010
8011 if (RT_SUCCESS(rc))
8012 {
8013 /* Update fExtrn. */
8014 pCtx->fExtrn &= ~fWhat;
8015
8016 /* If everything has been imported, clear the HM keeper bit. */
8017 if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL))
8018 {
8019 pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM;
8020 Assert(!pCtx->fExtrn);
8021 }
8022 }
8023 }
8024 else
8025 AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn));
8026
8027 /*
8028 * Restore interrupts.
8029 */
8030 ASMSetFlags(fEFlags);
8031
8032 STAM_PROFILE_ADV_STOP(& pVCpu->hm.s.StatImportGuestState, x);
8033
8034 if (RT_SUCCESS(rc))
8035 { /* likely */ }
8036 else
8037 return rc;
8038
8039 /*
8040 * Honor any pending CR3 updates.
8041 *
8042 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> VMXR0CallRing3Callback()
8043 * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
8044 * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are.
8045 *
8046 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
8047 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
8048 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
8049 * -NOT- check if CPUMCTX_EXTRN_CR3 is set!
8050 *
8051 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
8052 *
8053 * The force-flag is checked first as it's cheaper for potential superfluous calls to this function.
8054 */
8055 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)
8056 && VMMRZCallRing3IsEnabled(pVCpu))
8057 {
8058 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3));
8059 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu), false /* fPdpesMapped */);
8060 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
8061 }
8062
8063 return VINF_SUCCESS;
8064}
8065
8066
8067/**
8068 * Saves the guest state from the VMCS into the guest-CPU context.
8069 *
8070 * @returns VBox status code.
8071 * @param pVCpu The cross context virtual CPU structure.
8072 * @param fWhat What to import, CPUMCTX_EXTRN_XXX.
8073 */
8074VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPUCC pVCpu, uint64_t fWhat)
8075{
8076 AssertPtr(pVCpu);
8077 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
8078 return hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fWhat);
8079}
8080
8081
8082/**
8083 * Check per-VM and per-VCPU force flag actions that require us to go back to
8084 * ring-3 for one reason or another.
8085 *
8086 * @returns Strict VBox status code (i.e. informational status codes too)
8087 * @retval VINF_SUCCESS if we don't have any actions that require going back to
8088 * ring-3.
8089 * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
8090 * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
8091 * interrupts)
8092 * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
8093 * all EMTs to be in ring-3.
8094 * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
8095 * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
8096 * to the EM loop.
8097 *
8098 * @param pVCpu The cross context virtual CPU structure.
8099 * @param pVmxTransient The VMX-transient structure.
8100 * @param fStepping Whether we are single-stepping the guest using the
8101 * hypervisor debugger.
8102 *
8103 * @remarks This might cause nested-guest VM-exits, caller must check if the guest
8104 * is no longer in VMX non-root mode.
8105 */
8106static VBOXSTRICTRC hmR0VmxCheckForceFlags(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, bool fStepping)
8107{
8108 Assert(VMMRZCallRing3IsEnabled(pVCpu));
8109
8110 /*
8111 * Update pending interrupts into the APIC's IRR.
8112 */
8113 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
8114 APICUpdatePendingInterrupts(pVCpu);
8115
8116 /*
8117 * Anything pending? Should be more likely than not if we're doing a good job.
8118 */
8119 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
8120 if ( !fStepping
8121 ? !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_MASK)
8122 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_MASK)
8123 : !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_STEP_MASK)
8124 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
8125 return VINF_SUCCESS;
8126
8127 /* Pending PGM C3 sync. */
8128 if (VMCPU_FF_IS_ANY_SET(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
8129 {
8130 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
8131 Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & (CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4)));
8132 VBOXSTRICTRC rcStrict = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4,
8133 VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
8134 if (rcStrict != VINF_SUCCESS)
8135 {
8136 AssertRC(VBOXSTRICTRC_VAL(rcStrict));
8137 Log4Func(("PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", VBOXSTRICTRC_VAL(rcStrict)));
8138 return rcStrict;
8139 }
8140 }
8141
8142 /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
8143 if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HM_TO_R3_MASK)
8144 || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
8145 {
8146 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
8147 int rc = RT_LIKELY(!VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_RAW_TO_R3 : VINF_EM_NO_MEMORY;
8148 Log4Func(("HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc));
8149 return rc;
8150 }
8151
8152 /* Pending VM request packets, such as hardware interrupts. */
8153 if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST)
8154 || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST))
8155 {
8156 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchVmReq);
8157 Log4Func(("Pending VM request forcing us back to ring-3\n"));
8158 return VINF_EM_PENDING_REQUEST;
8159 }
8160
8161 /* Pending PGM pool flushes. */
8162 if (VM_FF_IS_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
8163 {
8164 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPgmPoolFlush);
8165 Log4Func(("PGM pool flush pending forcing us back to ring-3\n"));
8166 return VINF_PGM_POOL_FLUSH_PENDING;
8167 }
8168
8169 /* Pending DMA requests. */
8170 if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA))
8171 {
8172 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchDma);
8173 Log4Func(("Pending DMA request forcing us back to ring-3\n"));
8174 return VINF_EM_RAW_TO_R3;
8175 }
8176
8177#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
8178 /*
8179 * Pending nested-guest events.
8180 *
8181 * Please note the priority of these events are specified and important.
8182 * See Intel spec. 29.4.3.2 "APIC-Write Emulation".
8183 * See Intel spec. 6.9 "Priority Among Simultaneous Exceptions And Interrupts".
8184 */
8185 if (pVmxTransient->fIsNestedGuest)
8186 {
8187 /* Pending nested-guest APIC-write. */
8188 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_APIC_WRITE))
8189 {
8190 Log4Func(("Pending nested-guest APIC-write\n"));
8191 VBOXSTRICTRC rcStrict = IEMExecVmxVmexitApicWrite(pVCpu);
8192 Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
8193 return rcStrict;
8194 }
8195
8196 /* Pending nested-guest monitor-trap flag (MTF). */
8197 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_MTF))
8198 {
8199 Log4Func(("Pending nested-guest MTF\n"));
8200 VBOXSTRICTRC rcStrict = IEMExecVmxVmexit(pVCpu, VMX_EXIT_MTF, 0 /* uExitQual */);
8201 Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
8202 return rcStrict;
8203 }
8204
8205 /* Pending nested-guest VMX-preemption timer expired. */
8206 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_VMX_PREEMPT_TIMER))
8207 {
8208 Log4Func(("Pending nested-guest preempt timer\n"));
8209 VBOXSTRICTRC rcStrict = IEMExecVmxVmexitPreemptTimer(pVCpu);
8210 Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
8211 return rcStrict;
8212 }
8213 }
8214#else
8215 NOREF(pVmxTransient);
8216#endif
8217
8218 return VINF_SUCCESS;
8219}
8220
8221
8222/**
8223 * Converts any TRPM trap into a pending HM event. This is typically used when
8224 * entering from ring-3 (not longjmp returns).
8225 *
8226 * @param pVCpu The cross context virtual CPU structure.
8227 */
8228static void hmR0VmxTrpmTrapToPendingEvent(PVMCPUCC pVCpu)
8229{
8230 Assert(TRPMHasTrap(pVCpu));
8231 Assert(!pVCpu->hm.s.Event.fPending);
8232
8233 uint8_t uVector;
8234 TRPMEVENT enmTrpmEvent;
8235 uint32_t uErrCode;
8236 RTGCUINTPTR GCPtrFaultAddress;
8237 uint8_t cbInstr;
8238 bool fIcebp;
8239
8240 int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr, &fIcebp);
8241 AssertRC(rc);
8242
8243 uint32_t u32IntInfo;
8244 u32IntInfo = uVector | VMX_IDT_VECTORING_INFO_VALID;
8245 u32IntInfo |= HMTrpmEventTypeToVmxEventType(uVector, enmTrpmEvent, fIcebp);
8246
8247 rc = TRPMResetTrap(pVCpu);
8248 AssertRC(rc);
8249 Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n",
8250 u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress));
8251
8252 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress);
8253}
8254
8255
8256/**
8257 * Converts the pending HM event into a TRPM trap.
8258 *
8259 * @param pVCpu The cross context virtual CPU structure.
8260 */
8261static void hmR0VmxPendingEventToTrpmTrap(PVMCPUCC pVCpu)
8262{
8263 Assert(pVCpu->hm.s.Event.fPending);
8264
8265 /* If a trap was already pending, we did something wrong! */
8266 Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
8267
8268 uint32_t const u32IntInfo = pVCpu->hm.s.Event.u64IntInfo;
8269 uint32_t const uVector = VMX_IDT_VECTORING_INFO_VECTOR(u32IntInfo);
8270 TRPMEVENT const enmTrapType = HMVmxEventTypeToTrpmEventType(u32IntInfo);
8271
8272 Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType));
8273
8274 int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
8275 AssertRC(rc);
8276
8277 if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(u32IntInfo))
8278 TRPMSetErrorCode(pVCpu, pVCpu->hm.s.Event.u32ErrCode);
8279
8280 if (VMX_IDT_VECTORING_INFO_IS_XCPT_PF(u32IntInfo))
8281 TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
8282 else
8283 {
8284 uint8_t const uVectorType = VMX_IDT_VECTORING_INFO_TYPE(u32IntInfo);
8285 switch (uVectorType)
8286 {
8287 case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
8288 TRPMSetTrapDueToIcebp(pVCpu);
8289 RT_FALL_THRU();
8290 case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
8291 case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT:
8292 {
8293 AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
8294 || ( uVector == X86_XCPT_BP /* INT3 */
8295 || uVector == X86_XCPT_OF /* INTO */
8296 || uVector == X86_XCPT_DB /* INT1 (ICEBP) */),
8297 ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
8298 TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
8299 break;
8300 }
8301 }
8302 }
8303
8304 /* We're now done converting the pending event. */
8305 pVCpu->hm.s.Event.fPending = false;
8306}
8307
8308
8309/**
8310 * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to
8311 * cause a VM-exit as soon as the guest is in a state to receive interrupts.
8312 *
8313 * @param pVmcsInfo The VMCS info. object.
8314 */
8315static void hmR0VmxSetIntWindowExitVmcs(PVMXVMCSINFO pVmcsInfo)
8316{
8317 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_INT_WINDOW_EXIT)
8318 {
8319 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT))
8320 {
8321 pVmcsInfo->u32ProcCtls |= VMX_PROC_CTLS_INT_WINDOW_EXIT;
8322 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
8323 AssertRC(rc);
8324 }
8325 } /* else we will deliver interrupts whenever the guest Vm-exits next and is in a state to receive the interrupt. */
8326}
8327
8328
8329/**
8330 * Clears the interrupt-window exiting control in the VMCS.
8331 *
8332 * @param pVmcsInfo The VMCS info. object.
8333 */
8334DECLINLINE(void) hmR0VmxClearIntWindowExitVmcs(PVMXVMCSINFO pVmcsInfo)
8335{
8336 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT)
8337 {
8338 pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_INT_WINDOW_EXIT;
8339 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
8340 AssertRC(rc);
8341 }
8342}
8343
8344
8345/**
8346 * Sets the NMI-window exiting control in the VMCS which instructs VT-x to
8347 * cause a VM-exit as soon as the guest is in a state to receive NMIs.
8348 *
8349 * @param pVmcsInfo The VMCS info. object.
8350 */
8351static void hmR0VmxSetNmiWindowExitVmcs(PVMXVMCSINFO pVmcsInfo)
8352{
8353 if (g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_NMI_WINDOW_EXIT)
8354 {
8355 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT))
8356 {
8357 pVmcsInfo->u32ProcCtls |= VMX_PROC_CTLS_NMI_WINDOW_EXIT;
8358 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
8359 AssertRC(rc);
8360 Log4Func(("Setup NMI-window exiting\n"));
8361 }
8362 } /* else we will deliver NMIs whenever we VM-exit next, even possibly nesting NMIs. Can't be helped on ancient CPUs. */
8363}
8364
8365
8366/**
8367 * Clears the NMI-window exiting control in the VMCS.
8368 *
8369 * @param pVmcsInfo The VMCS info. object.
8370 */
8371DECLINLINE(void) hmR0VmxClearNmiWindowExitVmcs(PVMXVMCSINFO pVmcsInfo)
8372{
8373 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT)
8374 {
8375 pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_NMI_WINDOW_EXIT;
8376 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
8377 AssertRC(rc);
8378 }
8379}
8380
8381
8382/**
8383 * Does the necessary state syncing before returning to ring-3 for any reason
8384 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
8385 *
8386 * @returns VBox status code.
8387 * @param pVCpu The cross context virtual CPU structure.
8388 * @param fImportState Whether to import the guest state from the VMCS back
8389 * to the guest-CPU context.
8390 *
8391 * @remarks No-long-jmp zone!!!
8392 */
8393static int hmR0VmxLeave(PVMCPUCC pVCpu, bool fImportState)
8394{
8395 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8396 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
8397
8398 RTCPUID const idCpu = RTMpCpuId();
8399 Log4Func(("HostCpuId=%u\n", idCpu));
8400
8401 /*
8402 * !!! IMPORTANT !!!
8403 * If you modify code here, check whether VMXR0CallRing3Callback() needs to be updated too.
8404 */
8405
8406 /* Save the guest state if necessary. */
8407 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
8408 if (fImportState)
8409 {
8410 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
8411 AssertRCReturn(rc, rc);
8412 }
8413
8414 /* Restore host FPU state if necessary. We will resync on next R0 reentry. */
8415 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
8416 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
8417
8418 /* Restore host debug registers if necessary. We will resync on next R0 reentry. */
8419#ifdef VBOX_STRICT
8420 if (CPUMIsHyperDebugStateActive(pVCpu))
8421 Assert(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT);
8422#endif
8423 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
8424 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
8425 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
8426
8427 /* Restore host-state bits that VT-x only restores partially. */
8428 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
8429 {
8430 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hmr0.s.vmx.fRestoreHostFlags, idCpu));
8431 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
8432 }
8433 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
8434
8435 /* Restore the lazy host MSRs as we're leaving VT-x context. */
8436 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
8437 {
8438 /* We shouldn't restore the host MSRs without saving the guest MSRs first. */
8439 if (!fImportState)
8440 {
8441 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS);
8442 AssertRCReturn(rc, rc);
8443 }
8444 hmR0VmxLazyRestoreHostMsrs(pVCpu);
8445 Assert(!pVCpu->hmr0.s.vmx.fLazyMsrs);
8446 }
8447 else
8448 pVCpu->hmr0.s.vmx.fLazyMsrs = 0;
8449
8450 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
8451 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
8452
8453 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
8454 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState);
8455 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState);
8456 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit);
8457 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling);
8458 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
8459 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
8460 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
8461 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitVmentry);
8462 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
8463
8464 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
8465
8466 /** @todo This partially defeats the purpose of having preemption hooks.
8467 * The problem is, deregistering the hooks should be moved to a place that
8468 * lasts until the EMT is about to be destroyed not everytime while leaving HM
8469 * context.
8470 */
8471 int rc = hmR0VmxClearVmcs(pVmcsInfo);
8472 AssertRCReturn(rc, rc);
8473
8474#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
8475 /*
8476 * A valid shadow VMCS is made active as part of VM-entry. It is necessary to
8477 * clear a shadow VMCS before allowing that VMCS to become active on another
8478 * logical processor. We may or may not be importing guest state which clears
8479 * it, so cover for it here.
8480 *
8481 * See Intel spec. 24.11.1 "Software Use of Virtual-Machine Control Structures".
8482 */
8483 if ( pVmcsInfo->pvShadowVmcs
8484 && pVmcsInfo->fShadowVmcsState != VMX_V_VMCS_LAUNCH_STATE_CLEAR)
8485 {
8486 rc = hmR0VmxClearShadowVmcs(pVmcsInfo);
8487 AssertRCReturn(rc, rc);
8488 }
8489
8490 /*
8491 * Flag that we need to re-export the host state if we switch to this VMCS before
8492 * executing guest or nested-guest code.
8493 */
8494 pVmcsInfo->idHostCpuState = NIL_RTCPUID;
8495#endif
8496
8497 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
8498 NOREF(idCpu);
8499 return VINF_SUCCESS;
8500}
8501
8502
8503/**
8504 * Leaves the VT-x session.
8505 *
8506 * @returns VBox status code.
8507 * @param pVCpu The cross context virtual CPU structure.
8508 *
8509 * @remarks No-long-jmp zone!!!
8510 */
8511static int hmR0VmxLeaveSession(PVMCPUCC pVCpu)
8512{
8513 HM_DISABLE_PREEMPT(pVCpu);
8514 HMVMX_ASSERT_CPU_SAFE(pVCpu);
8515 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
8516 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8517
8518 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
8519 and done this from the VMXR0ThreadCtxCallback(). */
8520 if (!pVCpu->hmr0.s.fLeaveDone)
8521 {
8522 int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */);
8523 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
8524 pVCpu->hmr0.s.fLeaveDone = true;
8525 }
8526 Assert(!pVCpu->cpum.GstCtx.fExtrn);
8527
8528 /*
8529 * !!! IMPORTANT !!!
8530 * If you modify code here, make sure to check whether VMXR0CallRing3Callback() needs to be updated too.
8531 */
8532
8533 /* Deregister hook now that we've left HM context before re-enabling preemption. */
8534 /** @todo Deregistering here means we need to VMCLEAR always
8535 * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need
8536 * for calling VMMR0ThreadCtxHookDisable here! */
8537 VMMR0ThreadCtxHookDisable(pVCpu);
8538
8539 /* Leave HM context. This takes care of local init (term) and deregistering the longjmp-to-ring-3 callback. */
8540 int rc = HMR0LeaveCpu(pVCpu);
8541 HM_RESTORE_PREEMPT();
8542 return rc;
8543}
8544
8545
8546/**
8547 * Does the necessary state syncing before doing a longjmp to ring-3.
8548 *
8549 * @returns VBox status code.
8550 * @param pVCpu The cross context virtual CPU structure.
8551 *
8552 * @remarks No-long-jmp zone!!!
8553 */
8554DECLINLINE(int) hmR0VmxLongJmpToRing3(PVMCPUCC pVCpu)
8555{
8556 return hmR0VmxLeaveSession(pVCpu);
8557}
8558
8559
8560/**
8561 * Take necessary actions before going back to ring-3.
8562 *
8563 * An action requires us to go back to ring-3. This function does the necessary
8564 * steps before we can safely return to ring-3. This is not the same as longjmps
8565 * to ring-3, this is voluntary and prepares the guest so it may continue
8566 * executing outside HM (recompiler/IEM).
8567 *
8568 * @returns VBox status code.
8569 * @param pVCpu The cross context virtual CPU structure.
8570 * @param rcExit The reason for exiting to ring-3. Can be
8571 * VINF_VMM_UNKNOWN_RING3_CALL.
8572 */
8573static int hmR0VmxExitToRing3(PVMCPUCC pVCpu, VBOXSTRICTRC rcExit)
8574{
8575 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
8576
8577 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
8578 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
8579 {
8580 VMXGetCurrentVmcs(&pVCpu->hm.s.vmx.LastError.HCPhysCurrentVmcs);
8581 pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVmcsInfo->pvVmcs;
8582 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hmr0.s.idEnteredCpu;
8583 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
8584 }
8585
8586 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
8587 VMMRZCallRing3Disable(pVCpu);
8588 Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit)));
8589
8590 /*
8591 * Convert any pending HM events back to TRPM due to premature exits to ring-3.
8592 * We need to do this only on returns to ring-3 and not for longjmps to ring3.
8593 *
8594 * This is because execution may continue from ring-3 and we would need to inject
8595 * the event from there (hence place it back in TRPM).
8596 */
8597 if (pVCpu->hm.s.Event.fPending)
8598 {
8599 hmR0VmxPendingEventToTrpmTrap(pVCpu);
8600 Assert(!pVCpu->hm.s.Event.fPending);
8601
8602 /* Clear the events from the VMCS. */
8603 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
8604 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRC(rc);
8605 }
8606#ifdef VBOX_STRICT
8607 /*
8608 * We check for rcExit here since for errors like VERR_VMX_UNABLE_TO_START_VM (which are
8609 * fatal), we don't care about verifying duplicate injection of events. Errors like
8610 * VERR_EM_INTERPRET are converted to their VINF_* counterparts -prior- to calling this
8611 * function so those should and will be checked below.
8612 */
8613 else if (RT_SUCCESS(rcExit))
8614 {
8615 /*
8616 * Ensure we don't accidentally clear a pending HM event without clearing the VMCS.
8617 * This can be pretty hard to debug otherwise, interrupts might get injected twice
8618 * occasionally, see @bugref{9180#c42}.
8619 *
8620 * However, if the VM-entry failed, any VM entry-interruption info. field would
8621 * be left unmodified as the event would not have been injected to the guest. In
8622 * such cases, don't assert, we're not going to continue guest execution anyway.
8623 */
8624 uint32_t uExitReason;
8625 uint32_t uEntryIntInfo;
8626 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
8627 rc |= VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &uEntryIntInfo);
8628 AssertRC(rc);
8629 AssertMsg(VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason) || !VMX_ENTRY_INT_INFO_IS_VALID(uEntryIntInfo),
8630 ("uExitReason=%#RX32 uEntryIntInfo=%#RX32 rcExit=%d\n", uExitReason, uEntryIntInfo, VBOXSTRICTRC_VAL(rcExit)));
8631 }
8632#endif
8633
8634 /*
8635 * Clear the interrupt-window and NMI-window VMCS controls as we could have got
8636 * a VM-exit with higher priority than interrupt-window or NMI-window VM-exits
8637 * (e.g. TPR below threshold).
8638 */
8639 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
8640 {
8641 hmR0VmxClearIntWindowExitVmcs(pVmcsInfo);
8642 hmR0VmxClearNmiWindowExitVmcs(pVmcsInfo);
8643 }
8644
8645 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
8646 and if we're injecting an event we should have a TRPM trap pending. */
8647 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
8648#ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */
8649 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit)));
8650#endif
8651
8652 /* Save guest state and restore host state bits. */
8653 int rc = hmR0VmxLeaveSession(pVCpu);
8654 AssertRCReturn(rc, rc);
8655 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
8656
8657 /* Thread-context hooks are unregistered at this point!!! */
8658 /* Ring-3 callback notifications are unregistered at this point!!! */
8659
8660 /* Sync recompiler state. */
8661 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
8662 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
8663 | CPUM_CHANGED_LDTR
8664 | CPUM_CHANGED_GDTR
8665 | CPUM_CHANGED_IDTR
8666 | CPUM_CHANGED_TR
8667 | CPUM_CHANGED_HIDDEN_SEL_REGS);
8668 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging
8669 && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx))
8670 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
8671
8672 Assert(!pVCpu->hmr0.s.fClearTrapFlag);
8673
8674 /* Update the exit-to-ring 3 reason. */
8675 pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit);
8676
8677 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
8678 if ( rcExit != VINF_EM_RAW_INTERRUPT
8679 || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
8680 {
8681 Assert(!(pVCpu->cpum.GstCtx.fExtrn & HMVMX_CPUMCTX_EXTRN_ALL));
8682 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
8683 }
8684
8685 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
8686 VMMRZCallRing3Enable(pVCpu);
8687 return rc;
8688}
8689
8690
8691/**
8692 * VMMRZCallRing3() callback wrapper which saves the guest state before we
8693 * longjump to ring-3 and possibly get preempted.
8694 *
8695 * @returns VBox status code.
8696 * @param pVCpu The cross context virtual CPU structure.
8697 * @param enmOperation The operation causing the ring-3 longjump.
8698 */
8699VMMR0DECL(int) VMXR0CallRing3Callback(PVMCPUCC pVCpu, VMMCALLRING3 enmOperation)
8700{
8701 if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
8702 {
8703 /*
8704 * !!! IMPORTANT !!!
8705 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
8706 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
8707 */
8708 VMMRZCallRing3RemoveNotification(pVCpu);
8709 VMMRZCallRing3Disable(pVCpu);
8710 HM_DISABLE_PREEMPT(pVCpu);
8711
8712 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
8713 hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
8714 CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu);
8715 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
8716
8717 /* Restore host-state bits that VT-x only restores partially. */
8718 if (pVCpu->hmr0.s.vmx.fRestoreHostFlags > VMX_RESTORE_HOST_REQUIRED)
8719 VMXRestoreHostState(pVCpu->hmr0.s.vmx.fRestoreHostFlags, &pVCpu->hmr0.s.vmx.RestoreHost);
8720 pVCpu->hmr0.s.vmx.fRestoreHostFlags = 0;
8721
8722 /* Restore the lazy host MSRs as we're leaving VT-x context. */
8723 if (pVCpu->hmr0.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
8724 hmR0VmxLazyRestoreHostMsrs(pVCpu);
8725
8726 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
8727 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = false;
8728 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
8729
8730 /* Clear the current VMCS data back to memory (shadow VMCS if any would have been
8731 cleared as part of importing the guest state above. */
8732 hmR0VmxClearVmcs(pVmcsInfo);
8733
8734 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
8735 VMMR0ThreadCtxHookDisable(pVCpu);
8736
8737 /* Leave HM context. This takes care of local init (term). */
8738 HMR0LeaveCpu(pVCpu);
8739 HM_RESTORE_PREEMPT();
8740 return VINF_SUCCESS;
8741 }
8742
8743 Assert(pVCpu);
8744 Assert(VMMRZCallRing3IsEnabled(pVCpu));
8745 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
8746
8747 VMMRZCallRing3Disable(pVCpu);
8748
8749 Log4Func(("-> hmR0VmxLongJmpToRing3 enmOperation=%d\n", enmOperation));
8750
8751 int rc = hmR0VmxLongJmpToRing3(pVCpu);
8752 AssertRCReturn(rc, rc);
8753
8754 VMMRZCallRing3Enable(pVCpu);
8755 return VINF_SUCCESS;
8756}
8757
8758
8759/**
8760 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
8761 * stack.
8762 *
8763 * @returns Strict VBox status code (i.e. informational status codes too).
8764 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
8765 * @param pVCpu The cross context virtual CPU structure.
8766 * @param uValue The value to push to the guest stack.
8767 */
8768static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPUCC pVCpu, uint16_t uValue)
8769{
8770 /*
8771 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
8772 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
8773 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
8774 */
8775 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
8776 if (pCtx->sp == 1)
8777 return VINF_EM_RESET;
8778 pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
8779 int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t));
8780 AssertRC(rc);
8781 return rc;
8782}
8783
8784
8785/**
8786 * Injects an event into the guest upon VM-entry by updating the relevant fields
8787 * in the VM-entry area in the VMCS.
8788 *
8789 * @returns Strict VBox status code (i.e. informational status codes too).
8790 * @retval VINF_SUCCESS if the event is successfully injected into the VMCS.
8791 * @retval VINF_EM_RESET if event injection resulted in a triple-fault.
8792 *
8793 * @param pVCpu The cross context virtual CPU structure.
8794 * @param pVmxTransient The VMX-transient structure.
8795 * @param pEvent The event being injected.
8796 * @param pfIntrState Pointer to the VT-x guest-interruptibility-state. This
8797 * will be updated if necessary. This cannot not be NULL.
8798 * @param fStepping Whether we're single-stepping guest execution and should
8799 * return VINF_EM_DBG_STEPPED if the event is injected
8800 * directly (registers modified by us, not by hardware on
8801 * VM-entry).
8802 */
8803static VBOXSTRICTRC hmR0VmxInjectEventVmcs(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, PCHMEVENT pEvent, bool fStepping,
8804 uint32_t *pfIntrState)
8805{
8806 /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */
8807 AssertMsg(!RT_HI_U32(pEvent->u64IntInfo), ("%#RX64\n", pEvent->u64IntInfo));
8808 Assert(pfIntrState);
8809
8810 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
8811 uint32_t u32IntInfo = pEvent->u64IntInfo;
8812 uint32_t const u32ErrCode = pEvent->u32ErrCode;
8813 uint32_t const cbInstr = pEvent->cbInstr;
8814 RTGCUINTPTR const GCPtrFault = pEvent->GCPtrFaultAddress;
8815 uint8_t const uVector = VMX_ENTRY_INT_INFO_VECTOR(u32IntInfo);
8816 uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(u32IntInfo);
8817
8818#ifdef VBOX_STRICT
8819 /*
8820 * Validate the error-code-valid bit for hardware exceptions.
8821 * No error codes for exceptions in real-mode.
8822 *
8823 * See Intel spec. 20.1.4 "Interrupt and Exception Handling"
8824 */
8825 if ( uIntType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT
8826 && !CPUMIsGuestInRealModeEx(pCtx))
8827 {
8828 switch (uVector)
8829 {
8830 case X86_XCPT_PF:
8831 case X86_XCPT_DF:
8832 case X86_XCPT_TS:
8833 case X86_XCPT_NP:
8834 case X86_XCPT_SS:
8835 case X86_XCPT_GP:
8836 case X86_XCPT_AC:
8837 AssertMsg(VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo),
8838 ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector));
8839 RT_FALL_THRU();
8840 default:
8841 break;
8842 }
8843 }
8844
8845 /* Cannot inject an NMI when block-by-MOV SS is in effect. */
8846 Assert( uIntType != VMX_EXIT_INT_INFO_TYPE_NMI
8847 || !(*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
8848#endif
8849
8850 if ( uIntType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT
8851 || uIntType == VMX_EXIT_INT_INFO_TYPE_NMI
8852 || uIntType == VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT
8853 || uIntType == VMX_EXIT_INT_INFO_TYPE_SW_XCPT)
8854 {
8855 Assert(uVector <= X86_XCPT_LAST);
8856 Assert(uIntType != VMX_EXIT_INT_INFO_TYPE_NMI || uVector == X86_XCPT_NMI);
8857 Assert(uIntType != VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT || uVector == X86_XCPT_DB);
8858 STAM_COUNTER_INC(&pVCpu->hm.s.aStatInjectedXcpts[uVector]);
8859 }
8860 else
8861 STAM_COUNTER_INC(&pVCpu->hm.s.aStatInjectedIrqs[uVector & MASK_INJECT_IRQ_STAT]);
8862
8863 /*
8864 * Hardware interrupts & exceptions cannot be delivered through the software interrupt
8865 * redirection bitmap to the real mode task in virtual-8086 mode. We must jump to the
8866 * interrupt handler in the (real-mode) guest.
8867 *
8868 * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode".
8869 * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling.
8870 */
8871 if (CPUMIsGuestInRealModeEx(pCtx)) /* CR0.PE bit changes are always intercepted, so it's up to date. */
8872 {
8873 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest)
8874 {
8875 /*
8876 * For CPUs with unrestricted guest execution enabled and with the guest
8877 * in real-mode, we must not set the deliver-error-code bit.
8878 *
8879 * See Intel spec. 26.2.1.3 "VM-Entry Control Fields".
8880 */
8881 u32IntInfo &= ~VMX_ENTRY_INT_INFO_ERROR_CODE_VALID;
8882 }
8883 else
8884 {
8885 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
8886 Assert(PDMVmmDevHeapIsEnabled(pVM));
8887 Assert(pVM->hm.s.vmx.pRealModeTSS);
8888 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
8889
8890 /* We require RIP, RSP, RFLAGS, CS, IDTR, import them. */
8891 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
8892 int rc2 = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_TABLE_MASK
8893 | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_RFLAGS);
8894 AssertRCReturn(rc2, rc2);
8895
8896 /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */
8897 size_t const cbIdtEntry = sizeof(X86IDTR16);
8898 if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pCtx->idtr.cbIdt)
8899 {
8900 /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */
8901 if (uVector == X86_XCPT_DF)
8902 return VINF_EM_RESET;
8903
8904 /* If we're injecting a #GP with no valid IDT entry, inject a double-fault.
8905 No error codes for exceptions in real-mode. */
8906 if (uVector == X86_XCPT_GP)
8907 {
8908 uint32_t const uXcptDfInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DF)
8909 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_ENTRY_INT_INFO_TYPE_HW_XCPT)
8910 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
8911 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
8912 HMEVENT EventXcptDf;
8913 RT_ZERO(EventXcptDf);
8914 EventXcptDf.u64IntInfo = uXcptDfInfo;
8915 return hmR0VmxInjectEventVmcs(pVCpu, pVmxTransient, &EventXcptDf, fStepping, pfIntrState);
8916 }
8917
8918 /*
8919 * If we're injecting an event with no valid IDT entry, inject a #GP.
8920 * No error codes for exceptions in real-mode.
8921 *
8922 * See Intel spec. 20.1.4 "Interrupt and Exception Handling"
8923 */
8924 uint32_t const uXcptGpInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_GP)
8925 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_ENTRY_INT_INFO_TYPE_HW_XCPT)
8926 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0)
8927 | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1);
8928 HMEVENT EventXcptGp;
8929 RT_ZERO(EventXcptGp);
8930 EventXcptGp.u64IntInfo = uXcptGpInfo;
8931 return hmR0VmxInjectEventVmcs(pVCpu, pVmxTransient, &EventXcptGp, fStepping, pfIntrState);
8932 }
8933
8934 /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */
8935 uint16_t uGuestIp = pCtx->ip;
8936 if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_XCPT)
8937 {
8938 Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
8939 /* #BP and #OF are both benign traps, we need to resume the next instruction. */
8940 uGuestIp = pCtx->ip + (uint16_t)cbInstr;
8941 }
8942 else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_INT)
8943 uGuestIp = pCtx->ip + (uint16_t)cbInstr;
8944
8945 /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */
8946 X86IDTR16 IdtEntry;
8947 RTGCPHYS const GCPhysIdtEntry = (RTGCPHYS)pCtx->idtr.pIdt + uVector * cbIdtEntry;
8948 rc2 = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry);
8949 AssertRCReturn(rc2, rc2);
8950
8951 /* Construct the stack frame for the interrupt/exception handler. */
8952 VBOXSTRICTRC rcStrict;
8953 rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->eflags.u32);
8954 if (rcStrict == VINF_SUCCESS)
8955 {
8956 rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->cs.Sel);
8957 if (rcStrict == VINF_SUCCESS)
8958 rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, uGuestIp);
8959 }
8960
8961 /* Clear the required eflag bits and jump to the interrupt/exception handler. */
8962 if (rcStrict == VINF_SUCCESS)
8963 {
8964 pCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
8965 pCtx->rip = IdtEntry.offSel;
8966 pCtx->cs.Sel = IdtEntry.uSel;
8967 pCtx->cs.ValidSel = IdtEntry.uSel;
8968 pCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry;
8969 if ( uIntType == VMX_ENTRY_INT_INFO_TYPE_HW_XCPT
8970 && uVector == X86_XCPT_PF)
8971 pCtx->cr2 = GCPtrFault;
8972
8973 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CS | HM_CHANGED_GUEST_CR2
8974 | HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
8975 | HM_CHANGED_GUEST_RSP);
8976
8977 /*
8978 * If we delivered a hardware exception (other than an NMI) and if there was
8979 * block-by-STI in effect, we should clear it.
8980 */
8981 if (*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
8982 {
8983 Assert( uIntType != VMX_ENTRY_INT_INFO_TYPE_NMI
8984 && uIntType != VMX_ENTRY_INT_INFO_TYPE_EXT_INT);
8985 Log4Func(("Clearing inhibition due to STI\n"));
8986 *pfIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
8987 }
8988
8989 Log4(("Injected real-mode: u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x Eflags=%#x CS:EIP=%04x:%04x\n",
8990 u32IntInfo, u32ErrCode, cbInstr, pCtx->eflags.u, pCtx->cs.Sel, pCtx->eip));
8991
8992 /*
8993 * The event has been truly dispatched to the guest. Mark it as no longer pending so
8994 * we don't attempt to undo it if we are returning to ring-3 before executing guest code.
8995 */
8996 pVCpu->hm.s.Event.fPending = false;
8997
8998 /*
8999 * If we eventually support nested-guest execution without unrestricted guest execution,
9000 * we should set fInterceptEvents here.
9001 */
9002 Assert(!pVmxTransient->fIsNestedGuest);
9003
9004 /* If we're stepping and we've changed cs:rip above, bail out of the VMX R0 execution loop. */
9005 if (fStepping)
9006 rcStrict = VINF_EM_DBG_STEPPED;
9007 }
9008 AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
9009 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
9010 return rcStrict;
9011 }
9012 }
9013
9014 /*
9015 * Validate.
9016 */
9017 Assert(VMX_ENTRY_INT_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */
9018 Assert(!(u32IntInfo & VMX_BF_ENTRY_INT_INFO_RSVD_12_30_MASK)); /* Bits 30:12 MBZ. */
9019
9020 /*
9021 * Inject the event into the VMCS.
9022 */
9023 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo);
9024 if (VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo))
9025 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode);
9026 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
9027 AssertRC(rc);
9028
9029 /*
9030 * Update guest CR2 if this is a page-fault.
9031 */
9032 if (VMX_ENTRY_INT_INFO_IS_XCPT_PF(u32IntInfo))
9033 pCtx->cr2 = GCPtrFault;
9034
9035 Log4(("Injecting u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x CR2=%#RX64\n", u32IntInfo, u32ErrCode, cbInstr, pCtx->cr2));
9036 return VINF_SUCCESS;
9037}
9038
9039
9040/**
9041 * Evaluates the event to be delivered to the guest and sets it as the pending
9042 * event.
9043 *
9044 * Toggling of interrupt force-flags here is safe since we update TRPM on premature
9045 * exits to ring-3 before executing guest code, see hmR0VmxExitToRing3(). We must
9046 * NOT restore these force-flags.
9047 *
9048 * @returns Strict VBox status code (i.e. informational status codes too).
9049 * @param pVCpu The cross context virtual CPU structure.
9050 * @param pVmxTransient The VMX-transient structure.
9051 * @param pfIntrState Where to store the VT-x guest-interruptibility state.
9052 */
9053static VBOXSTRICTRC hmR0VmxEvaluatePendingEvent(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t *pfIntrState)
9054{
9055 Assert(pfIntrState);
9056 Assert(!TRPMHasTrap(pVCpu));
9057
9058 /*
9059 * Compute/update guest-interruptibility state related FFs.
9060 * The FFs will be used below while evaluating events to be injected.
9061 */
9062 *pfIntrState = hmR0VmxGetGuestIntrStateAndUpdateFFs(pVCpu);
9063
9064 /*
9065 * Evaluate if a new event needs to be injected.
9066 * An event that's already pending has already performed all necessary checks.
9067 */
9068 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
9069 bool const fIsNestedGuest = pVmxTransient->fIsNestedGuest;
9070 if ( !pVCpu->hm.s.Event.fPending
9071 && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
9072 {
9073 /** @todo SMI. SMIs take priority over NMIs. */
9074
9075 /*
9076 * NMIs.
9077 * NMIs take priority over external interrupts.
9078 */
9079 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
9080 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI))
9081 {
9082 /*
9083 * For a guest, the FF always indicates the guest's ability to receive an NMI.
9084 *
9085 * For a nested-guest, the FF always indicates the outer guest's ability to
9086 * receive an NMI while the guest-interruptibility state bit depends on whether
9087 * the nested-hypervisor is using virtual-NMIs.
9088 */
9089 if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
9090 {
9091#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
9092 if ( fIsNestedGuest
9093 && CPUMIsGuestVmxPinCtlsSet(pCtx, VMX_PIN_CTLS_NMI_EXIT))
9094 return IEMExecVmxVmexitXcptNmi(pVCpu);
9095#endif
9096 hmR0VmxSetPendingXcptNmi(pVCpu);
9097 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
9098 Log4Func(("NMI pending injection\n"));
9099
9100 /* We've injected the NMI, bail. */
9101 return VINF_SUCCESS;
9102 }
9103 else if (!fIsNestedGuest)
9104 hmR0VmxSetNmiWindowExitVmcs(pVmcsInfo);
9105 }
9106
9107 /*
9108 * External interrupts (PIC/APIC).
9109 * Once PDMGetInterrupt() returns a valid interrupt we -must- deliver it.
9110 * We cannot re-request the interrupt from the controller again.
9111 */
9112 if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
9113 && !pVCpu->hm.s.fSingleInstruction)
9114 {
9115 Assert(!DBGFIsStepping(pVCpu));
9116 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
9117 AssertRC(rc);
9118
9119 /*
9120 * We must not check EFLAGS directly when executing a nested-guest, use
9121 * CPUMIsGuestPhysIntrEnabled() instead as EFLAGS.IF does not control the blocking of
9122 * external interrupts when "External interrupt exiting" is set. This fixes a nasty
9123 * SMP hang while executing nested-guest VCPUs on spinlocks which aren't rescued by
9124 * other VM-exits (like a preemption timer), see @bugref{9562#c18}.
9125 *
9126 * See Intel spec. 25.4.1 "Event Blocking".
9127 */
9128 if (CPUMIsGuestPhysIntrEnabled(pVCpu))
9129 {
9130#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
9131 if ( fIsNestedGuest
9132 && CPUMIsGuestVmxPinCtlsSet(pCtx, VMX_PIN_CTLS_EXT_INT_EXIT))
9133 {
9134 VBOXSTRICTRC rcStrict = IEMExecVmxVmexitExtInt(pVCpu, 0 /* uVector */, true /* fIntPending */);
9135 if (rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE)
9136 return rcStrict;
9137 }
9138#endif
9139 uint8_t u8Interrupt;
9140 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
9141 if (RT_SUCCESS(rc))
9142 {
9143#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
9144 if ( fIsNestedGuest
9145 && CPUMIsGuestVmxPinCtlsSet(pCtx, VMX_PIN_CTLS_EXT_INT_EXIT))
9146 {
9147 VBOXSTRICTRC rcStrict = IEMExecVmxVmexitExtInt(pVCpu, u8Interrupt, false /* fIntPending */);
9148 Assert(rcStrict != VINF_VMX_INTERCEPT_NOT_ACTIVE);
9149 return rcStrict;
9150 }
9151#endif
9152 hmR0VmxSetPendingExtInt(pVCpu, u8Interrupt);
9153 Log4Func(("External interrupt (%#x) pending injection\n", u8Interrupt));
9154 }
9155 else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
9156 {
9157 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq);
9158
9159 if ( !fIsNestedGuest
9160 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
9161 hmR0VmxApicSetTprThreshold(pVmcsInfo, u8Interrupt >> 4);
9162 /* else: for nested-guests, TPR threshold is picked up while merging VMCS controls. */
9163
9164 /*
9165 * If the CPU doesn't have TPR shadowing, we will always get a VM-exit on TPR changes and
9166 * APICSetTpr() will end up setting the VMCPU_FF_INTERRUPT_APIC if required, so there is no
9167 * need to re-set this force-flag here.
9168 */
9169 }
9170 else
9171 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
9172
9173 /* We've injected the interrupt or taken necessary action, bail. */
9174 return VINF_SUCCESS;
9175 }
9176 if (!fIsNestedGuest)
9177 hmR0VmxSetIntWindowExitVmcs(pVmcsInfo);
9178 }
9179 }
9180 else if (!fIsNestedGuest)
9181 {
9182 /*
9183 * An event is being injected or we are in an interrupt shadow. Check if another event is
9184 * pending. If so, instruct VT-x to cause a VM-exit as soon as the guest is ready to accept
9185 * the pending event.
9186 */
9187 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI))
9188 hmR0VmxSetNmiWindowExitVmcs(pVmcsInfo);
9189 else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)
9190 && !pVCpu->hm.s.fSingleInstruction)
9191 hmR0VmxSetIntWindowExitVmcs(pVmcsInfo);
9192 }
9193 /* else: for nested-guests, NMI/interrupt-window exiting will be picked up when merging VMCS controls. */
9194
9195 return VINF_SUCCESS;
9196}
9197
9198
9199/**
9200 * Injects any pending events into the guest if the guest is in a state to
9201 * receive them.
9202 *
9203 * @returns Strict VBox status code (i.e. informational status codes too).
9204 * @param pVCpu The cross context virtual CPU structure.
9205 * @param pVmxTransient The VMX-transient structure.
9206 * @param fIntrState The VT-x guest-interruptibility state.
9207 * @param fStepping Whether we are single-stepping the guest using the
9208 * hypervisor debugger and should return
9209 * VINF_EM_DBG_STEPPED if the event was dispatched
9210 * directly.
9211 */
9212static VBOXSTRICTRC hmR0VmxInjectPendingEvent(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, uint32_t fIntrState, bool fStepping)
9213{
9214 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
9215 Assert(VMMRZCallRing3IsEnabled(pVCpu));
9216
9217#ifdef VBOX_STRICT
9218 /*
9219 * Verify guest-interruptibility state.
9220 *
9221 * We put this in a scoped block so we do not accidentally use fBlockSti or fBlockMovSS,
9222 * since injecting an event may modify the interruptibility state and we must thus always
9223 * use fIntrState.
9224 */
9225 {
9226 bool const fBlockMovSS = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS);
9227 bool const fBlockSti = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI);
9228 Assert(!fBlockSti || !(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_RFLAGS));
9229 Assert(!fBlockSti || pVCpu->cpum.GstCtx.eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
9230 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/
9231 Assert(!TRPMHasTrap(pVCpu));
9232 NOREF(fBlockMovSS); NOREF(fBlockSti);
9233 }
9234#endif
9235
9236 VBOXSTRICTRC rcStrict = VINF_SUCCESS;
9237 if (pVCpu->hm.s.Event.fPending)
9238 {
9239 /*
9240 * Do -not- clear any interrupt-window exiting control here. We might have an interrupt
9241 * pending even while injecting an event and in this case, we want a VM-exit as soon as
9242 * the guest is ready for the next interrupt, see @bugref{6208#c45}.
9243 *
9244 * See Intel spec. 26.6.5 "Interrupt-Window Exiting and Virtual-Interrupt Delivery".
9245 */
9246 uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
9247#ifdef VBOX_STRICT
9248 if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
9249 {
9250 Assert(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_IF);
9251 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI));
9252 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
9253 }
9254 else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_NMI)
9255 {
9256 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI));
9257 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI));
9258 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
9259 }
9260#endif
9261 Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64 Type=%#RX32\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo,
9262 uIntType));
9263
9264 /*
9265 * Inject the event and get any changes to the guest-interruptibility state.
9266 *
9267 * The guest-interruptibility state may need to be updated if we inject the event
9268 * into the guest IDT ourselves (for real-on-v86 guest injecting software interrupts).
9269 */
9270 rcStrict = hmR0VmxInjectEventVmcs(pVCpu, pVmxTransient, &pVCpu->hm.s.Event, fStepping, &fIntrState);
9271 AssertRCReturn(VBOXSTRICTRC_VAL(rcStrict), rcStrict);
9272
9273 if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT)
9274 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
9275 else
9276 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
9277 }
9278
9279 /*
9280 * Deliver any pending debug exceptions if the guest is single-stepping using EFLAGS.TF and
9281 * is an interrupt shadow (block-by-STI or block-by-MOV SS).
9282 */
9283 if ( (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
9284 && !pVmxTransient->fIsNestedGuest)
9285 {
9286 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS);
9287
9288 if (!pVCpu->hm.s.fSingleInstruction)
9289 {
9290 /*
9291 * Set or clear the BS bit depending on whether the trap flag is active or not. We need
9292 * to do both since we clear the BS bit from the VMCS while exiting to ring-3.
9293 */
9294 Assert(!DBGFIsStepping(pVCpu));
9295 uint8_t const fTrapFlag = !!(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_TF);
9296 int rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, fTrapFlag << VMX_BF_VMCS_PENDING_DBG_XCPT_BS_SHIFT);
9297 AssertRC(rc);
9298 }
9299 else
9300 {
9301 /*
9302 * We must not deliver a debug exception when single-stepping over STI/Mov-SS in the
9303 * hypervisor debugger using EFLAGS.TF but rather clear interrupt inhibition. However,
9304 * we take care of this case in hmR0VmxExportSharedDebugState and also the case if
9305 * we use MTF, so just make sure it's called before executing guest-code.
9306 */
9307 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR_MASK);
9308 }
9309 }
9310 /* else: for nested-guest currently handling while merging controls. */
9311
9312 /*
9313 * Finally, update the guest-interruptibility state.
9314 *
9315 * This is required for the real-on-v86 software interrupt injection, for
9316 * pending debug exceptions as well as updates to the guest state from ring-3 (IEM).
9317 */
9318 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
9319 AssertRC(rc);
9320
9321 /*
9322 * There's no need to clear the VM-entry interruption-information field here if we're not
9323 * injecting anything. VT-x clears the valid bit on every VM-exit.
9324 *
9325 * See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
9326 */
9327
9328 Assert(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping));
9329 return rcStrict;
9330}
9331
9332
9333/**
9334 * Enters the VT-x session.
9335 *
9336 * @returns VBox status code.
9337 * @param pVCpu The cross context virtual CPU structure.
9338 */
9339VMMR0DECL(int) VMXR0Enter(PVMCPUCC pVCpu)
9340{
9341 AssertPtr(pVCpu);
9342 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported);
9343 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
9344
9345 LogFlowFunc(("pVCpu=%p\n", pVCpu));
9346 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
9347 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
9348
9349#ifdef VBOX_STRICT
9350 /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */
9351 RTCCUINTREG uHostCr4 = ASMGetCR4();
9352 if (!(uHostCr4 & X86_CR4_VMXE))
9353 {
9354 LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n"));
9355 return VERR_VMX_X86_CR4_VMXE_CLEARED;
9356 }
9357#endif
9358
9359 /*
9360 * Do the EMT scheduled L1D and MDS flush here if needed.
9361 */
9362 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
9363 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
9364 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
9365 hmR0MdsClear();
9366
9367 /*
9368 * Load the appropriate VMCS as the current and active one.
9369 */
9370 PVMXVMCSINFO pVmcsInfo;
9371 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx);
9372 if (!fInNestedGuestMode)
9373 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfo;
9374 else
9375 pVmcsInfo = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
9376 int rc = hmR0VmxLoadVmcs(pVmcsInfo);
9377 if (RT_SUCCESS(rc))
9378 {
9379 pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs = fInNestedGuestMode;
9380 pVCpu->hm.s.vmx.fSwitchedToNstGstVmcsCopyForRing3 = fInNestedGuestMode;
9381 pVCpu->hmr0.s.fLeaveDone = false;
9382 Log4Func(("Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
9383 }
9384 return rc;
9385}
9386
9387
9388/**
9389 * The thread-context callback.
9390 *
9391 * This is used together with RTThreadCtxHookCreate() on platforms which
9392 * supports it, and directly from VMMR0EmtPrepareForBlocking() and
9393 * VMMR0EmtResumeAfterBlocking() on platforms which don't.
9394 *
9395 * @param enmEvent The thread-context event.
9396 * @param pVCpu The cross context virtual CPU structure.
9397 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
9398 * @thread EMT(pVCpu)
9399 */
9400VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPUCC pVCpu, bool fGlobalInit)
9401{
9402 AssertPtr(pVCpu);
9403 RT_NOREF1(fGlobalInit);
9404
9405 switch (enmEvent)
9406 {
9407 case RTTHREADCTXEVENT_OUT:
9408 {
9409 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
9410 VMCPU_ASSERT_EMT(pVCpu);
9411
9412 /* No longjmps (logger flushes, locks) in this fragile context. */
9413 VMMRZCallRing3Disable(pVCpu);
9414 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
9415
9416 /* Restore host-state (FPU, debug etc.) */
9417 if (!pVCpu->hmr0.s.fLeaveDone)
9418 {
9419 /*
9420 * Do -not- import the guest-state here as we might already be in the middle of importing
9421 * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState().
9422 */
9423 hmR0VmxLeave(pVCpu, false /* fImportState */);
9424 pVCpu->hmr0.s.fLeaveDone = true;
9425 }
9426
9427 /* Leave HM context, takes care of local init (term). */
9428 int rc = HMR0LeaveCpu(pVCpu);
9429 AssertRC(rc);
9430
9431 /* Restore longjmp state. */
9432 VMMRZCallRing3Enable(pVCpu);
9433 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
9434 break;
9435 }
9436
9437 case RTTHREADCTXEVENT_IN:
9438 {
9439 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
9440 VMCPU_ASSERT_EMT(pVCpu);
9441
9442 /* Do the EMT scheduled L1D and MDS flush here if needed. */
9443 if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_L1D_SCHED)
9444 ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D);
9445 else if (pVCpu->hmr0.s.fWorldSwitcher & HM_WSF_MDS_SCHED)
9446 hmR0MdsClear();
9447
9448 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
9449 VMMRZCallRing3Disable(pVCpu);
9450 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
9451
9452 /* Initialize the bare minimum state required for HM. This takes care of
9453 initializing VT-x if necessary (onlined CPUs, local init etc.) */
9454 int rc = hmR0EnterCpu(pVCpu);
9455 AssertRC(rc);
9456 Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE))
9457 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE));
9458
9459 /* Load the active VMCS as the current one. */
9460 PVMXVMCSINFO pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
9461 rc = hmR0VmxLoadVmcs(pVmcsInfo);
9462 AssertRC(rc);
9463 Log4Func(("Resumed: Loaded Vmcs. HostCpuId=%u\n", RTMpCpuId()));
9464 pVCpu->hmr0.s.fLeaveDone = false;
9465
9466 /* Restore longjmp state. */
9467 VMMRZCallRing3Enable(pVCpu);
9468 break;
9469 }
9470
9471 default:
9472 break;
9473 }
9474}
9475
9476
9477/**
9478 * Exports the host state into the VMCS host-state area.
9479 * Sets up the VM-exit MSR-load area.
9480 *
9481 * The CPU state will be loaded from these fields on every successful VM-exit.
9482 *
9483 * @returns VBox status code.
9484 * @param pVCpu The cross context virtual CPU structure.
9485 *
9486 * @remarks No-long-jump zone!!!
9487 */
9488static int hmR0VmxExportHostState(PVMCPUCC pVCpu)
9489{
9490 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
9491
9492 int rc = VINF_SUCCESS;
9493 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
9494 {
9495 uint64_t uHostCr4 = hmR0VmxExportHostControlRegs();
9496
9497 rc = hmR0VmxExportHostSegmentRegs(pVCpu, uHostCr4);
9498 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
9499
9500 hmR0VmxExportHostMsrs(pVCpu);
9501
9502 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT;
9503 }
9504 return rc;
9505}
9506
9507
9508/**
9509 * Saves the host state in the VMCS host-state.
9510 *
9511 * @returns VBox status code.
9512 * @param pVCpu The cross context virtual CPU structure.
9513 *
9514 * @remarks No-long-jump zone!!!
9515 */
9516VMMR0DECL(int) VMXR0ExportHostState(PVMCPUCC pVCpu)
9517{
9518 AssertPtr(pVCpu);
9519 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
9520
9521 /*
9522 * Export the host state here while entering HM context.
9523 * When thread-context hooks are used, we might get preempted and have to re-save the host
9524 * state but most of the time we won't be, so do it here before we disable interrupts.
9525 */
9526 return hmR0VmxExportHostState(pVCpu);
9527}
9528
9529
9530/**
9531 * Exports the guest state into the VMCS guest-state area.
9532 *
9533 * The will typically be done before VM-entry when the guest-CPU state and the
9534 * VMCS state may potentially be out of sync.
9535 *
9536 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
9537 * VM-entry controls.
9538 * Sets up the appropriate VMX non-root function to execute guest code based on
9539 * the guest CPU mode.
9540 *
9541 * @returns VBox strict status code.
9542 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
9543 * without unrestricted guest execution and the VMMDev is not presently
9544 * mapped (e.g. EFI32).
9545 *
9546 * @param pVCpu The cross context virtual CPU structure.
9547 * @param pVmxTransient The VMX-transient structure.
9548 *
9549 * @remarks No-long-jump zone!!!
9550 */
9551static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
9552{
9553 AssertPtr(pVCpu);
9554 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
9555 LogFlowFunc(("pVCpu=%p\n", pVCpu));
9556
9557 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x);
9558
9559 /*
9560 * Determine real-on-v86 mode.
9561 * Used when the guest is in real-mode and unrestricted guest execution is not used.
9562 */
9563 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmxTransient->pVmcsInfo->pShared;
9564 if ( pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest
9565 || !CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx))
9566 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
9567 else
9568 {
9569 Assert(!pVmxTransient->fIsNestedGuest);
9570 pVmcsInfoShared->RealMode.fRealOnV86Active = true;
9571 }
9572
9573 /*
9574 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
9575 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
9576 */
9577 int rc = hmR0VmxExportGuestEntryExitCtls(pVCpu, pVmxTransient);
9578 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
9579
9580 rc = hmR0VmxExportGuestCR0(pVCpu, pVmxTransient);
9581 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
9582
9583 VBOXSTRICTRC rcStrict = hmR0VmxExportGuestCR3AndCR4(pVCpu, pVmxTransient);
9584 if (rcStrict == VINF_SUCCESS)
9585 { /* likely */ }
9586 else
9587 {
9588 Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict));
9589 return rcStrict;
9590 }
9591
9592 rc = hmR0VmxExportGuestSegRegsXdtr(pVCpu, pVmxTransient);
9593 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
9594
9595 rc = hmR0VmxExportGuestMsrs(pVCpu, pVmxTransient);
9596 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
9597
9598 hmR0VmxExportGuestApicTpr(pVCpu, pVmxTransient);
9599 hmR0VmxExportGuestXcptIntercepts(pVCpu, pVmxTransient);
9600 hmR0VmxExportGuestRip(pVCpu);
9601 hmR0VmxExportGuestRsp(pVCpu);
9602 hmR0VmxExportGuestRflags(pVCpu, pVmxTransient);
9603
9604 rc = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
9605 AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc);
9606
9607 /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */
9608 ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP)
9609 | HM_CHANGED_GUEST_CR2
9610 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7)
9611 | HM_CHANGED_GUEST_X87
9612 | HM_CHANGED_GUEST_SSE_AVX
9613 | HM_CHANGED_GUEST_OTHER_XSAVE
9614 | HM_CHANGED_GUEST_XCRx
9615 | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */
9616 | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */
9617 | HM_CHANGED_GUEST_TSC_AUX
9618 | HM_CHANGED_GUEST_OTHER_MSRS
9619 | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK)));
9620
9621 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x);
9622 return rc;
9623}
9624
9625
9626/**
9627 * Exports the state shared between the host and guest into the VMCS.
9628 *
9629 * @param pVCpu The cross context virtual CPU structure.
9630 * @param pVmxTransient The VMX-transient structure.
9631 *
9632 * @remarks No-long-jump zone!!!
9633 */
9634static void hmR0VmxExportSharedState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
9635{
9636 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
9637 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
9638
9639 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK)
9640 {
9641 int rc = hmR0VmxExportSharedDebugState(pVCpu, pVmxTransient);
9642 AssertRC(rc);
9643 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK;
9644
9645 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
9646 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS)
9647 hmR0VmxExportGuestRflags(pVCpu, pVmxTransient);
9648 }
9649
9650 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS)
9651 {
9652 hmR0VmxLazyLoadGuestMsrs(pVCpu);
9653 pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS;
9654 }
9655
9656 AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE),
9657 ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
9658}
9659
9660
9661/**
9662 * Worker for loading the guest-state bits in the inner VT-x execution loop.
9663 *
9664 * @returns Strict VBox status code (i.e. informational status codes too).
9665 * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code
9666 * without unrestricted guest execution and the VMMDev is not presently
9667 * mapped (e.g. EFI32).
9668 *
9669 * @param pVCpu The cross context virtual CPU structure.
9670 * @param pVmxTransient The VMX-transient structure.
9671 *
9672 * @remarks No-long-jump zone!!!
9673 */
9674static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
9675{
9676 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
9677 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
9678
9679#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
9680 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
9681#endif
9682
9683 /*
9684 * For many VM-exits only RIP/RSP/RFLAGS (and HWVIRT state when executing a nested-guest)
9685 * changes. First try to export only these without going through all other changed-flag checks.
9686 */
9687 VBOXSTRICTRC rcStrict;
9688 uint64_t const fCtxMask = HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE;
9689 uint64_t const fMinimalMask = HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT;
9690 uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
9691
9692 /* If only RIP/RSP/RFLAGS/HWVIRT changed, export only those (quicker, happens more often).*/
9693 if ( (fCtxChanged & fMinimalMask)
9694 && !(fCtxChanged & (fCtxMask & ~fMinimalMask)))
9695 {
9696 hmR0VmxExportGuestRip(pVCpu);
9697 hmR0VmxExportGuestRsp(pVCpu);
9698 hmR0VmxExportGuestRflags(pVCpu, pVmxTransient);
9699 rcStrict = hmR0VmxExportGuestHwvirtState(pVCpu, pVmxTransient);
9700 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal);
9701 }
9702 /* If anything else also changed, go through the full export routine and export as required. */
9703 else if (fCtxChanged & fCtxMask)
9704 {
9705 rcStrict = hmR0VmxExportGuestState(pVCpu, pVmxTransient);
9706 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
9707 { /* likely */}
9708 else
9709 {
9710 AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("Failed to export guest state! rc=%Rrc\n",
9711 VBOXSTRICTRC_VAL(rcStrict)));
9712 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
9713 return rcStrict;
9714 }
9715 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull);
9716 }
9717 /* Nothing changed, nothing to load here. */
9718 else
9719 rcStrict = VINF_SUCCESS;
9720
9721#ifdef VBOX_STRICT
9722 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
9723 uint64_t const fCtxChangedCur = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged);
9724 AssertMsg(!(fCtxChangedCur & fCtxMask), ("fCtxChangedCur=%#RX64\n", fCtxChangedCur));
9725#endif
9726 return rcStrict;
9727}
9728
9729
9730/**
9731 * Tries to determine what part of the guest-state VT-x has deemed as invalid
9732 * and update error record fields accordingly.
9733 *
9734 * @returns VMX_IGS_* error codes.
9735 * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything
9736 * wrong with the guest state.
9737 *
9738 * @param pVCpu The cross context virtual CPU structure.
9739 * @param pVmcsInfo The VMCS info. object.
9740 *
9741 * @remarks This function assumes our cache of the VMCS controls
9742 * are valid, i.e. hmR0VmxCheckCachedVmcsCtls() succeeded.
9743 */
9744static uint32_t hmR0VmxCheckGuestState(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
9745{
9746#define HMVMX_ERROR_BREAK(err) { uError = (err); break; }
9747#define HMVMX_CHECK_BREAK(expr, err) do { \
9748 if (!(expr)) { uError = (err); break; } \
9749 } while (0)
9750
9751 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
9752 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
9753 uint32_t uError = VMX_IGS_ERROR;
9754 uint32_t u32IntrState = 0;
9755 bool const fUnrestrictedGuest = pVM->hmr0.s.vmx.fUnrestrictedGuest;
9756 do
9757 {
9758 int rc;
9759
9760 /*
9761 * Guest-interruptibility state.
9762 *
9763 * Read this first so that any check that fails prior to those that actually
9764 * require the guest-interruptibility state would still reflect the correct
9765 * VMCS value and avoids causing further confusion.
9766 */
9767 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32IntrState);
9768 AssertRC(rc);
9769
9770 uint32_t u32Val;
9771 uint64_t u64Val;
9772
9773 /*
9774 * CR0.
9775 */
9776 /** @todo Why do we need to OR and AND the fixed-0 and fixed-1 bits below? */
9777 uint64_t fSetCr0 = (g_HmMsrs.u.vmx.u64Cr0Fixed0 & g_HmMsrs.u.vmx.u64Cr0Fixed1);
9778 uint64_t const fZapCr0 = (g_HmMsrs.u.vmx.u64Cr0Fixed0 | g_HmMsrs.u.vmx.u64Cr0Fixed1);
9779 /* Exceptions for unrestricted guest execution for CR0 fixed bits (PE, PG).
9780 See Intel spec. 26.3.1 "Checks on Guest Control Registers, Debug Registers and MSRs." */
9781 if (fUnrestrictedGuest)
9782 fSetCr0 &= ~(uint64_t)(X86_CR0_PE | X86_CR0_PG);
9783
9784 uint64_t u64GuestCr0;
9785 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64GuestCr0);
9786 AssertRC(rc);
9787 HMVMX_CHECK_BREAK((u64GuestCr0 & fSetCr0) == fSetCr0, VMX_IGS_CR0_FIXED1);
9788 HMVMX_CHECK_BREAK(!(u64GuestCr0 & ~fZapCr0), VMX_IGS_CR0_FIXED0);
9789 if ( !fUnrestrictedGuest
9790 && (u64GuestCr0 & X86_CR0_PG)
9791 && !(u64GuestCr0 & X86_CR0_PE))
9792 HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO);
9793
9794 /*
9795 * CR4.
9796 */
9797 /** @todo Why do we need to OR and AND the fixed-0 and fixed-1 bits below? */
9798 uint64_t const fSetCr4 = (g_HmMsrs.u.vmx.u64Cr4Fixed0 & g_HmMsrs.u.vmx.u64Cr4Fixed1);
9799 uint64_t const fZapCr4 = (g_HmMsrs.u.vmx.u64Cr4Fixed0 | g_HmMsrs.u.vmx.u64Cr4Fixed1);
9800
9801 uint64_t u64GuestCr4;
9802 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR4, &u64GuestCr4);
9803 AssertRC(rc);
9804 HMVMX_CHECK_BREAK((u64GuestCr4 & fSetCr4) == fSetCr4, VMX_IGS_CR4_FIXED1);
9805 HMVMX_CHECK_BREAK(!(u64GuestCr4 & ~fZapCr4), VMX_IGS_CR4_FIXED0);
9806
9807 /*
9808 * IA32_DEBUGCTL MSR.
9809 */
9810 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val);
9811 AssertRC(rc);
9812 if ( (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
9813 && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */
9814 {
9815 HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED);
9816 }
9817 uint64_t u64DebugCtlMsr = u64Val;
9818
9819#ifdef VBOX_STRICT
9820 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
9821 AssertRC(rc);
9822 Assert(u32Val == pVmcsInfo->u32EntryCtls);
9823#endif
9824 bool const fLongModeGuest = RT_BOOL(pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_IA32E_MODE_GUEST);
9825
9826 /*
9827 * RIP and RFLAGS.
9828 */
9829 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RIP, &u64Val);
9830 AssertRC(rc);
9831 /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */
9832 if ( !fLongModeGuest
9833 || !pCtx->cs.Attr.n.u1Long)
9834 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID);
9835 /** @todo If the processor supports N < 64 linear-address bits, bits 63:N
9836 * must be identical if the "IA-32e mode guest" VM-entry
9837 * control is 1 and CS.L is 1. No check applies if the
9838 * CPU supports 64 linear-address bits. */
9839
9840 /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */
9841 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_RFLAGS, &u64Val);
9842 AssertRC(rc);
9843 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */
9844 VMX_IGS_RFLAGS_RESERVED);
9845 HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
9846 uint32_t const u32Eflags = u64Val;
9847
9848 if ( fLongModeGuest
9849 || ( fUnrestrictedGuest
9850 && !(u64GuestCr0 & X86_CR0_PE)))
9851 {
9852 HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID);
9853 }
9854
9855 uint32_t u32EntryInfo;
9856 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
9857 AssertRC(rc);
9858 if (VMX_ENTRY_INT_INFO_IS_EXT_INT(u32EntryInfo))
9859 HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID);
9860
9861 /*
9862 * 64-bit checks.
9863 */
9864 if (fLongModeGuest)
9865 {
9866 HMVMX_CHECK_BREAK(u64GuestCr0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE);
9867 HMVMX_CHECK_BREAK(u64GuestCr4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE);
9868 }
9869
9870 if ( !fLongModeGuest
9871 && (u64GuestCr4 & X86_CR4_PCIDE))
9872 HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE);
9873
9874 /** @todo CR3 field must be such that bits 63:52 and bits in the range
9875 * 51:32 beyond the processor's physical-address width are 0. */
9876
9877 if ( (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG)
9878 && (pCtx->dr[7] & X86_DR7_MBZ_MASK))
9879 HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED);
9880
9881 rc = VMXReadVmcsNw(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val);
9882 AssertRC(rc);
9883 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL);
9884
9885 rc = VMXReadVmcsNw(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val);
9886 AssertRC(rc);
9887 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL);
9888
9889 /*
9890 * PERF_GLOBAL MSR.
9891 */
9892 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PERF_MSR)
9893 {
9894 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val);
9895 AssertRC(rc);
9896 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)),
9897 VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */
9898 }
9899
9900 /*
9901 * PAT MSR.
9902 */
9903 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PAT_MSR)
9904 {
9905 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val);
9906 AssertRC(rc);
9907 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED);
9908 for (unsigned i = 0; i < 8; i++)
9909 {
9910 uint8_t u8Val = (u64Val & 0xff);
9911 if ( u8Val != 0 /* UC */
9912 && u8Val != 1 /* WC */
9913 && u8Val != 4 /* WT */
9914 && u8Val != 5 /* WP */
9915 && u8Val != 6 /* WB */
9916 && u8Val != 7 /* UC- */)
9917 HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID);
9918 u64Val >>= 8;
9919 }
9920 }
9921
9922 /*
9923 * EFER MSR.
9924 */
9925 if (pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_LOAD_EFER_MSR)
9926 {
9927 Assert(g_fHmVmxSupportsVmcsEfer);
9928 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val);
9929 AssertRC(rc);
9930 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)),
9931 VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */
9932 HMVMX_CHECK_BREAK(RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL( pVmcsInfo->u32EntryCtls
9933 & VMX_ENTRY_CTLS_IA32E_MODE_GUEST),
9934 VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH);
9935 /** @todo r=ramshankar: Unrestricted check here is probably wrong, see
9936 * iemVmxVmentryCheckGuestState(). */
9937 HMVMX_CHECK_BREAK( fUnrestrictedGuest
9938 || !(u64GuestCr0 & X86_CR0_PG)
9939 || RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL(u64Val & MSR_K6_EFER_LME),
9940 VMX_IGS_EFER_LMA_LME_MISMATCH);
9941 }
9942
9943 /*
9944 * Segment registers.
9945 */
9946 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
9947 || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID);
9948 if (!(u32Eflags & X86_EFL_VM))
9949 {
9950 /* CS */
9951 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID);
9952 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED);
9953 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED);
9954 HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff
9955 || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
9956 HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000)
9957 || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
9958 /* CS cannot be loaded with NULL in protected mode. */
9959 HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE);
9960 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID);
9961 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
9962 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL);
9963 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
9964 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH);
9965 else if (fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3)
9966 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID);
9967 else
9968 HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID);
9969
9970 /* SS */
9971 HMVMX_CHECK_BREAK( fUnrestrictedGuest
9972 || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL);
9973 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL);
9974 if ( !(pCtx->cr0 & X86_CR0_PE)
9975 || pCtx->cs.Attr.n.u4Type == 3)
9976 HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID);
9977
9978 if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
9979 {
9980 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID);
9981 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID);
9982 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED);
9983 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED);
9984 HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff
9985 || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
9986 HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000)
9987 || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
9988 }
9989
9990 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSReg(). */
9991 if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
9992 {
9993 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID);
9994 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID);
9995 HMVMX_CHECK_BREAK( fUnrestrictedGuest
9996 || pCtx->ds.Attr.n.u4Type > 11
9997 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
9998 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED);
9999 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED);
10000 HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff
10001 || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
10002 HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000)
10003 || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
10004 HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
10005 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID);
10006 }
10007 if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
10008 {
10009 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID);
10010 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID);
10011 HMVMX_CHECK_BREAK( fUnrestrictedGuest
10012 || pCtx->es.Attr.n.u4Type > 11
10013 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
10014 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED);
10015 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED);
10016 HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff
10017 || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
10018 HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000)
10019 || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
10020 HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
10021 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID);
10022 }
10023 if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
10024 {
10025 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID);
10026 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID);
10027 HMVMX_CHECK_BREAK( fUnrestrictedGuest
10028 || pCtx->fs.Attr.n.u4Type > 11
10029 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL);
10030 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED);
10031 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED);
10032 HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff
10033 || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
10034 HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000)
10035 || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
10036 HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
10037 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID);
10038 }
10039 if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
10040 {
10041 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID);
10042 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID);
10043 HMVMX_CHECK_BREAK( fUnrestrictedGuest
10044 || pCtx->gs.Attr.n.u4Type > 11
10045 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL);
10046 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED);
10047 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED);
10048 HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff
10049 || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
10050 HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000)
10051 || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
10052 HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
10053 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID);
10054 }
10055 /* 64-bit capable CPUs. */
10056 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
10057 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
10058 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
10059 || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
10060 HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID);
10061 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base),
10062 VMX_IGS_LONGMODE_SS_BASE_INVALID);
10063 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base),
10064 VMX_IGS_LONGMODE_DS_BASE_INVALID);
10065 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base),
10066 VMX_IGS_LONGMODE_ES_BASE_INVALID);
10067 }
10068 else
10069 {
10070 /* V86 mode checks. */
10071 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
10072 if (pVmcsInfo->pShared->RealMode.fRealOnV86Active)
10073 {
10074 u32CSAttr = 0xf3; u32SSAttr = 0xf3;
10075 u32DSAttr = 0xf3; u32ESAttr = 0xf3;
10076 u32FSAttr = 0xf3; u32GSAttr = 0xf3;
10077 }
10078 else
10079 {
10080 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u;
10081 u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u;
10082 u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
10083 }
10084
10085 /* CS */
10086 HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID);
10087 HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID);
10088 HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID);
10089 /* SS */
10090 HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID);
10091 HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID);
10092 HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID);
10093 /* DS */
10094 HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID);
10095 HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID);
10096 HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID);
10097 /* ES */
10098 HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID);
10099 HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID);
10100 HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID);
10101 /* FS */
10102 HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID);
10103 HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID);
10104 HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID);
10105 /* GS */
10106 HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID);
10107 HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID);
10108 HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID);
10109 /* 64-bit capable CPUs. */
10110 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
10111 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
10112 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
10113 || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
10114 HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID);
10115 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base),
10116 VMX_IGS_LONGMODE_SS_BASE_INVALID);
10117 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base),
10118 VMX_IGS_LONGMODE_DS_BASE_INVALID);
10119 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base),
10120 VMX_IGS_LONGMODE_ES_BASE_INVALID);
10121 }
10122
10123 /*
10124 * TR.
10125 */
10126 HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID);
10127 /* 64-bit capable CPUs. */
10128 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL);
10129 if (fLongModeGuest)
10130 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */
10131 VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID);
10132 else
10133 HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */
10134 || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/
10135 VMX_IGS_TR_ATTR_TYPE_INVALID);
10136 HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID);
10137 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID);
10138 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */
10139 HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff
10140 || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
10141 HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000)
10142 || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
10143 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE);
10144
10145 /*
10146 * GDTR and IDTR (64-bit capable checks).
10147 */
10148 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
10149 AssertRC(rc);
10150 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL);
10151
10152 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
10153 AssertRC(rc);
10154 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL);
10155
10156 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
10157 AssertRC(rc);
10158 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
10159
10160 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
10161 AssertRC(rc);
10162 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
10163
10164 /*
10165 * Guest Non-Register State.
10166 */
10167 /* Activity State. */
10168 uint32_t u32ActivityState;
10169 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState);
10170 AssertRC(rc);
10171 HMVMX_CHECK_BREAK( !u32ActivityState
10172 || (u32ActivityState & RT_BF_GET(g_HmMsrs.u.vmx.u64Misc, VMX_BF_MISC_ACTIVITY_STATES)),
10173 VMX_IGS_ACTIVITY_STATE_INVALID);
10174 HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl)
10175 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID);
10176
10177 if ( u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS
10178 || u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
10179 HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID);
10180
10181 /** @todo Activity state and injecting interrupts. Left as a todo since we
10182 * currently don't use activity states but ACTIVE. */
10183
10184 HMVMX_CHECK_BREAK( !(pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM)
10185 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID);
10186
10187 /* Guest interruptibility-state. */
10188 HMVMX_CHECK_BREAK(!(u32IntrState & 0xffffffe0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED);
10189 HMVMX_CHECK_BREAK((u32IntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS))
10190 != (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
10191 VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID);
10192 HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF)
10193 || !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI),
10194 VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID);
10195 if (VMX_ENTRY_INT_INFO_IS_EXT_INT(u32EntryInfo))
10196 {
10197 HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
10198 && !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
10199 VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID);
10200 }
10201 else if (VMX_ENTRY_INT_INFO_IS_XCPT_NMI(u32EntryInfo))
10202 {
10203 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS),
10204 VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID);
10205 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI),
10206 VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID);
10207 }
10208 /** @todo Assumes the processor is not in SMM. */
10209 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI),
10210 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID);
10211 HMVMX_CHECK_BREAK( !(pVmcsInfo->u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM)
10212 || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI),
10213 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID);
10214 if ( (pVmcsInfo->u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
10215 && VMX_ENTRY_INT_INFO_IS_XCPT_NMI(u32EntryInfo))
10216 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI), VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID);
10217
10218 /* Pending debug exceptions. */
10219 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &u64Val);
10220 AssertRC(rc);
10221 /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */
10222 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED);
10223 u32Val = u64Val; /* For pending debug exceptions checks below. */
10224
10225 if ( (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
10226 || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)
10227 || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT)
10228 {
10229 if ( (u32Eflags & X86_EFL_TF)
10230 && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
10231 {
10232 /* Bit 14 is PendingDebug.BS. */
10233 HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET);
10234 }
10235 if ( !(u32Eflags & X86_EFL_TF)
10236 || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
10237 {
10238 /* Bit 14 is PendingDebug.BS. */
10239 HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR);
10240 }
10241 }
10242
10243 /* VMCS link pointer. */
10244 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val);
10245 AssertRC(rc);
10246 if (u64Val != UINT64_C(0xffffffffffffffff))
10247 {
10248 HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED);
10249 /** @todo Bits beyond the processor's physical-address width MBZ. */
10250 /** @todo SMM checks. */
10251 Assert(pVmcsInfo->HCPhysShadowVmcs == u64Val);
10252 Assert(pVmcsInfo->pvShadowVmcs);
10253 VMXVMCSREVID VmcsRevId;
10254 VmcsRevId.u = *(uint32_t *)pVmcsInfo->pvShadowVmcs;
10255 HMVMX_CHECK_BREAK(VmcsRevId.n.u31RevisionId == RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_ID),
10256 VMX_IGS_VMCS_LINK_PTR_SHADOW_VMCS_ID_INVALID);
10257 HMVMX_CHECK_BREAK(VmcsRevId.n.fIsShadowVmcs == (uint32_t)!!(pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING),
10258 VMX_IGS_VMCS_LINK_PTR_NOT_SHADOW);
10259 }
10260
10261 /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is
10262 * not using nested paging? */
10263 if ( pVM->hmr0.s.fNestedPaging
10264 && !fLongModeGuest
10265 && CPUMIsGuestInPAEModeEx(pCtx))
10266 {
10267 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val);
10268 AssertRC(rc);
10269 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
10270
10271 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val);
10272 AssertRC(rc);
10273 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
10274
10275 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val);
10276 AssertRC(rc);
10277 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
10278
10279 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val);
10280 AssertRC(rc);
10281 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
10282 }
10283
10284 /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */
10285 if (uError == VMX_IGS_ERROR)
10286 uError = VMX_IGS_REASON_NOT_FOUND;
10287 } while (0);
10288
10289 pVCpu->hm.s.u32HMError = uError;
10290 pVCpu->hm.s.vmx.LastError.u32GuestIntrState = u32IntrState;
10291 return uError;
10292
10293#undef HMVMX_ERROR_BREAK
10294#undef HMVMX_CHECK_BREAK
10295}
10296
10297
10298/**
10299 * Map the APIC-access page for virtualizing APIC accesses.
10300 *
10301 * This can cause a longjumps to R3 due to the acquisition of the PGM lock. Hence,
10302 * this not done as part of exporting guest state, see @bugref{8721}.
10303 *
10304 * @returns VBox status code.
10305 * @param pVCpu The cross context virtual CPU structure.
10306 */
10307static int hmR0VmxMapHCApicAccessPage(PVMCPUCC pVCpu)
10308{
10309 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
10310 uint64_t const u64MsrApicBase = APICGetBaseMsrNoCheck(pVCpu);
10311
10312 Assert(PDMHasApic(pVM));
10313 Assert(u64MsrApicBase);
10314
10315 RTGCPHYS const GCPhysApicBase = u64MsrApicBase & PAGE_BASE_GC_MASK;
10316 Log4Func(("Mappping HC APIC-access page at %#RGp\n", GCPhysApicBase));
10317
10318 /* Unalias the existing mapping. */
10319 int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
10320 AssertRCReturn(rc, rc);
10321
10322 /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */
10323 Assert(pVM->hmr0.s.vmx.HCPhysApicAccess != NIL_RTHCPHYS);
10324 rc = IOMR0MmioMapMmioHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hmr0.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
10325 AssertRCReturn(rc, rc);
10326
10327 /* Update the per-VCPU cache of the APIC base MSR. */
10328 pVCpu->hm.s.vmx.u64GstMsrApicBase = u64MsrApicBase;
10329 return VINF_SUCCESS;
10330}
10331
10332
10333/**
10334 * Worker function passed to RTMpOnSpecific() that is to be called on the target
10335 * CPU.
10336 *
10337 * @param idCpu The ID for the CPU the function is called on.
10338 * @param pvUser1 Null, not used.
10339 * @param pvUser2 Null, not used.
10340 */
10341static DECLCALLBACK(void) hmR0DispatchHostNmi(RTCPUID idCpu, void *pvUser1, void *pvUser2)
10342{
10343 RT_NOREF3(idCpu, pvUser1, pvUser2);
10344 VMXDispatchHostNmi();
10345}
10346
10347
10348/**
10349 * Dispatching an NMI on the host CPU that received it.
10350 *
10351 * @returns VBox status code.
10352 * @param pVCpu The cross context virtual CPU structure.
10353 * @param pVmcsInfo The VMCS info. object corresponding to the VMCS that was
10354 * executing when receiving the host NMI in VMX non-root
10355 * operation.
10356 */
10357static int hmR0VmxExitHostNmi(PVMCPUCC pVCpu, PCVMXVMCSINFO pVmcsInfo)
10358{
10359 RTCPUID const idCpu = pVmcsInfo->idHostCpuExec;
10360 Assert(idCpu != NIL_RTCPUID);
10361
10362 /*
10363 * We don't want to delay dispatching the NMI any more than we have to. However,
10364 * we have already chosen -not- to dispatch NMIs when interrupts were still disabled
10365 * after executing guest or nested-guest code for the following reasons:
10366 *
10367 * - We would need to perform VMREADs with interrupts disabled and is orders of
10368 * magnitude worse when we run as a nested hypervisor without VMCS shadowing
10369 * supported by the host hypervisor.
10370 *
10371 * - It affects the common VM-exit scenario and keeps interrupts disabled for a
10372 * longer period of time just for handling an edge case like host NMIs which do
10373 * not occur nearly as frequently as other VM-exits.
10374 *
10375 * Let's cover the most likely scenario first. Check if we are on the target CPU
10376 * and dispatch the NMI right away. This should be much faster than calling into
10377 * RTMpOnSpecific() machinery.
10378 */
10379 bool fDispatched = false;
10380 RTCCUINTREG const fEFlags = ASMIntDisableFlags();
10381 if (idCpu == RTMpCpuId())
10382 {
10383 VMXDispatchHostNmi();
10384 fDispatched = true;
10385 }
10386 ASMSetFlags(fEFlags);
10387 if (fDispatched)
10388 {
10389 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
10390 return VINF_SUCCESS;
10391 }
10392
10393 /*
10394 * RTMpOnSpecific() waits until the worker function has run on the target CPU. So
10395 * there should be no race or recursion even if we are unlucky enough to be preempted
10396 * (to the target CPU) without dispatching the host NMI above.
10397 */
10398 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGCIpi);
10399 return RTMpOnSpecific(idCpu, &hmR0DispatchHostNmi, NULL /* pvUser1 */, NULL /* pvUser2 */);
10400}
10401
10402
10403#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
10404/**
10405 * Merges the guest with the nested-guest MSR bitmap in preparation of executing the
10406 * nested-guest using hardware-assisted VMX.
10407 *
10408 * @param pVCpu The cross context virtual CPU structure.
10409 * @param pVmcsInfoNstGst The nested-guest VMCS info. object.
10410 * @param pVmcsInfoGst The guest VMCS info. object.
10411 */
10412static void hmR0VmxMergeMsrBitmapNested(PCVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfoNstGst, PCVMXVMCSINFO pVmcsInfoGst)
10413{
10414 uint32_t const cbMsrBitmap = X86_PAGE_4K_SIZE;
10415 uint64_t *pu64MsrBitmap = (uint64_t *)pVmcsInfoNstGst->pvMsrBitmap;
10416 Assert(pu64MsrBitmap);
10417
10418 /*
10419 * We merge the guest MSR bitmap with the nested-guest MSR bitmap such that any
10420 * MSR that is intercepted by the guest is also intercepted while executing the
10421 * nested-guest using hardware-assisted VMX.
10422 *
10423 * Note! If the nested-guest is not using an MSR bitmap, every MSR must cause a
10424 * nested-guest VM-exit even if the outer guest is not intercepting some
10425 * MSRs. We cannot assume the caller has initialized the nested-guest
10426 * MSR bitmap in this case.
10427 *
10428 * The nested hypervisor may also switch whether it uses MSR bitmaps for
10429 * each of its VM-entry, hence initializing it once per-VM while setting
10430 * up the nested-guest VMCS is not sufficient.
10431 */
10432 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
10433 if (pVmcsNstGst->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
10434 {
10435 uint64_t const *pu64MsrBitmapNstGst = (uint64_t const *)&pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap[0];
10436 uint64_t const *pu64MsrBitmapGst = (uint64_t const *)pVmcsInfoGst->pvMsrBitmap;
10437 Assert(pu64MsrBitmapNstGst);
10438 Assert(pu64MsrBitmapGst);
10439
10440 /** @todo Detect and use EVEX.POR? */
10441 uint32_t const cFrags = cbMsrBitmap / sizeof(uint64_t);
10442 for (uint32_t i = 0; i < cFrags; i++)
10443 pu64MsrBitmap[i] = pu64MsrBitmapNstGst[i] | pu64MsrBitmapGst[i];
10444 }
10445 else
10446 ASMMemFill32(pu64MsrBitmap, cbMsrBitmap, UINT32_C(0xffffffff));
10447}
10448
10449
10450/**
10451 * Merges the guest VMCS in to the nested-guest VMCS controls in preparation of
10452 * hardware-assisted VMX execution of the nested-guest.
10453 *
10454 * For a guest, we don't modify these controls once we set up the VMCS and hence
10455 * this function is never called.
10456 *
10457 * For nested-guests since the nested hypervisor provides these controls on every
10458 * nested-guest VM-entry and could potentially change them everytime we need to
10459 * merge them before every nested-guest VM-entry.
10460 *
10461 * @returns VBox status code.
10462 * @param pVCpu The cross context virtual CPU structure.
10463 */
10464static int hmR0VmxMergeVmcsNested(PVMCPUCC pVCpu)
10465{
10466 PVMCC const pVM = pVCpu->CTX_SUFF(pVM);
10467 PCVMXVMCSINFO const pVmcsInfoGst = &pVCpu->hmr0.s.vmx.VmcsInfo;
10468 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
10469
10470 /*
10471 * Merge the controls with the requirements of the guest VMCS.
10472 *
10473 * We do not need to validate the nested-guest VMX features specified in the nested-guest
10474 * VMCS with the features supported by the physical CPU as it's already done by the
10475 * VMLAUNCH/VMRESUME instruction emulation.
10476 *
10477 * This is because the VMX features exposed by CPUM (through CPUID/MSRs) to the guest are
10478 * derived from the VMX features supported by the physical CPU.
10479 */
10480
10481 /* Pin-based VM-execution controls. */
10482 uint32_t const u32PinCtls = pVmcsNstGst->u32PinCtls | pVmcsInfoGst->u32PinCtls;
10483
10484 /* Processor-based VM-execution controls. */
10485 uint32_t u32ProcCtls = (pVmcsNstGst->u32ProcCtls & ~VMX_PROC_CTLS_USE_IO_BITMAPS)
10486 | (pVmcsInfoGst->u32ProcCtls & ~( VMX_PROC_CTLS_INT_WINDOW_EXIT
10487 | VMX_PROC_CTLS_NMI_WINDOW_EXIT
10488 | VMX_PROC_CTLS_MOV_DR_EXIT
10489 | VMX_PROC_CTLS_USE_TPR_SHADOW
10490 | VMX_PROC_CTLS_MONITOR_TRAP_FLAG));
10491
10492 /* Secondary processor-based VM-execution controls. */
10493 uint32_t const u32ProcCtls2 = (pVmcsNstGst->u32ProcCtls2 & ~VMX_PROC_CTLS2_VPID)
10494 | (pVmcsInfoGst->u32ProcCtls2 & ~( VMX_PROC_CTLS2_VIRT_APIC_ACCESS
10495 | VMX_PROC_CTLS2_INVPCID
10496 | VMX_PROC_CTLS2_VMCS_SHADOWING
10497 | VMX_PROC_CTLS2_RDTSCP
10498 | VMX_PROC_CTLS2_XSAVES_XRSTORS
10499 | VMX_PROC_CTLS2_APIC_REG_VIRT
10500 | VMX_PROC_CTLS2_VIRT_INT_DELIVERY
10501 | VMX_PROC_CTLS2_VMFUNC));
10502
10503 /*
10504 * VM-entry controls:
10505 * These controls contains state that depends on the nested-guest state (primarily
10506 * EFER MSR) and is thus not constant between VMLAUNCH/VMRESUME and the nested-guest
10507 * VM-exit. Although the nested hypervisor cannot change it, we need to in order to
10508 * properly continue executing the nested-guest if the EFER MSR changes but does not
10509 * cause a nested-guest VM-exits.
10510 *
10511 * VM-exit controls:
10512 * These controls specify the host state on return. We cannot use the controls from
10513 * the nested hypervisor state as is as it would contain the guest state rather than
10514 * the host state. Since the host state is subject to change (e.g. preemption, trips
10515 * to ring-3, longjmp and rescheduling to a different host CPU) they are not constant
10516 * through VMLAUNCH/VMRESUME and the nested-guest VM-exit.
10517 *
10518 * VM-entry MSR-load:
10519 * The guest MSRs from the VM-entry MSR-load area are already loaded into the guest-CPU
10520 * context by the VMLAUNCH/VMRESUME instruction emulation.
10521 *
10522 * VM-exit MSR-store:
10523 * The VM-exit emulation will take care of populating the MSRs from the guest-CPU context
10524 * back into the VM-exit MSR-store area.
10525 *
10526 * VM-exit MSR-load areas:
10527 * This must contain the real host MSRs with hardware-assisted VMX execution. Hence, we
10528 * can entirely ignore what the nested hypervisor wants to load here.
10529 */
10530
10531 /*
10532 * Exception bitmap.
10533 *
10534 * We could remove #UD from the guest bitmap and merge it with the nested-guest bitmap
10535 * here (and avoid doing anything while exporting nested-guest state), but to keep the
10536 * code more flexible if intercepting exceptions become more dynamic in the future we do
10537 * it as part of exporting the nested-guest state.
10538 */
10539 uint32_t const u32XcptBitmap = pVmcsNstGst->u32XcptBitmap | pVmcsInfoGst->u32XcptBitmap;
10540
10541 /*
10542 * CR0/CR4 guest/host mask.
10543 *
10544 * Modifications by the nested-guest to CR0/CR4 bits owned by the host and the guest must
10545 * cause VM-exits, so we need to merge them here.
10546 */
10547 uint64_t const u64Cr0Mask = pVmcsNstGst->u64Cr0Mask.u | pVmcsInfoGst->u64Cr0Mask;
10548 uint64_t const u64Cr4Mask = pVmcsNstGst->u64Cr4Mask.u | pVmcsInfoGst->u64Cr4Mask;
10549
10550 /*
10551 * Page-fault error-code mask and match.
10552 *
10553 * Although we require unrestricted guest execution (and thereby nested-paging) for
10554 * hardware-assisted VMX execution of nested-guests and thus the outer guest doesn't
10555 * normally intercept #PFs, it might intercept them for debugging purposes.
10556 *
10557 * If the outer guest is not intercepting #PFs, we can use the nested-guest #PF filters.
10558 * If the outer guest is intercepting #PFs, we must intercept all #PFs.
10559 */
10560 uint32_t u32XcptPFMask;
10561 uint32_t u32XcptPFMatch;
10562 if (!(pVmcsInfoGst->u32XcptBitmap & RT_BIT(X86_XCPT_PF)))
10563 {
10564 u32XcptPFMask = pVmcsNstGst->u32XcptPFMask;
10565 u32XcptPFMatch = pVmcsNstGst->u32XcptPFMatch;
10566 }
10567 else
10568 {
10569 u32XcptPFMask = 0;
10570 u32XcptPFMatch = 0;
10571 }
10572
10573 /*
10574 * Pause-Loop exiting.
10575 */
10576 /** @todo r=bird: given that both pVM->hm.s.vmx.cPleGapTicks and
10577 * pVM->hm.s.vmx.cPleWindowTicks defaults to zero, I cannot see how
10578 * this will work... */
10579 uint32_t const cPleGapTicks = RT_MIN(pVM->hm.s.vmx.cPleGapTicks, pVmcsNstGst->u32PleGap);
10580 uint32_t const cPleWindowTicks = RT_MIN(pVM->hm.s.vmx.cPleWindowTicks, pVmcsNstGst->u32PleWindow);
10581
10582 /*
10583 * Pending debug exceptions.
10584 * Currently just copy whatever the nested-guest provides us.
10585 */
10586 uint64_t const uPendingDbgXcpts = pVmcsNstGst->u64GuestPendingDbgXcpts.u;
10587
10588 /*
10589 * I/O Bitmap.
10590 *
10591 * We do not use the I/O bitmap that may be provided by the nested hypervisor as we always
10592 * intercept all I/O port accesses.
10593 */
10594 Assert(u32ProcCtls & VMX_PROC_CTLS_UNCOND_IO_EXIT);
10595 Assert(!(u32ProcCtls & VMX_PROC_CTLS_USE_IO_BITMAPS));
10596
10597 /*
10598 * VMCS shadowing.
10599 *
10600 * We do not yet expose VMCS shadowing to the guest and thus VMCS shadowing should not be
10601 * enabled while executing the nested-guest.
10602 */
10603 Assert(!(u32ProcCtls2 & VMX_PROC_CTLS2_VMCS_SHADOWING));
10604
10605 /*
10606 * APIC-access page.
10607 */
10608 RTHCPHYS HCPhysApicAccess;
10609 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
10610 {
10611 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS);
10612 RTGCPHYS const GCPhysApicAccess = pVmcsNstGst->u64AddrApicAccess.u;
10613
10614 /** @todo NSTVMX: This is not really correct but currently is required to make
10615 * things work. We need to re-enable the page handler when we fallback to
10616 * IEM execution of the nested-guest! */
10617 PGMHandlerPhysicalPageTempOff(pVM, GCPhysApicAccess, GCPhysApicAccess);
10618
10619 void *pvPage;
10620 PGMPAGEMAPLOCK PgLockApicAccess;
10621 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysApicAccess, &pvPage, &PgLockApicAccess);
10622 if (RT_SUCCESS(rc))
10623 {
10624 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysApicAccess, &HCPhysApicAccess);
10625 AssertMsgRCReturn(rc, ("Failed to get host-physical address for APIC-access page at %#RGp\n", GCPhysApicAccess), rc);
10626
10627 /** @todo Handle proper releasing of page-mapping lock later. */
10628 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockApicAccess);
10629 }
10630 else
10631 return rc;
10632 }
10633 else
10634 HCPhysApicAccess = 0;
10635
10636 /*
10637 * Virtual-APIC page and TPR threshold.
10638 */
10639 RTHCPHYS HCPhysVirtApic;
10640 uint32_t u32TprThreshold;
10641 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
10642 {
10643 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW);
10644 RTGCPHYS const GCPhysVirtApic = pVmcsNstGst->u64AddrVirtApic.u;
10645
10646 void *pvPage;
10647 PGMPAGEMAPLOCK PgLockVirtApic;
10648 int rc = PGMPhysGCPhys2CCPtr(pVM, GCPhysVirtApic, &pvPage, &PgLockVirtApic);
10649 if (RT_SUCCESS(rc))
10650 {
10651 rc = PGMPhysGCPhys2HCPhys(pVM, GCPhysVirtApic, &HCPhysVirtApic);
10652 AssertMsgRCReturn(rc, ("Failed to get host-physical address for virtual-APIC page at %#RGp\n", GCPhysVirtApic), rc);
10653
10654 /** @todo Handle proper releasing of page-mapping lock later. */
10655 PGMPhysReleasePageMappingLock(pVCpu->CTX_SUFF(pVM), &PgLockVirtApic);
10656 }
10657 else
10658 return rc;
10659
10660 u32TprThreshold = pVmcsNstGst->u32TprThreshold;
10661 }
10662 else
10663 {
10664 HCPhysVirtApic = 0;
10665 u32TprThreshold = 0;
10666
10667 /*
10668 * We must make sure CR8 reads/write must cause VM-exits when TPR shadowing is not
10669 * used by the nested hypervisor. Preventing MMIO accesses to the physical APIC will
10670 * be taken care of by EPT/shadow paging.
10671 */
10672 if (pVM->hmr0.s.fAllow64BitGuests)
10673 u32ProcCtls |= VMX_PROC_CTLS_CR8_STORE_EXIT
10674 | VMX_PROC_CTLS_CR8_LOAD_EXIT;
10675 }
10676
10677 /*
10678 * Validate basic assumptions.
10679 */
10680 PVMXVMCSINFO pVmcsInfoNstGst = &pVCpu->hmr0.s.vmx.VmcsInfoNstGst;
10681 Assert(pVM->hmr0.s.vmx.fUnrestrictedGuest);
10682 Assert(g_HmMsrs.u.vmx.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS);
10683 Assert(hmGetVmxActiveVmcsInfo(pVCpu) == pVmcsInfoNstGst);
10684
10685 /*
10686 * Commit it to the nested-guest VMCS.
10687 */
10688 int rc = VINF_SUCCESS;
10689 if (pVmcsInfoNstGst->u32PinCtls != u32PinCtls)
10690 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, u32PinCtls);
10691 if (pVmcsInfoNstGst->u32ProcCtls != u32ProcCtls)
10692 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, u32ProcCtls);
10693 if (pVmcsInfoNstGst->u32ProcCtls2 != u32ProcCtls2)
10694 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, u32ProcCtls2);
10695 if (pVmcsInfoNstGst->u32XcptBitmap != u32XcptBitmap)
10696 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
10697 if (pVmcsInfoNstGst->u64Cr0Mask != u64Cr0Mask)
10698 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, u64Cr0Mask);
10699 if (pVmcsInfoNstGst->u64Cr4Mask != u64Cr4Mask)
10700 rc |= VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, u64Cr4Mask);
10701 if (pVmcsInfoNstGst->u32XcptPFMask != u32XcptPFMask)
10702 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, u32XcptPFMask);
10703 if (pVmcsInfoNstGst->u32XcptPFMatch != u32XcptPFMatch)
10704 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, u32XcptPFMatch);
10705 if ( !(u32ProcCtls & VMX_PROC_CTLS_PAUSE_EXIT)
10706 && (u32ProcCtls2 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
10707 {
10708 Assert(g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT);
10709 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, cPleGapTicks);
10710 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, cPleWindowTicks);
10711 }
10712 if (u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
10713 {
10714 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
10715 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, HCPhysVirtApic);
10716 }
10717 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
10718 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, HCPhysApicAccess);
10719 rc |= VMXWriteVmcsNw(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, uPendingDbgXcpts);
10720 AssertRC(rc);
10721
10722 /*
10723 * Update the nested-guest VMCS cache.
10724 */
10725 pVmcsInfoNstGst->u32PinCtls = u32PinCtls;
10726 pVmcsInfoNstGst->u32ProcCtls = u32ProcCtls;
10727 pVmcsInfoNstGst->u32ProcCtls2 = u32ProcCtls2;
10728 pVmcsInfoNstGst->u32XcptBitmap = u32XcptBitmap;
10729 pVmcsInfoNstGst->u64Cr0Mask = u64Cr0Mask;
10730 pVmcsInfoNstGst->u64Cr4Mask = u64Cr4Mask;
10731 pVmcsInfoNstGst->u32XcptPFMask = u32XcptPFMask;
10732 pVmcsInfoNstGst->u32XcptPFMatch = u32XcptPFMatch;
10733 pVmcsInfoNstGst->HCPhysVirtApic = HCPhysVirtApic;
10734
10735 /*
10736 * We need to flush the TLB if we are switching the APIC-access page address.
10737 * See Intel spec. 28.3.3.4 "Guidelines for Use of the INVEPT Instruction".
10738 */
10739 if (u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
10740 pVCpu->hm.s.vmx.fSwitchedNstGstFlushTlb = true;
10741
10742 /*
10743 * MSR bitmap.
10744 *
10745 * The MSR bitmap address has already been initialized while setting up the nested-guest
10746 * VMCS, here we need to merge the MSR bitmaps.
10747 */
10748 if (u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
10749 hmR0VmxMergeMsrBitmapNested(pVCpu, pVmcsInfoNstGst, pVmcsInfoGst);
10750
10751 return VINF_SUCCESS;
10752}
10753#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
10754
10755
10756/**
10757 * Does the preparations before executing guest code in VT-x.
10758 *
10759 * This may cause longjmps to ring-3 and may even result in rescheduling to the
10760 * recompiler/IEM. We must be cautious what we do here regarding committing
10761 * guest-state information into the VMCS assuming we assuredly execute the
10762 * guest in VT-x mode.
10763 *
10764 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
10765 * the common-state (TRPM/forceflags), we must undo those changes so that the
10766 * recompiler/IEM can (and should) use them when it resumes guest execution.
10767 * Otherwise such operations must be done when we can no longer exit to ring-3.
10768 *
10769 * @returns Strict VBox status code (i.e. informational status codes too).
10770 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
10771 * have been disabled.
10772 * @retval VINF_VMX_VMEXIT if a nested-guest VM-exit occurs (e.g., while evaluating
10773 * pending events).
10774 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
10775 * double-fault into the guest.
10776 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
10777 * dispatched directly.
10778 * @retval VINF_* scheduling changes, we have to go back to ring-3.
10779 *
10780 * @param pVCpu The cross context virtual CPU structure.
10781 * @param pVmxTransient The VMX-transient structure.
10782 * @param fStepping Whether we are single-stepping the guest in the
10783 * hypervisor debugger. Makes us ignore some of the reasons
10784 * for returning to ring-3, and return VINF_EM_DBG_STEPPED
10785 * if event dispatching took place.
10786 */
10787static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping)
10788{
10789 Assert(VMMRZCallRing3IsEnabled(pVCpu));
10790
10791 Log4Func(("fIsNested=%RTbool fStepping=%RTbool\n", pVmxTransient->fIsNestedGuest, fStepping));
10792
10793#ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM
10794 if (pVmxTransient->fIsNestedGuest)
10795 {
10796 RT_NOREF2(pVCpu, fStepping);
10797 Log2Func(("Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n"));
10798 return VINF_EM_RESCHEDULE_REM;
10799 }
10800#endif
10801
10802 /*
10803 * Check and process force flag actions, some of which might require us to go back to ring-3.
10804 */
10805 VBOXSTRICTRC rcStrict = hmR0VmxCheckForceFlags(pVCpu, pVmxTransient, fStepping);
10806 if (rcStrict == VINF_SUCCESS)
10807 {
10808 /* FFs don't get set all the time. */
10809#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
10810 if ( pVmxTransient->fIsNestedGuest
10811 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
10812 {
10813 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
10814 return VINF_VMX_VMEXIT;
10815 }
10816#endif
10817 }
10818 else
10819 return rcStrict;
10820
10821 /*
10822 * Virtualize memory-mapped accesses to the physical APIC (may take locks).
10823 */
10824 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
10825 if ( !pVCpu->hm.s.vmx.u64GstMsrApicBase
10826 && (g_HmMsrs.u.vmx.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS)
10827 && PDMHasApic(pVM))
10828 {
10829 int rc = hmR0VmxMapHCApicAccessPage(pVCpu);
10830 AssertRCReturn(rc, rc);
10831 }
10832
10833#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
10834 /*
10835 * Merge guest VMCS controls with the nested-guest VMCS controls.
10836 *
10837 * Even if we have not executed the guest prior to this (e.g. when resuming from a
10838 * saved state), we should be okay with merging controls as we initialize the
10839 * guest VMCS controls as part of VM setup phase.
10840 */
10841 if ( pVmxTransient->fIsNestedGuest
10842 && !pVCpu->hm.s.vmx.fMergedNstGstCtls)
10843 {
10844 int rc = hmR0VmxMergeVmcsNested(pVCpu);
10845 AssertRCReturn(rc, rc);
10846 pVCpu->hm.s.vmx.fMergedNstGstCtls = true;
10847 }
10848#endif
10849
10850 /*
10851 * Evaluate events to be injected into the guest.
10852 *
10853 * Events in TRPM can be injected without inspecting the guest state.
10854 * If any new events (interrupts/NMI) are pending currently, we try to set up the
10855 * guest to cause a VM-exit the next time they are ready to receive the event.
10856 */
10857 if (TRPMHasTrap(pVCpu))
10858 hmR0VmxTrpmTrapToPendingEvent(pVCpu);
10859
10860 uint32_t fIntrState;
10861 rcStrict = hmR0VmxEvaluatePendingEvent(pVCpu, pVmxTransient, &fIntrState);
10862
10863#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
10864 /*
10865 * While evaluating pending events if something failed (unlikely) or if we were
10866 * preparing to run a nested-guest but performed a nested-guest VM-exit, we should bail.
10867 */
10868 if (rcStrict != VINF_SUCCESS)
10869 return rcStrict;
10870 if ( pVmxTransient->fIsNestedGuest
10871 && !CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
10872 {
10873 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
10874 return VINF_VMX_VMEXIT;
10875 }
10876#else
10877 Assert(rcStrict == VINF_SUCCESS);
10878#endif
10879
10880 /*
10881 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus
10882 * needs to be done with longjmps or interrupts + preemption enabled. Event injection might
10883 * also result in triple-faulting the VM.
10884 *
10885 * With nested-guests, the above does not apply since unrestricted guest execution is a
10886 * requirement. Regardless, we do this here to avoid duplicating code elsewhere.
10887 */
10888 rcStrict = hmR0VmxInjectPendingEvent(pVCpu, pVmxTransient, fIntrState, fStepping);
10889 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
10890 { /* likely */ }
10891 else
10892 {
10893 AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping),
10894 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
10895 return rcStrict;
10896 }
10897
10898 /*
10899 * A longjump might result in importing CR3 even for VM-exits that don't necessarily
10900 * import CR3 themselves. We will need to update them here, as even as late as the above
10901 * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing
10902 * the below force flags to be set.
10903 */
10904 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
10905 {
10906 Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3));
10907 int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu), false /* fPdpesMapped */);
10908 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
10909 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
10910 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
10911 }
10912
10913#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
10914 /* Paranoia. */
10915 Assert(!pVmxTransient->fIsNestedGuest || CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
10916#endif
10917
10918 /*
10919 * No longjmps to ring-3 from this point on!!!
10920 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
10921 * This also disables flushing of the R0-logger instance (if any).
10922 */
10923 VMMRZCallRing3Disable(pVCpu);
10924
10925 /*
10926 * Export the guest state bits.
10927 *
10928 * We cannot perform longjmps while loading the guest state because we do not preserve the
10929 * host/guest state (although the VMCS will be preserved) across longjmps which can cause
10930 * CPU migration.
10931 *
10932 * If we are injecting events to a real-on-v86 mode guest, we would have updated RIP and some segment
10933 * registers. Hence, exporting of the guest state needs to be done -after- injection of events.
10934 */
10935 rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu, pVmxTransient);
10936 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
10937 { /* likely */ }
10938 else
10939 {
10940 VMMRZCallRing3Enable(pVCpu);
10941 return rcStrict;
10942 }
10943
10944 /*
10945 * We disable interrupts so that we don't miss any interrupts that would flag preemption
10946 * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with
10947 * preemption disabled for a while. Since this is purely to aid the
10948 * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and
10949 * disable interrupt on NT.
10950 *
10951 * We need to check for force-flags that could've possible been altered since we last
10952 * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section,
10953 * see @bugref{6398}).
10954 *
10955 * We also check a couple of other force-flags as a last opportunity to get the EMT back
10956 * to ring-3 before executing guest code.
10957 */
10958 pVmxTransient->fEFlags = ASMIntDisableFlags();
10959
10960 if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
10961 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
10962 || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */
10963 && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
10964 {
10965 if (!RTThreadPreemptIsPending(NIL_RTTHREAD))
10966 {
10967#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
10968 /*
10969 * If we are executing a nested-guest make sure that we should intercept subsequent
10970 * events. The one we are injecting might be part of VM-entry. This is mainly to keep
10971 * the VM-exit instruction emulation happy.
10972 */
10973 if (pVmxTransient->fIsNestedGuest)
10974 CPUMSetGuestVmxInterceptEvents(&pVCpu->cpum.GstCtx, true);
10975#endif
10976
10977 /*
10978 * We've injected any pending events. This is really the point of no return (to ring-3).
10979 *
10980 * Note! The caller expects to continue with interrupts & longjmps disabled on successful
10981 * returns from this function, so do -not- enable them here.
10982 */
10983 pVCpu->hm.s.Event.fPending = false;
10984 return VINF_SUCCESS;
10985 }
10986
10987 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq);
10988 rcStrict = VINF_EM_RAW_INTERRUPT;
10989 }
10990 else
10991 {
10992 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
10993 rcStrict = VINF_EM_RAW_TO_R3;
10994 }
10995
10996 ASMSetFlags(pVmxTransient->fEFlags);
10997 VMMRZCallRing3Enable(pVCpu);
10998
10999 return rcStrict;
11000}
11001
11002
11003/**
11004 * Final preparations before executing guest code using hardware-assisted VMX.
11005 *
11006 * We can no longer get preempted to a different host CPU and there are no returns
11007 * to ring-3. We ignore any errors that may happen from this point (e.g. VMWRITE
11008 * failures), this function is not intended to fail sans unrecoverable hardware
11009 * errors.
11010 *
11011 * @param pVCpu The cross context virtual CPU structure.
11012 * @param pVmxTransient The VMX-transient structure.
11013 *
11014 * @remarks Called with preemption disabled.
11015 * @remarks No-long-jump zone!!!
11016 */
11017static void hmR0VmxPreRunGuestCommitted(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
11018{
11019 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
11020 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
11021 Assert(!pVCpu->hm.s.Event.fPending);
11022
11023 /*
11024 * Indicate start of guest execution and where poking EMT out of guest-context is recognized.
11025 */
11026 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
11027 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
11028
11029 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
11030 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
11031 PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu();
11032 RTCPUID const idCurrentCpu = pHostCpu->idCpu;
11033
11034 if (!CPUMIsGuestFPUStateActive(pVCpu))
11035 {
11036 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x);
11037 if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED)
11038 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT;
11039 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x);
11040 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu);
11041 }
11042
11043 /*
11044 * Re-export the host state bits as we may've been preempted (only happens when
11045 * thread-context hooks are used or when the VM start function changes) or if
11046 * the host CR0 is modified while loading the guest FPU state above.
11047 *
11048 * The 64-on-32 switcher saves the (64-bit) host state into the VMCS and if we
11049 * changed the switcher back to 32-bit, we *must* save the 32-bit host state here,
11050 * see @bugref{8432}.
11051 *
11052 * This may also happen when switching to/from a nested-guest VMCS without leaving
11053 * ring-0.
11054 */
11055 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)
11056 {
11057 hmR0VmxExportHostState(pVCpu);
11058 STAM_COUNTER_INC(&pVCpu->hm.s.StatExportHostState);
11059 }
11060 Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT));
11061
11062 /*
11063 * Export the state shared between host and guest (FPU, debug, lazy MSRs).
11064 */
11065 if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)
11066 hmR0VmxExportSharedState(pVCpu, pVmxTransient);
11067 AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged));
11068
11069 /*
11070 * Store status of the shared guest/host debug state at the time of VM-entry.
11071 */
11072 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
11073 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
11074
11075 /*
11076 * Always cache the TPR-shadow if the virtual-APIC page exists, thereby skipping
11077 * more than one conditional check. The post-run side of our code shall determine
11078 * if it needs to sync. the virtual APIC TPR with the TPR-shadow.
11079 */
11080 if (pVmcsInfo->pbVirtApic)
11081 pVmxTransient->u8GuestTpr = pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR];
11082
11083 /*
11084 * Update the host MSRs values in the VM-exit MSR-load area.
11085 */
11086 if (!pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs)
11087 {
11088 if (pVmcsInfo->cExitMsrLoad > 0)
11089 hmR0VmxUpdateAutoLoadHostMsrs(pVCpu, pVmcsInfo);
11090 pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs = true;
11091 }
11092
11093 /*
11094 * Evaluate if we need to intercept guest RDTSC/P accesses. Set up the
11095 * VMX-preemption timer based on the next virtual sync clock deadline.
11096 */
11097 if ( !pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer
11098 || idCurrentCpu != pVCpu->hmr0.s.idLastCpu)
11099 {
11100 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pVmxTransient, idCurrentCpu);
11101 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = true;
11102 }
11103
11104 /* Record statistics of how often we use TSC offsetting as opposed to intercepting RDTSC/P. */
11105 bool const fIsRdtscIntercepted = RT_BOOL(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT);
11106 if (!fIsRdtscIntercepted)
11107 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
11108 else
11109 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
11110
11111 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
11112 hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu, pVmcsInfo); /* Invalidate the appropriate guest entries from the TLB. */
11113 Assert(idCurrentCpu == pVCpu->hmr0.s.idLastCpu);
11114 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Record the error reporting info. with the current host CPU. */
11115 pVmcsInfo->idHostCpuState = idCurrentCpu; /* Record the CPU for which the host-state has been exported. */
11116 pVmcsInfo->idHostCpuExec = idCurrentCpu; /* Record the CPU on which we shall execute. */
11117
11118 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
11119
11120 TMNotifyStartOfExecution(pVM, pVCpu); /* Notify TM to resume its clocks when TSC is tied to execution,
11121 as we're about to start executing the guest. */
11122
11123 /*
11124 * Load the guest TSC_AUX MSR when we are not intercepting RDTSCP.
11125 *
11126 * This is done this late as updating the TSC offsetting/preemption timer above
11127 * figures out if we can skip intercepting RDTSCP by calculating the number of
11128 * host CPU ticks till the next virtual sync deadline (for the dynamic case).
11129 */
11130 if ( (pVmcsInfo->u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP)
11131 && !fIsRdtscIntercepted)
11132 {
11133 hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_TSC_AUX);
11134
11135 /* NB: Because we call hmR0VmxAddAutoLoadStoreMsr with fUpdateHostMsr=true,
11136 it's safe even after hmR0VmxUpdateAutoLoadHostMsrs has already been done. */
11137 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu),
11138 true /* fSetReadWrite */, true /* fUpdateHostMsr */);
11139 AssertRC(rc);
11140 Assert(!pVmxTransient->fRemoveTscAuxMsr);
11141 pVmxTransient->fRemoveTscAuxMsr = true;
11142 }
11143
11144#ifdef VBOX_STRICT
11145 Assert(pVCpu->hmr0.s.vmx.fUpdatedHostAutoMsrs);
11146 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
11147 hmR0VmxCheckHostEferMsr(pVmcsInfo);
11148 AssertRC(hmR0VmxCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest));
11149#endif
11150
11151#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
11152 /** @todo r=ramshankar: We can now probably use iemVmxVmentryCheckGuestState here.
11153 * Add a PVMXMSRS parameter to it, so that IEM can look at the host MSRs,
11154 * see @bugref{9180#c54}. */
11155 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
11156 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
11157 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
11158#endif
11159}
11160
11161
11162/**
11163 * First C routine invoked after running guest code using hardware-assisted VMX.
11164 *
11165 * @param pVCpu The cross context virtual CPU structure.
11166 * @param pVmxTransient The VMX-transient structure.
11167 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
11168 *
11169 * @remarks Called with interrupts disabled, and returns with interrupts enabled!
11170 *
11171 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
11172 * unconditionally when it is safe to do so.
11173 */
11174static void hmR0VmxPostRunGuest(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun)
11175{
11176 ASMAtomicUoWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
11177 ASMAtomicIncU32(&pVCpu->hmr0.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
11178 pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
11179 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
11180 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
11181 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
11182
11183 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
11184 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT))
11185 {
11186 uint64_t uGstTsc;
11187 if (!pVmxTransient->fIsNestedGuest)
11188 uGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
11189 else
11190 {
11191 uint64_t const uNstGstTsc = pVCpu->hmr0.s.uTscExit + pVmcsInfo->u64TscOffset;
11192 uGstTsc = CPUMRemoveNestedGuestTscOffset(pVCpu, uNstGstTsc);
11193 }
11194 TMCpuTickSetLastSeen(pVCpu, uGstTsc); /* Update TM with the guest TSC. */
11195 }
11196
11197 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x);
11198 TMNotifyEndOfExecution(pVCpu->CTX_SUFF(pVM), pVCpu, pVCpu->hmr0.s.uTscExit); /* Notify TM that the guest is no longer running. */
11199 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
11200
11201 pVCpu->hmr0.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Some host state messed up by VMX needs restoring. */
11202 pVmcsInfo->fVmcsState |= VMX_V_VMCS_LAUNCH_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
11203#ifdef VBOX_STRICT
11204 hmR0VmxCheckHostEferMsr(pVmcsInfo); /* Verify that the host EFER MSR wasn't modified. */
11205#endif
11206 Assert(!ASMIntAreEnabled());
11207 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
11208 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
11209
11210#ifdef HMVMX_ALWAYS_CLEAN_TRANSIENT
11211 /*
11212 * Clean all the VMCS fields in the transient structure before reading
11213 * anything from the VMCS.
11214 */
11215 pVmxTransient->uExitReason = 0;
11216 pVmxTransient->uExitIntErrorCode = 0;
11217 pVmxTransient->uExitQual = 0;
11218 pVmxTransient->uGuestLinearAddr = 0;
11219 pVmxTransient->uExitIntInfo = 0;
11220 pVmxTransient->cbExitInstr = 0;
11221 pVmxTransient->ExitInstrInfo.u = 0;
11222 pVmxTransient->uEntryIntInfo = 0;
11223 pVmxTransient->uEntryXcptErrorCode = 0;
11224 pVmxTransient->cbEntryInstr = 0;
11225 pVmxTransient->uIdtVectoringInfo = 0;
11226 pVmxTransient->uIdtVectoringErrorCode = 0;
11227#endif
11228
11229 /*
11230 * Save the basic VM-exit reason and check if the VM-entry failed.
11231 * See Intel spec. 24.9.1 "Basic VM-exit Information".
11232 */
11233 uint32_t uExitReason;
11234 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
11235 AssertRC(rc);
11236 pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason);
11237 pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason);
11238
11239 /*
11240 * Log the VM-exit before logging anything else as otherwise it might be a
11241 * tad confusing what happens before and after the world-switch.
11242 */
11243 HMVMX_LOG_EXIT(pVCpu, uExitReason);
11244
11245 /*
11246 * Remove the TSC_AUX MSR from the auto-load/store MSR area and reset any MSR
11247 * bitmap permissions, if it was added before VM-entry.
11248 */
11249 if (pVmxTransient->fRemoveTscAuxMsr)
11250 {
11251 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, pVmxTransient, MSR_K8_TSC_AUX);
11252 pVmxTransient->fRemoveTscAuxMsr = false;
11253 }
11254
11255 /*
11256 * Check if VMLAUNCH/VMRESUME succeeded.
11257 * If this failed, we cause a guru meditation and cease further execution.
11258 *
11259 * However, if we are executing a nested-guest we might fail if we use the
11260 * fast path rather than fully emulating VMLAUNCH/VMRESUME instruction in IEM.
11261 */
11262 if (RT_LIKELY(rcVMRun == VINF_SUCCESS))
11263 {
11264 /*
11265 * Update the VM-exit history array here even if the VM-entry failed due to:
11266 * - Invalid guest state.
11267 * - MSR loading.
11268 * - Machine-check event.
11269 *
11270 * In any of the above cases we will still have a "valid" VM-exit reason
11271 * despite @a fVMEntryFailed being false.
11272 *
11273 * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state".
11274 *
11275 * Note! We don't have CS or RIP at this point. Will probably address that later
11276 * by amending the history entry added here.
11277 */
11278 EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK),
11279 UINT64_MAX, pVCpu->hmr0.s.uTscExit);
11280
11281 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
11282 {
11283 VMMRZCallRing3Enable(pVCpu);
11284 Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
11285
11286#ifdef HMVMX_ALWAYS_SAVE_RO_GUEST_STATE
11287 hmR0VmxReadAllRoFieldsVmcs(pVmxTransient);
11288#endif
11289
11290 /*
11291 * Import the guest-interruptibility state always as we need it while evaluating
11292 * injecting events on re-entry.
11293 *
11294 * We don't import CR0 (when unrestricted guest execution is unavailable) despite
11295 * checking for real-mode while exporting the state because all bits that cause
11296 * mode changes wrt CR0 are intercepted.
11297 */
11298 uint64_t const fImportMask = CPUMCTX_EXTRN_HM_VMX_INT_STATE
11299#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
11300 | HMVMX_CPUMCTX_EXTRN_ALL
11301#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
11302 | CPUMCTX_EXTRN_RFLAGS
11303#endif
11304 ;
11305 rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fImportMask);
11306 AssertRC(rc);
11307
11308 /*
11309 * Sync the TPR shadow with our APIC state.
11310 */
11311 if ( !pVmxTransient->fIsNestedGuest
11312 && (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW))
11313 {
11314 Assert(pVmcsInfo->pbVirtApic);
11315 if (pVmxTransient->u8GuestTpr != pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR])
11316 {
11317 rc = APICSetTpr(pVCpu, pVmcsInfo->pbVirtApic[XAPIC_OFF_TPR]);
11318 AssertRC(rc);
11319 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
11320 }
11321 }
11322
11323 Assert(VMMRZCallRing3IsEnabled(pVCpu));
11324 Assert( pVmxTransient->fWasGuestDebugStateActive == false
11325 || pVmxTransient->fWasHyperDebugStateActive == false);
11326 return;
11327 }
11328 }
11329#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
11330 else if (pVmxTransient->fIsNestedGuest)
11331 AssertMsgFailed(("VMLAUNCH/VMRESUME failed but shouldn't happen when VMLAUNCH/VMRESUME was emulated in IEM!\n"));
11332#endif
11333 else
11334 Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed));
11335
11336 VMMRZCallRing3Enable(pVCpu);
11337}
11338
11339
11340/**
11341 * Runs the guest code using hardware-assisted VMX the normal way.
11342 *
11343 * @returns VBox status code.
11344 * @param pVCpu The cross context virtual CPU structure.
11345 * @param pcLoops Pointer to the number of executed loops.
11346 */
11347static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPUCC pVCpu, uint32_t *pcLoops)
11348{
11349 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
11350 Assert(pcLoops);
11351 Assert(*pcLoops <= cMaxResumeLoops);
11352 Assert(!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
11353
11354#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
11355 /*
11356 * Switch to the guest VMCS as we may have transitioned from executing the nested-guest
11357 * without leaving ring-0. Otherwise, if we came from ring-3 we would have loaded the
11358 * guest VMCS while entering the VMX ring-0 session.
11359 */
11360 if (pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
11361 {
11362 int rc = hmR0VmxSwitchToGstOrNstGstVmcs(pVCpu, false /* fSwitchToNstGstVmcs */);
11363 if (RT_SUCCESS(rc))
11364 { /* likely */ }
11365 else
11366 {
11367 LogRelFunc(("Failed to switch to the guest VMCS. rc=%Rrc\n", rc));
11368 return rc;
11369 }
11370 }
11371#endif
11372
11373 VMXTRANSIENT VmxTransient;
11374 RT_ZERO(VmxTransient);
11375 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
11376
11377 /* Paranoia. */
11378 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfo);
11379
11380 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
11381 for (;;)
11382 {
11383 Assert(!HMR0SuspendPending());
11384 HMVMX_ASSERT_CPU_SAFE(pVCpu);
11385 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
11386
11387 /*
11388 * Preparatory work for running nested-guest code, this may force us to
11389 * return to ring-3.
11390 *
11391 * Warning! This bugger disables interrupts on VINF_SUCCESS!
11392 */
11393 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
11394 if (rcStrict != VINF_SUCCESS)
11395 break;
11396
11397 /* Interrupts are disabled at this point! */
11398 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
11399 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
11400 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
11401 /* Interrupts are re-enabled at this point! */
11402
11403 /*
11404 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
11405 */
11406 if (RT_SUCCESS(rcRun))
11407 { /* very likely */ }
11408 else
11409 {
11410 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
11411 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
11412 return rcRun;
11413 }
11414
11415 /*
11416 * Profile the VM-exit.
11417 */
11418 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
11419 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
11420 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
11421 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
11422 HMVMX_START_EXIT_DISPATCH_PROF();
11423
11424 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
11425
11426 /*
11427 * Handle the VM-exit.
11428 */
11429#ifdef HMVMX_USE_FUNCTION_TABLE
11430 rcStrict = g_aVMExitHandlers[VmxTransient.uExitReason].pfn(pVCpu, &VmxTransient);
11431#else
11432 rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient);
11433#endif
11434 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
11435 if (rcStrict == VINF_SUCCESS)
11436 {
11437 if (++(*pcLoops) <= cMaxResumeLoops)
11438 continue;
11439 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
11440 rcStrict = VINF_EM_RAW_INTERRUPT;
11441 }
11442 break;
11443 }
11444
11445 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
11446 return rcStrict;
11447}
11448
11449
11450#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
11451/**
11452 * Runs the nested-guest code using hardware-assisted VMX.
11453 *
11454 * @returns VBox status code.
11455 * @param pVCpu The cross context virtual CPU structure.
11456 * @param pcLoops Pointer to the number of executed loops.
11457 *
11458 * @sa hmR0VmxRunGuestCodeNormal.
11459 */
11460static VBOXSTRICTRC hmR0VmxRunGuestCodeNested(PVMCPUCC pVCpu, uint32_t *pcLoops)
11461{
11462 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
11463 Assert(pcLoops);
11464 Assert(*pcLoops <= cMaxResumeLoops);
11465 Assert(CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx));
11466
11467 /*
11468 * Switch to the nested-guest VMCS as we may have transitioned from executing the
11469 * guest without leaving ring-0. Otherwise, if we came from ring-3 we would have
11470 * loaded the nested-guest VMCS while entering the VMX ring-0 session.
11471 */
11472 if (!pVCpu->hmr0.s.vmx.fSwitchedToNstGstVmcs)
11473 {
11474 int rc = hmR0VmxSwitchToGstOrNstGstVmcs(pVCpu, true /* fSwitchToNstGstVmcs */);
11475 if (RT_SUCCESS(rc))
11476 { /* likely */ }
11477 else
11478 {
11479 LogRelFunc(("Failed to switch to the nested-guest VMCS. rc=%Rrc\n", rc));
11480 return rc;
11481 }
11482 }
11483
11484 VMXTRANSIENT VmxTransient;
11485 RT_ZERO(VmxTransient);
11486 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
11487 VmxTransient.fIsNestedGuest = true;
11488
11489 /* Paranoia. */
11490 Assert(VmxTransient.pVmcsInfo == &pVCpu->hmr0.s.vmx.VmcsInfoNstGst);
11491
11492 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
11493 for (;;)
11494 {
11495 Assert(!HMR0SuspendPending());
11496 HMVMX_ASSERT_CPU_SAFE(pVCpu);
11497 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
11498
11499 /*
11500 * Preparatory work for running guest code, this may force us to
11501 * return to ring-3.
11502 *
11503 * Warning! This bugger disables interrupts on VINF_SUCCESS!
11504 */
11505 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */);
11506 if (rcStrict != VINF_SUCCESS)
11507 break;
11508
11509 /* Interrupts are disabled at this point! */
11510 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
11511 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
11512 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
11513 /* Interrupts are re-enabled at this point! */
11514
11515 /*
11516 * Check for errors with running the VM (VMLAUNCH/VMRESUME).
11517 */
11518 if (RT_SUCCESS(rcRun))
11519 { /* very likely */ }
11520 else
11521 {
11522 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
11523 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
11524 return rcRun;
11525 }
11526
11527 /*
11528 * Profile the VM-exit.
11529 */
11530 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
11531 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
11532 STAM_COUNTER_INC(&pVCpu->hm.s.StatNestedExitAll);
11533 STAM_COUNTER_INC(&pVCpu->hm.s.aStatNestedExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
11534 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
11535 HMVMX_START_EXIT_DISPATCH_PROF();
11536
11537 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
11538
11539 /*
11540 * Handle the VM-exit.
11541 */
11542 rcStrict = hmR0VmxHandleExitNested(pVCpu, &VmxTransient);
11543 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
11544 if (rcStrict == VINF_SUCCESS)
11545 {
11546 if (!CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
11547 {
11548 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchNstGstVmexit);
11549 rcStrict = VINF_VMX_VMEXIT;
11550 }
11551 else
11552 {
11553 if (++(*pcLoops) <= cMaxResumeLoops)
11554 continue;
11555 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
11556 rcStrict = VINF_EM_RAW_INTERRUPT;
11557 }
11558 }
11559 else
11560 Assert(rcStrict != VINF_VMX_VMEXIT);
11561 break;
11562 }
11563
11564 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
11565 return rcStrict;
11566}
11567#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
11568
11569
11570/** @name Execution loop for single stepping, DBGF events and expensive Dtrace
11571 * probes.
11572 *
11573 * The following few functions and associated structure contains the bloat
11574 * necessary for providing detailed debug events and dtrace probes as well as
11575 * reliable host side single stepping. This works on the principle of
11576 * "subclassing" the normal execution loop and workers. We replace the loop
11577 * method completely and override selected helpers to add necessary adjustments
11578 * to their core operation.
11579 *
11580 * The goal is to keep the "parent" code lean and mean, so as not to sacrifice
11581 * any performance for debug and analysis features.
11582 *
11583 * @{
11584 */
11585
11586/**
11587 * Transient per-VCPU debug state of VMCS and related info. we save/restore in
11588 * the debug run loop.
11589 */
11590typedef struct VMXRUNDBGSTATE
11591{
11592 /** The RIP we started executing at. This is for detecting that we stepped. */
11593 uint64_t uRipStart;
11594 /** The CS we started executing with. */
11595 uint16_t uCsStart;
11596
11597 /** Whether we've actually modified the 1st execution control field. */
11598 bool fModifiedProcCtls : 1;
11599 /** Whether we've actually modified the 2nd execution control field. */
11600 bool fModifiedProcCtls2 : 1;
11601 /** Whether we've actually modified the exception bitmap. */
11602 bool fModifiedXcptBitmap : 1;
11603
11604 /** We desire the modified the CR0 mask to be cleared. */
11605 bool fClearCr0Mask : 1;
11606 /** We desire the modified the CR4 mask to be cleared. */
11607 bool fClearCr4Mask : 1;
11608 /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC. */
11609 uint32_t fCpe1Extra;
11610 /** Stuff we do not want in VMX_VMCS32_CTRL_PROC_EXEC. */
11611 uint32_t fCpe1Unwanted;
11612 /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC2. */
11613 uint32_t fCpe2Extra;
11614 /** Extra stuff we need in VMX_VMCS32_CTRL_EXCEPTION_BITMAP. */
11615 uint32_t bmXcptExtra;
11616 /** The sequence number of the Dtrace provider settings the state was
11617 * configured against. */
11618 uint32_t uDtraceSettingsSeqNo;
11619 /** VM-exits to check (one bit per VM-exit). */
11620 uint32_t bmExitsToCheck[3];
11621
11622 /** The initial VMX_VMCS32_CTRL_PROC_EXEC value (helps with restore). */
11623 uint32_t fProcCtlsInitial;
11624 /** The initial VMX_VMCS32_CTRL_PROC_EXEC2 value (helps with restore). */
11625 uint32_t fProcCtls2Initial;
11626 /** The initial VMX_VMCS32_CTRL_EXCEPTION_BITMAP value (helps with restore). */
11627 uint32_t bmXcptInitial;
11628} VMXRUNDBGSTATE;
11629AssertCompileMemberSize(VMXRUNDBGSTATE, bmExitsToCheck, (VMX_EXIT_MAX + 1 + 31) / 32 * 4);
11630typedef VMXRUNDBGSTATE *PVMXRUNDBGSTATE;
11631
11632
11633/**
11634 * Initializes the VMXRUNDBGSTATE structure.
11635 *
11636 * @param pVCpu The cross context virtual CPU structure of the
11637 * calling EMT.
11638 * @param pVmxTransient The VMX-transient structure.
11639 * @param pDbgState The debug state to initialize.
11640 */
11641static void hmR0VmxRunDebugStateInit(PVMCPUCC pVCpu, PCVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
11642{
11643 pDbgState->uRipStart = pVCpu->cpum.GstCtx.rip;
11644 pDbgState->uCsStart = pVCpu->cpum.GstCtx.cs.Sel;
11645
11646 pDbgState->fModifiedProcCtls = false;
11647 pDbgState->fModifiedProcCtls2 = false;
11648 pDbgState->fModifiedXcptBitmap = false;
11649 pDbgState->fClearCr0Mask = false;
11650 pDbgState->fClearCr4Mask = false;
11651 pDbgState->fCpe1Extra = 0;
11652 pDbgState->fCpe1Unwanted = 0;
11653 pDbgState->fCpe2Extra = 0;
11654 pDbgState->bmXcptExtra = 0;
11655 pDbgState->fProcCtlsInitial = pVmxTransient->pVmcsInfo->u32ProcCtls;
11656 pDbgState->fProcCtls2Initial = pVmxTransient->pVmcsInfo->u32ProcCtls2;
11657 pDbgState->bmXcptInitial = pVmxTransient->pVmcsInfo->u32XcptBitmap;
11658}
11659
11660
11661/**
11662 * Updates the VMSC fields with changes requested by @a pDbgState.
11663 *
11664 * This is performed after hmR0VmxPreRunGuestDebugStateUpdate as well
11665 * immediately before executing guest code, i.e. when interrupts are disabled.
11666 * We don't check status codes here as we cannot easily assert or return in the
11667 * latter case.
11668 *
11669 * @param pVCpu The cross context virtual CPU structure.
11670 * @param pVmxTransient The VMX-transient structure.
11671 * @param pDbgState The debug state.
11672 */
11673static void hmR0VmxPreRunGuestDebugStateApply(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
11674{
11675 /*
11676 * Ensure desired flags in VMCS control fields are set.
11677 * (Ignoring write failure here, as we're committed and it's just debug extras.)
11678 *
11679 * Note! We load the shadow CR0 & CR4 bits when we flag the clearing, so
11680 * there should be no stale data in pCtx at this point.
11681 */
11682 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
11683 if ( (pVmcsInfo->u32ProcCtls & pDbgState->fCpe1Extra) != pDbgState->fCpe1Extra
11684 || (pVmcsInfo->u32ProcCtls & pDbgState->fCpe1Unwanted))
11685 {
11686 pVmcsInfo->u32ProcCtls |= pDbgState->fCpe1Extra;
11687 pVmcsInfo->u32ProcCtls &= ~pDbgState->fCpe1Unwanted;
11688 VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
11689 Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC: %#RX32\n", pVmcsInfo->u32ProcCtls));
11690 pDbgState->fModifiedProcCtls = true;
11691 }
11692
11693 if ((pVmcsInfo->u32ProcCtls2 & pDbgState->fCpe2Extra) != pDbgState->fCpe2Extra)
11694 {
11695 pVmcsInfo->u32ProcCtls2 |= pDbgState->fCpe2Extra;
11696 VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pVmcsInfo->u32ProcCtls2);
11697 Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC2: %#RX32\n", pVmcsInfo->u32ProcCtls2));
11698 pDbgState->fModifiedProcCtls2 = true;
11699 }
11700
11701 if ((pVmcsInfo->u32XcptBitmap & pDbgState->bmXcptExtra) != pDbgState->bmXcptExtra)
11702 {
11703 pVmcsInfo->u32XcptBitmap |= pDbgState->bmXcptExtra;
11704 VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVmcsInfo->u32XcptBitmap);
11705 Log6Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP: %#RX32\n", pVmcsInfo->u32XcptBitmap));
11706 pDbgState->fModifiedXcptBitmap = true;
11707 }
11708
11709 if (pDbgState->fClearCr0Mask && pVmcsInfo->u64Cr0Mask != 0)
11710 {
11711 pVmcsInfo->u64Cr0Mask = 0;
11712 VMXWriteVmcsNw(VMX_VMCS_CTRL_CR0_MASK, 0);
11713 Log6Func(("VMX_VMCS_CTRL_CR0_MASK: 0\n"));
11714 }
11715
11716 if (pDbgState->fClearCr4Mask && pVmcsInfo->u64Cr4Mask != 0)
11717 {
11718 pVmcsInfo->u64Cr4Mask = 0;
11719 VMXWriteVmcsNw(VMX_VMCS_CTRL_CR4_MASK, 0);
11720 Log6Func(("VMX_VMCS_CTRL_CR4_MASK: 0\n"));
11721 }
11722
11723 NOREF(pVCpu);
11724}
11725
11726
11727/**
11728 * Restores VMCS fields that were changed by hmR0VmxPreRunGuestDebugStateApply for
11729 * re-entry next time around.
11730 *
11731 * @returns Strict VBox status code (i.e. informational status codes too).
11732 * @param pVCpu The cross context virtual CPU structure.
11733 * @param pVmxTransient The VMX-transient structure.
11734 * @param pDbgState The debug state.
11735 * @param rcStrict The return code from executing the guest using single
11736 * stepping.
11737 */
11738static VBOXSTRICTRC hmR0VmxRunDebugStateRevert(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState,
11739 VBOXSTRICTRC rcStrict)
11740{
11741 /*
11742 * Restore VM-exit control settings as we may not reenter this function the
11743 * next time around.
11744 */
11745 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
11746
11747 /* We reload the initial value, trigger what we can of recalculations the
11748 next time around. From the looks of things, that's all that's required atm. */
11749 if (pDbgState->fModifiedProcCtls)
11750 {
11751 if (!(pDbgState->fProcCtlsInitial & VMX_PROC_CTLS_MOV_DR_EXIT) && CPUMIsHyperDebugStateActive(pVCpu))
11752 pDbgState->fProcCtlsInitial |= VMX_PROC_CTLS_MOV_DR_EXIT; /* Avoid assertion in hmR0VmxLeave */
11753 int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pDbgState->fProcCtlsInitial);
11754 AssertRC(rc2);
11755 pVmcsInfo->u32ProcCtls = pDbgState->fProcCtlsInitial;
11756 }
11757
11758 /* We're currently the only ones messing with this one, so just restore the
11759 cached value and reload the field. */
11760 if ( pDbgState->fModifiedProcCtls2
11761 && pVmcsInfo->u32ProcCtls2 != pDbgState->fProcCtls2Initial)
11762 {
11763 int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pDbgState->fProcCtls2Initial);
11764 AssertRC(rc2);
11765 pVmcsInfo->u32ProcCtls2 = pDbgState->fProcCtls2Initial;
11766 }
11767
11768 /* If we've modified the exception bitmap, we restore it and trigger
11769 reloading and partial recalculation the next time around. */
11770 if (pDbgState->fModifiedXcptBitmap)
11771 pVmcsInfo->u32XcptBitmap = pDbgState->bmXcptInitial;
11772
11773 return rcStrict;
11774}
11775
11776
11777/**
11778 * Configures VM-exit controls for current DBGF and DTrace settings.
11779 *
11780 * This updates @a pDbgState and the VMCS execution control fields to reflect
11781 * the necessary VM-exits demanded by DBGF and DTrace.
11782 *
11783 * @param pVCpu The cross context virtual CPU structure.
11784 * @param pVmxTransient The VMX-transient structure. May update
11785 * fUpdatedTscOffsettingAndPreemptTimer.
11786 * @param pDbgState The debug state.
11787 */
11788static void hmR0VmxPreRunGuestDebugStateUpdate(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
11789{
11790 /*
11791 * Take down the dtrace serial number so we can spot changes.
11792 */
11793 pDbgState->uDtraceSettingsSeqNo = VBOXVMM_GET_SETTINGS_SEQ_NO();
11794 ASMCompilerBarrier();
11795
11796 /*
11797 * We'll rebuild most of the middle block of data members (holding the
11798 * current settings) as we go along here, so start by clearing it all.
11799 */
11800 pDbgState->bmXcptExtra = 0;
11801 pDbgState->fCpe1Extra = 0;
11802 pDbgState->fCpe1Unwanted = 0;
11803 pDbgState->fCpe2Extra = 0;
11804 for (unsigned i = 0; i < RT_ELEMENTS(pDbgState->bmExitsToCheck); i++)
11805 pDbgState->bmExitsToCheck[i] = 0;
11806
11807 /*
11808 * Software interrupts (INT XXh) - no idea how to trigger these...
11809 */
11810 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
11811 if ( DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_INTERRUPT_SOFTWARE)
11812 || VBOXVMM_INT_SOFTWARE_ENABLED())
11813 {
11814 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
11815 }
11816
11817 /*
11818 * INT3 breakpoints - triggered by #BP exceptions.
11819 */
11820 if (pVM->dbgf.ro.cEnabledInt3Breakpoints > 0)
11821 pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
11822
11823 /*
11824 * Exception bitmap and XCPT events+probes.
11825 */
11826 for (int iXcpt = 0; iXcpt < (DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST + 1); iXcpt++)
11827 if (DBGF_IS_EVENT_ENABLED(pVM, (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + iXcpt)))
11828 pDbgState->bmXcptExtra |= RT_BIT_32(iXcpt);
11829
11830 if (VBOXVMM_XCPT_DE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DE);
11831 if (VBOXVMM_XCPT_DB_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DB);
11832 if (VBOXVMM_XCPT_BP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP);
11833 if (VBOXVMM_XCPT_OF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_OF);
11834 if (VBOXVMM_XCPT_BR_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BR);
11835 if (VBOXVMM_XCPT_UD_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_UD);
11836 if (VBOXVMM_XCPT_NM_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NM);
11837 if (VBOXVMM_XCPT_DF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DF);
11838 if (VBOXVMM_XCPT_TS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_TS);
11839 if (VBOXVMM_XCPT_NP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NP);
11840 if (VBOXVMM_XCPT_SS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SS);
11841 if (VBOXVMM_XCPT_GP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_GP);
11842 if (VBOXVMM_XCPT_PF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_PF);
11843 if (VBOXVMM_XCPT_MF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_MF);
11844 if (VBOXVMM_XCPT_AC_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_AC);
11845 if (VBOXVMM_XCPT_XF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_XF);
11846 if (VBOXVMM_XCPT_VE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_VE);
11847 if (VBOXVMM_XCPT_SX_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SX);
11848
11849 if (pDbgState->bmXcptExtra)
11850 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI);
11851
11852 /*
11853 * Process events and probes for VM-exits, making sure we get the wanted VM-exits.
11854 *
11855 * Note! This is the reverse of what hmR0VmxHandleExitDtraceEvents does.
11856 * So, when adding/changing/removing please don't forget to update it.
11857 *
11858 * Some of the macros are picking up local variables to save horizontal space,
11859 * (being able to see it in a table is the lesser evil here).
11860 */
11861#define IS_EITHER_ENABLED(a_pVM, a_EventSubName) \
11862 ( DBGF_IS_EVENT_ENABLED(a_pVM, RT_CONCAT(DBGFEVENT_, a_EventSubName)) \
11863 || RT_CONCAT3(VBOXVMM_, a_EventSubName, _ENABLED)() )
11864#define SET_ONLY_XBM_IF_EITHER_EN(a_EventSubName, a_uExit) \
11865 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
11866 { AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
11867 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
11868 } else do { } while (0)
11869#define SET_CPE1_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec) \
11870 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
11871 { \
11872 (pDbgState)->fCpe1Extra |= (a_fCtrlProcExec); \
11873 AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
11874 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
11875 } else do { } while (0)
11876#define SET_CPEU_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fUnwantedCtrlProcExec) \
11877 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
11878 { \
11879 (pDbgState)->fCpe1Unwanted |= (a_fUnwantedCtrlProcExec); \
11880 AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
11881 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
11882 } else do { } while (0)
11883#define SET_CPE2_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec2) \
11884 if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \
11885 { \
11886 (pDbgState)->fCpe2Extra |= (a_fCtrlProcExec2); \
11887 AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \
11888 ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \
11889 } else do { } while (0)
11890
11891 SET_ONLY_XBM_IF_EITHER_EN(EXIT_TASK_SWITCH, VMX_EXIT_TASK_SWITCH); /* unconditional */
11892 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_VIOLATION, VMX_EXIT_EPT_VIOLATION); /* unconditional */
11893 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_MISCONFIG, VMX_EXIT_EPT_MISCONFIG); /* unconditional (unless #VE) */
11894 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_ACCESS, VMX_EXIT_APIC_ACCESS); /* feature dependent, nothing to enable here */
11895 SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_WRITE, VMX_EXIT_APIC_WRITE); /* feature dependent, nothing to enable here */
11896
11897 SET_ONLY_XBM_IF_EITHER_EN(INSTR_CPUID, VMX_EXIT_CPUID); /* unconditional */
11898 SET_ONLY_XBM_IF_EITHER_EN( EXIT_CPUID, VMX_EXIT_CPUID);
11899 SET_ONLY_XBM_IF_EITHER_EN(INSTR_GETSEC, VMX_EXIT_GETSEC); /* unconditional */
11900 SET_ONLY_XBM_IF_EITHER_EN( EXIT_GETSEC, VMX_EXIT_GETSEC);
11901 SET_CPE1_XBM_IF_EITHER_EN(INSTR_HALT, VMX_EXIT_HLT, VMX_PROC_CTLS_HLT_EXIT); /* paranoia */
11902 SET_ONLY_XBM_IF_EITHER_EN( EXIT_HALT, VMX_EXIT_HLT);
11903 SET_ONLY_XBM_IF_EITHER_EN(INSTR_INVD, VMX_EXIT_INVD); /* unconditional */
11904 SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVD, VMX_EXIT_INVD);
11905 SET_CPE1_XBM_IF_EITHER_EN(INSTR_INVLPG, VMX_EXIT_INVLPG, VMX_PROC_CTLS_INVLPG_EXIT);
11906 SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVLPG, VMX_EXIT_INVLPG);
11907 SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDPMC, VMX_EXIT_RDPMC, VMX_PROC_CTLS_RDPMC_EXIT);
11908 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDPMC, VMX_EXIT_RDPMC);
11909 SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSC, VMX_EXIT_RDTSC, VMX_PROC_CTLS_RDTSC_EXIT);
11910 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSC, VMX_EXIT_RDTSC);
11911 SET_ONLY_XBM_IF_EITHER_EN(INSTR_RSM, VMX_EXIT_RSM); /* unconditional */
11912 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RSM, VMX_EXIT_RSM);
11913 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMM_CALL, VMX_EXIT_VMCALL); /* unconditional */
11914 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMM_CALL, VMX_EXIT_VMCALL);
11915 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMCLEAR, VMX_EXIT_VMCLEAR); /* unconditional */
11916 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMCLEAR, VMX_EXIT_VMCLEAR);
11917 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH); /* unconditional */
11918 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH);
11919 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRLD, VMX_EXIT_VMPTRLD); /* unconditional */
11920 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRLD, VMX_EXIT_VMPTRLD);
11921 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRST, VMX_EXIT_VMPTRST); /* unconditional */
11922 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRST, VMX_EXIT_VMPTRST);
11923 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMREAD, VMX_EXIT_VMREAD); /* unconditional */
11924 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMREAD, VMX_EXIT_VMREAD);
11925 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMRESUME, VMX_EXIT_VMRESUME); /* unconditional */
11926 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMRESUME, VMX_EXIT_VMRESUME);
11927 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMWRITE, VMX_EXIT_VMWRITE); /* unconditional */
11928 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMWRITE, VMX_EXIT_VMWRITE);
11929 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXOFF, VMX_EXIT_VMXOFF); /* unconditional */
11930 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXOFF, VMX_EXIT_VMXOFF);
11931 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXON, VMX_EXIT_VMXON); /* unconditional */
11932 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXON, VMX_EXIT_VMXON);
11933
11934 if ( IS_EITHER_ENABLED(pVM, INSTR_CRX_READ)
11935 || IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
11936 {
11937 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR4
11938 | CPUMCTX_EXTRN_APIC_TPR);
11939 AssertRC(rc);
11940
11941#if 0 /** @todo fix me */
11942 pDbgState->fClearCr0Mask = true;
11943 pDbgState->fClearCr4Mask = true;
11944#endif
11945 if (IS_EITHER_ENABLED(pVM, INSTR_CRX_READ))
11946 pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_STORE_EXIT | VMX_PROC_CTLS_CR8_STORE_EXIT;
11947 if (IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE))
11948 pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR8_LOAD_EXIT;
11949 pDbgState->fCpe1Unwanted |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* risky? */
11950 /* Note! We currently don't use VMX_VMCS32_CTRL_CR3_TARGET_COUNT. It would
11951 require clearing here and in the loop if we start using it. */
11952 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_CRX);
11953 }
11954 else
11955 {
11956 if (pDbgState->fClearCr0Mask)
11957 {
11958 pDbgState->fClearCr0Mask = false;
11959 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0);
11960 }
11961 if (pDbgState->fClearCr4Mask)
11962 {
11963 pDbgState->fClearCr4Mask = false;
11964 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4);
11965 }
11966 }
11967 SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_READ, VMX_EXIT_MOV_CRX);
11968 SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_WRITE, VMX_EXIT_MOV_CRX);
11969
11970 if ( IS_EITHER_ENABLED(pVM, INSTR_DRX_READ)
11971 || IS_EITHER_ENABLED(pVM, INSTR_DRX_WRITE))
11972 {
11973 /** @todo later, need to fix handler as it assumes this won't usually happen. */
11974 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_DRX);
11975 }
11976 SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_READ, VMX_EXIT_MOV_DRX);
11977 SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_WRITE, VMX_EXIT_MOV_DRX);
11978
11979 SET_CPEU_XBM_IF_EITHER_EN(INSTR_RDMSR, VMX_EXIT_RDMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS); /* risky clearing this? */
11980 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDMSR, VMX_EXIT_RDMSR);
11981 SET_CPEU_XBM_IF_EITHER_EN(INSTR_WRMSR, VMX_EXIT_WRMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS);
11982 SET_ONLY_XBM_IF_EITHER_EN( EXIT_WRMSR, VMX_EXIT_WRMSR);
11983 SET_CPE1_XBM_IF_EITHER_EN(INSTR_MWAIT, VMX_EXIT_MWAIT, VMX_PROC_CTLS_MWAIT_EXIT); /* paranoia */
11984 SET_ONLY_XBM_IF_EITHER_EN( EXIT_MWAIT, VMX_EXIT_MWAIT);
11985 SET_CPE1_XBM_IF_EITHER_EN(INSTR_MONITOR, VMX_EXIT_MONITOR, VMX_PROC_CTLS_MONITOR_EXIT); /* paranoia */
11986 SET_ONLY_XBM_IF_EITHER_EN( EXIT_MONITOR, VMX_EXIT_MONITOR);
11987#if 0 /** @todo too slow, fix handler. */
11988 SET_CPE1_XBM_IF_EITHER_EN(INSTR_PAUSE, VMX_EXIT_PAUSE, VMX_PROC_CTLS_PAUSE_EXIT);
11989#endif
11990 SET_ONLY_XBM_IF_EITHER_EN( EXIT_PAUSE, VMX_EXIT_PAUSE);
11991
11992 if ( IS_EITHER_ENABLED(pVM, INSTR_SGDT)
11993 || IS_EITHER_ENABLED(pVM, INSTR_SIDT)
11994 || IS_EITHER_ENABLED(pVM, INSTR_LGDT)
11995 || IS_EITHER_ENABLED(pVM, INSTR_LIDT))
11996 {
11997 pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
11998 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_GDTR_IDTR_ACCESS);
11999 }
12000 SET_ONLY_XBM_IF_EITHER_EN( EXIT_SGDT, VMX_EXIT_GDTR_IDTR_ACCESS);
12001 SET_ONLY_XBM_IF_EITHER_EN( EXIT_SIDT, VMX_EXIT_GDTR_IDTR_ACCESS);
12002 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LGDT, VMX_EXIT_GDTR_IDTR_ACCESS);
12003 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LIDT, VMX_EXIT_GDTR_IDTR_ACCESS);
12004
12005 if ( IS_EITHER_ENABLED(pVM, INSTR_SLDT)
12006 || IS_EITHER_ENABLED(pVM, INSTR_STR)
12007 || IS_EITHER_ENABLED(pVM, INSTR_LLDT)
12008 || IS_EITHER_ENABLED(pVM, INSTR_LTR))
12009 {
12010 pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT;
12011 ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_LDTR_TR_ACCESS);
12012 }
12013 SET_ONLY_XBM_IF_EITHER_EN( EXIT_SLDT, VMX_EXIT_LDTR_TR_ACCESS);
12014 SET_ONLY_XBM_IF_EITHER_EN( EXIT_STR, VMX_EXIT_LDTR_TR_ACCESS);
12015 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LLDT, VMX_EXIT_LDTR_TR_ACCESS);
12016 SET_ONLY_XBM_IF_EITHER_EN( EXIT_LTR, VMX_EXIT_LDTR_TR_ACCESS);
12017
12018 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVEPT, VMX_EXIT_INVEPT); /* unconditional */
12019 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVEPT, VMX_EXIT_INVEPT);
12020 SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSCP, VMX_EXIT_RDTSCP, VMX_PROC_CTLS_RDTSC_EXIT);
12021 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSCP, VMX_EXIT_RDTSCP);
12022 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVVPID, VMX_EXIT_INVVPID); /* unconditional */
12023 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVVPID, VMX_EXIT_INVVPID);
12024 SET_CPE2_XBM_IF_EITHER_EN(INSTR_WBINVD, VMX_EXIT_WBINVD, VMX_PROC_CTLS2_WBINVD_EXIT);
12025 SET_ONLY_XBM_IF_EITHER_EN( EXIT_WBINVD, VMX_EXIT_WBINVD);
12026 SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSETBV, VMX_EXIT_XSETBV); /* unconditional */
12027 SET_ONLY_XBM_IF_EITHER_EN( EXIT_XSETBV, VMX_EXIT_XSETBV);
12028 SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDRAND, VMX_EXIT_RDRAND, VMX_PROC_CTLS2_RDRAND_EXIT);
12029 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDRAND, VMX_EXIT_RDRAND);
12030 SET_CPE1_XBM_IF_EITHER_EN(INSTR_VMX_INVPCID, VMX_EXIT_INVPCID, VMX_PROC_CTLS_INVLPG_EXIT);
12031 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVPCID, VMX_EXIT_INVPCID);
12032 SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMFUNC, VMX_EXIT_VMFUNC); /* unconditional for the current setup */
12033 SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMFUNC, VMX_EXIT_VMFUNC);
12034 SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDSEED, VMX_EXIT_RDSEED, VMX_PROC_CTLS2_RDSEED_EXIT);
12035 SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDSEED, VMX_EXIT_RDSEED);
12036 SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSAVES, VMX_EXIT_XSAVES); /* unconditional (enabled by host, guest cfg) */
12037 SET_ONLY_XBM_IF_EITHER_EN(EXIT_XSAVES, VMX_EXIT_XSAVES);
12038 SET_ONLY_XBM_IF_EITHER_EN(INSTR_XRSTORS, VMX_EXIT_XRSTORS); /* unconditional (enabled by host, guest cfg) */
12039 SET_ONLY_XBM_IF_EITHER_EN( EXIT_XRSTORS, VMX_EXIT_XRSTORS);
12040
12041#undef IS_EITHER_ENABLED
12042#undef SET_ONLY_XBM_IF_EITHER_EN
12043#undef SET_CPE1_XBM_IF_EITHER_EN
12044#undef SET_CPEU_XBM_IF_EITHER_EN
12045#undef SET_CPE2_XBM_IF_EITHER_EN
12046
12047 /*
12048 * Sanitize the control stuff.
12049 */
12050 pDbgState->fCpe2Extra &= g_HmMsrs.u.vmx.ProcCtls2.n.allowed1;
12051 if (pDbgState->fCpe2Extra)
12052 pDbgState->fCpe1Extra |= VMX_PROC_CTLS_USE_SECONDARY_CTLS;
12053 pDbgState->fCpe1Extra &= g_HmMsrs.u.vmx.ProcCtls.n.allowed1;
12054 pDbgState->fCpe1Unwanted &= ~g_HmMsrs.u.vmx.ProcCtls.n.allowed0;
12055 if (pVCpu->hmr0.s.fDebugWantRdTscExit != RT_BOOL(pDbgState->fCpe1Extra & VMX_PROC_CTLS_RDTSC_EXIT))
12056 {
12057 pVCpu->hmr0.s.fDebugWantRdTscExit ^= true;
12058 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
12059 }
12060
12061 Log6(("HM: debug state: cpe1=%#RX32 cpeu=%#RX32 cpe2=%#RX32%s%s\n",
12062 pDbgState->fCpe1Extra, pDbgState->fCpe1Unwanted, pDbgState->fCpe2Extra,
12063 pDbgState->fClearCr0Mask ? " clr-cr0" : "",
12064 pDbgState->fClearCr4Mask ? " clr-cr4" : ""));
12065}
12066
12067
12068/**
12069 * Fires off DBGF events and dtrace probes for a VM-exit, when it's
12070 * appropriate.
12071 *
12072 * The caller has checked the VM-exit against the
12073 * VMXRUNDBGSTATE::bmExitsToCheck bitmap. The caller has checked for NMIs
12074 * already, so we don't have to do that either.
12075 *
12076 * @returns Strict VBox status code (i.e. informational status codes too).
12077 * @param pVCpu The cross context virtual CPU structure.
12078 * @param pVmxTransient The VMX-transient structure.
12079 * @param uExitReason The VM-exit reason.
12080 *
12081 * @remarks The name of this function is displayed by dtrace, so keep it short
12082 * and to the point. No longer than 33 chars long, please.
12083 */
12084static VBOXSTRICTRC hmR0VmxHandleExitDtraceEvents(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uExitReason)
12085{
12086 /*
12087 * Translate the event into a DBGF event (enmEvent + uEventArg) and at the
12088 * same time check whether any corresponding Dtrace event is enabled (fDtrace).
12089 *
12090 * Note! This is the reverse operation of what hmR0VmxPreRunGuestDebugStateUpdate
12091 * does. Must add/change/remove both places. Same ordering, please.
12092 *
12093 * Added/removed events must also be reflected in the next section
12094 * where we dispatch dtrace events.
12095 */
12096 bool fDtrace1 = false;
12097 bool fDtrace2 = false;
12098 DBGFEVENTTYPE enmEvent1 = DBGFEVENT_END;
12099 DBGFEVENTTYPE enmEvent2 = DBGFEVENT_END;
12100 uint32_t uEventArg = 0;
12101#define SET_EXIT(a_EventSubName) \
12102 do { \
12103 enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \
12104 fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \
12105 } while (0)
12106#define SET_BOTH(a_EventSubName) \
12107 do { \
12108 enmEvent1 = RT_CONCAT(DBGFEVENT_INSTR_, a_EventSubName); \
12109 enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \
12110 fDtrace1 = RT_CONCAT3(VBOXVMM_INSTR_, a_EventSubName, _ENABLED)(); \
12111 fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \
12112 } while (0)
12113 switch (uExitReason)
12114 {
12115 case VMX_EXIT_MTF:
12116 return hmR0VmxExitMtf(pVCpu, pVmxTransient);
12117
12118 case VMX_EXIT_XCPT_OR_NMI:
12119 {
12120 uint8_t const idxVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
12121 switch (VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo))
12122 {
12123 case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
12124 case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
12125 case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
12126 if (idxVector <= (unsigned)(DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST))
12127 {
12128 if (VMX_EXIT_INT_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uExitIntInfo))
12129 {
12130 hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
12131 uEventArg = pVmxTransient->uExitIntErrorCode;
12132 }
12133 enmEvent1 = (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + idxVector);
12134 switch (enmEvent1)
12135 {
12136 case DBGFEVENT_XCPT_DE: fDtrace1 = VBOXVMM_XCPT_DE_ENABLED(); break;
12137 case DBGFEVENT_XCPT_DB: fDtrace1 = VBOXVMM_XCPT_DB_ENABLED(); break;
12138 case DBGFEVENT_XCPT_BP: fDtrace1 = VBOXVMM_XCPT_BP_ENABLED(); break;
12139 case DBGFEVENT_XCPT_OF: fDtrace1 = VBOXVMM_XCPT_OF_ENABLED(); break;
12140 case DBGFEVENT_XCPT_BR: fDtrace1 = VBOXVMM_XCPT_BR_ENABLED(); break;
12141 case DBGFEVENT_XCPT_UD: fDtrace1 = VBOXVMM_XCPT_UD_ENABLED(); break;
12142 case DBGFEVENT_XCPT_NM: fDtrace1 = VBOXVMM_XCPT_NM_ENABLED(); break;
12143 case DBGFEVENT_XCPT_DF: fDtrace1 = VBOXVMM_XCPT_DF_ENABLED(); break;
12144 case DBGFEVENT_XCPT_TS: fDtrace1 = VBOXVMM_XCPT_TS_ENABLED(); break;
12145 case DBGFEVENT_XCPT_NP: fDtrace1 = VBOXVMM_XCPT_NP_ENABLED(); break;
12146 case DBGFEVENT_XCPT_SS: fDtrace1 = VBOXVMM_XCPT_SS_ENABLED(); break;
12147 case DBGFEVENT_XCPT_GP: fDtrace1 = VBOXVMM_XCPT_GP_ENABLED(); break;
12148 case DBGFEVENT_XCPT_PF: fDtrace1 = VBOXVMM_XCPT_PF_ENABLED(); break;
12149 case DBGFEVENT_XCPT_MF: fDtrace1 = VBOXVMM_XCPT_MF_ENABLED(); break;
12150 case DBGFEVENT_XCPT_AC: fDtrace1 = VBOXVMM_XCPT_AC_ENABLED(); break;
12151 case DBGFEVENT_XCPT_XF: fDtrace1 = VBOXVMM_XCPT_XF_ENABLED(); break;
12152 case DBGFEVENT_XCPT_VE: fDtrace1 = VBOXVMM_XCPT_VE_ENABLED(); break;
12153 case DBGFEVENT_XCPT_SX: fDtrace1 = VBOXVMM_XCPT_SX_ENABLED(); break;
12154 default: break;
12155 }
12156 }
12157 else
12158 AssertFailed();
12159 break;
12160
12161 case VMX_EXIT_INT_INFO_TYPE_SW_INT:
12162 uEventArg = idxVector;
12163 enmEvent1 = DBGFEVENT_INTERRUPT_SOFTWARE;
12164 fDtrace1 = VBOXVMM_INT_SOFTWARE_ENABLED();
12165 break;
12166 }
12167 break;
12168 }
12169
12170 case VMX_EXIT_TRIPLE_FAULT:
12171 enmEvent1 = DBGFEVENT_TRIPLE_FAULT;
12172 //fDtrace1 = VBOXVMM_EXIT_TRIPLE_FAULT_ENABLED();
12173 break;
12174 case VMX_EXIT_TASK_SWITCH: SET_EXIT(TASK_SWITCH); break;
12175 case VMX_EXIT_EPT_VIOLATION: SET_EXIT(VMX_EPT_VIOLATION); break;
12176 case VMX_EXIT_EPT_MISCONFIG: SET_EXIT(VMX_EPT_MISCONFIG); break;
12177 case VMX_EXIT_APIC_ACCESS: SET_EXIT(VMX_VAPIC_ACCESS); break;
12178 case VMX_EXIT_APIC_WRITE: SET_EXIT(VMX_VAPIC_WRITE); break;
12179
12180 /* Instruction specific VM-exits: */
12181 case VMX_EXIT_CPUID: SET_BOTH(CPUID); break;
12182 case VMX_EXIT_GETSEC: SET_BOTH(GETSEC); break;
12183 case VMX_EXIT_HLT: SET_BOTH(HALT); break;
12184 case VMX_EXIT_INVD: SET_BOTH(INVD); break;
12185 case VMX_EXIT_INVLPG: SET_BOTH(INVLPG); break;
12186 case VMX_EXIT_RDPMC: SET_BOTH(RDPMC); break;
12187 case VMX_EXIT_RDTSC: SET_BOTH(RDTSC); break;
12188 case VMX_EXIT_RSM: SET_BOTH(RSM); break;
12189 case VMX_EXIT_VMCALL: SET_BOTH(VMM_CALL); break;
12190 case VMX_EXIT_VMCLEAR: SET_BOTH(VMX_VMCLEAR); break;
12191 case VMX_EXIT_VMLAUNCH: SET_BOTH(VMX_VMLAUNCH); break;
12192 case VMX_EXIT_VMPTRLD: SET_BOTH(VMX_VMPTRLD); break;
12193 case VMX_EXIT_VMPTRST: SET_BOTH(VMX_VMPTRST); break;
12194 case VMX_EXIT_VMREAD: SET_BOTH(VMX_VMREAD); break;
12195 case VMX_EXIT_VMRESUME: SET_BOTH(VMX_VMRESUME); break;
12196 case VMX_EXIT_VMWRITE: SET_BOTH(VMX_VMWRITE); break;
12197 case VMX_EXIT_VMXOFF: SET_BOTH(VMX_VMXOFF); break;
12198 case VMX_EXIT_VMXON: SET_BOTH(VMX_VMXON); break;
12199 case VMX_EXIT_MOV_CRX:
12200 hmR0VmxReadExitQualVmcs(pVmxTransient);
12201 if (VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_CRX_ACCESS_READ)
12202 SET_BOTH(CRX_READ);
12203 else
12204 SET_BOTH(CRX_WRITE);
12205 uEventArg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
12206 break;
12207 case VMX_EXIT_MOV_DRX:
12208 hmR0VmxReadExitQualVmcs(pVmxTransient);
12209 if ( VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual)
12210 == VMX_EXIT_QUAL_DRX_DIRECTION_READ)
12211 SET_BOTH(DRX_READ);
12212 else
12213 SET_BOTH(DRX_WRITE);
12214 uEventArg = VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual);
12215 break;
12216 case VMX_EXIT_RDMSR: SET_BOTH(RDMSR); break;
12217 case VMX_EXIT_WRMSR: SET_BOTH(WRMSR); break;
12218 case VMX_EXIT_MWAIT: SET_BOTH(MWAIT); break;
12219 case VMX_EXIT_MONITOR: SET_BOTH(MONITOR); break;
12220 case VMX_EXIT_PAUSE: SET_BOTH(PAUSE); break;
12221 case VMX_EXIT_GDTR_IDTR_ACCESS:
12222 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
12223 switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_XDTR_INSINFO_INSTR_ID))
12224 {
12225 case VMX_XDTR_INSINFO_II_SGDT: SET_BOTH(SGDT); break;
12226 case VMX_XDTR_INSINFO_II_SIDT: SET_BOTH(SIDT); break;
12227 case VMX_XDTR_INSINFO_II_LGDT: SET_BOTH(LGDT); break;
12228 case VMX_XDTR_INSINFO_II_LIDT: SET_BOTH(LIDT); break;
12229 }
12230 break;
12231
12232 case VMX_EXIT_LDTR_TR_ACCESS:
12233 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
12234 switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_YYTR_INSINFO_INSTR_ID))
12235 {
12236 case VMX_YYTR_INSINFO_II_SLDT: SET_BOTH(SLDT); break;
12237 case VMX_YYTR_INSINFO_II_STR: SET_BOTH(STR); break;
12238 case VMX_YYTR_INSINFO_II_LLDT: SET_BOTH(LLDT); break;
12239 case VMX_YYTR_INSINFO_II_LTR: SET_BOTH(LTR); break;
12240 }
12241 break;
12242
12243 case VMX_EXIT_INVEPT: SET_BOTH(VMX_INVEPT); break;
12244 case VMX_EXIT_RDTSCP: SET_BOTH(RDTSCP); break;
12245 case VMX_EXIT_INVVPID: SET_BOTH(VMX_INVVPID); break;
12246 case VMX_EXIT_WBINVD: SET_BOTH(WBINVD); break;
12247 case VMX_EXIT_XSETBV: SET_BOTH(XSETBV); break;
12248 case VMX_EXIT_RDRAND: SET_BOTH(RDRAND); break;
12249 case VMX_EXIT_INVPCID: SET_BOTH(VMX_INVPCID); break;
12250 case VMX_EXIT_VMFUNC: SET_BOTH(VMX_VMFUNC); break;
12251 case VMX_EXIT_RDSEED: SET_BOTH(RDSEED); break;
12252 case VMX_EXIT_XSAVES: SET_BOTH(XSAVES); break;
12253 case VMX_EXIT_XRSTORS: SET_BOTH(XRSTORS); break;
12254
12255 /* Events that aren't relevant at this point. */
12256 case VMX_EXIT_EXT_INT:
12257 case VMX_EXIT_INT_WINDOW:
12258 case VMX_EXIT_NMI_WINDOW:
12259 case VMX_EXIT_TPR_BELOW_THRESHOLD:
12260 case VMX_EXIT_PREEMPT_TIMER:
12261 case VMX_EXIT_IO_INSTR:
12262 break;
12263
12264 /* Errors and unexpected events. */
12265 case VMX_EXIT_INIT_SIGNAL:
12266 case VMX_EXIT_SIPI:
12267 case VMX_EXIT_IO_SMI:
12268 case VMX_EXIT_SMI:
12269 case VMX_EXIT_ERR_INVALID_GUEST_STATE:
12270 case VMX_EXIT_ERR_MSR_LOAD:
12271 case VMX_EXIT_ERR_MACHINE_CHECK:
12272 case VMX_EXIT_PML_FULL:
12273 case VMX_EXIT_VIRTUALIZED_EOI:
12274 break;
12275
12276 default:
12277 AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
12278 break;
12279 }
12280#undef SET_BOTH
12281#undef SET_EXIT
12282
12283 /*
12284 * Dtrace tracepoints go first. We do them here at once so we don't
12285 * have to copy the guest state saving and stuff a few dozen times.
12286 * Down side is that we've got to repeat the switch, though this time
12287 * we use enmEvent since the probes are a subset of what DBGF does.
12288 */
12289 if (fDtrace1 || fDtrace2)
12290 {
12291 hmR0VmxReadExitQualVmcs(pVmxTransient);
12292 hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
12293 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
12294 switch (enmEvent1)
12295 {
12296 /** @todo consider which extra parameters would be helpful for each probe. */
12297 case DBGFEVENT_END: break;
12298 case DBGFEVENT_XCPT_DE: VBOXVMM_XCPT_DE(pVCpu, pCtx); break;
12299 case DBGFEVENT_XCPT_DB: VBOXVMM_XCPT_DB(pVCpu, pCtx, pCtx->dr[6]); break;
12300 case DBGFEVENT_XCPT_BP: VBOXVMM_XCPT_BP(pVCpu, pCtx); break;
12301 case DBGFEVENT_XCPT_OF: VBOXVMM_XCPT_OF(pVCpu, pCtx); break;
12302 case DBGFEVENT_XCPT_BR: VBOXVMM_XCPT_BR(pVCpu, pCtx); break;
12303 case DBGFEVENT_XCPT_UD: VBOXVMM_XCPT_UD(pVCpu, pCtx); break;
12304 case DBGFEVENT_XCPT_NM: VBOXVMM_XCPT_NM(pVCpu, pCtx); break;
12305 case DBGFEVENT_XCPT_DF: VBOXVMM_XCPT_DF(pVCpu, pCtx); break;
12306 case DBGFEVENT_XCPT_TS: VBOXVMM_XCPT_TS(pVCpu, pCtx, uEventArg); break;
12307 case DBGFEVENT_XCPT_NP: VBOXVMM_XCPT_NP(pVCpu, pCtx, uEventArg); break;
12308 case DBGFEVENT_XCPT_SS: VBOXVMM_XCPT_SS(pVCpu, pCtx, uEventArg); break;
12309 case DBGFEVENT_XCPT_GP: VBOXVMM_XCPT_GP(pVCpu, pCtx, uEventArg); break;
12310 case DBGFEVENT_XCPT_PF: VBOXVMM_XCPT_PF(pVCpu, pCtx, uEventArg, pCtx->cr2); break;
12311 case DBGFEVENT_XCPT_MF: VBOXVMM_XCPT_MF(pVCpu, pCtx); break;
12312 case DBGFEVENT_XCPT_AC: VBOXVMM_XCPT_AC(pVCpu, pCtx); break;
12313 case DBGFEVENT_XCPT_XF: VBOXVMM_XCPT_XF(pVCpu, pCtx); break;
12314 case DBGFEVENT_XCPT_VE: VBOXVMM_XCPT_VE(pVCpu, pCtx); break;
12315 case DBGFEVENT_XCPT_SX: VBOXVMM_XCPT_SX(pVCpu, pCtx, uEventArg); break;
12316 case DBGFEVENT_INTERRUPT_SOFTWARE: VBOXVMM_INT_SOFTWARE(pVCpu, pCtx, (uint8_t)uEventArg); break;
12317 case DBGFEVENT_INSTR_CPUID: VBOXVMM_INSTR_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
12318 case DBGFEVENT_INSTR_GETSEC: VBOXVMM_INSTR_GETSEC(pVCpu, pCtx); break;
12319 case DBGFEVENT_INSTR_HALT: VBOXVMM_INSTR_HALT(pVCpu, pCtx); break;
12320 case DBGFEVENT_INSTR_INVD: VBOXVMM_INSTR_INVD(pVCpu, pCtx); break;
12321 case DBGFEVENT_INSTR_INVLPG: VBOXVMM_INSTR_INVLPG(pVCpu, pCtx); break;
12322 case DBGFEVENT_INSTR_RDPMC: VBOXVMM_INSTR_RDPMC(pVCpu, pCtx); break;
12323 case DBGFEVENT_INSTR_RDTSC: VBOXVMM_INSTR_RDTSC(pVCpu, pCtx); break;
12324 case DBGFEVENT_INSTR_RSM: VBOXVMM_INSTR_RSM(pVCpu, pCtx); break;
12325 case DBGFEVENT_INSTR_CRX_READ: VBOXVMM_INSTR_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
12326 case DBGFEVENT_INSTR_CRX_WRITE: VBOXVMM_INSTR_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
12327 case DBGFEVENT_INSTR_DRX_READ: VBOXVMM_INSTR_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
12328 case DBGFEVENT_INSTR_DRX_WRITE: VBOXVMM_INSTR_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
12329 case DBGFEVENT_INSTR_RDMSR: VBOXVMM_INSTR_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
12330 case DBGFEVENT_INSTR_WRMSR: VBOXVMM_INSTR_WRMSR(pVCpu, pCtx, pCtx->ecx,
12331 RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
12332 case DBGFEVENT_INSTR_MWAIT: VBOXVMM_INSTR_MWAIT(pVCpu, pCtx); break;
12333 case DBGFEVENT_INSTR_MONITOR: VBOXVMM_INSTR_MONITOR(pVCpu, pCtx); break;
12334 case DBGFEVENT_INSTR_PAUSE: VBOXVMM_INSTR_PAUSE(pVCpu, pCtx); break;
12335 case DBGFEVENT_INSTR_SGDT: VBOXVMM_INSTR_SGDT(pVCpu, pCtx); break;
12336 case DBGFEVENT_INSTR_SIDT: VBOXVMM_INSTR_SIDT(pVCpu, pCtx); break;
12337 case DBGFEVENT_INSTR_LGDT: VBOXVMM_INSTR_LGDT(pVCpu, pCtx); break;
12338 case DBGFEVENT_INSTR_LIDT: VBOXVMM_INSTR_LIDT(pVCpu, pCtx); break;
12339 case DBGFEVENT_INSTR_SLDT: VBOXVMM_INSTR_SLDT(pVCpu, pCtx); break;
12340 case DBGFEVENT_INSTR_STR: VBOXVMM_INSTR_STR(pVCpu, pCtx); break;
12341 case DBGFEVENT_INSTR_LLDT: VBOXVMM_INSTR_LLDT(pVCpu, pCtx); break;
12342 case DBGFEVENT_INSTR_LTR: VBOXVMM_INSTR_LTR(pVCpu, pCtx); break;
12343 case DBGFEVENT_INSTR_RDTSCP: VBOXVMM_INSTR_RDTSCP(pVCpu, pCtx); break;
12344 case DBGFEVENT_INSTR_WBINVD: VBOXVMM_INSTR_WBINVD(pVCpu, pCtx); break;
12345 case DBGFEVENT_INSTR_XSETBV: VBOXVMM_INSTR_XSETBV(pVCpu, pCtx); break;
12346 case DBGFEVENT_INSTR_RDRAND: VBOXVMM_INSTR_RDRAND(pVCpu, pCtx); break;
12347 case DBGFEVENT_INSTR_RDSEED: VBOXVMM_INSTR_RDSEED(pVCpu, pCtx); break;
12348 case DBGFEVENT_INSTR_XSAVES: VBOXVMM_INSTR_XSAVES(pVCpu, pCtx); break;
12349 case DBGFEVENT_INSTR_XRSTORS: VBOXVMM_INSTR_XRSTORS(pVCpu, pCtx); break;
12350 case DBGFEVENT_INSTR_VMM_CALL: VBOXVMM_INSTR_VMM_CALL(pVCpu, pCtx); break;
12351 case DBGFEVENT_INSTR_VMX_VMCLEAR: VBOXVMM_INSTR_VMX_VMCLEAR(pVCpu, pCtx); break;
12352 case DBGFEVENT_INSTR_VMX_VMLAUNCH: VBOXVMM_INSTR_VMX_VMLAUNCH(pVCpu, pCtx); break;
12353 case DBGFEVENT_INSTR_VMX_VMPTRLD: VBOXVMM_INSTR_VMX_VMPTRLD(pVCpu, pCtx); break;
12354 case DBGFEVENT_INSTR_VMX_VMPTRST: VBOXVMM_INSTR_VMX_VMPTRST(pVCpu, pCtx); break;
12355 case DBGFEVENT_INSTR_VMX_VMREAD: VBOXVMM_INSTR_VMX_VMREAD(pVCpu, pCtx); break;
12356 case DBGFEVENT_INSTR_VMX_VMRESUME: VBOXVMM_INSTR_VMX_VMRESUME(pVCpu, pCtx); break;
12357 case DBGFEVENT_INSTR_VMX_VMWRITE: VBOXVMM_INSTR_VMX_VMWRITE(pVCpu, pCtx); break;
12358 case DBGFEVENT_INSTR_VMX_VMXOFF: VBOXVMM_INSTR_VMX_VMXOFF(pVCpu, pCtx); break;
12359 case DBGFEVENT_INSTR_VMX_VMXON: VBOXVMM_INSTR_VMX_VMXON(pVCpu, pCtx); break;
12360 case DBGFEVENT_INSTR_VMX_INVEPT: VBOXVMM_INSTR_VMX_INVEPT(pVCpu, pCtx); break;
12361 case DBGFEVENT_INSTR_VMX_INVVPID: VBOXVMM_INSTR_VMX_INVVPID(pVCpu, pCtx); break;
12362 case DBGFEVENT_INSTR_VMX_INVPCID: VBOXVMM_INSTR_VMX_INVPCID(pVCpu, pCtx); break;
12363 case DBGFEVENT_INSTR_VMX_VMFUNC: VBOXVMM_INSTR_VMX_VMFUNC(pVCpu, pCtx); break;
12364 default: AssertMsgFailed(("enmEvent1=%d uExitReason=%d\n", enmEvent1, uExitReason)); break;
12365 }
12366 switch (enmEvent2)
12367 {
12368 /** @todo consider which extra parameters would be helpful for each probe. */
12369 case DBGFEVENT_END: break;
12370 case DBGFEVENT_EXIT_TASK_SWITCH: VBOXVMM_EXIT_TASK_SWITCH(pVCpu, pCtx); break;
12371 case DBGFEVENT_EXIT_CPUID: VBOXVMM_EXIT_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break;
12372 case DBGFEVENT_EXIT_GETSEC: VBOXVMM_EXIT_GETSEC(pVCpu, pCtx); break;
12373 case DBGFEVENT_EXIT_HALT: VBOXVMM_EXIT_HALT(pVCpu, pCtx); break;
12374 case DBGFEVENT_EXIT_INVD: VBOXVMM_EXIT_INVD(pVCpu, pCtx); break;
12375 case DBGFEVENT_EXIT_INVLPG: VBOXVMM_EXIT_INVLPG(pVCpu, pCtx); break;
12376 case DBGFEVENT_EXIT_RDPMC: VBOXVMM_EXIT_RDPMC(pVCpu, pCtx); break;
12377 case DBGFEVENT_EXIT_RDTSC: VBOXVMM_EXIT_RDTSC(pVCpu, pCtx); break;
12378 case DBGFEVENT_EXIT_RSM: VBOXVMM_EXIT_RSM(pVCpu, pCtx); break;
12379 case DBGFEVENT_EXIT_CRX_READ: VBOXVMM_EXIT_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
12380 case DBGFEVENT_EXIT_CRX_WRITE: VBOXVMM_EXIT_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
12381 case DBGFEVENT_EXIT_DRX_READ: VBOXVMM_EXIT_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break;
12382 case DBGFEVENT_EXIT_DRX_WRITE: VBOXVMM_EXIT_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break;
12383 case DBGFEVENT_EXIT_RDMSR: VBOXVMM_EXIT_RDMSR(pVCpu, pCtx, pCtx->ecx); break;
12384 case DBGFEVENT_EXIT_WRMSR: VBOXVMM_EXIT_WRMSR(pVCpu, pCtx, pCtx->ecx,
12385 RT_MAKE_U64(pCtx->eax, pCtx->edx)); break;
12386 case DBGFEVENT_EXIT_MWAIT: VBOXVMM_EXIT_MWAIT(pVCpu, pCtx); break;
12387 case DBGFEVENT_EXIT_MONITOR: VBOXVMM_EXIT_MONITOR(pVCpu, pCtx); break;
12388 case DBGFEVENT_EXIT_PAUSE: VBOXVMM_EXIT_PAUSE(pVCpu, pCtx); break;
12389 case DBGFEVENT_EXIT_SGDT: VBOXVMM_EXIT_SGDT(pVCpu, pCtx); break;
12390 case DBGFEVENT_EXIT_SIDT: VBOXVMM_EXIT_SIDT(pVCpu, pCtx); break;
12391 case DBGFEVENT_EXIT_LGDT: VBOXVMM_EXIT_LGDT(pVCpu, pCtx); break;
12392 case DBGFEVENT_EXIT_LIDT: VBOXVMM_EXIT_LIDT(pVCpu, pCtx); break;
12393 case DBGFEVENT_EXIT_SLDT: VBOXVMM_EXIT_SLDT(pVCpu, pCtx); break;
12394 case DBGFEVENT_EXIT_STR: VBOXVMM_EXIT_STR(pVCpu, pCtx); break;
12395 case DBGFEVENT_EXIT_LLDT: VBOXVMM_EXIT_LLDT(pVCpu, pCtx); break;
12396 case DBGFEVENT_EXIT_LTR: VBOXVMM_EXIT_LTR(pVCpu, pCtx); break;
12397 case DBGFEVENT_EXIT_RDTSCP: VBOXVMM_EXIT_RDTSCP(pVCpu, pCtx); break;
12398 case DBGFEVENT_EXIT_WBINVD: VBOXVMM_EXIT_WBINVD(pVCpu, pCtx); break;
12399 case DBGFEVENT_EXIT_XSETBV: VBOXVMM_EXIT_XSETBV(pVCpu, pCtx); break;
12400 case DBGFEVENT_EXIT_RDRAND: VBOXVMM_EXIT_RDRAND(pVCpu, pCtx); break;
12401 case DBGFEVENT_EXIT_RDSEED: VBOXVMM_EXIT_RDSEED(pVCpu, pCtx); break;
12402 case DBGFEVENT_EXIT_XSAVES: VBOXVMM_EXIT_XSAVES(pVCpu, pCtx); break;
12403 case DBGFEVENT_EXIT_XRSTORS: VBOXVMM_EXIT_XRSTORS(pVCpu, pCtx); break;
12404 case DBGFEVENT_EXIT_VMM_CALL: VBOXVMM_EXIT_VMM_CALL(pVCpu, pCtx); break;
12405 case DBGFEVENT_EXIT_VMX_VMCLEAR: VBOXVMM_EXIT_VMX_VMCLEAR(pVCpu, pCtx); break;
12406 case DBGFEVENT_EXIT_VMX_VMLAUNCH: VBOXVMM_EXIT_VMX_VMLAUNCH(pVCpu, pCtx); break;
12407 case DBGFEVENT_EXIT_VMX_VMPTRLD: VBOXVMM_EXIT_VMX_VMPTRLD(pVCpu, pCtx); break;
12408 case DBGFEVENT_EXIT_VMX_VMPTRST: VBOXVMM_EXIT_VMX_VMPTRST(pVCpu, pCtx); break;
12409 case DBGFEVENT_EXIT_VMX_VMREAD: VBOXVMM_EXIT_VMX_VMREAD(pVCpu, pCtx); break;
12410 case DBGFEVENT_EXIT_VMX_VMRESUME: VBOXVMM_EXIT_VMX_VMRESUME(pVCpu, pCtx); break;
12411 case DBGFEVENT_EXIT_VMX_VMWRITE: VBOXVMM_EXIT_VMX_VMWRITE(pVCpu, pCtx); break;
12412 case DBGFEVENT_EXIT_VMX_VMXOFF: VBOXVMM_EXIT_VMX_VMXOFF(pVCpu, pCtx); break;
12413 case DBGFEVENT_EXIT_VMX_VMXON: VBOXVMM_EXIT_VMX_VMXON(pVCpu, pCtx); break;
12414 case DBGFEVENT_EXIT_VMX_INVEPT: VBOXVMM_EXIT_VMX_INVEPT(pVCpu, pCtx); break;
12415 case DBGFEVENT_EXIT_VMX_INVVPID: VBOXVMM_EXIT_VMX_INVVPID(pVCpu, pCtx); break;
12416 case DBGFEVENT_EXIT_VMX_INVPCID: VBOXVMM_EXIT_VMX_INVPCID(pVCpu, pCtx); break;
12417 case DBGFEVENT_EXIT_VMX_VMFUNC: VBOXVMM_EXIT_VMX_VMFUNC(pVCpu, pCtx); break;
12418 case DBGFEVENT_EXIT_VMX_EPT_MISCONFIG: VBOXVMM_EXIT_VMX_EPT_MISCONFIG(pVCpu, pCtx); break;
12419 case DBGFEVENT_EXIT_VMX_EPT_VIOLATION: VBOXVMM_EXIT_VMX_EPT_VIOLATION(pVCpu, pCtx); break;
12420 case DBGFEVENT_EXIT_VMX_VAPIC_ACCESS: VBOXVMM_EXIT_VMX_VAPIC_ACCESS(pVCpu, pCtx); break;
12421 case DBGFEVENT_EXIT_VMX_VAPIC_WRITE: VBOXVMM_EXIT_VMX_VAPIC_WRITE(pVCpu, pCtx); break;
12422 default: AssertMsgFailed(("enmEvent2=%d uExitReason=%d\n", enmEvent2, uExitReason)); break;
12423 }
12424 }
12425
12426 /*
12427 * Fire of the DBGF event, if enabled (our check here is just a quick one,
12428 * the DBGF call will do a full check).
12429 *
12430 * Note! DBGF sets DBGFEVENT_INTERRUPT_SOFTWARE in the bitmap.
12431 * Note! If we have to events, we prioritize the first, i.e. the instruction
12432 * one, in order to avoid event nesting.
12433 */
12434 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
12435 if ( enmEvent1 != DBGFEVENT_END
12436 && DBGF_IS_EVENT_ENABLED(pVM, enmEvent1))
12437 {
12438 hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
12439 VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent1, DBGFEVENTCTX_HM, 1, uEventArg);
12440 if (rcStrict != VINF_SUCCESS)
12441 return rcStrict;
12442 }
12443 else if ( enmEvent2 != DBGFEVENT_END
12444 && DBGF_IS_EVENT_ENABLED(pVM, enmEvent2))
12445 {
12446 hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
12447 VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent2, DBGFEVENTCTX_HM, 1, uEventArg);
12448 if (rcStrict != VINF_SUCCESS)
12449 return rcStrict;
12450 }
12451
12452 return VINF_SUCCESS;
12453}
12454
12455
12456/**
12457 * Single-stepping VM-exit filtering.
12458 *
12459 * This is preprocessing the VM-exits and deciding whether we've gotten far
12460 * enough to return VINF_EM_DBG_STEPPED already. If not, normal VM-exit
12461 * handling is performed.
12462 *
12463 * @returns Strict VBox status code (i.e. informational status codes too).
12464 * @param pVCpu The cross context virtual CPU structure of the calling EMT.
12465 * @param pVmxTransient The VMX-transient structure.
12466 * @param pDbgState The debug state.
12467 */
12468DECLINLINE(VBOXSTRICTRC) hmR0VmxRunDebugHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState)
12469{
12470 /*
12471 * Expensive (saves context) generic dtrace VM-exit probe.
12472 */
12473 uint32_t const uExitReason = pVmxTransient->uExitReason;
12474 if (!VBOXVMM_R0_HMVMX_VMEXIT_ENABLED())
12475 { /* more likely */ }
12476 else
12477 {
12478 hmR0VmxReadExitQualVmcs(pVmxTransient);
12479 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
12480 AssertRC(rc);
12481 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
12482 }
12483
12484 /*
12485 * Check for host NMI, just to get that out of the way.
12486 */
12487 if (uExitReason != VMX_EXIT_XCPT_OR_NMI)
12488 { /* normally likely */ }
12489 else
12490 {
12491 hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
12492 uint32_t const uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
12493 if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI)
12494 return hmR0VmxExitHostNmi(pVCpu, pVmxTransient->pVmcsInfo);
12495 }
12496
12497 /*
12498 * Check for single stepping event if we're stepping.
12499 */
12500 if (pVCpu->hm.s.fSingleInstruction)
12501 {
12502 switch (uExitReason)
12503 {
12504 case VMX_EXIT_MTF:
12505 return hmR0VmxExitMtf(pVCpu, pVmxTransient);
12506
12507 /* Various events: */
12508 case VMX_EXIT_XCPT_OR_NMI:
12509 case VMX_EXIT_EXT_INT:
12510 case VMX_EXIT_TRIPLE_FAULT:
12511 case VMX_EXIT_INT_WINDOW:
12512 case VMX_EXIT_NMI_WINDOW:
12513 case VMX_EXIT_TASK_SWITCH:
12514 case VMX_EXIT_TPR_BELOW_THRESHOLD:
12515 case VMX_EXIT_APIC_ACCESS:
12516 case VMX_EXIT_EPT_VIOLATION:
12517 case VMX_EXIT_EPT_MISCONFIG:
12518 case VMX_EXIT_PREEMPT_TIMER:
12519
12520 /* Instruction specific VM-exits: */
12521 case VMX_EXIT_CPUID:
12522 case VMX_EXIT_GETSEC:
12523 case VMX_EXIT_HLT:
12524 case VMX_EXIT_INVD:
12525 case VMX_EXIT_INVLPG:
12526 case VMX_EXIT_RDPMC:
12527 case VMX_EXIT_RDTSC:
12528 case VMX_EXIT_RSM:
12529 case VMX_EXIT_VMCALL:
12530 case VMX_EXIT_VMCLEAR:
12531 case VMX_EXIT_VMLAUNCH:
12532 case VMX_EXIT_VMPTRLD:
12533 case VMX_EXIT_VMPTRST:
12534 case VMX_EXIT_VMREAD:
12535 case VMX_EXIT_VMRESUME:
12536 case VMX_EXIT_VMWRITE:
12537 case VMX_EXIT_VMXOFF:
12538 case VMX_EXIT_VMXON:
12539 case VMX_EXIT_MOV_CRX:
12540 case VMX_EXIT_MOV_DRX:
12541 case VMX_EXIT_IO_INSTR:
12542 case VMX_EXIT_RDMSR:
12543 case VMX_EXIT_WRMSR:
12544 case VMX_EXIT_MWAIT:
12545 case VMX_EXIT_MONITOR:
12546 case VMX_EXIT_PAUSE:
12547 case VMX_EXIT_GDTR_IDTR_ACCESS:
12548 case VMX_EXIT_LDTR_TR_ACCESS:
12549 case VMX_EXIT_INVEPT:
12550 case VMX_EXIT_RDTSCP:
12551 case VMX_EXIT_INVVPID:
12552 case VMX_EXIT_WBINVD:
12553 case VMX_EXIT_XSETBV:
12554 case VMX_EXIT_RDRAND:
12555 case VMX_EXIT_INVPCID:
12556 case VMX_EXIT_VMFUNC:
12557 case VMX_EXIT_RDSEED:
12558 case VMX_EXIT_XSAVES:
12559 case VMX_EXIT_XRSTORS:
12560 {
12561 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
12562 AssertRCReturn(rc, rc);
12563 if ( pVCpu->cpum.GstCtx.rip != pDbgState->uRipStart
12564 || pVCpu->cpum.GstCtx.cs.Sel != pDbgState->uCsStart)
12565 return VINF_EM_DBG_STEPPED;
12566 break;
12567 }
12568
12569 /* Errors and unexpected events: */
12570 case VMX_EXIT_INIT_SIGNAL:
12571 case VMX_EXIT_SIPI:
12572 case VMX_EXIT_IO_SMI:
12573 case VMX_EXIT_SMI:
12574 case VMX_EXIT_ERR_INVALID_GUEST_STATE:
12575 case VMX_EXIT_ERR_MSR_LOAD:
12576 case VMX_EXIT_ERR_MACHINE_CHECK:
12577 case VMX_EXIT_PML_FULL:
12578 case VMX_EXIT_VIRTUALIZED_EOI:
12579 case VMX_EXIT_APIC_WRITE: /* Some talk about this being fault like, so I guess we must process it? */
12580 break;
12581
12582 default:
12583 AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason));
12584 break;
12585 }
12586 }
12587
12588 /*
12589 * Check for debugger event breakpoints and dtrace probes.
12590 */
12591 if ( uExitReason < RT_ELEMENTS(pDbgState->bmExitsToCheck) * 32U
12592 && ASMBitTest(pDbgState->bmExitsToCheck, uExitReason) )
12593 {
12594 VBOXSTRICTRC rcStrict = hmR0VmxHandleExitDtraceEvents(pVCpu, pVmxTransient, uExitReason);
12595 if (rcStrict != VINF_SUCCESS)
12596 return rcStrict;
12597 }
12598
12599 /*
12600 * Normal processing.
12601 */
12602#ifdef HMVMX_USE_FUNCTION_TABLE
12603 return g_aVMExitHandlers[uExitReason].pfn(pVCpu, pVmxTransient);
12604#else
12605 return hmR0VmxHandleExit(pVCpu, pVmxTransient, uExitReason);
12606#endif
12607}
12608
12609
12610/**
12611 * Single steps guest code using hardware-assisted VMX.
12612 *
12613 * This is -not- the same as the guest single-stepping itself (say using EFLAGS.TF)
12614 * but single-stepping through the hypervisor debugger.
12615 *
12616 * @returns Strict VBox status code (i.e. informational status codes too).
12617 * @param pVCpu The cross context virtual CPU structure.
12618 * @param pcLoops Pointer to the number of executed loops.
12619 *
12620 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
12621 */
12622static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPUCC pVCpu, uint32_t *pcLoops)
12623{
12624 uint32_t const cMaxResumeLoops = pVCpu->CTX_SUFF(pVM)->hmr0.s.cMaxResumeLoops;
12625 Assert(pcLoops);
12626 Assert(*pcLoops <= cMaxResumeLoops);
12627
12628 VMXTRANSIENT VmxTransient;
12629 RT_ZERO(VmxTransient);
12630 VmxTransient.pVmcsInfo = hmGetVmxActiveVmcsInfo(pVCpu);
12631
12632 /* Set HMCPU indicators. */
12633 bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction;
12634 pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu);
12635 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
12636 pVCpu->hmr0.s.fUsingDebugLoop = true;
12637
12638 /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */
12639 VMXRUNDBGSTATE DbgState;
12640 hmR0VmxRunDebugStateInit(pVCpu, &VmxTransient, &DbgState);
12641 hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
12642
12643 /*
12644 * The loop.
12645 */
12646 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
12647 for (;;)
12648 {
12649 Assert(!HMR0SuspendPending());
12650 HMVMX_ASSERT_CPU_SAFE(pVCpu);
12651 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
12652 bool fStepping = pVCpu->hm.s.fSingleInstruction;
12653
12654 /* Set up VM-execution controls the next two can respond to. */
12655 hmR0VmxPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
12656
12657 /*
12658 * Preparatory work for running guest code, this may force us to
12659 * return to ring-3.
12660 *
12661 * Warning! This bugger disables interrupts on VINF_SUCCESS!
12662 */
12663 rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping);
12664 if (rcStrict != VINF_SUCCESS)
12665 break;
12666
12667 /* Interrupts are disabled at this point! */
12668 hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient);
12669
12670 /* Override any obnoxious code in the above two calls. */
12671 hmR0VmxPreRunGuestDebugStateApply(pVCpu, &VmxTransient, &DbgState);
12672
12673 /*
12674 * Finally execute the guest.
12675 */
12676 int rcRun = hmR0VmxRunGuest(pVCpu, &VmxTransient);
12677
12678 hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun);
12679 /* Interrupts are re-enabled at this point! */
12680
12681 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
12682 if (RT_SUCCESS(rcRun))
12683 { /* very likely */ }
12684 else
12685 {
12686 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x);
12687 hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient);
12688 return rcRun;
12689 }
12690
12691 /* Profile the VM-exit. */
12692 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
12693 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
12694 STAM_COUNTER_INC(&pVCpu->hm.s.aStatExitReason[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
12695 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x);
12696 HMVMX_START_EXIT_DISPATCH_PROF();
12697
12698 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason);
12699
12700 /*
12701 * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug().
12702 */
12703 rcStrict = hmR0VmxRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState);
12704 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x);
12705 if (rcStrict != VINF_SUCCESS)
12706 break;
12707 if (++(*pcLoops) > cMaxResumeLoops)
12708 {
12709 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
12710 rcStrict = VINF_EM_RAW_INTERRUPT;
12711 break;
12712 }
12713
12714 /*
12715 * Stepping: Did the RIP change, if so, consider it a single step.
12716 * Otherwise, make sure one of the TFs gets set.
12717 */
12718 if (fStepping)
12719 {
12720 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
12721 AssertRC(rc);
12722 if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart
12723 || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart)
12724 {
12725 rcStrict = VINF_EM_DBG_STEPPED;
12726 break;
12727 }
12728 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
12729 }
12730
12731 /*
12732 * Update when dtrace settings changes (DBGF kicks us, so no need to check).
12733 */
12734 if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo)
12735 hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &VmxTransient, &DbgState);
12736
12737 /* Restore all controls applied by hmR0VmxPreRunGuestDebugStateApply above. */
12738 rcStrict = hmR0VmxRunDebugStateRevert(pVCpu, &VmxTransient, &DbgState, rcStrict);
12739 Assert(rcStrict == VINF_SUCCESS);
12740 }
12741
12742 /*
12743 * Clear the X86_EFL_TF if necessary.
12744 */
12745 if (pVCpu->hmr0.s.fClearTrapFlag)
12746 {
12747 int rc = hmR0VmxImportGuestState(pVCpu, VmxTransient.pVmcsInfo, CPUMCTX_EXTRN_RFLAGS);
12748 AssertRC(rc);
12749 pVCpu->hmr0.s.fClearTrapFlag = false;
12750 pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0;
12751 }
12752 /** @todo there seems to be issues with the resume flag when the monitor trap
12753 * flag is pending without being used. Seen early in bios init when
12754 * accessing APIC page in protected mode. */
12755
12756 /* Restore HMCPU indicators. */
12757 pVCpu->hmr0.s.fUsingDebugLoop = false;
12758 pVCpu->hmr0.s.fDebugWantRdTscExit = false;
12759 pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction;
12760
12761 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
12762 return rcStrict;
12763}
12764
12765
12766/** @} */
12767
12768
12769/**
12770 * Checks if any expensive dtrace probes are enabled and we should go to the
12771 * debug loop.
12772 *
12773 * @returns true if we should use debug loop, false if not.
12774 */
12775static bool hmR0VmxAnyExpensiveProbesEnabled(void)
12776{
12777 /* It's probably faster to OR the raw 32-bit counter variables together.
12778 Since the variables are in an array and the probes are next to one
12779 another (more or less), we have good locality. So, better read
12780 eight-nine cache lines ever time and only have one conditional, than
12781 128+ conditionals, right? */
12782 return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */
12783 | VBOXVMM_XCPT_DE_ENABLED_RAW()
12784 | VBOXVMM_XCPT_DB_ENABLED_RAW()
12785 | VBOXVMM_XCPT_BP_ENABLED_RAW()
12786 | VBOXVMM_XCPT_OF_ENABLED_RAW()
12787 | VBOXVMM_XCPT_BR_ENABLED_RAW()
12788 | VBOXVMM_XCPT_UD_ENABLED_RAW()
12789 | VBOXVMM_XCPT_NM_ENABLED_RAW()
12790 | VBOXVMM_XCPT_DF_ENABLED_RAW()
12791 | VBOXVMM_XCPT_TS_ENABLED_RAW()
12792 | VBOXVMM_XCPT_NP_ENABLED_RAW()
12793 | VBOXVMM_XCPT_SS_ENABLED_RAW()
12794 | VBOXVMM_XCPT_GP_ENABLED_RAW()
12795 | VBOXVMM_XCPT_PF_ENABLED_RAW()
12796 | VBOXVMM_XCPT_MF_ENABLED_RAW()
12797 | VBOXVMM_XCPT_AC_ENABLED_RAW()
12798 | VBOXVMM_XCPT_XF_ENABLED_RAW()
12799 | VBOXVMM_XCPT_VE_ENABLED_RAW()
12800 | VBOXVMM_XCPT_SX_ENABLED_RAW()
12801 | VBOXVMM_INT_SOFTWARE_ENABLED_RAW()
12802 | VBOXVMM_INT_HARDWARE_ENABLED_RAW()
12803 ) != 0
12804 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW()
12805 | VBOXVMM_INSTR_MWAIT_ENABLED_RAW()
12806 | VBOXVMM_INSTR_MONITOR_ENABLED_RAW()
12807 | VBOXVMM_INSTR_CPUID_ENABLED_RAW()
12808 | VBOXVMM_INSTR_INVD_ENABLED_RAW()
12809 | VBOXVMM_INSTR_WBINVD_ENABLED_RAW()
12810 | VBOXVMM_INSTR_INVLPG_ENABLED_RAW()
12811 | VBOXVMM_INSTR_RDTSC_ENABLED_RAW()
12812 | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW()
12813 | VBOXVMM_INSTR_RDPMC_ENABLED_RAW()
12814 | VBOXVMM_INSTR_RDMSR_ENABLED_RAW()
12815 | VBOXVMM_INSTR_WRMSR_ENABLED_RAW()
12816 | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW()
12817 | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW()
12818 | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW()
12819 | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW()
12820 | VBOXVMM_INSTR_PAUSE_ENABLED_RAW()
12821 | VBOXVMM_INSTR_XSETBV_ENABLED_RAW()
12822 | VBOXVMM_INSTR_SIDT_ENABLED_RAW()
12823 | VBOXVMM_INSTR_LIDT_ENABLED_RAW()
12824 | VBOXVMM_INSTR_SGDT_ENABLED_RAW()
12825 | VBOXVMM_INSTR_LGDT_ENABLED_RAW()
12826 | VBOXVMM_INSTR_SLDT_ENABLED_RAW()
12827 | VBOXVMM_INSTR_LLDT_ENABLED_RAW()
12828 | VBOXVMM_INSTR_STR_ENABLED_RAW()
12829 | VBOXVMM_INSTR_LTR_ENABLED_RAW()
12830 | VBOXVMM_INSTR_GETSEC_ENABLED_RAW()
12831 | VBOXVMM_INSTR_RSM_ENABLED_RAW()
12832 | VBOXVMM_INSTR_RDRAND_ENABLED_RAW()
12833 | VBOXVMM_INSTR_RDSEED_ENABLED_RAW()
12834 | VBOXVMM_INSTR_XSAVES_ENABLED_RAW()
12835 | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW()
12836 | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW()
12837 | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW()
12838 | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW()
12839 | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW()
12840 | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW()
12841 | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW()
12842 | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW()
12843 | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW()
12844 | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW()
12845 | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW()
12846 | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW()
12847 | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW()
12848 | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW()
12849 | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW()
12850 ) != 0
12851 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW()
12852 | VBOXVMM_EXIT_HALT_ENABLED_RAW()
12853 | VBOXVMM_EXIT_MWAIT_ENABLED_RAW()
12854 | VBOXVMM_EXIT_MONITOR_ENABLED_RAW()
12855 | VBOXVMM_EXIT_CPUID_ENABLED_RAW()
12856 | VBOXVMM_EXIT_INVD_ENABLED_RAW()
12857 | VBOXVMM_EXIT_WBINVD_ENABLED_RAW()
12858 | VBOXVMM_EXIT_INVLPG_ENABLED_RAW()
12859 | VBOXVMM_EXIT_RDTSC_ENABLED_RAW()
12860 | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW()
12861 | VBOXVMM_EXIT_RDPMC_ENABLED_RAW()
12862 | VBOXVMM_EXIT_RDMSR_ENABLED_RAW()
12863 | VBOXVMM_EXIT_WRMSR_ENABLED_RAW()
12864 | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW()
12865 | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW()
12866 | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW()
12867 | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW()
12868 | VBOXVMM_EXIT_PAUSE_ENABLED_RAW()
12869 | VBOXVMM_EXIT_XSETBV_ENABLED_RAW()
12870 | VBOXVMM_EXIT_SIDT_ENABLED_RAW()
12871 | VBOXVMM_EXIT_LIDT_ENABLED_RAW()
12872 | VBOXVMM_EXIT_SGDT_ENABLED_RAW()
12873 | VBOXVMM_EXIT_LGDT_ENABLED_RAW()
12874 | VBOXVMM_EXIT_SLDT_ENABLED_RAW()
12875 | VBOXVMM_EXIT_LLDT_ENABLED_RAW()
12876 | VBOXVMM_EXIT_STR_ENABLED_RAW()
12877 | VBOXVMM_EXIT_LTR_ENABLED_RAW()
12878 | VBOXVMM_EXIT_GETSEC_ENABLED_RAW()
12879 | VBOXVMM_EXIT_RSM_ENABLED_RAW()
12880 | VBOXVMM_EXIT_RDRAND_ENABLED_RAW()
12881 | VBOXVMM_EXIT_RDSEED_ENABLED_RAW()
12882 | VBOXVMM_EXIT_XSAVES_ENABLED_RAW()
12883 | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW()
12884 | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW()
12885 | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW()
12886 | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW()
12887 | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW()
12888 | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW()
12889 | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW()
12890 | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW()
12891 | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW()
12892 | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW()
12893 | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW()
12894 | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW()
12895 | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW()
12896 | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW()
12897 | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW()
12898 | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW()
12899 | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW()
12900 | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW()
12901 | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW()
12902 ) != 0;
12903}
12904
12905
12906/**
12907 * Runs the guest using hardware-assisted VMX.
12908 *
12909 * @returns Strict VBox status code (i.e. informational status codes too).
12910 * @param pVCpu The cross context virtual CPU structure.
12911 */
12912VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPUCC pVCpu)
12913{
12914 AssertPtr(pVCpu);
12915 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
12916 Assert(VMMRZCallRing3IsEnabled(pVCpu));
12917 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
12918 HMVMX_ASSERT_PREEMPT_SAFE(pVCpu);
12919
12920 VBOXSTRICTRC rcStrict;
12921 uint32_t cLoops = 0;
12922 for (;;)
12923 {
12924#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
12925 bool const fInNestedGuestMode = CPUMIsGuestInVmxNonRootMode(pCtx);
12926#else
12927 NOREF(pCtx);
12928 bool const fInNestedGuestMode = false;
12929#endif
12930 if (!fInNestedGuestMode)
12931 {
12932 if ( !pVCpu->hm.s.fUseDebugLoop
12933 && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled())
12934 && !DBGFIsStepping(pVCpu)
12935 && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints)
12936 rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu, &cLoops);
12937 else
12938 rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu, &cLoops);
12939 }
12940#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
12941 else
12942 rcStrict = hmR0VmxRunGuestCodeNested(pVCpu, &cLoops);
12943
12944 if (rcStrict == VINF_VMX_VMLAUNCH_VMRESUME)
12945 {
12946 Assert(CPUMIsGuestInVmxNonRootMode(pCtx));
12947 continue;
12948 }
12949 if (rcStrict == VINF_VMX_VMEXIT)
12950 {
12951 Assert(!CPUMIsGuestInVmxNonRootMode(pCtx));
12952 continue;
12953 }
12954#endif
12955 break;
12956 }
12957
12958 int const rcLoop = VBOXSTRICTRC_VAL(rcStrict);
12959 switch (rcLoop)
12960 {
12961 case VERR_EM_INTERPRETER: rcStrict = VINF_EM_RAW_EMULATE_INSTR; break;
12962 case VINF_EM_RESET: rcStrict = VINF_EM_TRIPLE_FAULT; break;
12963 }
12964
12965 int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict);
12966 if (RT_FAILURE(rc2))
12967 {
12968 pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict);
12969 rcStrict = rc2;
12970 }
12971 Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn));
12972 Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
12973 return rcStrict;
12974}
12975
12976
12977#ifndef HMVMX_USE_FUNCTION_TABLE
12978/**
12979 * Handles a guest VM-exit from hardware-assisted VMX execution.
12980 *
12981 * @returns Strict VBox status code (i.e. informational status codes too).
12982 * @param pVCpu The cross context virtual CPU structure.
12983 * @param pVmxTransient The VMX-transient structure.
12984 */
12985DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
12986{
12987#ifdef DEBUG_ramshankar
12988# define VMEXIT_CALL_RET(a_fSave, a_CallExpr) \
12989 do { \
12990 if (a_fSave != 0) \
12991 hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL); \
12992 VBOXSTRICTRC rcStrict = a_CallExpr; \
12993 if (a_fSave != 0) \
12994 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \
12995 return rcStrict; \
12996 } while (0)
12997#else
12998# define VMEXIT_CALL_RET(a_fSave, a_CallExpr) return a_CallExpr
12999#endif
13000 uint32_t const uExitReason = pVmxTransient->uExitReason;
13001 switch (uExitReason)
13002 {
13003 case VMX_EXIT_EPT_MISCONFIG: VMEXIT_CALL_RET(0, hmR0VmxExitEptMisconfig(pVCpu, pVmxTransient));
13004 case VMX_EXIT_EPT_VIOLATION: VMEXIT_CALL_RET(0, hmR0VmxExitEptViolation(pVCpu, pVmxTransient));
13005 case VMX_EXIT_IO_INSTR: VMEXIT_CALL_RET(0, hmR0VmxExitIoInstr(pVCpu, pVmxTransient));
13006 case VMX_EXIT_CPUID: VMEXIT_CALL_RET(0, hmR0VmxExitCpuid(pVCpu, pVmxTransient));
13007 case VMX_EXIT_RDTSC: VMEXIT_CALL_RET(0, hmR0VmxExitRdtsc(pVCpu, pVmxTransient));
13008 case VMX_EXIT_RDTSCP: VMEXIT_CALL_RET(0, hmR0VmxExitRdtscp(pVCpu, pVmxTransient));
13009 case VMX_EXIT_APIC_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitApicAccess(pVCpu, pVmxTransient));
13010 case VMX_EXIT_XCPT_OR_NMI: VMEXIT_CALL_RET(0, hmR0VmxExitXcptOrNmi(pVCpu, pVmxTransient));
13011 case VMX_EXIT_MOV_CRX: VMEXIT_CALL_RET(0, hmR0VmxExitMovCRx(pVCpu, pVmxTransient));
13012 case VMX_EXIT_EXT_INT: VMEXIT_CALL_RET(0, hmR0VmxExitExtInt(pVCpu, pVmxTransient));
13013 case VMX_EXIT_INT_WINDOW: VMEXIT_CALL_RET(0, hmR0VmxExitIntWindow(pVCpu, pVmxTransient));
13014 case VMX_EXIT_TPR_BELOW_THRESHOLD: VMEXIT_CALL_RET(0, hmR0VmxExitTprBelowThreshold(pVCpu, pVmxTransient));
13015 case VMX_EXIT_MWAIT: VMEXIT_CALL_RET(0, hmR0VmxExitMwait(pVCpu, pVmxTransient));
13016 case VMX_EXIT_MONITOR: VMEXIT_CALL_RET(0, hmR0VmxExitMonitor(pVCpu, pVmxTransient));
13017 case VMX_EXIT_TASK_SWITCH: VMEXIT_CALL_RET(0, hmR0VmxExitTaskSwitch(pVCpu, pVmxTransient));
13018 case VMX_EXIT_PREEMPT_TIMER: VMEXIT_CALL_RET(0, hmR0VmxExitPreemptTimer(pVCpu, pVmxTransient));
13019 case VMX_EXIT_RDMSR: VMEXIT_CALL_RET(0, hmR0VmxExitRdmsr(pVCpu, pVmxTransient));
13020 case VMX_EXIT_WRMSR: VMEXIT_CALL_RET(0, hmR0VmxExitWrmsr(pVCpu, pVmxTransient));
13021 case VMX_EXIT_VMCALL: VMEXIT_CALL_RET(0, hmR0VmxExitVmcall(pVCpu, pVmxTransient));
13022 case VMX_EXIT_MOV_DRX: VMEXIT_CALL_RET(0, hmR0VmxExitMovDRx(pVCpu, pVmxTransient));
13023 case VMX_EXIT_HLT: VMEXIT_CALL_RET(0, hmR0VmxExitHlt(pVCpu, pVmxTransient));
13024 case VMX_EXIT_INVD: VMEXIT_CALL_RET(0, hmR0VmxExitInvd(pVCpu, pVmxTransient));
13025 case VMX_EXIT_INVLPG: VMEXIT_CALL_RET(0, hmR0VmxExitInvlpg(pVCpu, pVmxTransient));
13026 case VMX_EXIT_MTF: VMEXIT_CALL_RET(0, hmR0VmxExitMtf(pVCpu, pVmxTransient));
13027 case VMX_EXIT_PAUSE: VMEXIT_CALL_RET(0, hmR0VmxExitPause(pVCpu, pVmxTransient));
13028 case VMX_EXIT_WBINVD: VMEXIT_CALL_RET(0, hmR0VmxExitWbinvd(pVCpu, pVmxTransient));
13029 case VMX_EXIT_XSETBV: VMEXIT_CALL_RET(0, hmR0VmxExitXsetbv(pVCpu, pVmxTransient));
13030 case VMX_EXIT_INVPCID: VMEXIT_CALL_RET(0, hmR0VmxExitInvpcid(pVCpu, pVmxTransient));
13031 case VMX_EXIT_GETSEC: VMEXIT_CALL_RET(0, hmR0VmxExitGetsec(pVCpu, pVmxTransient));
13032 case VMX_EXIT_RDPMC: VMEXIT_CALL_RET(0, hmR0VmxExitRdpmc(pVCpu, pVmxTransient));
13033#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
13034 case VMX_EXIT_VMCLEAR: VMEXIT_CALL_RET(0, hmR0VmxExitVmclear(pVCpu, pVmxTransient));
13035 case VMX_EXIT_VMLAUNCH: VMEXIT_CALL_RET(0, hmR0VmxExitVmlaunch(pVCpu, pVmxTransient));
13036 case VMX_EXIT_VMPTRLD: VMEXIT_CALL_RET(0, hmR0VmxExitVmptrld(pVCpu, pVmxTransient));
13037 case VMX_EXIT_VMPTRST: VMEXIT_CALL_RET(0, hmR0VmxExitVmptrst(pVCpu, pVmxTransient));
13038 case VMX_EXIT_VMREAD: VMEXIT_CALL_RET(0, hmR0VmxExitVmread(pVCpu, pVmxTransient));
13039 case VMX_EXIT_VMRESUME: VMEXIT_CALL_RET(0, hmR0VmxExitVmwrite(pVCpu, pVmxTransient));
13040 case VMX_EXIT_VMWRITE: VMEXIT_CALL_RET(0, hmR0VmxExitVmresume(pVCpu, pVmxTransient));
13041 case VMX_EXIT_VMXOFF: VMEXIT_CALL_RET(0, hmR0VmxExitVmxoff(pVCpu, pVmxTransient));
13042 case VMX_EXIT_VMXON: VMEXIT_CALL_RET(0, hmR0VmxExitVmxon(pVCpu, pVmxTransient));
13043 case VMX_EXIT_INVVPID: VMEXIT_CALL_RET(0, hmR0VmxExitInvvpid(pVCpu, pVmxTransient));
13044 case VMX_EXIT_INVEPT: VMEXIT_CALL_RET(0, hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient));
13045#else
13046 case VMX_EXIT_VMCLEAR:
13047 case VMX_EXIT_VMLAUNCH:
13048 case VMX_EXIT_VMPTRLD:
13049 case VMX_EXIT_VMPTRST:
13050 case VMX_EXIT_VMREAD:
13051 case VMX_EXIT_VMRESUME:
13052 case VMX_EXIT_VMWRITE:
13053 case VMX_EXIT_VMXOFF:
13054 case VMX_EXIT_VMXON:
13055 case VMX_EXIT_INVVPID:
13056 case VMX_EXIT_INVEPT:
13057 return hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient);
13058#endif
13059
13060 case VMX_EXIT_TRIPLE_FAULT: return hmR0VmxExitTripleFault(pVCpu, pVmxTransient);
13061 case VMX_EXIT_NMI_WINDOW: return hmR0VmxExitNmiWindow(pVCpu, pVmxTransient);
13062 case VMX_EXIT_ERR_INVALID_GUEST_STATE: return hmR0VmxExitErrInvalidGuestState(pVCpu, pVmxTransient);
13063
13064 case VMX_EXIT_INIT_SIGNAL:
13065 case VMX_EXIT_SIPI:
13066 case VMX_EXIT_IO_SMI:
13067 case VMX_EXIT_SMI:
13068 case VMX_EXIT_ERR_MSR_LOAD:
13069 case VMX_EXIT_ERR_MACHINE_CHECK:
13070 case VMX_EXIT_PML_FULL:
13071 case VMX_EXIT_VIRTUALIZED_EOI:
13072 case VMX_EXIT_GDTR_IDTR_ACCESS:
13073 case VMX_EXIT_LDTR_TR_ACCESS:
13074 case VMX_EXIT_APIC_WRITE:
13075 case VMX_EXIT_RDRAND:
13076 case VMX_EXIT_RSM:
13077 case VMX_EXIT_VMFUNC:
13078 case VMX_EXIT_ENCLS:
13079 case VMX_EXIT_RDSEED:
13080 case VMX_EXIT_XSAVES:
13081 case VMX_EXIT_XRSTORS:
13082 case VMX_EXIT_UMWAIT:
13083 case VMX_EXIT_TPAUSE:
13084 case VMX_EXIT_LOADIWKEY:
13085 default:
13086 return hmR0VmxExitErrUnexpected(pVCpu, pVmxTransient);
13087 }
13088#undef VMEXIT_CALL_RET
13089}
13090#endif /* !HMVMX_USE_FUNCTION_TABLE */
13091
13092
13093#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
13094/**
13095 * Handles a nested-guest VM-exit from hardware-assisted VMX execution.
13096 *
13097 * @returns Strict VBox status code (i.e. informational status codes too).
13098 * @param pVCpu The cross context virtual CPU structure.
13099 * @param pVmxTransient The VMX-transient structure.
13100 */
13101DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExitNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
13102{
13103 uint32_t const uExitReason = pVmxTransient->uExitReason;
13104 switch (uExitReason)
13105 {
13106 case VMX_EXIT_EPT_MISCONFIG: return hmR0VmxExitEptMisconfig(pVCpu, pVmxTransient);
13107 case VMX_EXIT_EPT_VIOLATION: return hmR0VmxExitEptViolation(pVCpu, pVmxTransient);
13108 case VMX_EXIT_XCPT_OR_NMI: return hmR0VmxExitXcptOrNmiNested(pVCpu, pVmxTransient);
13109 case VMX_EXIT_IO_INSTR: return hmR0VmxExitIoInstrNested(pVCpu, pVmxTransient);
13110 case VMX_EXIT_HLT: return hmR0VmxExitHltNested(pVCpu, pVmxTransient);
13111
13112 /*
13113 * We shouldn't direct host physical interrupts to the nested-guest.
13114 */
13115 case VMX_EXIT_EXT_INT:
13116 return hmR0VmxExitExtInt(pVCpu, pVmxTransient);
13117
13118 /*
13119 * Instructions that cause VM-exits unconditionally or the condition is
13120 * always is taken solely from the nested hypervisor (meaning if the VM-exit
13121 * happens, it's guaranteed to be a nested-guest VM-exit).
13122 *
13123 * - Provides VM-exit instruction length ONLY.
13124 */
13125 case VMX_EXIT_CPUID: /* Unconditional. */
13126 case VMX_EXIT_VMCALL:
13127 case VMX_EXIT_GETSEC:
13128 case VMX_EXIT_INVD:
13129 case VMX_EXIT_XSETBV:
13130 case VMX_EXIT_VMLAUNCH:
13131 case VMX_EXIT_VMRESUME:
13132 case VMX_EXIT_VMXOFF:
13133 case VMX_EXIT_ENCLS: /* Condition specified solely by nested hypervisor. */
13134 case VMX_EXIT_VMFUNC:
13135 return hmR0VmxExitInstrNested(pVCpu, pVmxTransient);
13136
13137 /*
13138 * Instructions that cause VM-exits unconditionally or the condition is
13139 * always is taken solely from the nested hypervisor (meaning if the VM-exit
13140 * happens, it's guaranteed to be a nested-guest VM-exit).
13141 *
13142 * - Provides VM-exit instruction length.
13143 * - Provides VM-exit information.
13144 * - Optionally provides Exit qualification.
13145 *
13146 * Since Exit qualification is 0 for all VM-exits where it is not
13147 * applicable, reading and passing it to the guest should produce
13148 * defined behavior.
13149 *
13150 * See Intel spec. 27.2.1 "Basic VM-Exit Information".
13151 */
13152 case VMX_EXIT_INVEPT: /* Unconditional. */
13153 case VMX_EXIT_INVVPID:
13154 case VMX_EXIT_VMCLEAR:
13155 case VMX_EXIT_VMPTRLD:
13156 case VMX_EXIT_VMPTRST:
13157 case VMX_EXIT_VMXON:
13158 case VMX_EXIT_GDTR_IDTR_ACCESS: /* Condition specified solely by nested hypervisor. */
13159 case VMX_EXIT_LDTR_TR_ACCESS:
13160 case VMX_EXIT_RDRAND:
13161 case VMX_EXIT_RDSEED:
13162 case VMX_EXIT_XSAVES:
13163 case VMX_EXIT_XRSTORS:
13164 case VMX_EXIT_UMWAIT:
13165 case VMX_EXIT_TPAUSE:
13166 return hmR0VmxExitInstrWithInfoNested(pVCpu, pVmxTransient);
13167
13168 case VMX_EXIT_RDTSC: return hmR0VmxExitRdtscNested(pVCpu, pVmxTransient);
13169 case VMX_EXIT_RDTSCP: return hmR0VmxExitRdtscpNested(pVCpu, pVmxTransient);
13170 case VMX_EXIT_RDMSR: return hmR0VmxExitRdmsrNested(pVCpu, pVmxTransient);
13171 case VMX_EXIT_WRMSR: return hmR0VmxExitWrmsrNested(pVCpu, pVmxTransient);
13172 case VMX_EXIT_INVLPG: return hmR0VmxExitInvlpgNested(pVCpu, pVmxTransient);
13173 case VMX_EXIT_INVPCID: return hmR0VmxExitInvpcidNested(pVCpu, pVmxTransient);
13174 case VMX_EXIT_TASK_SWITCH: return hmR0VmxExitTaskSwitchNested(pVCpu, pVmxTransient);
13175 case VMX_EXIT_WBINVD: return hmR0VmxExitWbinvdNested(pVCpu, pVmxTransient);
13176 case VMX_EXIT_MTF: return hmR0VmxExitMtfNested(pVCpu, pVmxTransient);
13177 case VMX_EXIT_APIC_ACCESS: return hmR0VmxExitApicAccessNested(pVCpu, pVmxTransient);
13178 case VMX_EXIT_APIC_WRITE: return hmR0VmxExitApicWriteNested(pVCpu, pVmxTransient);
13179 case VMX_EXIT_VIRTUALIZED_EOI: return hmR0VmxExitVirtEoiNested(pVCpu, pVmxTransient);
13180 case VMX_EXIT_MOV_CRX: return hmR0VmxExitMovCRxNested(pVCpu, pVmxTransient);
13181 case VMX_EXIT_INT_WINDOW: return hmR0VmxExitIntWindowNested(pVCpu, pVmxTransient);
13182 case VMX_EXIT_NMI_WINDOW: return hmR0VmxExitNmiWindowNested(pVCpu, pVmxTransient);
13183 case VMX_EXIT_TPR_BELOW_THRESHOLD: return hmR0VmxExitTprBelowThresholdNested(pVCpu, pVmxTransient);
13184 case VMX_EXIT_MWAIT: return hmR0VmxExitMwaitNested(pVCpu, pVmxTransient);
13185 case VMX_EXIT_MONITOR: return hmR0VmxExitMonitorNested(pVCpu, pVmxTransient);
13186 case VMX_EXIT_PAUSE: return hmR0VmxExitPauseNested(pVCpu, pVmxTransient);
13187
13188 case VMX_EXIT_PREEMPT_TIMER:
13189 {
13190 /** @todo NSTVMX: Preempt timer. */
13191 return hmR0VmxExitPreemptTimer(pVCpu, pVmxTransient);
13192 }
13193
13194 case VMX_EXIT_MOV_DRX: return hmR0VmxExitMovDRxNested(pVCpu, pVmxTransient);
13195 case VMX_EXIT_RDPMC: return hmR0VmxExitRdpmcNested(pVCpu, pVmxTransient);
13196
13197 case VMX_EXIT_VMREAD:
13198 case VMX_EXIT_VMWRITE: return hmR0VmxExitVmreadVmwriteNested(pVCpu, pVmxTransient);
13199
13200 case VMX_EXIT_TRIPLE_FAULT: return hmR0VmxExitTripleFaultNested(pVCpu, pVmxTransient);
13201 case VMX_EXIT_ERR_INVALID_GUEST_STATE: return hmR0VmxExitErrInvalidGuestStateNested(pVCpu, pVmxTransient);
13202
13203 case VMX_EXIT_INIT_SIGNAL:
13204 case VMX_EXIT_SIPI:
13205 case VMX_EXIT_IO_SMI:
13206 case VMX_EXIT_SMI:
13207 case VMX_EXIT_ERR_MSR_LOAD:
13208 case VMX_EXIT_ERR_MACHINE_CHECK:
13209 case VMX_EXIT_PML_FULL:
13210 case VMX_EXIT_RSM:
13211 default:
13212 return hmR0VmxExitErrUnexpected(pVCpu, pVmxTransient);
13213 }
13214}
13215#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
13216
13217
13218/** @name VM-exit helpers.
13219 * @{
13220 */
13221/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
13222/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= VM-exit helpers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
13223/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
13224
13225/** Macro for VM-exits called unexpectedly. */
13226#define HMVMX_UNEXPECTED_EXIT_RET(a_pVCpu, a_HmError) \
13227 do { \
13228 (a_pVCpu)->hm.s.u32HMError = (a_HmError); \
13229 return VERR_VMX_UNEXPECTED_EXIT; \
13230 } while (0)
13231
13232#ifdef VBOX_STRICT
13233/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
13234# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \
13235 RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
13236
13237# define HMVMX_ASSERT_PREEMPT_CPUID() \
13238 do { \
13239 RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
13240 AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
13241 } while (0)
13242
13243# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
13244 do { \
13245 AssertPtr((a_pVCpu)); \
13246 AssertPtr((a_pVmxTransient)); \
13247 Assert((a_pVmxTransient)->fVMEntryFailed == false); \
13248 Assert((a_pVmxTransient)->pVmcsInfo); \
13249 Assert(ASMIntAreEnabled()); \
13250 HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \
13251 HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \
13252 Log4Func(("vcpu[%RU32]\n", (a_pVCpu)->idCpu)); \
13253 HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \
13254 if (!VMMRZCallRing3IsEnabled((a_pVCpu))) \
13255 HMVMX_ASSERT_PREEMPT_CPUID(); \
13256 HMVMX_STOP_EXIT_DISPATCH_PROF(); \
13257 } while (0)
13258
13259# define HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
13260 do { \
13261 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient); \
13262 Assert((a_pVmxTransient)->fIsNestedGuest); \
13263 } while (0)
13264
13265# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
13266 do { \
13267 Log4Func(("\n")); \
13268 } while (0)
13269#else
13270# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
13271 do { \
13272 HMVMX_STOP_EXIT_DISPATCH_PROF(); \
13273 NOREF((a_pVCpu)); NOREF((a_pVmxTransient)); \
13274 } while (0)
13275
13276# define HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \
13277 do { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient); } while (0)
13278
13279# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) do { } while (0)
13280#endif
13281
13282#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
13283/** Macro that does the necessary privilege checks and intercepted VM-exits for
13284 * guests that attempted to execute a VMX instruction. */
13285# define HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(a_pVCpu, a_uExitReason) \
13286 do \
13287 { \
13288 VBOXSTRICTRC rcStrictTmp = hmR0VmxCheckExitDueToVmxInstr((a_pVCpu), (a_uExitReason)); \
13289 if (rcStrictTmp == VINF_SUCCESS) \
13290 { /* likely */ } \
13291 else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \
13292 { \
13293 Assert((a_pVCpu)->hm.s.Event.fPending); \
13294 Log4Func(("Privilege checks failed -> %#x\n", VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo))); \
13295 return VINF_SUCCESS; \
13296 } \
13297 else \
13298 { \
13299 int rcTmp = VBOXSTRICTRC_VAL(rcStrictTmp); \
13300 AssertMsgFailedReturn(("Unexpected failure. rc=%Rrc", rcTmp), rcTmp); \
13301 } \
13302 } while (0)
13303
13304/** Macro that decodes a memory operand for an VM-exit caused by an instruction. */
13305# define HMVMX_DECODE_MEM_OPERAND(a_pVCpu, a_uExitInstrInfo, a_uExitQual, a_enmMemAccess, a_pGCPtrEffAddr) \
13306 do \
13307 { \
13308 VBOXSTRICTRC rcStrictTmp = hmR0VmxDecodeMemOperand((a_pVCpu), (a_uExitInstrInfo), (a_uExitQual), (a_enmMemAccess), \
13309 (a_pGCPtrEffAddr)); \
13310 if (rcStrictTmp == VINF_SUCCESS) \
13311 { /* likely */ } \
13312 else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \
13313 { \
13314 uint8_t const uXcptTmp = VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo); \
13315 Log4Func(("Memory operand decoding failed, raising xcpt %#x\n", uXcptTmp)); \
13316 NOREF(uXcptTmp); \
13317 return VINF_SUCCESS; \
13318 } \
13319 else \
13320 { \
13321 Log4Func(("hmR0VmxDecodeMemOperand failed. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrictTmp))); \
13322 return rcStrictTmp; \
13323 } \
13324 } while (0)
13325#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
13326
13327
13328/**
13329 * Advances the guest RIP by the specified number of bytes.
13330 *
13331 * @param pVCpu The cross context virtual CPU structure.
13332 * @param cbInstr Number of bytes to advance the RIP by.
13333 *
13334 * @remarks No-long-jump zone!!!
13335 */
13336DECLINLINE(void) hmR0VmxAdvanceGuestRipBy(PVMCPUCC pVCpu, uint32_t cbInstr)
13337{
13338 /* Advance the RIP. */
13339 pVCpu->cpum.GstCtx.rip += cbInstr;
13340 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
13341
13342 /* Update interrupt inhibition. */
13343 if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
13344 && pVCpu->cpum.GstCtx.rip != EMGetInhibitInterruptsPC(pVCpu))
13345 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
13346}
13347
13348
13349/**
13350 * Advances the guest RIP after reading it from the VMCS.
13351 *
13352 * @returns VBox status code, no informational status codes.
13353 * @param pVCpu The cross context virtual CPU structure.
13354 * @param pVmxTransient The VMX-transient structure.
13355 *
13356 * @remarks No-long-jump zone!!!
13357 */
13358static int hmR0VmxAdvanceGuestRip(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
13359{
13360 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
13361 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS);
13362 AssertRCReturn(rc, rc);
13363
13364 hmR0VmxAdvanceGuestRipBy(pVCpu, pVmxTransient->cbExitInstr);
13365 return VINF_SUCCESS;
13366}
13367
13368
13369/**
13370 * Handle a condition that occurred while delivering an event through the guest or
13371 * nested-guest IDT.
13372 *
13373 * @returns Strict VBox status code (i.e. informational status codes too).
13374 * @retval VINF_SUCCESS if we should continue handling the VM-exit.
13375 * @retval VINF_HM_DOUBLE_FAULT if a \#DF condition was detected and we ought
13376 * to continue execution of the guest which will delivery the \#DF.
13377 * @retval VINF_EM_RESET if we detected a triple-fault condition.
13378 * @retval VERR_EM_GUEST_CPU_HANG if we detected a guest CPU hang.
13379 *
13380 * @param pVCpu The cross context virtual CPU structure.
13381 * @param pVmxTransient The VMX-transient structure.
13382 *
13383 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
13384 * Additionally, HMVMX_READ_EXIT_QUALIFICATION is required if the VM-exit
13385 * is due to an EPT violation, PML full or SPP-related event.
13386 *
13387 * @remarks No-long-jump zone!!!
13388 */
13389static VBOXSTRICTRC hmR0VmxCheckExitDueToEventDelivery(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
13390{
13391 Assert(!pVCpu->hm.s.Event.fPending);
13392 HMVMX_ASSERT_READ(pVmxTransient, HMVMX_READ_XCPT_INFO);
13393 if ( pVmxTransient->uExitReason == VMX_EXIT_EPT_VIOLATION
13394 || pVmxTransient->uExitReason == VMX_EXIT_PML_FULL
13395 || pVmxTransient->uExitReason == VMX_EXIT_SPP_EVENT)
13396 HMVMX_ASSERT_READ(pVmxTransient, HMVMX_READ_EXIT_QUALIFICATION);
13397
13398 VBOXSTRICTRC rcStrict = VINF_SUCCESS;
13399 PCVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
13400 uint32_t const uIdtVectorInfo = pVmxTransient->uIdtVectoringInfo;
13401 uint32_t const uExitIntInfo = pVmxTransient->uExitIntInfo;
13402 if (VMX_IDT_VECTORING_INFO_IS_VALID(uIdtVectorInfo))
13403 {
13404 uint32_t const uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(uIdtVectorInfo);
13405 uint32_t const uIdtVectorType = VMX_IDT_VECTORING_INFO_TYPE(uIdtVectorInfo);
13406
13407 /*
13408 * If the event was a software interrupt (generated with INT n) or a software exception
13409 * (generated by INT3/INTO) or a privileged software exception (generated by INT1), we
13410 * can handle the VM-exit and continue guest execution which will re-execute the
13411 * instruction rather than re-injecting the exception, as that can cause premature
13412 * trips to ring-3 before injection and involve TRPM which currently has no way of
13413 * storing that these exceptions were caused by these instructions (ICEBP's #DB poses
13414 * the problem).
13415 */
13416 IEMXCPTRAISE enmRaise;
13417 IEMXCPTRAISEINFO fRaiseInfo;
13418 if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
13419 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
13420 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
13421 {
13422 enmRaise = IEMXCPTRAISE_REEXEC_INSTR;
13423 fRaiseInfo = IEMXCPTRAISEINFO_NONE;
13424 }
13425 else if (VMX_EXIT_INT_INFO_IS_VALID(uExitIntInfo))
13426 {
13427 uint32_t const uExitVectorType = VMX_EXIT_INT_INFO_TYPE(uExitIntInfo);
13428 uint8_t const uExitVector = VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo);
13429 Assert(uExitVectorType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT);
13430
13431 uint32_t const fIdtVectorFlags = hmR0VmxGetIemXcptFlags(uIdtVector, uIdtVectorType);
13432 uint32_t const fExitVectorFlags = hmR0VmxGetIemXcptFlags(uExitVector, uExitVectorType);
13433
13434 enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fIdtVectorFlags, uIdtVector, fExitVectorFlags, uExitVector, &fRaiseInfo);
13435
13436 /* Determine a vectoring #PF condition, see comment in hmR0VmxExitXcptPF(). */
13437 if (fRaiseInfo & (IEMXCPTRAISEINFO_EXT_INT_PF | IEMXCPTRAISEINFO_NMI_PF))
13438 {
13439 pVmxTransient->fVectoringPF = true;
13440 enmRaise = IEMXCPTRAISE_PREV_EVENT;
13441 }
13442 }
13443 else
13444 {
13445 /*
13446 * If an exception or hardware interrupt delivery caused an EPT violation/misconfig or APIC access
13447 * VM-exit, then the VM-exit interruption-information will not be valid and we end up here.
13448 * It is sufficient to reflect the original event to the guest after handling the VM-exit.
13449 */
13450 Assert( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
13451 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
13452 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT);
13453 enmRaise = IEMXCPTRAISE_PREV_EVENT;
13454 fRaiseInfo = IEMXCPTRAISEINFO_NONE;
13455 }
13456
13457 /*
13458 * On CPUs that support Virtual NMIs, if this VM-exit (be it an exception or EPT violation/misconfig
13459 * etc.) occurred while delivering the NMI, we need to clear the block-by-NMI field in the guest
13460 * interruptibility-state before re-delivering the NMI after handling the VM-exit. Otherwise the
13461 * subsequent VM-entry would fail, see @bugref{7445}.
13462 *
13463 * See Intel spec. 30.7.1.2 "Resuming Guest Software after Handling an Exception".
13464 */
13465 if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
13466 && enmRaise == IEMXCPTRAISE_PREV_EVENT
13467 && (pVmcsInfo->u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
13468 && CPUMIsGuestNmiBlocking(pVCpu))
13469 {
13470 CPUMSetGuestNmiBlocking(pVCpu, false);
13471 }
13472
13473 switch (enmRaise)
13474 {
13475 case IEMXCPTRAISE_CURRENT_XCPT:
13476 {
13477 Log4Func(("IDT: Pending secondary Xcpt: idtinfo=%#RX64 exitinfo=%#RX64\n", uIdtVectorInfo, uExitIntInfo));
13478 Assert(rcStrict == VINF_SUCCESS);
13479 break;
13480 }
13481
13482 case IEMXCPTRAISE_PREV_EVENT:
13483 {
13484 uint32_t u32ErrCode;
13485 if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(uIdtVectorInfo))
13486 u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
13487 else
13488 u32ErrCode = 0;
13489
13490 /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF, see hmR0VmxExitXcptPF(). */
13491 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectReflect);
13492 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(uIdtVectorInfo), 0 /* cbInstr */,
13493 u32ErrCode, pVCpu->cpum.GstCtx.cr2);
13494
13495 Log4Func(("IDT: Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->hm.s.Event.u64IntInfo,
13496 pVCpu->hm.s.Event.u32ErrCode));
13497 Assert(rcStrict == VINF_SUCCESS);
13498 break;
13499 }
13500
13501 case IEMXCPTRAISE_REEXEC_INSTR:
13502 Assert(rcStrict == VINF_SUCCESS);
13503 break;
13504
13505 case IEMXCPTRAISE_DOUBLE_FAULT:
13506 {
13507 /*
13508 * Determing a vectoring double #PF condition. Used later, when PGM evaluates the
13509 * second #PF as a guest #PF (and not a shadow #PF) and needs to be converted into a #DF.
13510 */
13511 if (fRaiseInfo & IEMXCPTRAISEINFO_PF_PF)
13512 {
13513 pVmxTransient->fVectoringDoublePF = true;
13514 Log4Func(("IDT: Vectoring double #PF %#RX64 cr2=%#RX64\n", pVCpu->hm.s.Event.u64IntInfo,
13515 pVCpu->cpum.GstCtx.cr2));
13516 rcStrict = VINF_SUCCESS;
13517 }
13518 else
13519 {
13520 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectConvertDF);
13521 hmR0VmxSetPendingXcptDF(pVCpu);
13522 Log4Func(("IDT: Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->hm.s.Event.u64IntInfo,
13523 uIdtVector, VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo)));
13524 rcStrict = VINF_HM_DOUBLE_FAULT;
13525 }
13526 break;
13527 }
13528
13529 case IEMXCPTRAISE_TRIPLE_FAULT:
13530 {
13531 Log4Func(("IDT: Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", uIdtVector,
13532 VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo)));
13533 rcStrict = VINF_EM_RESET;
13534 break;
13535 }
13536
13537 case IEMXCPTRAISE_CPU_HANG:
13538 {
13539 Log4Func(("IDT: Bad guest! Entering CPU hang. fRaiseInfo=%#x\n", fRaiseInfo));
13540 rcStrict = VERR_EM_GUEST_CPU_HANG;
13541 break;
13542 }
13543
13544 default:
13545 {
13546 AssertMsgFailed(("IDT: vcpu[%RU32] Unexpected/invalid value! enmRaise=%#x\n", pVCpu->idCpu, enmRaise));
13547 rcStrict = VERR_VMX_IPE_2;
13548 break;
13549 }
13550 }
13551 }
13552 else if ( (pVmcsInfo->u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)
13553 && !CPUMIsGuestNmiBlocking(pVCpu))
13554 {
13555 if ( VMX_EXIT_INT_INFO_IS_VALID(uExitIntInfo)
13556 && VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo) != X86_XCPT_DF
13557 && VMX_EXIT_INT_INFO_IS_NMI_UNBLOCK_IRET(uExitIntInfo))
13558 {
13559 /*
13560 * Execution of IRET caused a fault when NMI blocking was in effect (i.e we're in
13561 * the guest or nested-guest NMI handler). We need to set the block-by-NMI field so
13562 * that virtual NMIs remain blocked until the IRET execution is completed.
13563 *
13564 * See Intel spec. 31.7.1.2 "Resuming Guest Software After Handling An Exception".
13565 */
13566 CPUMSetGuestNmiBlocking(pVCpu, true);
13567 Log4Func(("Set NMI blocking. uExitReason=%u\n", pVmxTransient->uExitReason));
13568 }
13569 else if ( pVmxTransient->uExitReason == VMX_EXIT_EPT_VIOLATION
13570 || pVmxTransient->uExitReason == VMX_EXIT_PML_FULL
13571 || pVmxTransient->uExitReason == VMX_EXIT_SPP_EVENT)
13572 {
13573 /*
13574 * Execution of IRET caused an EPT violation, page-modification log-full event or
13575 * SPP-related event VM-exit when NMI blocking was in effect (i.e. we're in the
13576 * guest or nested-guest NMI handler). We need to set the block-by-NMI field so
13577 * that virtual NMIs remain blocked until the IRET execution is completed.
13578 *
13579 * See Intel spec. 27.2.3 "Information about NMI unblocking due to IRET"
13580 */
13581 if (VMX_EXIT_QUAL_EPT_IS_NMI_UNBLOCK_IRET(pVmxTransient->uExitQual))
13582 {
13583 CPUMSetGuestNmiBlocking(pVCpu, true);
13584 Log4Func(("Set NMI blocking. uExitReason=%u\n", pVmxTransient->uExitReason));
13585 }
13586 }
13587 }
13588
13589 Assert( rcStrict == VINF_SUCCESS || rcStrict == VINF_HM_DOUBLE_FAULT
13590 || rcStrict == VINF_EM_RESET || rcStrict == VERR_EM_GUEST_CPU_HANG);
13591 return rcStrict;
13592}
13593
13594
13595#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
13596/**
13597 * Perform the relevant VMX instruction checks for VM-exits that occurred due to the
13598 * guest attempting to execute a VMX instruction.
13599 *
13600 * @returns Strict VBox status code (i.e. informational status codes too).
13601 * @retval VINF_SUCCESS if we should continue handling the VM-exit.
13602 * @retval VINF_HM_PENDING_XCPT if an exception was raised.
13603 *
13604 * @param pVCpu The cross context virtual CPU structure.
13605 * @param uExitReason The VM-exit reason.
13606 *
13607 * @todo NSTVMX: Document other error codes when VM-exit is implemented.
13608 * @remarks No-long-jump zone!!!
13609 */
13610static VBOXSTRICTRC hmR0VmxCheckExitDueToVmxInstr(PVMCPUCC pVCpu, uint32_t uExitReason)
13611{
13612 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS
13613 | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER);
13614
13615 /*
13616 * The physical CPU would have already checked the CPU mode/code segment.
13617 * We shall just assert here for paranoia.
13618 * See Intel spec. 25.1.1 "Relative Priority of Faults and VM Exits".
13619 */
13620 Assert(!CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx));
13621 Assert( !CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)
13622 || CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx));
13623
13624 if (uExitReason == VMX_EXIT_VMXON)
13625 {
13626 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4);
13627
13628 /*
13629 * We check CR4.VMXE because it is required to be always set while in VMX operation
13630 * by physical CPUs and our CR4 read-shadow is only consulted when executing specific
13631 * instructions (CLTS, LMSW, MOV CR, and SMSW) and thus doesn't affect CPU operation
13632 * otherwise (i.e. physical CPU won't automatically #UD if Cr4Shadow.VMXE is 0).
13633 */
13634 if (!CPUMIsGuestVmxEnabled(&pVCpu->cpum.GstCtx))
13635 {
13636 Log4Func(("CR4.VMXE is not set -> #UD\n"));
13637 hmR0VmxSetPendingXcptUD(pVCpu);
13638 return VINF_HM_PENDING_XCPT;
13639 }
13640 }
13641 else if (!CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx))
13642 {
13643 /*
13644 * The guest has not entered VMX operation but attempted to execute a VMX instruction
13645 * (other than VMXON), we need to raise a #UD.
13646 */
13647 Log4Func(("Not in VMX root mode -> #UD\n"));
13648 hmR0VmxSetPendingXcptUD(pVCpu);
13649 return VINF_HM_PENDING_XCPT;
13650 }
13651
13652 /* All other checks (including VM-exit intercepts) are handled by IEM instruction emulation. */
13653 return VINF_SUCCESS;
13654}
13655
13656
13657/**
13658 * Decodes the memory operand of an instruction that caused a VM-exit.
13659 *
13660 * The Exit qualification field provides the displacement field for memory
13661 * operand instructions, if any.
13662 *
13663 * @returns Strict VBox status code (i.e. informational status codes too).
13664 * @retval VINF_SUCCESS if the operand was successfully decoded.
13665 * @retval VINF_HM_PENDING_XCPT if an exception was raised while decoding the
13666 * operand.
13667 * @param pVCpu The cross context virtual CPU structure.
13668 * @param uExitInstrInfo The VM-exit instruction information field.
13669 * @param enmMemAccess The memory operand's access type (read or write).
13670 * @param GCPtrDisp The instruction displacement field, if any. For
13671 * RIP-relative addressing pass RIP + displacement here.
13672 * @param pGCPtrMem Where to store the effective destination memory address.
13673 *
13674 * @remarks Warning! This function ASSUMES the instruction cannot be used in real or
13675 * virtual-8086 mode hence skips those checks while verifying if the
13676 * segment is valid.
13677 */
13678static VBOXSTRICTRC hmR0VmxDecodeMemOperand(PVMCPUCC pVCpu, uint32_t uExitInstrInfo, RTGCPTR GCPtrDisp, VMXMEMACCESS enmMemAccess,
13679 PRTGCPTR pGCPtrMem)
13680{
13681 Assert(pGCPtrMem);
13682 Assert(!CPUMIsGuestInRealOrV86Mode(pVCpu));
13683 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_EFER
13684 | CPUMCTX_EXTRN_CR0);
13685
13686 static uint64_t const s_auAddrSizeMasks[] = { UINT64_C(0xffff), UINT64_C(0xffffffff), UINT64_C(0xffffffffffffffff) };
13687 static uint64_t const s_auAccessSizeMasks[] = { sizeof(uint16_t), sizeof(uint32_t), sizeof(uint64_t) };
13688 AssertCompile(RT_ELEMENTS(s_auAccessSizeMasks) == RT_ELEMENTS(s_auAddrSizeMasks));
13689
13690 VMXEXITINSTRINFO ExitInstrInfo;
13691 ExitInstrInfo.u = uExitInstrInfo;
13692 uint8_t const uAddrSize = ExitInstrInfo.All.u3AddrSize;
13693 uint8_t const iSegReg = ExitInstrInfo.All.iSegReg;
13694 bool const fIdxRegValid = !ExitInstrInfo.All.fIdxRegInvalid;
13695 uint8_t const iIdxReg = ExitInstrInfo.All.iIdxReg;
13696 uint8_t const uScale = ExitInstrInfo.All.u2Scaling;
13697 bool const fBaseRegValid = !ExitInstrInfo.All.fBaseRegInvalid;
13698 uint8_t const iBaseReg = ExitInstrInfo.All.iBaseReg;
13699 bool const fIsMemOperand = !ExitInstrInfo.All.fIsRegOperand;
13700 bool const fIsLongMode = CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx);
13701
13702 /*
13703 * Validate instruction information.
13704 * This shouldn't happen on real hardware but useful while testing our nested hardware-virtualization code.
13705 */
13706 AssertLogRelMsgReturn(uAddrSize < RT_ELEMENTS(s_auAddrSizeMasks),
13707 ("Invalid address size. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_1);
13708 AssertLogRelMsgReturn(iSegReg < X86_SREG_COUNT,
13709 ("Invalid segment register. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_2);
13710 AssertLogRelMsgReturn(fIsMemOperand,
13711 ("Expected memory operand. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_3);
13712
13713 /*
13714 * Compute the complete effective address.
13715 *
13716 * See AMD instruction spec. 1.4.2 "SIB Byte Format"
13717 * See AMD spec. 4.5.2 "Segment Registers".
13718 */
13719 RTGCPTR GCPtrMem = GCPtrDisp;
13720 if (fBaseRegValid)
13721 GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iBaseReg].u64;
13722 if (fIdxRegValid)
13723 GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iIdxReg].u64 << uScale;
13724
13725 RTGCPTR const GCPtrOff = GCPtrMem;
13726 if ( !fIsLongMode
13727 || iSegReg >= X86_SREG_FS)
13728 GCPtrMem += pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base;
13729 GCPtrMem &= s_auAddrSizeMasks[uAddrSize];
13730
13731 /*
13732 * Validate effective address.
13733 * See AMD spec. 4.5.3 "Segment Registers in 64-Bit Mode".
13734 */
13735 uint8_t const cbAccess = s_auAccessSizeMasks[uAddrSize];
13736 Assert(cbAccess > 0);
13737 if (fIsLongMode)
13738 {
13739 if (X86_IS_CANONICAL(GCPtrMem))
13740 {
13741 *pGCPtrMem = GCPtrMem;
13742 return VINF_SUCCESS;
13743 }
13744
13745 /** @todo r=ramshankar: We should probably raise \#SS or \#GP. See AMD spec. 4.12.2
13746 * "Data Limit Checks in 64-bit Mode". */
13747 Log4Func(("Long mode effective address is not canonical GCPtrMem=%#RX64\n", GCPtrMem));
13748 hmR0VmxSetPendingXcptGP(pVCpu, 0);
13749 return VINF_HM_PENDING_XCPT;
13750 }
13751
13752 /*
13753 * This is a watered down version of iemMemApplySegment().
13754 * Parts that are not applicable for VMX instructions like real-or-v8086 mode
13755 * and segment CPL/DPL checks are skipped.
13756 */
13757 RTGCPTR32 const GCPtrFirst32 = (RTGCPTR32)GCPtrOff;
13758 RTGCPTR32 const GCPtrLast32 = GCPtrFirst32 + cbAccess - 1;
13759 PCCPUMSELREG pSel = &pVCpu->cpum.GstCtx.aSRegs[iSegReg];
13760
13761 /* Check if the segment is present and usable. */
13762 if ( pSel->Attr.n.u1Present
13763 && !pSel->Attr.n.u1Unusable)
13764 {
13765 Assert(pSel->Attr.n.u1DescType);
13766 if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_CODE))
13767 {
13768 /* Check permissions for the data segment. */
13769 if ( enmMemAccess == VMXMEMACCESS_WRITE
13770 && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_WRITE))
13771 {
13772 Log4Func(("Data segment access invalid. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u));
13773 hmR0VmxSetPendingXcptGP(pVCpu, iSegReg);
13774 return VINF_HM_PENDING_XCPT;
13775 }
13776
13777 /* Check limits if it's a normal data segment. */
13778 if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_DOWN))
13779 {
13780 if ( GCPtrFirst32 > pSel->u32Limit
13781 || GCPtrLast32 > pSel->u32Limit)
13782 {
13783 Log4Func(("Data segment limit exceeded. "
13784 "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32,
13785 GCPtrLast32, pSel->u32Limit));
13786 if (iSegReg == X86_SREG_SS)
13787 hmR0VmxSetPendingXcptSS(pVCpu, 0);
13788 else
13789 hmR0VmxSetPendingXcptGP(pVCpu, 0);
13790 return VINF_HM_PENDING_XCPT;
13791 }
13792 }
13793 else
13794 {
13795 /* Check limits if it's an expand-down data segment.
13796 Note! The upper boundary is defined by the B bit, not the G bit! */
13797 if ( GCPtrFirst32 < pSel->u32Limit + UINT32_C(1)
13798 || GCPtrLast32 > (pSel->Attr.n.u1DefBig ? UINT32_MAX : UINT32_C(0xffff)))
13799 {
13800 Log4Func(("Expand-down data segment limit exceeded. "
13801 "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32,
13802 GCPtrLast32, pSel->u32Limit));
13803 if (iSegReg == X86_SREG_SS)
13804 hmR0VmxSetPendingXcptSS(pVCpu, 0);
13805 else
13806 hmR0VmxSetPendingXcptGP(pVCpu, 0);
13807 return VINF_HM_PENDING_XCPT;
13808 }
13809 }
13810 }
13811 else
13812 {
13813 /* Check permissions for the code segment. */
13814 if ( enmMemAccess == VMXMEMACCESS_WRITE
13815 || ( enmMemAccess == VMXMEMACCESS_READ
13816 && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_READ)))
13817 {
13818 Log4Func(("Code segment access invalid. Attr=%#RX32\n", pSel->Attr.u));
13819 Assert(!CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx));
13820 hmR0VmxSetPendingXcptGP(pVCpu, 0);
13821 return VINF_HM_PENDING_XCPT;
13822 }
13823
13824 /* Check limits for the code segment (normal/expand-down not applicable for code segments). */
13825 if ( GCPtrFirst32 > pSel->u32Limit
13826 || GCPtrLast32 > pSel->u32Limit)
13827 {
13828 Log4Func(("Code segment limit exceeded. GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n",
13829 GCPtrFirst32, GCPtrLast32, pSel->u32Limit));
13830 if (iSegReg == X86_SREG_SS)
13831 hmR0VmxSetPendingXcptSS(pVCpu, 0);
13832 else
13833 hmR0VmxSetPendingXcptGP(pVCpu, 0);
13834 return VINF_HM_PENDING_XCPT;
13835 }
13836 }
13837 }
13838 else
13839 {
13840 Log4Func(("Not present or unusable segment. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u));
13841 hmR0VmxSetPendingXcptGP(pVCpu, 0);
13842 return VINF_HM_PENDING_XCPT;
13843 }
13844
13845 *pGCPtrMem = GCPtrMem;
13846 return VINF_SUCCESS;
13847}
13848#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
13849
13850
13851/**
13852 * VM-exit helper for LMSW.
13853 */
13854static VBOXSTRICTRC hmR0VmxExitLmsw(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr, uint16_t uMsw, RTGCPTR GCPtrEffDst)
13855{
13856 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
13857 AssertRCReturn(rc, rc);
13858
13859 VBOXSTRICTRC rcStrict = IEMExecDecodedLmsw(pVCpu, cbInstr, uMsw, GCPtrEffDst);
13860 AssertMsg( rcStrict == VINF_SUCCESS
13861 || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
13862
13863 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
13864 if (rcStrict == VINF_IEM_RAISED_XCPT)
13865 {
13866 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
13867 rcStrict = VINF_SUCCESS;
13868 }
13869
13870 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
13871 Log4Func(("rcStrict=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
13872 return rcStrict;
13873}
13874
13875
13876/**
13877 * VM-exit helper for CLTS.
13878 */
13879static VBOXSTRICTRC hmR0VmxExitClts(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr)
13880{
13881 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
13882 AssertRCReturn(rc, rc);
13883
13884 VBOXSTRICTRC rcStrict = IEMExecDecodedClts(pVCpu, cbInstr);
13885 AssertMsg( rcStrict == VINF_SUCCESS
13886 || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
13887
13888 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0);
13889 if (rcStrict == VINF_IEM_RAISED_XCPT)
13890 {
13891 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
13892 rcStrict = VINF_SUCCESS;
13893 }
13894
13895 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
13896 Log4Func(("rcStrict=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
13897 return rcStrict;
13898}
13899
13900
13901/**
13902 * VM-exit helper for MOV from CRx (CRx read).
13903 */
13904static VBOXSTRICTRC hmR0VmxExitMovFromCrX(PVMCPUCC pVCpu, PVMXVMCSINFO pVmcsInfo, uint8_t cbInstr, uint8_t iGReg, uint8_t iCrReg)
13905{
13906 Assert(iCrReg < 16);
13907 Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
13908
13909 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
13910 AssertRCReturn(rc, rc);
13911
13912 VBOXSTRICTRC rcStrict = IEMExecDecodedMovCRxRead(pVCpu, cbInstr, iGReg, iCrReg);
13913 AssertMsg( rcStrict == VINF_SUCCESS
13914 || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
13915
13916 if (iGReg == X86_GREG_xSP)
13917 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_RSP);
13918 else
13919 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
13920#ifdef VBOX_WITH_STATISTICS
13921 switch (iCrReg)
13922 {
13923 case 0: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Read); break;
13924 case 2: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Read); break;
13925 case 3: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Read); break;
13926 case 4: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Read); break;
13927 case 8: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Read); break;
13928 }
13929#endif
13930 Log4Func(("CR%d Read access rcStrict=%Rrc\n", iCrReg, VBOXSTRICTRC_VAL(rcStrict)));
13931 return rcStrict;
13932}
13933
13934
13935/**
13936 * VM-exit helper for MOV to CRx (CRx write).
13937 */
13938static VBOXSTRICTRC hmR0VmxExitMovToCrX(PVMCPUCC pVCpu, uint8_t cbInstr, uint8_t iGReg, uint8_t iCrReg)
13939{
13940 HMVMX_CPUMCTX_ASSERT(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK);
13941
13942 VBOXSTRICTRC rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, cbInstr, iCrReg, iGReg);
13943 AssertMsg( rcStrict == VINF_SUCCESS
13944 || rcStrict == VINF_IEM_RAISED_XCPT
13945 || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
13946
13947 switch (iCrReg)
13948 {
13949 case 0:
13950 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0
13951 | HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_EXIT_CTLS);
13952 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Write);
13953 Log4Func(("CR0 write. rcStrict=%Rrc CR0=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr0));
13954 break;
13955
13956 case 2:
13957 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Write);
13958 /* Nothing to do here, CR2 it's not part of the VMCS. */
13959 break;
13960
13961 case 3:
13962 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR3);
13963 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Write);
13964 Log4Func(("CR3 write. rcStrict=%Rrc CR3=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr3));
13965 break;
13966
13967 case 4:
13968 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR4);
13969 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Write);
13970 Log4Func(("CR4 write. rc=%Rrc CR4=%#RX64 fLoadSaveGuestXcr0=%u\n", VBOXSTRICTRC_VAL(rcStrict),
13971 pVCpu->cpum.GstCtx.cr4, pVCpu->hmr0.s.fLoadSaveGuestXcr0));
13972 break;
13973
13974 case 8:
13975 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
13976 HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_APIC_TPR);
13977 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Write);
13978 break;
13979
13980 default:
13981 AssertMsgFailed(("Invalid CRx register %#x\n", iCrReg));
13982 break;
13983 }
13984
13985 if (rcStrict == VINF_IEM_RAISED_XCPT)
13986 {
13987 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
13988 rcStrict = VINF_SUCCESS;
13989 }
13990 return rcStrict;
13991}
13992
13993
13994/**
13995 * VM-exit exception handler for \#PF (Page-fault exception).
13996 *
13997 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
13998 */
13999static VBOXSTRICTRC hmR0VmxExitXcptPF(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14000{
14001 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14002 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
14003 hmR0VmxReadExitQualVmcs(pVmxTransient);
14004
14005 if (!pVM->hmr0.s.fNestedPaging)
14006 { /* likely */ }
14007 else
14008 {
14009#if !defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) && !defined(HMVMX_ALWAYS_TRAP_PF)
14010 Assert(pVmxTransient->fIsNestedGuest || pVCpu->hmr0.s.fUsingDebugLoop);
14011#endif
14012 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
14013 if (!pVmxTransient->fVectoringDoublePF)
14014 {
14015 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */,
14016 pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual);
14017 }
14018 else
14019 {
14020 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
14021 Assert(!pVmxTransient->fIsNestedGuest);
14022 hmR0VmxSetPendingXcptDF(pVCpu);
14023 Log4Func(("Pending #DF due to vectoring #PF w/ NestedPaging\n"));
14024 }
14025 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
14026 return VINF_SUCCESS;
14027 }
14028
14029 Assert(!pVmxTransient->fIsNestedGuest);
14030
14031 /* If it's a vectoring #PF, emulate injecting the original event injection as PGMTrap0eHandler() is incapable
14032 of differentiating between instruction emulation and event injection that caused a #PF. See @bugref{6607}. */
14033 if (pVmxTransient->fVectoringPF)
14034 {
14035 Assert(pVCpu->hm.s.Event.fPending);
14036 return VINF_EM_RAW_INJECT_TRPM_EVENT;
14037 }
14038
14039 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
14040 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14041 AssertRCReturn(rc, rc);
14042
14043 Log4Func(("#PF: cs:rip=%#04x:%#RX64 err_code=%#RX32 exit_qual=%#RX64 cr3=%#RX64\n", pCtx->cs.Sel, pCtx->rip,
14044 pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual, pCtx->cr3));
14045
14046 TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQual, (RTGCUINT)pVmxTransient->uExitIntErrorCode);
14047 rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pCtx), (RTGCPTR)pVmxTransient->uExitQual);
14048
14049 Log4Func(("#PF: rc=%Rrc\n", rc));
14050 if (rc == VINF_SUCCESS)
14051 {
14052 /*
14053 * This is typically a shadow page table sync or a MMIO instruction. But we may have
14054 * emulated something like LTR or a far jump. Any part of the CPU context may have changed.
14055 */
14056 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
14057 TRPMResetTrap(pVCpu);
14058 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
14059 return rc;
14060 }
14061
14062 if (rc == VINF_EM_RAW_GUEST_TRAP)
14063 {
14064 if (!pVmxTransient->fVectoringDoublePF)
14065 {
14066 /* It's a guest page fault and needs to be reflected to the guest. */
14067 uint32_t const uGstErrorCode = TRPMGetErrorCode(pVCpu);
14068 TRPMResetTrap(pVCpu);
14069 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */
14070 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */,
14071 uGstErrorCode, pVmxTransient->uExitQual);
14072 }
14073 else
14074 {
14075 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
14076 TRPMResetTrap(pVCpu);
14077 pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */
14078 hmR0VmxSetPendingXcptDF(pVCpu);
14079 Log4Func(("#PF: Pending #DF due to vectoring #PF\n"));
14080 }
14081
14082 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
14083 return VINF_SUCCESS;
14084 }
14085
14086 TRPMResetTrap(pVCpu);
14087 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
14088 return rc;
14089}
14090
14091
14092/**
14093 * VM-exit exception handler for \#MF (Math Fault: floating point exception).
14094 *
14095 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
14096 */
14097static VBOXSTRICTRC hmR0VmxExitXcptMF(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14098{
14099 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14100 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
14101
14102 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR0);
14103 AssertRCReturn(rc, rc);
14104
14105 if (!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_NE))
14106 {
14107 /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */
14108 rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13, 1, 0 /* uTagSrc */);
14109
14110 /** @todo r=ramshankar: The Intel spec. does -not- specify that this VM-exit
14111 * provides VM-exit instruction length. If this causes problem later,
14112 * disassemble the instruction like it's done on AMD-V. */
14113 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
14114 AssertRCReturn(rc2, rc2);
14115 return rc;
14116 }
14117
14118 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbExitInstr,
14119 pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
14120 return VINF_SUCCESS;
14121}
14122
14123
14124/**
14125 * VM-exit exception handler for \#BP (Breakpoint exception).
14126 *
14127 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
14128 */
14129static VBOXSTRICTRC hmR0VmxExitXcptBP(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14130{
14131 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14132 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
14133
14134 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14135 AssertRCReturn(rc, rc);
14136
14137 if (!pVmxTransient->fIsNestedGuest)
14138 rc = DBGFTrap03Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(&pVCpu->cpum.GstCtx));
14139 else
14140 rc = VINF_EM_RAW_GUEST_TRAP;
14141
14142 if (rc == VINF_EM_RAW_GUEST_TRAP)
14143 {
14144 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
14145 pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
14146 rc = VINF_SUCCESS;
14147 }
14148
14149 Assert(rc == VINF_SUCCESS || rc == VINF_EM_DBG_BREAKPOINT);
14150 return rc;
14151}
14152
14153
14154/**
14155 * VM-exit exception handler for \#AC (Alignment-check exception).
14156 *
14157 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
14158 */
14159static VBOXSTRICTRC hmR0VmxExitXcptAC(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14160{
14161 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14162
14163 /*
14164 * Detect #ACs caused by host having enabled split-lock detection.
14165 * Emulate such instructions.
14166 */
14167 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo,
14168 CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS | CPUMCTX_EXTRN_CS);
14169 AssertRCReturn(rc, rc);
14170 /** @todo detect split lock in cpu feature? */
14171 if ( /* 1. If 486-style alignment checks aren't enabled, then this must be a split-lock exception */
14172 !(pVCpu->cpum.GstCtx.cr0 & X86_CR0_AM)
14173 /* 2. #AC cannot happen in rings 0-2 except for split-lock detection. */
14174 || CPUMGetGuestCPL(pVCpu) != 3
14175 /* 3. When the EFLAGS.AC != 0 this can only be a split-lock case. */
14176 || !(pVCpu->cpum.GstCtx.eflags.u & X86_EFL_AC) )
14177 {
14178 /*
14179 * Check for debug/trace events and import state accordingly.
14180 */
14181 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitGuestACSplitLock);
14182 PVMCC pVM = pVCpu->pVMR0;
14183 if ( !DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_VMX_SPLIT_LOCK)
14184 && !VBOXVMM_VMX_SPLIT_LOCK_ENABLED())
14185 {
14186 if (pVM->cCpus == 1)
14187 {
14188#if 0 /** @todo r=bird: This is potentially wrong. Might have to just do a whole state sync above and mark everything changed to be safe... */
14189 rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
14190#else
14191 rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14192#endif
14193 AssertRCReturn(rc, rc);
14194 }
14195 }
14196 else
14197 {
14198 rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14199 AssertRCReturn(rc, rc);
14200
14201 VBOXVMM_XCPT_DF(pVCpu, &pVCpu->cpum.GstCtx);
14202
14203 if (DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_VMX_SPLIT_LOCK))
14204 {
14205 VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, DBGFEVENT_VMX_SPLIT_LOCK, DBGFEVENTCTX_HM, 0);
14206 if (rcStrict != VINF_SUCCESS)
14207 return rcStrict;
14208 }
14209 }
14210
14211 /*
14212 * Emulate the instruction.
14213 *
14214 * We have to ignore the LOCK prefix here as we must not retrigger the
14215 * detection on the host. This isn't all that satisfactory, though...
14216 */
14217 if (pVM->cCpus == 1)
14218 {
14219 Log8Func(("cs:rip=%#04x:%#RX64 rflags=%#RX64 cr0=%#RX64 split-lock #AC\n", pVCpu->cpum.GstCtx.cs.Sel,
14220 pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.rflags, pVCpu->cpum.GstCtx.cr0));
14221
14222 /** @todo For SMP configs we should do a rendezvous here. */
14223 VBOXSTRICTRC rcStrict = IEMExecOneIgnoreLock(pVCpu);
14224 if (rcStrict == VINF_SUCCESS)
14225#if 0 /** @todo r=bird: This is potentially wrong. Might have to just do a whole state sync above and mark everything changed to be safe... */
14226 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged,
14227 HM_CHANGED_GUEST_RIP
14228 | HM_CHANGED_GUEST_RFLAGS
14229 | HM_CHANGED_GUEST_GPRS_MASK
14230 | HM_CHANGED_GUEST_CS
14231 | HM_CHANGED_GUEST_SS);
14232#else
14233 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
14234#endif
14235 else if (rcStrict == VINF_IEM_RAISED_XCPT)
14236 {
14237 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
14238 rcStrict = VINF_SUCCESS;
14239 }
14240 return rcStrict;
14241 }
14242 Log8Func(("cs:rip=%#04x:%#RX64 rflags=%#RX64 cr0=%#RX64 split-lock #AC -> VINF_EM_EMULATE_SPLIT_LOCK\n",
14243 pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.rflags, pVCpu->cpum.GstCtx.cr0));
14244 return VINF_EM_EMULATE_SPLIT_LOCK;
14245 }
14246
14247 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitGuestAC);
14248 Log8Func(("cs:rip=%#04x:%#RX64 rflags=%#RX64 cr0=%#RX64 cpl=%d -> #AC\n", pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
14249 pVCpu->cpum.GstCtx.rflags, pVCpu->cpum.GstCtx.cr0, CPUMGetGuestCPL(pVCpu) ));
14250
14251 /* Re-inject it. We'll detect any nesting before getting here. */
14252 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
14253 pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
14254 return VINF_SUCCESS;
14255}
14256
14257
14258/**
14259 * VM-exit exception handler for \#DB (Debug exception).
14260 *
14261 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
14262 */
14263static VBOXSTRICTRC hmR0VmxExitXcptDB(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14264{
14265 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14266 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
14267
14268 /*
14269 * Get the DR6-like values from the Exit qualification and pass it to DBGF for processing.
14270 */
14271 hmR0VmxReadExitQualVmcs(pVmxTransient);
14272
14273 /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */
14274 uint64_t const uDR6 = X86_DR6_INIT_VAL
14275 | (pVmxTransient->uExitQual & ( X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3
14276 | X86_DR6_BD | X86_DR6_BS));
14277
14278 int rc;
14279 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
14280 if (!pVmxTransient->fIsNestedGuest)
14281 {
14282 rc = DBGFTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx), uDR6, pVCpu->hm.s.fSingleInstruction);
14283
14284 /*
14285 * Prevents stepping twice over the same instruction when the guest is stepping using
14286 * EFLAGS.TF and the hypervisor debugger is stepping using MTF.
14287 * Testcase: DOSQEMM, break (using "ba x 1") at cs:rip 0x70:0x774 and step (using "t").
14288 */
14289 if ( rc == VINF_EM_DBG_STEPPED
14290 && (pVmxTransient->pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_MONITOR_TRAP_FLAG))
14291 {
14292 Assert(pVCpu->hm.s.fSingleInstruction);
14293 rc = VINF_EM_RAW_GUEST_TRAP;
14294 }
14295 }
14296 else
14297 rc = VINF_EM_RAW_GUEST_TRAP;
14298 Log6Func(("rc=%Rrc\n", rc));
14299 if (rc == VINF_EM_RAW_GUEST_TRAP)
14300 {
14301 /*
14302 * The exception was for the guest. Update DR6, DR7.GD and
14303 * IA32_DEBUGCTL.LBR before forwarding it.
14304 * See Intel spec. 27.1 "Architectural State before a VM-Exit".
14305 */
14306 VMMRZCallRing3Disable(pVCpu);
14307 HM_DISABLE_PREEMPT(pVCpu);
14308
14309 pCtx->dr[6] &= ~X86_DR6_B_MASK;
14310 pCtx->dr[6] |= uDR6;
14311 if (CPUMIsGuestDebugStateActive(pVCpu))
14312 ASMSetDR6(pCtx->dr[6]);
14313
14314 HM_RESTORE_PREEMPT();
14315 VMMRZCallRing3Enable(pVCpu);
14316
14317 rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_DR7);
14318 AssertRCReturn(rc, rc);
14319
14320 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
14321 pCtx->dr[7] &= ~(uint64_t)X86_DR7_GD;
14322
14323 /* Paranoia. */
14324 pCtx->dr[7] &= ~(uint64_t)X86_DR7_RAZ_MASK;
14325 pCtx->dr[7] |= X86_DR7_RA1_MASK;
14326
14327 rc = VMXWriteVmcsNw(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
14328 AssertRC(rc);
14329
14330 /*
14331 * Raise #DB in the guest.
14332 *
14333 * It is important to reflect exactly what the VM-exit gave us (preserving the
14334 * interruption-type) rather than use hmR0VmxSetPendingXcptDB() as the #DB could've
14335 * been raised while executing ICEBP (INT1) and not the regular #DB. Thus it may
14336 * trigger different handling in the CPU (like skipping DPL checks), see @bugref{6398}.
14337 *
14338 * Intel re-documented ICEBP/INT1 on May 2018 previously documented as part of
14339 * Intel 386, see Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
14340 */
14341 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
14342 pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
14343 return VINF_SUCCESS;
14344 }
14345
14346 /*
14347 * Not a guest trap, must be a hypervisor related debug event then.
14348 * Update DR6 in case someone is interested in it.
14349 */
14350 AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc));
14351 AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5);
14352 CPUMSetHyperDR6(pVCpu, uDR6);
14353
14354 return rc;
14355}
14356
14357
14358/**
14359 * Hacks its way around the lovely mesa driver's backdoor accesses.
14360 *
14361 * @sa hmR0SvmHandleMesaDrvGp.
14362 */
14363static int hmR0VmxHandleMesaDrvGp(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx)
14364{
14365 LogFunc(("cs:rip=%#04x:%#RX64 rcx=%#RX64 rbx=%#RX64\n", pCtx->cs.Sel, pCtx->rip, pCtx->rcx, pCtx->rbx));
14366 RT_NOREF(pCtx);
14367
14368 /* For now we'll just skip the instruction. */
14369 return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
14370}
14371
14372
14373/**
14374 * Checks if the \#GP'ing instruction is the mesa driver doing it's lovely
14375 * backdoor logging w/o checking what it is running inside.
14376 *
14377 * This recognizes an "IN EAX,DX" instruction executed in flat ring-3, with the
14378 * backdoor port and magic numbers loaded in registers.
14379 *
14380 * @returns true if it is, false if it isn't.
14381 * @sa hmR0SvmIsMesaDrvGp.
14382 */
14383DECLINLINE(bool) hmR0VmxIsMesaDrvGp(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx)
14384{
14385 /* 0xed: IN eAX,dx */
14386 uint8_t abInstr[1];
14387 if (pVmxTransient->cbExitInstr != sizeof(abInstr))
14388 return false;
14389
14390 /* Check that it is #GP(0). */
14391 if (pVmxTransient->uExitIntErrorCode != 0)
14392 return false;
14393
14394 /* Check magic and port. */
14395 Assert(!(pCtx->fExtrn & (CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX)));
14396 /*Log(("hmR0VmxIsMesaDrvGp: rax=%RX64 rdx=%RX64\n", pCtx->rax, pCtx->rdx));*/
14397 if (pCtx->rax != UINT32_C(0x564d5868))
14398 return false;
14399 if (pCtx->dx != UINT32_C(0x5658))
14400 return false;
14401
14402 /* Flat ring-3 CS. */
14403 AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_CS);
14404 Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_CS));
14405 /*Log(("hmR0VmxIsMesaDrvGp: cs.Attr.n.u2Dpl=%d base=%Rx64\n", pCtx->cs.Attr.n.u2Dpl, pCtx->cs.u64Base));*/
14406 if (pCtx->cs.Attr.n.u2Dpl != 3)
14407 return false;
14408 if (pCtx->cs.u64Base != 0)
14409 return false;
14410
14411 /* Check opcode. */
14412 AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_RIP);
14413 Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_RIP));
14414 int rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pCtx->rip, sizeof(abInstr));
14415 /*Log(("hmR0VmxIsMesaDrvGp: PGMPhysSimpleReadGCPtr -> %Rrc %#x\n", rc, abInstr[0]));*/
14416 if (RT_FAILURE(rc))
14417 return false;
14418 if (abInstr[0] != 0xed)
14419 return false;
14420
14421 return true;
14422}
14423
14424
14425/**
14426 * VM-exit exception handler for \#GP (General-protection exception).
14427 *
14428 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
14429 */
14430static VBOXSTRICTRC hmR0VmxExitXcptGP(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14431{
14432 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14433 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
14434
14435 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
14436 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14437 PVMXVMCSINFOSHARED pVmcsInfoShared = pVmcsInfo->pShared;
14438 if (pVmcsInfoShared->RealMode.fRealOnV86Active)
14439 { /* likely */ }
14440 else
14441 {
14442#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
14443 Assert(pVCpu->hmr0.s.fUsingDebugLoop || pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv || pVmxTransient->fIsNestedGuest);
14444#endif
14445 /*
14446 * If the guest is not in real-mode or we have unrestricted guest execution support, or if we are
14447 * executing a nested-guest, reflect #GP to the guest or nested-guest.
14448 */
14449 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14450 AssertRCReturn(rc, rc);
14451 Log4Func(("Gst: cs:rip=%#04x:%#RX64 ErrorCode=%#x cr0=%#RX64 cpl=%u tr=%#04x\n", pCtx->cs.Sel, pCtx->rip,
14452 pVmxTransient->uExitIntErrorCode, pCtx->cr0, CPUMGetGuestCPL(pVCpu), pCtx->tr.Sel));
14453
14454 if ( pVmxTransient->fIsNestedGuest
14455 || !pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv
14456 || !hmR0VmxIsMesaDrvGp(pVCpu, pVmxTransient, pCtx))
14457 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
14458 pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
14459 else
14460 rc = hmR0VmxHandleMesaDrvGp(pVCpu, pVmxTransient, pCtx);
14461 return rc;
14462 }
14463
14464 Assert(CPUMIsGuestInRealModeEx(pCtx));
14465 Assert(!pVCpu->CTX_SUFF(pVM)->hmr0.s.vmx.fUnrestrictedGuest);
14466 Assert(!pVmxTransient->fIsNestedGuest);
14467
14468 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14469 AssertRCReturn(rc, rc);
14470
14471 VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu);
14472 if (rcStrict == VINF_SUCCESS)
14473 {
14474 if (!CPUMIsGuestInRealModeEx(pCtx))
14475 {
14476 /*
14477 * The guest is no longer in real-mode, check if we can continue executing the
14478 * guest using hardware-assisted VMX. Otherwise, fall back to emulation.
14479 */
14480 pVmcsInfoShared->RealMode.fRealOnV86Active = false;
14481 if (HMCanExecuteVmxGuest(pVCpu->pVMR0, pVCpu, pCtx))
14482 {
14483 Log4Func(("Mode changed but guest still suitable for executing using hardware-assisted VMX\n"));
14484 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
14485 }
14486 else
14487 {
14488 Log4Func(("Mode changed -> VINF_EM_RESCHEDULE\n"));
14489 rcStrict = VINF_EM_RESCHEDULE;
14490 }
14491 }
14492 else
14493 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
14494 }
14495 else if (rcStrict == VINF_IEM_RAISED_XCPT)
14496 {
14497 rcStrict = VINF_SUCCESS;
14498 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
14499 }
14500 return VBOXSTRICTRC_VAL(rcStrict);
14501}
14502
14503
14504/**
14505 * VM-exit exception handler wrapper for all other exceptions that are not handled
14506 * by a specific handler.
14507 *
14508 * This simply re-injects the exception back into the VM without any special
14509 * processing.
14510 *
14511 * @remarks Requires all fields in HMVMX_READ_XCPT_INFO to be read from the VMCS.
14512 */
14513static VBOXSTRICTRC hmR0VmxExitXcptOthers(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14514{
14515 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14516
14517#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
14518 PCVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14519 AssertMsg(pVCpu->hmr0.s.fUsingDebugLoop || pVmcsInfo->pShared->RealMode.fRealOnV86Active || pVmxTransient->fIsNestedGuest,
14520 ("uVector=%#x u32XcptBitmap=%#X32\n",
14521 VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo), pVmcsInfo->u32XcptBitmap));
14522 NOREF(pVmcsInfo);
14523#endif
14524
14525 /*
14526 * Re-inject the exception into the guest. This cannot be a double-fault condition which
14527 * would have been handled while checking exits due to event delivery.
14528 */
14529 uint8_t const uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
14530
14531#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
14532 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP);
14533 AssertRCReturn(rc, rc);
14534 Log4Func(("Reinjecting Xcpt. uVector=%#x cs:rip=%#04x:%#RX64\n", uVector, pCtx->cs.Sel, pCtx->rip));
14535#endif
14536
14537#ifdef VBOX_WITH_STATISTICS
14538 switch (uVector)
14539 {
14540 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); break;
14541 case X86_XCPT_DB: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB); break;
14542 case X86_XCPT_BP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP); break;
14543 case X86_XCPT_OF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestOF); break;
14544 case X86_XCPT_BR: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBR); break;
14545 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); break;
14546 case X86_XCPT_NM: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestOF); break;
14547 case X86_XCPT_DF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDF); break;
14548 case X86_XCPT_TS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestTS); break;
14549 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); break;
14550 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); break;
14551 case X86_XCPT_GP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP); break;
14552 case X86_XCPT_PF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); break;
14553 case X86_XCPT_MF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF); break;
14554 case X86_XCPT_AC: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestAC); break;
14555 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); break;
14556 default:
14557 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
14558 break;
14559 }
14560#endif
14561
14562 /* We should never call this function for a page-fault, we'd need to pass on the fault address below otherwise. */
14563 Assert(!VMX_EXIT_INT_INFO_IS_XCPT_PF(pVmxTransient->uExitIntInfo));
14564 NOREF(uVector);
14565
14566 /* Re-inject the original exception into the guest. */
14567 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
14568 pVmxTransient->cbExitInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
14569 return VINF_SUCCESS;
14570}
14571
14572
14573/**
14574 * VM-exit exception handler for all exceptions (except NMIs!).
14575 *
14576 * @remarks This may be called for both guests and nested-guests. Take care to not
14577 * make assumptions and avoid doing anything that is not relevant when
14578 * executing a nested-guest (e.g., Mesa driver hacks).
14579 */
14580static VBOXSTRICTRC hmR0VmxExitXcpt(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14581{
14582 HMVMX_ASSERT_READ(pVmxTransient, HMVMX_READ_XCPT_INFO);
14583
14584 /*
14585 * If this VM-exit occurred while delivering an event through the guest IDT, take
14586 * action based on the return code and additional hints (e.g. for page-faults)
14587 * that will be updated in the VMX transient structure.
14588 */
14589 VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
14590 if (rcStrict == VINF_SUCCESS)
14591 {
14592 /*
14593 * If an exception caused a VM-exit due to delivery of an event, the original
14594 * event may have to be re-injected into the guest. We shall reinject it and
14595 * continue guest execution. However, page-fault is a complicated case and
14596 * needs additional processing done in hmR0VmxExitXcptPF().
14597 */
14598 Assert(VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
14599 uint8_t const uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
14600 if ( !pVCpu->hm.s.Event.fPending
14601 || uVector == X86_XCPT_PF)
14602 {
14603 switch (uVector)
14604 {
14605 case X86_XCPT_PF: return hmR0VmxExitXcptPF(pVCpu, pVmxTransient);
14606 case X86_XCPT_GP: return hmR0VmxExitXcptGP(pVCpu, pVmxTransient);
14607 case X86_XCPT_MF: return hmR0VmxExitXcptMF(pVCpu, pVmxTransient);
14608 case X86_XCPT_DB: return hmR0VmxExitXcptDB(pVCpu, pVmxTransient);
14609 case X86_XCPT_BP: return hmR0VmxExitXcptBP(pVCpu, pVmxTransient);
14610 case X86_XCPT_AC: return hmR0VmxExitXcptAC(pVCpu, pVmxTransient);
14611 default:
14612 return hmR0VmxExitXcptOthers(pVCpu, pVmxTransient);
14613 }
14614 }
14615 /* else: inject pending event before resuming guest execution. */
14616 }
14617 else if (rcStrict == VINF_HM_DOUBLE_FAULT)
14618 {
14619 Assert(pVCpu->hm.s.Event.fPending);
14620 rcStrict = VINF_SUCCESS;
14621 }
14622
14623 return rcStrict;
14624}
14625/** @} */
14626
14627
14628/** @name VM-exit handlers.
14629 * @{
14630 */
14631/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
14632/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
14633/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
14634
14635/**
14636 * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT).
14637 */
14638HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14639{
14640 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14641 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
14642 /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */
14643 if (VMMR0ThreadCtxHookIsEnabled(pVCpu))
14644 return VINF_SUCCESS;
14645 return VINF_EM_RAW_INTERRUPT;
14646}
14647
14648
14649/**
14650 * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI). Conditional
14651 * VM-exit.
14652 */
14653HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14654{
14655 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14656 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3);
14657
14658 hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
14659
14660 uint32_t const uExitIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo);
14661 uint8_t const uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo);
14662 Assert(VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
14663
14664 PCVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14665 Assert( !(pVmcsInfo->u32ExitCtls & VMX_EXIT_CTLS_ACK_EXT_INT)
14666 && uExitIntType != VMX_EXIT_INT_INFO_TYPE_EXT_INT);
14667 NOREF(pVmcsInfo);
14668
14669 VBOXSTRICTRC rcStrict;
14670 switch (uExitIntType)
14671 {
14672 /*
14673 * Host physical NMIs:
14674 * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we
14675 * injected it ourselves and anything we inject is not going to cause a VM-exit directly
14676 * for the event being injected[1]. Go ahead and dispatch the NMI to the host[2].
14677 *
14678 * See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery".
14679 * See Intel spec. 27.5.5 "Updating Non-Register State".
14680 */
14681 case VMX_EXIT_INT_INFO_TYPE_NMI:
14682 {
14683 rcStrict = hmR0VmxExitHostNmi(pVCpu, pVmcsInfo);
14684 break;
14685 }
14686
14687 /*
14688 * Privileged software exceptions (#DB from ICEBP),
14689 * Software exceptions (#BP and #OF),
14690 * Hardware exceptions:
14691 * Process the required exceptions and resume guest execution if possible.
14692 */
14693 case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
14694 Assert(uVector == X86_XCPT_DB);
14695 RT_FALL_THRU();
14696 case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
14697 Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uExitIntType == VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT);
14698 RT_FALL_THRU();
14699 case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
14700 {
14701 NOREF(uVector);
14702 hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
14703 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
14704 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
14705 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
14706
14707 rcStrict = hmR0VmxExitXcpt(pVCpu, pVmxTransient);
14708 break;
14709 }
14710
14711 default:
14712 {
14713 pVCpu->hm.s.u32HMError = pVmxTransient->uExitIntInfo;
14714 rcStrict = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
14715 AssertMsgFailed(("Invalid/unexpected VM-exit interruption info %#x\n", pVmxTransient->uExitIntInfo));
14716 break;
14717 }
14718 }
14719
14720 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
14721 return rcStrict;
14722}
14723
14724
14725/**
14726 * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
14727 */
14728HMVMX_EXIT_NSRC_DECL hmR0VmxExitIntWindow(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14729{
14730 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14731
14732 /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
14733 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14734 hmR0VmxClearIntWindowExitVmcs(pVmcsInfo);
14735
14736 /* Evaluate and deliver pending events and resume guest execution. */
14737 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
14738 return VINF_SUCCESS;
14739}
14740
14741
14742/**
14743 * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
14744 */
14745HMVMX_EXIT_NSRC_DECL hmR0VmxExitNmiWindow(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14746{
14747 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14748
14749 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14750 if (RT_UNLIKELY(!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT))) /** @todo NSTVMX: Turn this into an assertion. */
14751 {
14752 AssertMsgFailed(("Unexpected NMI-window exit.\n"));
14753 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
14754 }
14755
14756 Assert(!CPUMIsGuestNmiBlocking(pVCpu));
14757
14758 /*
14759 * If block-by-STI is set when we get this VM-exit, it means the CPU doesn't block NMIs following STI.
14760 * It is therefore safe to unblock STI and deliver the NMI ourselves. See @bugref{7445}.
14761 */
14762 uint32_t fIntrState;
14763 int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState);
14764 AssertRC(rc);
14765 Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS));
14766 if (fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)
14767 {
14768 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
14769 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
14770
14771 fIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI;
14772 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState);
14773 AssertRC(rc);
14774 }
14775
14776 /* Indicate that we no longer need to VM-exit when the guest is ready to receive NMIs, it is now ready */
14777 hmR0VmxClearNmiWindowExitVmcs(pVmcsInfo);
14778
14779 /* Evaluate and deliver pending events and resume guest execution. */
14780 return VINF_SUCCESS;
14781}
14782
14783
14784/**
14785 * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
14786 */
14787HMVMX_EXIT_NSRC_DECL hmR0VmxExitWbinvd(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14788{
14789 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14790 return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
14791}
14792
14793
14794/**
14795 * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit.
14796 */
14797HMVMX_EXIT_NSRC_DECL hmR0VmxExitInvd(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14798{
14799 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14800 return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
14801}
14802
14803
14804/**
14805 * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit.
14806 */
14807HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14808{
14809 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14810
14811 /*
14812 * Get the state we need and update the exit history entry.
14813 */
14814 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14815 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
14816
14817 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
14818 AssertRCReturn(rc, rc);
14819
14820 VBOXSTRICTRC rcStrict;
14821 PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
14822 EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_CPUID),
14823 pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
14824 if (!pExitRec)
14825 {
14826 /*
14827 * Regular CPUID instruction execution.
14828 */
14829 rcStrict = IEMExecDecodedCpuid(pVCpu, pVmxTransient->cbExitInstr);
14830 if (rcStrict == VINF_SUCCESS)
14831 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
14832 else if (rcStrict == VINF_IEM_RAISED_XCPT)
14833 {
14834 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
14835 rcStrict = VINF_SUCCESS;
14836 }
14837 }
14838 else
14839 {
14840 /*
14841 * Frequent exit or something needing probing. Get state and call EMHistoryExec.
14842 */
14843 int rc2 = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
14844 AssertRCReturn(rc2, rc2);
14845
14846 Log4(("CpuIdExit/%u: %04x:%08RX64: %#x/%#x -> EMHistoryExec\n",
14847 pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx));
14848
14849 rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
14850 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
14851
14852 Log4(("CpuIdExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
14853 pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
14854 VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
14855 }
14856 return rcStrict;
14857}
14858
14859
14860/**
14861 * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit.
14862 */
14863HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14864{
14865 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14866
14867 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14868 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_CR4);
14869 AssertRCReturn(rc, rc);
14870
14871 if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_SMXE)
14872 return VINF_EM_RAW_EMULATE_INSTR;
14873
14874 AssertMsgFailed(("hmR0VmxExitGetsec: Unexpected VM-exit when CR4.SMXE is 0.\n"));
14875 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
14876}
14877
14878
14879/**
14880 * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
14881 */
14882HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14883{
14884 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14885
14886 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14887 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
14888 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
14889 AssertRCReturn(rc, rc);
14890
14891 VBOXSTRICTRC rcStrict = IEMExecDecodedRdtsc(pVCpu, pVmxTransient->cbExitInstr);
14892 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
14893 {
14894 /* If we get a spurious VM-exit when TSC offsetting is enabled,
14895 we must reset offsetting on VM-entry. See @bugref{6634}. */
14896 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING)
14897 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
14898 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
14899 }
14900 else if (rcStrict == VINF_IEM_RAISED_XCPT)
14901 {
14902 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
14903 rcStrict = VINF_SUCCESS;
14904 }
14905 return rcStrict;
14906}
14907
14908
14909/**
14910 * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
14911 */
14912HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14913{
14914 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14915
14916 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14917 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
14918 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_TSC_AUX);
14919 AssertRCReturn(rc, rc);
14920
14921 VBOXSTRICTRC rcStrict = IEMExecDecodedRdtscp(pVCpu, pVmxTransient->cbExitInstr);
14922 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
14923 {
14924 /* If we get a spurious VM-exit when TSC offsetting is enabled,
14925 we must reset offsetting on VM-reentry. See @bugref{6634}. */
14926 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING)
14927 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
14928 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
14929 }
14930 else if (rcStrict == VINF_IEM_RAISED_XCPT)
14931 {
14932 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
14933 rcStrict = VINF_SUCCESS;
14934 }
14935 return rcStrict;
14936}
14937
14938
14939/**
14940 * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
14941 */
14942HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14943{
14944 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14945
14946 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14947 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_CR0
14948 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS);
14949 AssertRCReturn(rc, rc);
14950
14951 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
14952 rc = EMInterpretRdpmc(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
14953 if (RT_LIKELY(rc == VINF_SUCCESS))
14954 {
14955 rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
14956 Assert(pVmxTransient->cbExitInstr == 2);
14957 }
14958 else
14959 {
14960 AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
14961 rc = VERR_EM_INTERPRETER;
14962 }
14963 return rc;
14964}
14965
14966
14967/**
14968 * VM-exit handler for VMCALL (VMX_EXIT_VMCALL). Unconditional VM-exit.
14969 */
14970HMVMX_EXIT_DECL hmR0VmxExitVmcall(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
14971{
14972 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
14973
14974 VBOXSTRICTRC rcStrict = VERR_VMX_IPE_3;
14975 if (EMAreHypercallInstructionsEnabled(pVCpu))
14976 {
14977 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
14978 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_CR0
14979 | CPUMCTX_EXTRN_SS | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER);
14980 AssertRCReturn(rc, rc);
14981
14982 /* Perform the hypercall. */
14983 rcStrict = GIMHypercall(pVCpu, &pVCpu->cpum.GstCtx);
14984 if (rcStrict == VINF_SUCCESS)
14985 {
14986 rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
14987 AssertRCReturn(rc, rc);
14988 }
14989 else
14990 Assert( rcStrict == VINF_GIM_R3_HYPERCALL
14991 || rcStrict == VINF_GIM_HYPERCALL_CONTINUING
14992 || RT_FAILURE(rcStrict));
14993
14994 /* If the hypercall changes anything other than guest's general-purpose registers,
14995 we would need to reload the guest changed bits here before VM-entry. */
14996 }
14997 else
14998 Log4Func(("Hypercalls not enabled\n"));
14999
15000 /* If hypercalls are disabled or the hypercall failed for some reason, raise #UD and continue. */
15001 if (RT_FAILURE(rcStrict))
15002 {
15003 hmR0VmxSetPendingXcptUD(pVCpu);
15004 rcStrict = VINF_SUCCESS;
15005 }
15006
15007 return rcStrict;
15008}
15009
15010
15011/**
15012 * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
15013 */
15014HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15015{
15016 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15017 Assert(!pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging || pVCpu->hmr0.s.fUsingDebugLoop);
15018
15019 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15020 hmR0VmxReadExitQualVmcs(pVmxTransient);
15021 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15022 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
15023 AssertRCReturn(rc, rc);
15024
15025 VBOXSTRICTRC rcStrict = IEMExecDecodedInvlpg(pVCpu, pVmxTransient->cbExitInstr, pVmxTransient->uExitQual);
15026
15027 if (rcStrict == VINF_SUCCESS || rcStrict == VINF_PGM_SYNC_CR3)
15028 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
15029 else if (rcStrict == VINF_IEM_RAISED_XCPT)
15030 {
15031 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
15032 rcStrict = VINF_SUCCESS;
15033 }
15034 else
15035 AssertMsgFailed(("Unexpected IEMExecDecodedInvlpg(%#RX64) status: %Rrc\n", pVmxTransient->uExitQual,
15036 VBOXSTRICTRC_VAL(rcStrict)));
15037 return rcStrict;
15038}
15039
15040
15041/**
15042 * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
15043 */
15044HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15045{
15046 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15047
15048 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15049 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15050 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK | CPUMCTX_EXTRN_DS);
15051 AssertRCReturn(rc, rc);
15052
15053 VBOXSTRICTRC rcStrict = IEMExecDecodedMonitor(pVCpu, pVmxTransient->cbExitInstr);
15054 if (rcStrict == VINF_SUCCESS)
15055 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
15056 else if (rcStrict == VINF_IEM_RAISED_XCPT)
15057 {
15058 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
15059 rcStrict = VINF_SUCCESS;
15060 }
15061
15062 return rcStrict;
15063}
15064
15065
15066/**
15067 * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
15068 */
15069HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15070{
15071 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15072
15073 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15074 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15075 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
15076 AssertRCReturn(rc, rc);
15077
15078 VBOXSTRICTRC rcStrict = IEMExecDecodedMwait(pVCpu, pVmxTransient->cbExitInstr);
15079 if (RT_SUCCESS(rcStrict))
15080 {
15081 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
15082 if (EMMonitorWaitShouldContinue(pVCpu, &pVCpu->cpum.GstCtx))
15083 rcStrict = VINF_SUCCESS;
15084 }
15085
15086 return rcStrict;
15087}
15088
15089
15090/**
15091 * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional
15092 * VM-exit.
15093 */
15094HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15095{
15096 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15097 return VINF_EM_RESET;
15098}
15099
15100
15101/**
15102 * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
15103 */
15104HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15105{
15106 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15107
15108 int rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
15109 AssertRCReturn(rc, rc);
15110
15111 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS); /* Advancing the RIP above should've imported eflags. */
15112 if (EMShouldContinueAfterHalt(pVCpu, &pVCpu->cpum.GstCtx)) /* Requires eflags. */
15113 rc = VINF_SUCCESS;
15114 else
15115 rc = VINF_EM_HALT;
15116
15117 if (rc != VINF_SUCCESS)
15118 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3);
15119 return rc;
15120}
15121
15122
15123/**
15124 * VM-exit handler for instructions that result in a \#UD exception delivered to
15125 * the guest.
15126 */
15127HMVMX_EXIT_NSRC_DECL hmR0VmxExitSetPendingXcptUD(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15128{
15129 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15130 hmR0VmxSetPendingXcptUD(pVCpu);
15131 return VINF_SUCCESS;
15132}
15133
15134
15135/**
15136 * VM-exit handler for expiry of the VMX-preemption timer.
15137 */
15138HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15139{
15140 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15141
15142 /* If the VMX-preemption timer has expired, reinitialize the preemption timer on next VM-entry. */
15143 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
15144Log12(("hmR0VmxExitPreemptTimer:\n"));
15145
15146 /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */
15147 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
15148 bool fTimersPending = TMTimerPollBool(pVM, pVCpu);
15149 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer);
15150 return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS;
15151}
15152
15153
15154/**
15155 * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit.
15156 */
15157HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15158{
15159 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15160
15161 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15162 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15163 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_CR4);
15164 AssertRCReturn(rc, rc);
15165
15166 VBOXSTRICTRC rcStrict = IEMExecDecodedXsetbv(pVCpu, pVmxTransient->cbExitInstr);
15167 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, rcStrict != VINF_IEM_RAISED_XCPT ? HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS
15168 : HM_CHANGED_RAISED_XCPT_MASK);
15169
15170 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
15171 bool const fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0();
15172 if (fLoadSaveGuestXcr0 != pVCpu->hmr0.s.fLoadSaveGuestXcr0)
15173 {
15174 pVCpu->hmr0.s.fLoadSaveGuestXcr0 = fLoadSaveGuestXcr0;
15175 hmR0VmxUpdateStartVmFunction(pVCpu);
15176 }
15177
15178 return rcStrict;
15179}
15180
15181
15182/**
15183 * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
15184 */
15185HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15186{
15187 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15188
15189 /** @todo Enable the new code after finding a reliably guest test-case. */
15190#if 1
15191 return VERR_EM_INTERPRETER;
15192#else
15193 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15194 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
15195 hmR0VmxReadExitQualVmcs(pVmxTransient);
15196 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
15197 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
15198 AssertRCReturn(rc, rc);
15199
15200 /* Paranoia. Ensure this has a memory operand. */
15201 Assert(!pVmxTransient->ExitInstrInfo.Inv.u1Cleared0);
15202
15203 uint8_t const iGReg = pVmxTransient->ExitInstrInfo.VmreadVmwrite.iReg2;
15204 Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
15205 uint64_t const uType = CPUMIsGuestIn64BitCode(pVCpu) ? pVCpu->cpum.GstCtx.aGRegs[iGReg].u64
15206 : pVCpu->cpum.GstCtx.aGRegs[iGReg].u32;
15207
15208 RTGCPTR GCPtrDesc;
15209 HMVMX_DECODE_MEM_OPERAND(pVCpu, pVmxTransient->ExitInstrInfo.u, pVmxTransient->uExitQual, VMXMEMACCESS_READ, &GCPtrDesc);
15210
15211 VBOXSTRICTRC rcStrict = IEMExecDecodedInvpcid(pVCpu, pVmxTransient->cbExitInstr, pVmxTransient->ExitInstrInfo.Inv.iSegReg,
15212 GCPtrDesc, uType);
15213 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
15214 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
15215 else if (rcStrict == VINF_IEM_RAISED_XCPT)
15216 {
15217 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
15218 rcStrict = VINF_SUCCESS;
15219 }
15220 return rcStrict;
15221#endif
15222}
15223
15224
15225/**
15226 * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE). Error
15227 * VM-exit.
15228 */
15229HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrInvalidGuestState(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15230{
15231 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15232 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
15233 AssertRCReturn(rc, rc);
15234
15235 rc = hmR0VmxCheckCachedVmcsCtls(pVCpu, pVmcsInfo, pVmxTransient->fIsNestedGuest);
15236 if (RT_FAILURE(rc))
15237 return rc;
15238
15239 uint32_t const uInvalidReason = hmR0VmxCheckGuestState(pVCpu, pVmcsInfo);
15240 NOREF(uInvalidReason);
15241
15242#ifdef VBOX_STRICT
15243 uint32_t fIntrState;
15244 uint64_t u64Val;
15245 hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
15246 hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient);
15247 hmR0VmxReadEntryInstrLenVmcs(pVmxTransient);
15248
15249 Log4(("uInvalidReason %u\n", uInvalidReason));
15250 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo));
15251 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode));
15252 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr));
15253
15254 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState); AssertRC(rc);
15255 Log4(("VMX_VMCS32_GUEST_INT_STATE %#RX32\n", fIntrState));
15256 rc = VMXReadVmcsNw(VMX_VMCS_GUEST_CR0, &u64Val); AssertRC(rc);
15257 Log4(("VMX_VMCS_GUEST_CR0 %#RX64\n", u64Val));
15258 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_MASK, &u64Val); AssertRC(rc);
15259 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RX64\n", u64Val));
15260 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u64Val); AssertRC(rc);
15261 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RX64\n", u64Val));
15262 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_MASK, &u64Val); AssertRC(rc);
15263 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RX64\n", u64Val));
15264 rc = VMXReadVmcsNw(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u64Val); AssertRC(rc);
15265 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RX64\n", u64Val));
15266 if (pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging)
15267 {
15268 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
15269 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
15270 }
15271 hmR0DumpRegs(pVCpu, HM_DUMP_REG_FLAGS_ALL);
15272#endif
15273
15274 return VERR_VMX_INVALID_GUEST_STATE;
15275}
15276
15277/**
15278 * VM-exit handler for all undefined/unexpected reasons. Should never happen.
15279 */
15280HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrUnexpected(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15281{
15282 /*
15283 * Cumulative notes of all recognized but unexpected VM-exits.
15284 *
15285 * 1. This does -not- cover scenarios like a page-fault VM-exit occurring when
15286 * nested-paging is used.
15287 *
15288 * 2. Any instruction that causes a VM-exit unconditionally (for e.g. VMXON) must be
15289 * emulated or a #UD must be raised in the guest. Therefore, we should -not- be using
15290 * this function (and thereby stop VM execution) for handling such instructions.
15291 *
15292 *
15293 * VMX_EXIT_INIT_SIGNAL:
15294 * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM.
15295 * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these
15296 * VM-exits. However, we should not receive INIT signals VM-exit while executing a VM.
15297 *
15298 * See Intel spec. 33.14.1 Default Treatment of SMI Delivery"
15299 * See Intel spec. 29.3 "VMX Instructions" for "VMXON".
15300 * See Intel spec. "23.8 Restrictions on VMX operation".
15301 *
15302 * VMX_EXIT_SIPI:
15303 * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest
15304 * activity state is used. We don't make use of it as our guests don't have direct
15305 * access to the host local APIC.
15306 *
15307 * See Intel spec. 25.3 "Other Causes of VM-exits".
15308 *
15309 * VMX_EXIT_IO_SMI:
15310 * VMX_EXIT_SMI:
15311 * This can only happen if we support dual-monitor treatment of SMI, which can be
15312 * activated by executing VMCALL in VMX root operation. Only an STM (SMM transfer
15313 * monitor) would get this VM-exit when we (the executive monitor) execute a VMCALL in
15314 * VMX root mode or receive an SMI. If we get here, something funny is going on.
15315 *
15316 * See Intel spec. 33.15.6 "Activating the Dual-Monitor Treatment"
15317 * See Intel spec. 25.3 "Other Causes of VM-Exits"
15318 *
15319 * VMX_EXIT_ERR_MSR_LOAD:
15320 * Failures while loading MSRs are part of the VM-entry MSR-load area are unexpected
15321 * and typically indicates a bug in the hypervisor code. We thus cannot not resume
15322 * execution.
15323 *
15324 * See Intel spec. 26.7 "VM-Entry Failures During Or After Loading Guest State".
15325 *
15326 * VMX_EXIT_ERR_MACHINE_CHECK:
15327 * Machine check exceptions indicates a fatal/unrecoverable hardware condition
15328 * including but not limited to system bus, ECC, parity, cache and TLB errors. A
15329 * #MC exception abort class exception is raised. We thus cannot assume a
15330 * reasonable chance of continuing any sort of execution and we bail.
15331 *
15332 * See Intel spec. 15.1 "Machine-check Architecture".
15333 * See Intel spec. 27.1 "Architectural State Before A VM Exit".
15334 *
15335 * VMX_EXIT_PML_FULL:
15336 * VMX_EXIT_VIRTUALIZED_EOI:
15337 * VMX_EXIT_APIC_WRITE:
15338 * We do not currently support any of these features and thus they are all unexpected
15339 * VM-exits.
15340 *
15341 * VMX_EXIT_GDTR_IDTR_ACCESS:
15342 * VMX_EXIT_LDTR_TR_ACCESS:
15343 * VMX_EXIT_RDRAND:
15344 * VMX_EXIT_RSM:
15345 * VMX_EXIT_VMFUNC:
15346 * VMX_EXIT_ENCLS:
15347 * VMX_EXIT_RDSEED:
15348 * VMX_EXIT_XSAVES:
15349 * VMX_EXIT_XRSTORS:
15350 * VMX_EXIT_UMWAIT:
15351 * VMX_EXIT_TPAUSE:
15352 * VMX_EXIT_LOADIWKEY:
15353 * These VM-exits are -not- caused unconditionally by execution of the corresponding
15354 * instruction. Any VM-exit for these instructions indicate a hardware problem,
15355 * unsupported CPU modes (like SMM) or potentially corrupt VMCS controls.
15356 *
15357 * See Intel spec. 25.1.3 "Instructions That Cause VM Exits Conditionally".
15358 */
15359 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15360 AssertMsgFailed(("Unexpected VM-exit %u\n", pVmxTransient->uExitReason));
15361 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
15362}
15363
15364
15365/**
15366 * VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
15367 */
15368HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15369{
15370 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15371
15372 /** @todo Optimize this: We currently drag in the whole MSR state
15373 * (CPUMCTX_EXTRN_ALL_MSRS) here. We should optimize this to only get
15374 * MSRs required. That would require changes to IEM and possibly CPUM too.
15375 * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */
15376 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15377 uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx;
15378 uint64_t fImport = IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS;
15379 switch (idMsr)
15380 {
15381 case MSR_K8_FS_BASE: fImport |= CPUMCTX_EXTRN_FS; break;
15382 case MSR_K8_GS_BASE: fImport |= CPUMCTX_EXTRN_GS; break;
15383 }
15384
15385 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15386 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fImport);
15387 AssertRCReturn(rc, rc);
15388
15389 Log4Func(("ecx=%#RX32\n", idMsr));
15390
15391#ifdef VBOX_STRICT
15392 Assert(!pVmxTransient->fIsNestedGuest);
15393 if (pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)
15394 {
15395 if ( hmR0VmxIsAutoLoadGuestMsr(pVmcsInfo, idMsr)
15396 && idMsr != MSR_K6_EFER)
15397 {
15398 AssertMsgFailed(("Unexpected RDMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", idMsr));
15399 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
15400 }
15401 if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
15402 {
15403 Assert(pVmcsInfo->pvMsrBitmap);
15404 uint32_t fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, idMsr);
15405 if (fMsrpm & VMXMSRPM_ALLOW_RD)
15406 {
15407 AssertMsgFailed(("Unexpected RDMSR for a passthru lazy-restore MSR. ecx=%#RX32\n", idMsr));
15408 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
15409 }
15410 }
15411 }
15412#endif
15413
15414 VBOXSTRICTRC rcStrict = IEMExecDecodedRdmsr(pVCpu, pVmxTransient->cbExitInstr);
15415 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
15416 if (rcStrict == VINF_SUCCESS)
15417 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
15418 else if (rcStrict == VINF_IEM_RAISED_XCPT)
15419 {
15420 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
15421 rcStrict = VINF_SUCCESS;
15422 }
15423 else
15424 AssertMsg(rcStrict == VINF_CPUM_R3_MSR_READ || rcStrict == VINF_EM_TRIPLE_FAULT,
15425 ("Unexpected IEMExecDecodedRdmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
15426
15427 return rcStrict;
15428}
15429
15430
15431/**
15432 * VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
15433 */
15434HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15435{
15436 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15437
15438 /** @todo Optimize this: We currently drag in the whole MSR state
15439 * (CPUMCTX_EXTRN_ALL_MSRS) here. We should optimize this to only get
15440 * MSRs required. That would require changes to IEM and possibly CPUM too.
15441 * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */
15442 uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx;
15443 uint64_t fImport = IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS;
15444
15445 /*
15446 * The FS and GS base MSRs are not part of the above all-MSRs mask.
15447 * Although we don't need to fetch the base as it will be overwritten shortly, while
15448 * loading guest-state we would also load the entire segment register including limit
15449 * and attributes and thus we need to load them here.
15450 */
15451 switch (idMsr)
15452 {
15453 case MSR_K8_FS_BASE: fImport |= CPUMCTX_EXTRN_FS; break;
15454 case MSR_K8_GS_BASE: fImport |= CPUMCTX_EXTRN_GS; break;
15455 }
15456
15457 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15458 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15459 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, fImport);
15460 AssertRCReturn(rc, rc);
15461
15462 Log4Func(("ecx=%#RX32 edx:eax=%#RX32:%#RX32\n", idMsr, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.eax));
15463
15464 VBOXSTRICTRC rcStrict = IEMExecDecodedWrmsr(pVCpu, pVmxTransient->cbExitInstr);
15465 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
15466
15467 if (rcStrict == VINF_SUCCESS)
15468 {
15469 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
15470
15471 /* If this is an X2APIC WRMSR access, update the APIC state as well. */
15472 if ( idMsr == MSR_IA32_APICBASE
15473 || ( idMsr >= MSR_IA32_X2APIC_START
15474 && idMsr <= MSR_IA32_X2APIC_END))
15475 {
15476 /*
15477 * We've already saved the APIC related guest-state (TPR) in post-run phase.
15478 * When full APIC register virtualization is implemented we'll have to make
15479 * sure APIC state is saved from the VMCS before IEM changes it.
15480 */
15481 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR);
15482 }
15483 else if (idMsr == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */
15484 pVmxTransient->fUpdatedTscOffsettingAndPreemptTimer = false;
15485 else if (idMsr == MSR_K6_EFER)
15486 {
15487 /*
15488 * If the guest touches the EFER MSR we need to update the VM-Entry and VM-Exit controls
15489 * as well, even if it is -not- touching bits that cause paging mode changes (LMA/LME).
15490 * We care about the other bits as well, SCE and NXE. See @bugref{7368}.
15491 */
15492 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_EXIT_CTLS);
15493 }
15494
15495 /* Update MSRs that are part of the VMCS and auto-load/store area when MSR-bitmaps are not used. */
15496 if (!(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS))
15497 {
15498 switch (idMsr)
15499 {
15500 case MSR_IA32_SYSENTER_CS: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break;
15501 case MSR_IA32_SYSENTER_EIP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break;
15502 case MSR_IA32_SYSENTER_ESP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break;
15503 case MSR_K8_FS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS); break;
15504 case MSR_K8_GS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_GS); break;
15505 case MSR_K6_EFER: /* Nothing to do, already handled above. */ break;
15506 default:
15507 {
15508 if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
15509 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_LAZY_MSRS);
15510 else if (hmR0VmxIsAutoLoadGuestMsr(pVmcsInfo, idMsr))
15511 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
15512 break;
15513 }
15514 }
15515 }
15516#ifdef VBOX_STRICT
15517 else
15518 {
15519 /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */
15520 switch (idMsr)
15521 {
15522 case MSR_IA32_SYSENTER_CS:
15523 case MSR_IA32_SYSENTER_EIP:
15524 case MSR_IA32_SYSENTER_ESP:
15525 case MSR_K8_FS_BASE:
15526 case MSR_K8_GS_BASE:
15527 {
15528 AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", idMsr));
15529 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
15530 }
15531
15532 /* Writes to MSRs in auto-load/store area/swapped MSRs, shouldn't cause VM-exits with MSR-bitmaps. */
15533 default:
15534 {
15535 if (hmR0VmxIsAutoLoadGuestMsr(pVmcsInfo, idMsr))
15536 {
15537 /* EFER MSR writes are always intercepted. */
15538 if (idMsr != MSR_K6_EFER)
15539 {
15540 AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
15541 idMsr));
15542 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
15543 }
15544 }
15545
15546 if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr))
15547 {
15548 Assert(pVmcsInfo->pvMsrBitmap);
15549 uint32_t fMsrpm = CPUMGetVmxMsrPermission(pVmcsInfo->pvMsrBitmap, idMsr);
15550 if (fMsrpm & VMXMSRPM_ALLOW_WR)
15551 {
15552 AssertMsgFailed(("Unexpected WRMSR for passthru, lazy-restore MSR. ecx=%#RX32\n", idMsr));
15553 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, idMsr);
15554 }
15555 }
15556 break;
15557 }
15558 }
15559 }
15560#endif /* VBOX_STRICT */
15561 }
15562 else if (rcStrict == VINF_IEM_RAISED_XCPT)
15563 {
15564 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
15565 rcStrict = VINF_SUCCESS;
15566 }
15567 else
15568 AssertMsg(rcStrict == VINF_CPUM_R3_MSR_WRITE || rcStrict == VINF_EM_TRIPLE_FAULT,
15569 ("Unexpected IEMExecDecodedWrmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict)));
15570
15571 return rcStrict;
15572}
15573
15574
15575/**
15576 * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
15577 */
15578HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15579{
15580 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15581
15582 /** @todo The guest has likely hit a contended spinlock. We might want to
15583 * poke a schedule different guest VCPU. */
15584 int rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
15585 if (RT_SUCCESS(rc))
15586 return VINF_EM_RAW_INTERRUPT;
15587
15588 AssertMsgFailed(("hmR0VmxExitPause: Failed to increment RIP. rc=%Rrc\n", rc));
15589 return rc;
15590}
15591
15592
15593/**
15594 * VM-exit handler for when the TPR value is lowered below the specified
15595 * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
15596 */
15597HMVMX_EXIT_NSRC_DECL hmR0VmxExitTprBelowThreshold(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15598{
15599 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15600 Assert(pVmxTransient->pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW);
15601
15602 /*
15603 * The TPR shadow would've been synced with the APIC TPR in the post-run phase.
15604 * We'll re-evaluate pending interrupts and inject them before the next VM
15605 * entry so we can just continue execution here.
15606 */
15607 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold);
15608 return VINF_SUCCESS;
15609}
15610
15611
15612/**
15613 * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional
15614 * VM-exit.
15615 *
15616 * @retval VINF_SUCCESS when guest execution can continue.
15617 * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3.
15618 * @retval VERR_EM_RESCHEDULE_REM when we need to return to ring-3 due to
15619 * incompatible guest state for VMX execution (real-on-v86 case).
15620 */
15621HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15622{
15623 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15624 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2);
15625
15626 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15627 hmR0VmxReadExitQualVmcs(pVmxTransient);
15628 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15629
15630 VBOXSTRICTRC rcStrict;
15631 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
15632 uint64_t const uExitQual = pVmxTransient->uExitQual;
15633 uint32_t const uAccessType = VMX_EXIT_QUAL_CRX_ACCESS(uExitQual);
15634 switch (uAccessType)
15635 {
15636 /*
15637 * MOV to CRx.
15638 */
15639 case VMX_EXIT_QUAL_CRX_ACCESS_WRITE:
15640 {
15641 /*
15642 * When PAE paging is used, the CPU will reload PAE PDPTEs from CR3 when the guest
15643 * changes certain bits even in CR0, CR4 (and not just CR3). We are currently fine
15644 * since IEM_CPUMCTX_EXTRN_MUST_MASK (used below) includes CR3 which will import
15645 * PAE PDPTEs as well.
15646 */
15647 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
15648 AssertRCReturn(rc, rc);
15649
15650 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0);
15651 uint32_t const uOldCr0 = pVCpu->cpum.GstCtx.cr0;
15652 uint8_t const iGReg = VMX_EXIT_QUAL_CRX_GENREG(uExitQual);
15653 uint8_t const iCrReg = VMX_EXIT_QUAL_CRX_REGISTER(uExitQual);
15654
15655 /*
15656 * MOV to CR3 only cause a VM-exit when one or more of the following are true:
15657 * - When nested paging isn't used.
15658 * - If the guest doesn't have paging enabled (intercept CR3 to update shadow page tables).
15659 * - We are executing in the VM debug loop.
15660 */
15661 Assert( iCrReg != 3
15662 || !pVM->hmr0.s.fNestedPaging
15663 || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)
15664 || pVCpu->hmr0.s.fUsingDebugLoop);
15665
15666 /* MOV to CR8 writes only cause VM-exits when TPR shadow is not used. */
15667 Assert( iCrReg != 8
15668 || !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW));
15669
15670 rcStrict = hmR0VmxExitMovToCrX(pVCpu, pVmxTransient->cbExitInstr, iGReg, iCrReg);
15671 AssertMsg( rcStrict == VINF_SUCCESS
15672 || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
15673
15674 /*
15675 * This is a kludge for handling switches back to real mode when we try to use
15676 * V86 mode to run real mode code directly. Problem is that V86 mode cannot
15677 * deal with special selector values, so we have to return to ring-3 and run
15678 * there till the selector values are V86 mode compatible.
15679 *
15680 * Note! Using VINF_EM_RESCHEDULE_REM here rather than VINF_EM_RESCHEDULE since the
15681 * latter is an alias for VINF_IEM_RAISED_XCPT which is asserted at the end of
15682 * this function.
15683 */
15684 if ( iCrReg == 0
15685 && rcStrict == VINF_SUCCESS
15686 && !pVM->hmr0.s.vmx.fUnrestrictedGuest
15687 && CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx)
15688 && (uOldCr0 & X86_CR0_PE)
15689 && !(pVCpu->cpum.GstCtx.cr0 & X86_CR0_PE))
15690 {
15691 /** @todo Check selectors rather than returning all the time. */
15692 Assert(!pVmxTransient->fIsNestedGuest);
15693 Log4Func(("CR0 write, back to real mode -> VINF_EM_RESCHEDULE_REM\n"));
15694 rcStrict = VINF_EM_RESCHEDULE_REM;
15695 }
15696 break;
15697 }
15698
15699 /*
15700 * MOV from CRx.
15701 */
15702 case VMX_EXIT_QUAL_CRX_ACCESS_READ:
15703 {
15704 uint8_t const iGReg = VMX_EXIT_QUAL_CRX_GENREG(uExitQual);
15705 uint8_t const iCrReg = VMX_EXIT_QUAL_CRX_REGISTER(uExitQual);
15706
15707 /*
15708 * MOV from CR3 only cause a VM-exit when one or more of the following are true:
15709 * - When nested paging isn't used.
15710 * - If the guest doesn't have paging enabled (pass guest's CR3 rather than our identity mapped CR3).
15711 * - We are executing in the VM debug loop.
15712 */
15713 Assert( iCrReg != 3
15714 || !pVM->hmr0.s.fNestedPaging
15715 || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)
15716 || pVCpu->hmr0.s.fLeaveDone);
15717
15718 /* MOV from CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */
15719 Assert( iCrReg != 8
15720 || !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW));
15721
15722 rcStrict = hmR0VmxExitMovFromCrX(pVCpu, pVmcsInfo, pVmxTransient->cbExitInstr, iGReg, iCrReg);
15723 break;
15724 }
15725
15726 /*
15727 * CLTS (Clear Task-Switch Flag in CR0).
15728 */
15729 case VMX_EXIT_QUAL_CRX_ACCESS_CLTS:
15730 {
15731 rcStrict = hmR0VmxExitClts(pVCpu, pVmcsInfo, pVmxTransient->cbExitInstr);
15732 break;
15733 }
15734
15735 /*
15736 * LMSW (Load Machine-Status Word into CR0).
15737 * LMSW cannot clear CR0.PE, so no fRealOnV86Active kludge needed here.
15738 */
15739 case VMX_EXIT_QUAL_CRX_ACCESS_LMSW:
15740 {
15741 RTGCPTR GCPtrEffDst;
15742 uint8_t const cbInstr = pVmxTransient->cbExitInstr;
15743 uint16_t const uMsw = VMX_EXIT_QUAL_CRX_LMSW_DATA(uExitQual);
15744 bool const fMemOperand = VMX_EXIT_QUAL_CRX_LMSW_OP_MEM(uExitQual);
15745 if (fMemOperand)
15746 {
15747 hmR0VmxReadGuestLinearAddrVmcs(pVmxTransient);
15748 GCPtrEffDst = pVmxTransient->uGuestLinearAddr;
15749 }
15750 else
15751 GCPtrEffDst = NIL_RTGCPTR;
15752 rcStrict = hmR0VmxExitLmsw(pVCpu, pVmcsInfo, cbInstr, uMsw, GCPtrEffDst);
15753 break;
15754 }
15755
15756 default:
15757 {
15758 AssertMsgFailed(("Unrecognized Mov CRX access type %#x\n", uAccessType));
15759 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, uAccessType);
15760 }
15761 }
15762
15763 Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS))
15764 == (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS));
15765 Assert(rcStrict != VINF_IEM_RAISED_XCPT);
15766
15767 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2);
15768 NOREF(pVM);
15769 return rcStrict;
15770}
15771
15772
15773/**
15774 * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional
15775 * VM-exit.
15776 */
15777HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
15778{
15779 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
15780 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1);
15781
15782 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
15783 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
15784 hmR0VmxReadExitQualVmcs(pVmxTransient);
15785 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
15786 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_SREG_MASK
15787 | CPUMCTX_EXTRN_EFER);
15788 /* EFER MSR also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */
15789 AssertRCReturn(rc, rc);
15790
15791 /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */
15792 uint32_t const uIOPort = VMX_EXIT_QUAL_IO_PORT(pVmxTransient->uExitQual);
15793 uint8_t const uIOSize = VMX_EXIT_QUAL_IO_SIZE(pVmxTransient->uExitQual);
15794 bool const fIOWrite = (VMX_EXIT_QUAL_IO_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_IO_DIRECTION_OUT);
15795 bool const fIOString = VMX_EXIT_QUAL_IO_IS_STRING(pVmxTransient->uExitQual);
15796 bool const fGstStepping = RT_BOOL(pCtx->eflags.Bits.u1TF);
15797 bool const fDbgStepping = pVCpu->hm.s.fSingleInstruction;
15798 AssertReturn(uIOSize <= 3 && uIOSize != 2, VERR_VMX_IPE_1);
15799
15800 /*
15801 * Update exit history to see if this exit can be optimized.
15802 */
15803 VBOXSTRICTRC rcStrict;
15804 PCEMEXITREC pExitRec = NULL;
15805 if ( !fGstStepping
15806 && !fDbgStepping)
15807 pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
15808 !fIOString
15809 ? !fIOWrite
15810 ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_READ)
15811 : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_WRITE)
15812 : !fIOWrite
15813 ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_READ)
15814 : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_WRITE),
15815 pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
15816 if (!pExitRec)
15817 {
15818 static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses in bytes. */
15819 static uint32_t const s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving result in AL/AX/EAX. */
15820
15821 uint32_t const cbValue = s_aIOSizes[uIOSize];
15822 uint32_t const cbInstr = pVmxTransient->cbExitInstr;
15823 bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */
15824 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
15825 if (fIOString)
15826 {
15827 /*
15828 * INS/OUTS - I/O String instruction.
15829 *
15830 * Use instruction-information if available, otherwise fall back on
15831 * interpreting the instruction.
15832 */
15833 Log4Func(("cs:rip=%#04x:%#RX64 %#06x/%u %c str\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
15834 AssertReturn(pCtx->dx == uIOPort, VERR_VMX_IPE_2);
15835 bool const fInsOutsInfo = RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS);
15836 if (fInsOutsInfo)
15837 {
15838 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
15839 AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_VMX_IPE_3);
15840 AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2);
15841 IEMMODE const enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize;
15842 bool const fRep = VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual);
15843 if (fIOWrite)
15844 rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr,
15845 pVmxTransient->ExitInstrInfo.StrIo.iSegReg, true /*fIoChecked*/);
15846 else
15847 {
15848 /*
15849 * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES.
15850 * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS.
15851 * See Intel Instruction spec. for "INS".
15852 * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS".
15853 */
15854 rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, true /*fIoChecked*/);
15855 }
15856 }
15857 else
15858 rcStrict = IEMExecOne(pVCpu);
15859
15860 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
15861 fUpdateRipAlready = true;
15862 }
15863 else
15864 {
15865 /*
15866 * IN/OUT - I/O instruction.
15867 */
15868 Log4Func(("cs:rip=%04x:%08RX64 %#06x/%u %c\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
15869 uint32_t const uAndVal = s_aIOOpAnd[uIOSize];
15870 Assert(!VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual));
15871 if (fIOWrite)
15872 {
15873 rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pCtx->eax & uAndVal, cbValue);
15874 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
15875 if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE
15876 && !pCtx->eflags.Bits.u1TF)
15877 rcStrict = EMRZSetPendingIoPortWrite(pVCpu, uIOPort, cbInstr, cbValue, pCtx->eax & uAndVal);
15878 }
15879 else
15880 {
15881 uint32_t u32Result = 0;
15882 rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue);
15883 if (IOM_SUCCESS(rcStrict))
15884 {
15885 /* Save result of I/O IN instr. in AL/AX/EAX. */
15886 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Result & uAndVal);
15887 }
15888 if ( rcStrict == VINF_IOM_R3_IOPORT_READ
15889 && !pCtx->eflags.Bits.u1TF)
15890 rcStrict = EMRZSetPendingIoPortRead(pVCpu, uIOPort, cbInstr, cbValue);
15891 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
15892 }
15893 }
15894
15895 if (IOM_SUCCESS(rcStrict))
15896 {
15897 if (!fUpdateRipAlready)
15898 {
15899 hmR0VmxAdvanceGuestRipBy(pVCpu, cbInstr);
15900 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP);
15901 }
15902
15903 /*
15904 * INS/OUTS with REP prefix updates RFLAGS, can be observed with triple-fault guru
15905 * while booting Fedora 17 64-bit guest.
15906 *
15907 * See Intel Instruction reference for REP/REPE/REPZ/REPNE/REPNZ.
15908 */
15909 if (fIOString)
15910 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RFLAGS);
15911
15912 /*
15913 * If any I/O breakpoints are armed, we need to check if one triggered
15914 * and take appropriate action.
15915 * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
15916 */
15917 rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_DR7);
15918 AssertRCReturn(rc, rc);
15919
15920 /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
15921 * execution engines about whether hyper BPs and such are pending. */
15922 uint32_t const uDr7 = pCtx->dr[7];
15923 if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK)
15924 && X86_DR7_ANY_RW_IO(uDr7)
15925 && (pCtx->cr4 & X86_CR4_DE))
15926 || DBGFBpIsHwIoArmed(pVM)))
15927 {
15928 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
15929
15930 /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
15931 VMMRZCallRing3Disable(pVCpu);
15932 HM_DISABLE_PREEMPT(pVCpu);
15933
15934 bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /* fDr6 */);
15935
15936 VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pCtx, uIOPort, cbValue);
15937 if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
15938 {
15939 /* Raise #DB. */
15940 if (fIsGuestDbgActive)
15941 ASMSetDR6(pCtx->dr[6]);
15942 if (pCtx->dr[7] != uDr7)
15943 pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_DR7;
15944
15945 hmR0VmxSetPendingXcptDB(pVCpu);
15946 }
15947 /* rcStrict is VINF_SUCCESS, VINF_IOM_R3_IOPORT_COMMIT_WRITE, or in [VINF_EM_FIRST..VINF_EM_LAST],
15948 however we can ditch VINF_IOM_R3_IOPORT_COMMIT_WRITE as it has VMCPU_FF_IOM as backup. */
15949 else if ( rcStrict2 != VINF_SUCCESS
15950 && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
15951 rcStrict = rcStrict2;
15952 AssertCompile(VINF_EM_LAST < VINF_IOM_R3_IOPORT_COMMIT_WRITE);
15953
15954 HM_RESTORE_PREEMPT();
15955 VMMRZCallRing3Enable(pVCpu);
15956 }
15957 }
15958
15959#ifdef VBOX_STRICT
15960 if ( rcStrict == VINF_IOM_R3_IOPORT_READ
15961 || rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
15962 Assert(!fIOWrite);
15963 else if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE
15964 || rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
15965 || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
15966 Assert(fIOWrite);
15967 else
15968 {
15969# if 0 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
15970 * statuses, that the VMM device and some others may return. See
15971 * IOM_SUCCESS() for guidance. */
15972 AssertMsg( RT_FAILURE(rcStrict)
15973 || rcStrict == VINF_SUCCESS
15974 || rcStrict == VINF_EM_RAW_EMULATE_INSTR
15975 || rcStrict == VINF_EM_DBG_BREAKPOINT
15976 || rcStrict == VINF_EM_RAW_GUEST_TRAP
15977 || rcStrict == VINF_EM_RAW_TO_R3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
15978# endif
15979 }
15980#endif
15981 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1);
15982 }
15983 else
15984 {
15985 /*
15986 * Frequent exit or something needing probing. Get state and call EMHistoryExec.
15987 */
15988 int rc2 = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
15989 AssertRCReturn(rc2, rc2);
15990 STAM_COUNTER_INC(!fIOString ? fIOWrite ? &pVCpu->hm.s.StatExitIOWrite : &pVCpu->hm.s.StatExitIORead
15991 : fIOWrite ? &pVCpu->hm.s.StatExitIOStringWrite : &pVCpu->hm.s.StatExitIOStringRead);
15992 Log4(("IOExit/%u: %04x:%08RX64: %s%s%s %#x LB %u -> EMHistoryExec\n",
15993 pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
15994 VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual) ? "REP " : "",
15995 fIOWrite ? "OUT" : "IN", fIOString ? "S" : "", uIOPort, uIOSize));
15996
15997 rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
15998 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
15999
16000 Log4(("IOExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
16001 pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
16002 VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
16003 }
16004 return rcStrict;
16005}
16006
16007
16008/**
16009 * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional
16010 * VM-exit.
16011 */
16012HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16013{
16014 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16015
16016 /* Check if this task-switch occurred while delivery an event through the guest IDT. */
16017 hmR0VmxReadExitQualVmcs(pVmxTransient);
16018 if (VMX_EXIT_QUAL_TASK_SWITCH_TYPE(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_TASK_SWITCH_TYPE_IDT)
16019 {
16020 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
16021 if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo))
16022 {
16023 uint32_t uErrCode;
16024 if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uIdtVectoringInfo))
16025 {
16026 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
16027 uErrCode = pVmxTransient->uIdtVectoringErrorCode;
16028 }
16029 else
16030 uErrCode = 0;
16031
16032 RTGCUINTPTR GCPtrFaultAddress;
16033 if (VMX_IDT_VECTORING_INFO_IS_XCPT_PF(pVmxTransient->uIdtVectoringInfo))
16034 GCPtrFaultAddress = pVCpu->cpum.GstCtx.cr2;
16035 else
16036 GCPtrFaultAddress = 0;
16037
16038 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16039
16040 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
16041 pVmxTransient->cbExitInstr, uErrCode, GCPtrFaultAddress);
16042
16043 Log4Func(("Pending event. uIntType=%#x uVector=%#x\n", VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo),
16044 VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo)));
16045 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
16046 return VINF_EM_RAW_INJECT_TRPM_EVENT;
16047 }
16048 }
16049
16050 /* Fall back to the interpreter to emulate the task-switch. */
16051 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
16052 return VERR_EM_INTERPRETER;
16053}
16054
16055
16056/**
16057 * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit.
16058 */
16059HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16060{
16061 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16062
16063 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
16064 pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_MONITOR_TRAP_FLAG;
16065 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
16066 AssertRC(rc);
16067 return VINF_EM_DBG_STEPPED;
16068}
16069
16070
16071/**
16072 * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit.
16073 */
16074HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16075{
16076 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16077 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess);
16078
16079 hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
16080 hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
16081 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16082 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
16083 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
16084
16085 /*
16086 * If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly.
16087 */
16088 VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
16089 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16090 {
16091 /* For some crazy guest, if an event delivery causes an APIC-access VM-exit, go to instruction emulation. */
16092 if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending))
16093 {
16094 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
16095 return VINF_EM_RAW_INJECT_TRPM_EVENT;
16096 }
16097 }
16098 else
16099 {
16100 Assert(rcStrict != VINF_HM_DOUBLE_FAULT);
16101 return rcStrict;
16102 }
16103
16104 /* IOMMIOPhysHandler() below may call into IEM, save the necessary state. */
16105 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
16106 hmR0VmxReadExitQualVmcs(pVmxTransient);
16107 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
16108 AssertRCReturn(rc, rc);
16109
16110 /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */
16111 uint32_t const uAccessType = VMX_EXIT_QUAL_APIC_ACCESS_TYPE(pVmxTransient->uExitQual);
16112 switch (uAccessType)
16113 {
16114 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
16115 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
16116 {
16117 AssertMsg( !(pVmcsInfo->u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)
16118 || VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual) != XAPIC_OFF_TPR,
16119 ("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n"));
16120
16121 RTGCPHYS GCPhys = pVCpu->hm.s.vmx.u64GstMsrApicBase; /* Always up-to-date, as it is not part of the VMCS. */
16122 GCPhys &= PAGE_BASE_GC_MASK;
16123 GCPhys += VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual);
16124 Log4Func(("Linear access uAccessType=%#x GCPhys=%#RGp Off=%#x\n", uAccessType, GCPhys,
16125 VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual)));
16126
16127 rcStrict = IOMR0MmioPhysHandler(pVCpu->CTX_SUFF(pVM), pVCpu,
16128 uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ ? 0 : X86_TRAP_PF_RW, GCPhys);
16129 Log4Func(("IOMMMIOPhysHandler returned %Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
16130 if ( rcStrict == VINF_SUCCESS
16131 || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
16132 || rcStrict == VERR_PAGE_NOT_PRESENT)
16133 {
16134 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
16135 | HM_CHANGED_GUEST_APIC_TPR);
16136 rcStrict = VINF_SUCCESS;
16137 }
16138 break;
16139 }
16140
16141 default:
16142 {
16143 Log4Func(("uAccessType=%#x\n", uAccessType));
16144 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
16145 break;
16146 }
16147 }
16148
16149 if (rcStrict != VINF_SUCCESS)
16150 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchApicAccessToR3);
16151 return rcStrict;
16152}
16153
16154
16155/**
16156 * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional
16157 * VM-exit.
16158 */
16159HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16160{
16161 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16162 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
16163
16164 /*
16165 * We might also get this VM-exit if the nested-guest isn't intercepting MOV DRx accesses.
16166 * In such a case, rather than disabling MOV DRx intercepts and resuming execution, we
16167 * must emulate the MOV DRx access.
16168 */
16169 if (!pVmxTransient->fIsNestedGuest)
16170 {
16171 /* We should -not- get this VM-exit if the guest's debug registers were active. */
16172 if (pVmxTransient->fWasGuestDebugStateActive)
16173 {
16174 AssertMsgFailed(("Unexpected MOV DRx exit\n"));
16175 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient->uExitReason);
16176 }
16177
16178 if ( !pVCpu->hm.s.fSingleInstruction
16179 && !pVmxTransient->fWasHyperDebugStateActive)
16180 {
16181 Assert(!DBGFIsStepping(pVCpu));
16182 Assert(pVmcsInfo->u32XcptBitmap & RT_BIT(X86_XCPT_DB));
16183
16184 /* Don't intercept MOV DRx any more. */
16185 pVmcsInfo->u32ProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT;
16186 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVmcsInfo->u32ProcCtls);
16187 AssertRC(rc);
16188
16189 /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */
16190 VMMRZCallRing3Disable(pVCpu);
16191 HM_DISABLE_PREEMPT(pVCpu);
16192
16193 /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
16194 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
16195 Assert(CPUMIsGuestDebugStateActive(pVCpu));
16196
16197 HM_RESTORE_PREEMPT();
16198 VMMRZCallRing3Enable(pVCpu);
16199
16200#ifdef VBOX_WITH_STATISTICS
16201 hmR0VmxReadExitQualVmcs(pVmxTransient);
16202 if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE)
16203 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
16204 else
16205 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
16206#endif
16207 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
16208 return VINF_SUCCESS;
16209 }
16210 }
16211
16212 /*
16213 * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER MSR, CS.
16214 * The EFER MSR is always up-to-date.
16215 * Update the segment registers and DR7 from the CPU.
16216 */
16217 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
16218 hmR0VmxReadExitQualVmcs(pVmxTransient);
16219 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_DR7);
16220 AssertRCReturn(rc, rc);
16221 Log4Func(("cs:rip=%#04x:%#RX64\n", pCtx->cs.Sel, pCtx->rip));
16222
16223 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
16224 if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE)
16225 {
16226 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
16227 VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual),
16228 VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual));
16229 if (RT_SUCCESS(rc))
16230 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7);
16231 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
16232 }
16233 else
16234 {
16235 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
16236 VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual),
16237 VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual));
16238 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
16239 }
16240
16241 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
16242 if (RT_SUCCESS(rc))
16243 {
16244 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient);
16245 AssertRCReturn(rc2, rc2);
16246 return VINF_SUCCESS;
16247 }
16248 return rc;
16249}
16250
16251
16252/**
16253 * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG).
16254 * Conditional VM-exit.
16255 */
16256HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16257{
16258 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16259 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging);
16260
16261 hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
16262 hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
16263 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16264 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
16265 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
16266
16267 /*
16268 * If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly.
16269 */
16270 VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
16271 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16272 {
16273 /*
16274 * In the unlikely case where delivering an event causes an EPT misconfig (MMIO), go back to
16275 * instruction emulation to inject the original event. Otherwise, injecting the original event
16276 * using hardware-assisted VMX would trigger the same EPT misconfig VM-exit again.
16277 */
16278 if (!pVCpu->hm.s.Event.fPending)
16279 { /* likely */ }
16280 else
16281 {
16282 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterpret);
16283#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
16284 /** @todo NSTVMX: Think about how this should be handled. */
16285 if (pVmxTransient->fIsNestedGuest)
16286 return VERR_VMX_IPE_3;
16287#endif
16288 return VINF_EM_RAW_INJECT_TRPM_EVENT;
16289 }
16290 }
16291 else
16292 {
16293 Assert(rcStrict != VINF_HM_DOUBLE_FAULT);
16294 return rcStrict;
16295 }
16296
16297 /*
16298 * Get sufficient state and update the exit history entry.
16299 */
16300 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
16301 hmR0VmxReadGuestPhysicalAddrVmcs(pVmxTransient);
16302 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
16303 AssertRCReturn(rc, rc);
16304
16305 RTGCPHYS const GCPhys = pVmxTransient->uGuestPhysicalAddr;
16306 PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu,
16307 EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_MMIO),
16308 pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base);
16309 if (!pExitRec)
16310 {
16311 /*
16312 * If we succeed, resume guest execution.
16313 * If we fail in interpreting the instruction because we couldn't get the guest physical address
16314 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
16315 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
16316 * weird case. See @bugref{6043}.
16317 */
16318 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
16319 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
16320/** @todo bird: We can probably just go straight to IOM here and assume that
16321 * it's MMIO, then fall back on PGM if that hunch didn't work out so
16322 * well. However, we need to address that aliasing workarounds that
16323 * PGMR0Trap0eHandlerNPMisconfig implements. So, some care is needed.
16324 *
16325 * Might also be interesting to see if we can get this done more or
16326 * less locklessly inside IOM. Need to consider the lookup table
16327 * updating and use a bit more carefully first (or do all updates via
16328 * rendezvous) */
16329 rcStrict = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
16330 Log4Func(("At %#RGp RIP=%#RX64 rc=%Rrc\n", GCPhys, pCtx->rip, VBOXSTRICTRC_VAL(rcStrict)));
16331 if ( rcStrict == VINF_SUCCESS
16332 || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
16333 || rcStrict == VERR_PAGE_NOT_PRESENT)
16334 {
16335 /* Successfully handled MMIO operation. */
16336 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
16337 | HM_CHANGED_GUEST_APIC_TPR);
16338 rcStrict = VINF_SUCCESS;
16339 }
16340 }
16341 else
16342 {
16343 /*
16344 * Frequent exit or something needing probing. Call EMHistoryExec.
16345 */
16346 Log4(("EptMisscfgExit/%u: %04x:%08RX64: %RGp -> EMHistoryExec\n",
16347 pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhys));
16348
16349 rcStrict = EMHistoryExec(pVCpu, pExitRec, 0);
16350 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
16351
16352 Log4(("EptMisscfgExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n",
16353 pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip,
16354 VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip));
16355 }
16356 return rcStrict;
16357}
16358
16359
16360/**
16361 * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional
16362 * VM-exit.
16363 */
16364HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16365{
16366 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16367 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging);
16368
16369 hmR0VmxReadExitQualVmcs(pVmxTransient);
16370 hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
16371 hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
16372 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16373 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
16374 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
16375
16376 /*
16377 * If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly.
16378 */
16379 VBOXSTRICTRC rcStrict = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient);
16380 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16381 {
16382 /*
16383 * If delivery of an event causes an EPT violation (true nested #PF and not MMIO),
16384 * we shall resolve the nested #PF and re-inject the original event.
16385 */
16386 if (pVCpu->hm.s.Event.fPending)
16387 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectReflectNPF);
16388 }
16389 else
16390 {
16391 Assert(rcStrict != VINF_HM_DOUBLE_FAULT);
16392 return rcStrict;
16393 }
16394
16395 PVMXVMCSINFO pVmcsInfo = pVmxTransient->pVmcsInfo;
16396 hmR0VmxReadGuestPhysicalAddrVmcs(pVmxTransient);
16397 int rc = hmR0VmxImportGuestState(pVCpu, pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
16398 AssertRCReturn(rc, rc);
16399
16400 RTGCPHYS const GCPhys = pVmxTransient->uGuestPhysicalAddr;
16401 uint64_t const uExitQual = pVmxTransient->uExitQual;
16402 AssertMsg(((pVmxTransient->uExitQual >> 7) & 3) != 2, ("%#RX64", uExitQual));
16403
16404 RTGCUINT uErrorCode = 0;
16405 if (uExitQual & VMX_EXIT_QUAL_EPT_ACCESS_INSTR_FETCH)
16406 uErrorCode |= X86_TRAP_PF_ID;
16407 if (uExitQual & VMX_EXIT_QUAL_EPT_ACCESS_WRITE)
16408 uErrorCode |= X86_TRAP_PF_RW;
16409 if (uExitQual & (VMX_EXIT_QUAL_EPT_ENTRY_READ | VMX_EXIT_QUAL_EPT_ENTRY_WRITE | VMX_EXIT_QUAL_EPT_ENTRY_EXECUTE))
16410 uErrorCode |= X86_TRAP_PF_P;
16411
16412 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
16413 PCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
16414 Log4Func(("at %#RX64 (%#RX64 errcode=%#x) cs:rip=%#04x:%#RX64\n", GCPhys, uExitQual, uErrorCode, pCtx->cs.Sel, pCtx->rip));
16415
16416 /*
16417 * Handle the pagefault trap for the nested shadow table.
16418 */
16419 TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode);
16420 rcStrict = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pCtx), GCPhys);
16421 TRPMResetTrap(pVCpu);
16422
16423 /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */
16424 if ( rcStrict == VINF_SUCCESS
16425 || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT
16426 || rcStrict == VERR_PAGE_NOT_PRESENT)
16427 {
16428 /* Successfully synced our nested page tables. */
16429 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
16430 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS);
16431 return VINF_SUCCESS;
16432 }
16433
16434 Log4Func(("EPT return to ring-3 rcStrict2=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
16435 return rcStrict;
16436}
16437
16438
16439#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
16440/**
16441 * VM-exit handler for VMCLEAR (VMX_EXIT_VMCLEAR). Unconditional VM-exit.
16442 */
16443HMVMX_EXIT_DECL hmR0VmxExitVmclear(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16444{
16445 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16446
16447 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16448 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16449 hmR0VmxReadExitQualVmcs(pVmxTransient);
16450 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16451 | CPUMCTX_EXTRN_HWVIRT
16452 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16453 AssertRCReturn(rc, rc);
16454
16455 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16456
16457 VMXVEXITINFO ExitInfo;
16458 RT_ZERO(ExitInfo);
16459 ExitInfo.uReason = pVmxTransient->uExitReason;
16460 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16461 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16462 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16463 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
16464
16465 VBOXSTRICTRC rcStrict = IEMExecDecodedVmclear(pVCpu, &ExitInfo);
16466 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16467 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
16468 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16469 {
16470 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16471 rcStrict = VINF_SUCCESS;
16472 }
16473 return rcStrict;
16474}
16475
16476
16477/**
16478 * VM-exit handler for VMLAUNCH (VMX_EXIT_VMLAUNCH). Unconditional VM-exit.
16479 */
16480HMVMX_EXIT_DECL hmR0VmxExitVmlaunch(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16481{
16482 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16483
16484 /* Import the entire VMCS state for now as we would be switching VMCS on successful VMLAUNCH,
16485 otherwise we could import just IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK. */
16486 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16487 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
16488 AssertRCReturn(rc, rc);
16489
16490 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16491
16492 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitVmentry, z);
16493 VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbExitInstr, VMXINSTRID_VMLAUNCH);
16494 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitVmentry, z);
16495 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16496 {
16497 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
16498 if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
16499 rcStrict = VINF_VMX_VMLAUNCH_VMRESUME;
16500 }
16501 Assert(rcStrict != VINF_IEM_RAISED_XCPT);
16502 return rcStrict;
16503}
16504
16505
16506/**
16507 * VM-exit handler for VMPTRLD (VMX_EXIT_VMPTRLD). Unconditional VM-exit.
16508 */
16509HMVMX_EXIT_DECL hmR0VmxExitVmptrld(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16510{
16511 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16512
16513 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16514 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16515 hmR0VmxReadExitQualVmcs(pVmxTransient);
16516 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16517 | CPUMCTX_EXTRN_HWVIRT
16518 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16519 AssertRCReturn(rc, rc);
16520
16521 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16522
16523 VMXVEXITINFO ExitInfo;
16524 RT_ZERO(ExitInfo);
16525 ExitInfo.uReason = pVmxTransient->uExitReason;
16526 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16527 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16528 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16529 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
16530
16531 VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrld(pVCpu, &ExitInfo);
16532 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16533 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
16534 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16535 {
16536 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16537 rcStrict = VINF_SUCCESS;
16538 }
16539 return rcStrict;
16540}
16541
16542
16543/**
16544 * VM-exit handler for VMPTRST (VMX_EXIT_VMPTRST). Unconditional VM-exit.
16545 */
16546HMVMX_EXIT_DECL hmR0VmxExitVmptrst(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16547{
16548 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16549
16550 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16551 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16552 hmR0VmxReadExitQualVmcs(pVmxTransient);
16553 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16554 | CPUMCTX_EXTRN_HWVIRT
16555 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16556 AssertRCReturn(rc, rc);
16557
16558 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16559
16560 VMXVEXITINFO ExitInfo;
16561 RT_ZERO(ExitInfo);
16562 ExitInfo.uReason = pVmxTransient->uExitReason;
16563 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16564 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16565 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16566 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr);
16567
16568 VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrst(pVCpu, &ExitInfo);
16569 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16570 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
16571 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16572 {
16573 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16574 rcStrict = VINF_SUCCESS;
16575 }
16576 return rcStrict;
16577}
16578
16579
16580/**
16581 * VM-exit handler for VMREAD (VMX_EXIT_VMREAD). Conditional VM-exit.
16582 */
16583HMVMX_EXIT_DECL hmR0VmxExitVmread(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16584{
16585 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16586
16587 /*
16588 * Strictly speaking we should not get VMREAD VM-exits for shadow VMCS fields and
16589 * thus might not need to import the shadow VMCS state, it's safer just in case
16590 * code elsewhere dares look at unsynced VMCS fields.
16591 */
16592 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16593 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16594 hmR0VmxReadExitQualVmcs(pVmxTransient);
16595 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16596 | CPUMCTX_EXTRN_HWVIRT
16597 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16598 AssertRCReturn(rc, rc);
16599
16600 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16601
16602 VMXVEXITINFO ExitInfo;
16603 RT_ZERO(ExitInfo);
16604 ExitInfo.uReason = pVmxTransient->uExitReason;
16605 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16606 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16607 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16608 if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand)
16609 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr);
16610
16611 VBOXSTRICTRC rcStrict = IEMExecDecodedVmread(pVCpu, &ExitInfo);
16612 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16613 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
16614 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16615 {
16616 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16617 rcStrict = VINF_SUCCESS;
16618 }
16619 return rcStrict;
16620}
16621
16622
16623/**
16624 * VM-exit handler for VMRESUME (VMX_EXIT_VMRESUME). Unconditional VM-exit.
16625 */
16626HMVMX_EXIT_DECL hmR0VmxExitVmresume(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16627{
16628 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16629
16630 /* Import the entire VMCS state for now as we would be switching VMCS on successful VMRESUME,
16631 otherwise we could import just IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK. */
16632 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16633 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
16634 AssertRCReturn(rc, rc);
16635
16636 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16637
16638 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitVmentry, z);
16639 VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbExitInstr, VMXINSTRID_VMRESUME);
16640 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitVmentry, z);
16641 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16642 {
16643 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST);
16644 if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx))
16645 rcStrict = VINF_VMX_VMLAUNCH_VMRESUME;
16646 }
16647 Assert(rcStrict != VINF_IEM_RAISED_XCPT);
16648 return rcStrict;
16649}
16650
16651
16652/**
16653 * VM-exit handler for VMWRITE (VMX_EXIT_VMWRITE). Conditional VM-exit.
16654 */
16655HMVMX_EXIT_DECL hmR0VmxExitVmwrite(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16656{
16657 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16658
16659 /*
16660 * Although we should not get VMWRITE VM-exits for shadow VMCS fields, since our HM hook
16661 * gets invoked when IEM's VMWRITE instruction emulation modifies the current VMCS and it
16662 * flags re-loading the entire shadow VMCS, we should save the entire shadow VMCS here.
16663 */
16664 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16665 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16666 hmR0VmxReadExitQualVmcs(pVmxTransient);
16667 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16668 | CPUMCTX_EXTRN_HWVIRT
16669 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16670 AssertRCReturn(rc, rc);
16671
16672 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16673
16674 VMXVEXITINFO ExitInfo;
16675 RT_ZERO(ExitInfo);
16676 ExitInfo.uReason = pVmxTransient->uExitReason;
16677 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16678 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16679 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16680 if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand)
16681 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
16682
16683 VBOXSTRICTRC rcStrict = IEMExecDecodedVmwrite(pVCpu, &ExitInfo);
16684 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16685 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
16686 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16687 {
16688 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16689 rcStrict = VINF_SUCCESS;
16690 }
16691 return rcStrict;
16692}
16693
16694
16695/**
16696 * VM-exit handler for VMXOFF (VMX_EXIT_VMXOFF). Unconditional VM-exit.
16697 */
16698HMVMX_EXIT_DECL hmR0VmxExitVmxoff(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16699{
16700 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16701
16702 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16703 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_CR4
16704 | CPUMCTX_EXTRN_HWVIRT
16705 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK);
16706 AssertRCReturn(rc, rc);
16707
16708 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16709
16710 VBOXSTRICTRC rcStrict = IEMExecDecodedVmxoff(pVCpu, pVmxTransient->cbExitInstr);
16711 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16712 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_HWVIRT);
16713 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16714 {
16715 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16716 rcStrict = VINF_SUCCESS;
16717 }
16718 return rcStrict;
16719}
16720
16721
16722/**
16723 * VM-exit handler for VMXON (VMX_EXIT_VMXON). Unconditional VM-exit.
16724 */
16725HMVMX_EXIT_DECL hmR0VmxExitVmxon(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16726{
16727 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16728
16729 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16730 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16731 hmR0VmxReadExitQualVmcs(pVmxTransient);
16732 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16733 | CPUMCTX_EXTRN_HWVIRT
16734 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16735 AssertRCReturn(rc, rc);
16736
16737 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16738
16739 VMXVEXITINFO ExitInfo;
16740 RT_ZERO(ExitInfo);
16741 ExitInfo.uReason = pVmxTransient->uExitReason;
16742 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16743 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16744 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16745 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
16746
16747 VBOXSTRICTRC rcStrict = IEMExecDecodedVmxon(pVCpu, &ExitInfo);
16748 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16749 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT);
16750 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16751 {
16752 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16753 rcStrict = VINF_SUCCESS;
16754 }
16755 return rcStrict;
16756}
16757
16758
16759/**
16760 * VM-exit handler for INVVPID (VMX_EXIT_INVVPID). Unconditional VM-exit.
16761 */
16762HMVMX_EXIT_DECL hmR0VmxExitInvvpid(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16763{
16764 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16765
16766 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16767 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
16768 hmR0VmxReadExitQualVmcs(pVmxTransient);
16769 int rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK
16770 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK);
16771 AssertRCReturn(rc, rc);
16772
16773 HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason);
16774
16775 VMXVEXITINFO ExitInfo;
16776 RT_ZERO(ExitInfo);
16777 ExitInfo.uReason = pVmxTransient->uExitReason;
16778 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16779 ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u;
16780 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16781 HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr);
16782
16783 VBOXSTRICTRC rcStrict = IEMExecDecodedInvvpid(pVCpu, &ExitInfo);
16784 if (RT_LIKELY(rcStrict == VINF_SUCCESS))
16785 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
16786 else if (rcStrict == VINF_IEM_RAISED_XCPT)
16787 {
16788 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
16789 rcStrict = VINF_SUCCESS;
16790 }
16791 return rcStrict;
16792}
16793#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
16794/** @} */
16795
16796
16797#ifdef VBOX_WITH_NESTED_HWVIRT_VMX
16798/** @name Nested-guest VM-exit handlers.
16799 * @{
16800 */
16801/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
16802/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- Nested-guest VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
16803/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
16804
16805/**
16806 * Nested-guest VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI).
16807 * Conditional VM-exit.
16808 */
16809HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmiNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16810{
16811 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16812
16813 hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
16814
16815 uint64_t const uExitIntInfo = pVmxTransient->uExitIntInfo;
16816 uint32_t const uExitIntType = VMX_EXIT_INT_INFO_TYPE(uExitIntInfo);
16817 Assert(VMX_EXIT_INT_INFO_IS_VALID(uExitIntInfo));
16818
16819 switch (uExitIntType)
16820 {
16821 /*
16822 * Physical NMIs:
16823 * We shouldn't direct host physical NMIs to the nested-guest. Dispatch it to the host.
16824 */
16825 case VMX_EXIT_INT_INFO_TYPE_NMI:
16826 return hmR0VmxExitHostNmi(pVCpu, pVmxTransient->pVmcsInfo);
16827
16828 /*
16829 * Hardware exceptions,
16830 * Software exceptions,
16831 * Privileged software exceptions:
16832 * Figure out if the exception must be delivered to the guest or the nested-guest.
16833 */
16834 case VMX_EXIT_INT_INFO_TYPE_SW_XCPT:
16835 case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT:
16836 case VMX_EXIT_INT_INFO_TYPE_HW_XCPT:
16837 {
16838 hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
16839 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16840 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
16841 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
16842
16843 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
16844 bool const fIntercept = CPUMIsGuestVmxXcptInterceptSet(pCtx, VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo),
16845 pVmxTransient->uExitIntErrorCode);
16846 if (fIntercept)
16847 {
16848 /* Exit qualification is required for debug and page-fault exceptions. */
16849 hmR0VmxReadExitQualVmcs(pVmxTransient);
16850
16851 /*
16852 * For VM-exits due to software exceptions (those generated by INT3 or INTO) and privileged
16853 * software exceptions (those generated by INT1/ICEBP) we need to supply the VM-exit instruction
16854 * length. However, if delivery of a software interrupt, software exception or privileged
16855 * software exception causes a VM-exit, that too provides the VM-exit instruction length.
16856 */
16857 VMXVEXITINFO ExitInfo;
16858 RT_ZERO(ExitInfo);
16859 ExitInfo.uReason = pVmxTransient->uExitReason;
16860 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16861 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16862
16863 VMXVEXITEVENTINFO ExitEventInfo;
16864 RT_ZERO(ExitEventInfo);
16865 ExitEventInfo.uExitIntInfo = pVmxTransient->uExitIntInfo;
16866 ExitEventInfo.uExitIntErrCode = pVmxTransient->uExitIntErrorCode;
16867 ExitEventInfo.uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
16868 ExitEventInfo.uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
16869
16870#ifdef DEBUG_ramshankar
16871 hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, HMVMX_CPUMCTX_EXTRN_ALL);
16872 Log4Func(("exit_int_info=%#RX32 err_code=%#RX32 exit_qual=%#RX64\n", pVmxTransient->uExitIntInfo,
16873 pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual));
16874 if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo))
16875 {
16876 Log4Func(("idt_info=%#RX32 idt_errcode=%#RX32 cr2=%#RX64\n", pVmxTransient->uIdtVectoringInfo,
16877 pVmxTransient->uIdtVectoringErrorCode, pCtx->cr2));
16878 }
16879#endif
16880 return IEMExecVmxVmexitXcpt(pVCpu, &ExitInfo, &ExitEventInfo);
16881 }
16882
16883 /* Nested paging is currently a requirement, otherwise we would need to handle shadow #PFs in hmR0VmxExitXcptPF. */
16884 Assert(pVCpu->CTX_SUFF(pVM)->hmr0.s.fNestedPaging);
16885 return hmR0VmxExitXcpt(pVCpu, pVmxTransient);
16886 }
16887
16888 /*
16889 * Software interrupts:
16890 * VM-exits cannot be caused by software interrupts.
16891 *
16892 * External interrupts:
16893 * This should only happen when "acknowledge external interrupts on VM-exit"
16894 * control is set. However, we never set this when executing a guest or
16895 * nested-guest. For nested-guests it is emulated while injecting interrupts into
16896 * the guest.
16897 */
16898 case VMX_EXIT_INT_INFO_TYPE_SW_INT:
16899 case VMX_EXIT_INT_INFO_TYPE_EXT_INT:
16900 default:
16901 {
16902 pVCpu->hm.s.u32HMError = pVmxTransient->uExitIntInfo;
16903 return VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
16904 }
16905 }
16906}
16907
16908
16909/**
16910 * Nested-guest VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT).
16911 * Unconditional VM-exit.
16912 */
16913HMVMX_EXIT_DECL hmR0VmxExitTripleFaultNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16914{
16915 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16916 return IEMExecVmxVmexitTripleFault(pVCpu);
16917}
16918
16919
16920/**
16921 * Nested-guest VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
16922 */
16923HMVMX_EXIT_NSRC_DECL hmR0VmxExitIntWindowNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16924{
16925 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16926
16927 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INT_WINDOW_EXIT))
16928 return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, 0 /* uExitQual */);
16929 return hmR0VmxExitIntWindow(pVCpu, pVmxTransient);
16930}
16931
16932
16933/**
16934 * Nested-guest VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
16935 */
16936HMVMX_EXIT_NSRC_DECL hmR0VmxExitNmiWindowNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16937{
16938 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16939
16940 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_NMI_WINDOW_EXIT))
16941 return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, 0 /* uExitQual */);
16942 return hmR0VmxExitIntWindow(pVCpu, pVmxTransient);
16943}
16944
16945
16946/**
16947 * Nested-guest VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH).
16948 * Unconditional VM-exit.
16949 */
16950HMVMX_EXIT_DECL hmR0VmxExitTaskSwitchNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16951{
16952 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16953
16954 hmR0VmxReadExitQualVmcs(pVmxTransient);
16955 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16956 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
16957 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
16958
16959 VMXVEXITINFO ExitInfo;
16960 RT_ZERO(ExitInfo);
16961 ExitInfo.uReason = pVmxTransient->uExitReason;
16962 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
16963 ExitInfo.u64Qual = pVmxTransient->uExitQual;
16964
16965 VMXVEXITEVENTINFO ExitEventInfo;
16966 RT_ZERO(ExitEventInfo);
16967 ExitEventInfo.uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
16968 ExitEventInfo.uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
16969 return IEMExecVmxVmexitTaskSwitch(pVCpu, &ExitInfo, &ExitEventInfo);
16970}
16971
16972
16973/**
16974 * Nested-guest VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
16975 */
16976HMVMX_EXIT_DECL hmR0VmxExitHltNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16977{
16978 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16979
16980 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_HLT_EXIT))
16981 {
16982 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16983 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
16984 }
16985 return hmR0VmxExitHlt(pVCpu, pVmxTransient);
16986}
16987
16988
16989/**
16990 * Nested-guest VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
16991 */
16992HMVMX_EXIT_DECL hmR0VmxExitInvlpgNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
16993{
16994 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
16995
16996 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INVLPG_EXIT))
16997 {
16998 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
16999 hmR0VmxReadExitQualVmcs(pVmxTransient);
17000
17001 VMXVEXITINFO ExitInfo;
17002 RT_ZERO(ExitInfo);
17003 ExitInfo.uReason = pVmxTransient->uExitReason;
17004 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17005 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17006 return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17007 }
17008 return hmR0VmxExitInvlpg(pVCpu, pVmxTransient);
17009}
17010
17011
17012/**
17013 * Nested-guest VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
17014 */
17015HMVMX_EXIT_DECL hmR0VmxExitRdpmcNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17016{
17017 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17018
17019 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_RDPMC_EXIT))
17020 {
17021 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17022 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17023 }
17024 return hmR0VmxExitRdpmc(pVCpu, pVmxTransient);
17025}
17026
17027
17028/**
17029 * Nested-guest VM-exit handler for VMREAD (VMX_EXIT_VMREAD) and VMWRITE
17030 * (VMX_EXIT_VMWRITE). Conditional VM-exit.
17031 */
17032HMVMX_EXIT_DECL hmR0VmxExitVmreadVmwriteNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17033{
17034 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17035
17036 Assert( pVmxTransient->uExitReason == VMX_EXIT_VMREAD
17037 || pVmxTransient->uExitReason == VMX_EXIT_VMWRITE);
17038
17039 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
17040
17041 uint8_t const iGReg = pVmxTransient->ExitInstrInfo.VmreadVmwrite.iReg2;
17042 Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
17043 uint64_t u64VmcsField = pVCpu->cpum.GstCtx.aGRegs[iGReg].u64;
17044
17045 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER);
17046 if (!CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx))
17047 u64VmcsField &= UINT64_C(0xffffffff);
17048
17049 if (CPUMIsGuestVmxVmreadVmwriteInterceptSet(pVCpu, pVmxTransient->uExitReason, u64VmcsField))
17050 {
17051 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17052 hmR0VmxReadExitQualVmcs(pVmxTransient);
17053
17054 VMXVEXITINFO ExitInfo;
17055 RT_ZERO(ExitInfo);
17056 ExitInfo.uReason = pVmxTransient->uExitReason;
17057 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17058 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17059 ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
17060 return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17061 }
17062
17063 if (pVmxTransient->uExitReason == VMX_EXIT_VMREAD)
17064 return hmR0VmxExitVmread(pVCpu, pVmxTransient);
17065 return hmR0VmxExitVmwrite(pVCpu, pVmxTransient);
17066}
17067
17068
17069/**
17070 * Nested-guest VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
17071 */
17072HMVMX_EXIT_DECL hmR0VmxExitRdtscNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17073{
17074 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17075
17076 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_RDTSC_EXIT))
17077 {
17078 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17079 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17080 }
17081
17082 return hmR0VmxExitRdtsc(pVCpu, pVmxTransient);
17083}
17084
17085
17086/**
17087 * Nested-guest VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX).
17088 * Conditional VM-exit.
17089 */
17090HMVMX_EXIT_DECL hmR0VmxExitMovCRxNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17091{
17092 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17093
17094 hmR0VmxReadExitQualVmcs(pVmxTransient);
17095 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17096
17097 VBOXSTRICTRC rcStrict;
17098 uint32_t const uAccessType = VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual);
17099 switch (uAccessType)
17100 {
17101 case VMX_EXIT_QUAL_CRX_ACCESS_WRITE:
17102 {
17103 uint8_t const iCrReg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
17104 uint8_t const iGReg = VMX_EXIT_QUAL_CRX_GENREG(pVmxTransient->uExitQual);
17105 Assert(iGReg < RT_ELEMENTS(pVCpu->cpum.GstCtx.aGRegs));
17106 uint64_t const uNewCrX = pVCpu->cpum.GstCtx.aGRegs[iGReg].u64;
17107
17108 bool fIntercept;
17109 switch (iCrReg)
17110 {
17111 case 0:
17112 case 4:
17113 fIntercept = CPUMIsGuestVmxMovToCr0Cr4InterceptSet(&pVCpu->cpum.GstCtx, iCrReg, uNewCrX);
17114 break;
17115
17116 case 3:
17117 fIntercept = CPUMIsGuestVmxMovToCr3InterceptSet(pVCpu, uNewCrX);
17118 break;
17119
17120 case 8:
17121 fIntercept = CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_CR8_LOAD_EXIT);
17122 break;
17123
17124 default:
17125 fIntercept = false;
17126 break;
17127 }
17128 if (fIntercept)
17129 {
17130 VMXVEXITINFO ExitInfo;
17131 RT_ZERO(ExitInfo);
17132 ExitInfo.uReason = pVmxTransient->uExitReason;
17133 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17134 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17135 rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17136 }
17137 else
17138 {
17139 int const rc = hmR0VmxImportGuestState(pVCpu, pVmxTransient->pVmcsInfo, IEM_CPUMCTX_EXTRN_MUST_MASK);
17140 AssertRCReturn(rc, rc);
17141 rcStrict = hmR0VmxExitMovToCrX(pVCpu, pVmxTransient->cbExitInstr, iGReg, iCrReg);
17142 }
17143 break;
17144 }
17145
17146 case VMX_EXIT_QUAL_CRX_ACCESS_READ:
17147 {
17148 /*
17149 * CR0/CR4 reads do not cause VM-exits, the read-shadow is used (subject to masking).
17150 * CR2 reads do not cause a VM-exit.
17151 * CR3 reads cause a VM-exit depending on the "CR3 store exiting" control.
17152 * CR8 reads cause a VM-exit depending on the "CR8 store exiting" control.
17153 */
17154 uint8_t const iCrReg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual);
17155 if ( iCrReg == 3
17156 || iCrReg == 8)
17157 {
17158 static const uint32_t s_auCrXReadIntercepts[] = { 0, 0, 0, VMX_PROC_CTLS_CR3_STORE_EXIT, 0,
17159 0, 0, 0, VMX_PROC_CTLS_CR8_STORE_EXIT };
17160 uint32_t const uIntercept = s_auCrXReadIntercepts[iCrReg];
17161 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, uIntercept))
17162 {
17163 VMXVEXITINFO ExitInfo;
17164 RT_ZERO(ExitInfo);
17165 ExitInfo.uReason = pVmxTransient->uExitReason;
17166 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17167 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17168 rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17169 }
17170 else
17171 {
17172 uint8_t const iGReg = VMX_EXIT_QUAL_CRX_GENREG(pVmxTransient->uExitQual);
17173 rcStrict = hmR0VmxExitMovFromCrX(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr, iGReg, iCrReg);
17174 }
17175 }
17176 else
17177 {
17178 AssertMsgFailed(("MOV from CR%d VM-exit must not happen\n", iCrReg));
17179 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, iCrReg);
17180 }
17181 break;
17182 }
17183
17184 case VMX_EXIT_QUAL_CRX_ACCESS_CLTS:
17185 {
17186 PCVMXVVMCS const pVmcsNstGst = &pVCpu->cpum.GstCtx.hwvirt.vmx.Vmcs;
17187 uint64_t const uGstHostMask = pVmcsNstGst->u64Cr0Mask.u;
17188 uint64_t const uReadShadow = pVmcsNstGst->u64Cr0ReadShadow.u;
17189 if ( (uGstHostMask & X86_CR0_TS)
17190 && (uReadShadow & X86_CR0_TS))
17191 {
17192 VMXVEXITINFO ExitInfo;
17193 RT_ZERO(ExitInfo);
17194 ExitInfo.uReason = pVmxTransient->uExitReason;
17195 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17196 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17197 rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17198 }
17199 else
17200 rcStrict = hmR0VmxExitClts(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr);
17201 break;
17202 }
17203
17204 case VMX_EXIT_QUAL_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */
17205 {
17206 RTGCPTR GCPtrEffDst;
17207 uint16_t const uNewMsw = VMX_EXIT_QUAL_CRX_LMSW_DATA(pVmxTransient->uExitQual);
17208 bool const fMemOperand = VMX_EXIT_QUAL_CRX_LMSW_OP_MEM(pVmxTransient->uExitQual);
17209 if (fMemOperand)
17210 {
17211 hmR0VmxReadGuestLinearAddrVmcs(pVmxTransient);
17212 GCPtrEffDst = pVmxTransient->uGuestLinearAddr;
17213 }
17214 else
17215 GCPtrEffDst = NIL_RTGCPTR;
17216
17217 if (CPUMIsGuestVmxLmswInterceptSet(&pVCpu->cpum.GstCtx, uNewMsw))
17218 {
17219 VMXVEXITINFO ExitInfo;
17220 RT_ZERO(ExitInfo);
17221 ExitInfo.uReason = pVmxTransient->uExitReason;
17222 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17223 ExitInfo.u64GuestLinearAddr = GCPtrEffDst;
17224 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17225 rcStrict = IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17226 }
17227 else
17228 rcStrict = hmR0VmxExitLmsw(pVCpu, pVmxTransient->pVmcsInfo, pVmxTransient->cbExitInstr, uNewMsw, GCPtrEffDst);
17229 break;
17230 }
17231
17232 default:
17233 {
17234 AssertMsgFailed(("Unrecognized Mov CRX access type %#x\n", uAccessType));
17235 HMVMX_UNEXPECTED_EXIT_RET(pVCpu, uAccessType);
17236 }
17237 }
17238
17239 if (rcStrict == VINF_IEM_RAISED_XCPT)
17240 {
17241 ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK);
17242 rcStrict = VINF_SUCCESS;
17243 }
17244 return rcStrict;
17245}
17246
17247
17248/**
17249 * Nested-guest VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX).
17250 * Conditional VM-exit.
17251 */
17252HMVMX_EXIT_DECL hmR0VmxExitMovDRxNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17253{
17254 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17255
17256 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_MOV_DR_EXIT))
17257 {
17258 hmR0VmxReadExitQualVmcs(pVmxTransient);
17259 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17260
17261 VMXVEXITINFO ExitInfo;
17262 RT_ZERO(ExitInfo);
17263 ExitInfo.uReason = pVmxTransient->uExitReason;
17264 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17265 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17266 return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17267 }
17268 return hmR0VmxExitMovDRx(pVCpu, pVmxTransient);
17269}
17270
17271
17272/**
17273 * Nested-guest VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR).
17274 * Conditional VM-exit.
17275 */
17276HMVMX_EXIT_DECL hmR0VmxExitIoInstrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17277{
17278 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17279
17280 hmR0VmxReadExitQualVmcs(pVmxTransient);
17281
17282 uint32_t const uIOPort = VMX_EXIT_QUAL_IO_PORT(pVmxTransient->uExitQual);
17283 uint8_t const uIOSize = VMX_EXIT_QUAL_IO_SIZE(pVmxTransient->uExitQual);
17284 AssertReturn(uIOSize <= 3 && uIOSize != 2, VERR_VMX_IPE_1);
17285
17286 static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses in bytes. */
17287 uint8_t const cbAccess = s_aIOSizes[uIOSize];
17288 if (CPUMIsGuestVmxIoInterceptSet(pVCpu, uIOPort, cbAccess))
17289 {
17290 /*
17291 * IN/OUT instruction:
17292 * - Provides VM-exit instruction length.
17293 *
17294 * INS/OUTS instruction:
17295 * - Provides VM-exit instruction length.
17296 * - Provides Guest-linear address.
17297 * - Optionally provides VM-exit instruction info (depends on CPU feature).
17298 */
17299 PVMCC pVM = pVCpu->CTX_SUFF(pVM);
17300 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17301
17302 /* Make sure we don't use stale/uninitialized VMX-transient info. below. */
17303 pVmxTransient->ExitInstrInfo.u = 0;
17304 pVmxTransient->uGuestLinearAddr = 0;
17305
17306 bool const fVmxInsOutsInfo = pVM->cpum.ro.GuestFeatures.fVmxInsOutInfo;
17307 bool const fIOString = VMX_EXIT_QUAL_IO_IS_STRING(pVmxTransient->uExitQual);
17308 if (fIOString)
17309 {
17310 hmR0VmxReadGuestLinearAddrVmcs(pVmxTransient);
17311 if (fVmxInsOutsInfo)
17312 {
17313 Assert(RT_BF_GET(g_HmMsrs.u.vmx.u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS)); /* Paranoia. */
17314 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
17315 }
17316 }
17317
17318 VMXVEXITINFO ExitInfo;
17319 RT_ZERO(ExitInfo);
17320 ExitInfo.uReason = pVmxTransient->uExitReason;
17321 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17322 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17323 ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
17324 ExitInfo.u64GuestLinearAddr = pVmxTransient->uGuestLinearAddr;
17325 return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17326 }
17327 return hmR0VmxExitIoInstr(pVCpu, pVmxTransient);
17328}
17329
17330
17331/**
17332 * Nested-guest VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
17333 */
17334HMVMX_EXIT_DECL hmR0VmxExitRdmsrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17335{
17336 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17337
17338 uint32_t fMsrpm;
17339 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_USE_MSR_BITMAPS))
17340 fMsrpm = CPUMGetVmxMsrPermission(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, pVCpu->cpum.GstCtx.ecx);
17341 else
17342 fMsrpm = VMXMSRPM_EXIT_RD;
17343
17344 if (fMsrpm & VMXMSRPM_EXIT_RD)
17345 {
17346 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17347 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17348 }
17349 return hmR0VmxExitRdmsr(pVCpu, pVmxTransient);
17350}
17351
17352
17353/**
17354 * Nested-guest VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
17355 */
17356HMVMX_EXIT_DECL hmR0VmxExitWrmsrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17357{
17358 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17359
17360 uint32_t fMsrpm;
17361 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_USE_MSR_BITMAPS))
17362 fMsrpm = CPUMGetVmxMsrPermission(pVCpu->cpum.GstCtx.hwvirt.vmx.abMsrBitmap, pVCpu->cpum.GstCtx.ecx);
17363 else
17364 fMsrpm = VMXMSRPM_EXIT_WR;
17365
17366 if (fMsrpm & VMXMSRPM_EXIT_WR)
17367 {
17368 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17369 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17370 }
17371 return hmR0VmxExitWrmsr(pVCpu, pVmxTransient);
17372}
17373
17374
17375/**
17376 * Nested-guest VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
17377 */
17378HMVMX_EXIT_DECL hmR0VmxExitMwaitNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17379{
17380 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17381
17382 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_MWAIT_EXIT))
17383 {
17384 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17385 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17386 }
17387 return hmR0VmxExitMwait(pVCpu, pVmxTransient);
17388}
17389
17390
17391/**
17392 * Nested-guest VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional
17393 * VM-exit.
17394 */
17395HMVMX_EXIT_DECL hmR0VmxExitMtfNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17396{
17397 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17398
17399 /** @todo NSTVMX: Should consider debugging nested-guests using VM debugger. */
17400 hmR0VmxReadGuestPendingDbgXctps(pVmxTransient);
17401 VMXVEXITINFO ExitInfo;
17402 RT_ZERO(ExitInfo);
17403 ExitInfo.uReason = pVmxTransient->uExitReason;
17404 ExitInfo.u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
17405 return IEMExecVmxVmexitTrapLike(pVCpu, &ExitInfo);
17406}
17407
17408
17409/**
17410 * Nested-guest VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
17411 */
17412HMVMX_EXIT_DECL hmR0VmxExitMonitorNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17413{
17414 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17415
17416 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_MONITOR_EXIT))
17417 {
17418 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17419 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17420 }
17421 return hmR0VmxExitMonitor(pVCpu, pVmxTransient);
17422}
17423
17424
17425/**
17426 * Nested-guest VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
17427 */
17428HMVMX_EXIT_DECL hmR0VmxExitPauseNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17429{
17430 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17431
17432 /** @todo NSTVMX: Think about this more. Does the outer guest need to intercept
17433 * PAUSE when executing a nested-guest? If it does not, we would not need
17434 * to check for the intercepts here. Just call VM-exit... */
17435
17436 /* The CPU would have already performed the necessary CPL checks for PAUSE-loop exiting. */
17437 if ( CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_PAUSE_EXIT)
17438 || CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_PAUSE_LOOP_EXIT))
17439 {
17440 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17441 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17442 }
17443 return hmR0VmxExitPause(pVCpu, pVmxTransient);
17444}
17445
17446
17447/**
17448 * Nested-guest VM-exit handler for when the TPR value is lowered below the
17449 * specified threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
17450 */
17451HMVMX_EXIT_NSRC_DECL hmR0VmxExitTprBelowThresholdNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17452{
17453 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17454
17455 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_USE_TPR_SHADOW))
17456 {
17457 hmR0VmxReadGuestPendingDbgXctps(pVmxTransient);
17458 VMXVEXITINFO ExitInfo;
17459 RT_ZERO(ExitInfo);
17460 ExitInfo.uReason = pVmxTransient->uExitReason;
17461 ExitInfo.u64GuestPendingDbgXcpts = pVmxTransient->uGuestPendingDbgXcpts;
17462 return IEMExecVmxVmexitTrapLike(pVCpu, &ExitInfo);
17463 }
17464 return hmR0VmxExitTprBelowThreshold(pVCpu, pVmxTransient);
17465}
17466
17467
17468/**
17469 * Nested-guest VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional
17470 * VM-exit.
17471 */
17472HMVMX_EXIT_DECL hmR0VmxExitApicAccessNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17473{
17474 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17475
17476 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17477 hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
17478 hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
17479 hmR0VmxReadExitQualVmcs(pVmxTransient);
17480
17481 Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_VIRT_APIC_ACCESS));
17482
17483 Log4Func(("at offset %#x type=%u\n", VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual),
17484 VMX_EXIT_QUAL_APIC_ACCESS_TYPE(pVmxTransient->uExitQual)));
17485
17486 VMXVEXITINFO ExitInfo;
17487 RT_ZERO(ExitInfo);
17488 ExitInfo.uReason = pVmxTransient->uExitReason;
17489 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17490 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17491
17492 VMXVEXITEVENTINFO ExitEventInfo;
17493 RT_ZERO(ExitEventInfo);
17494 ExitEventInfo.uIdtVectoringInfo = pVmxTransient->uIdtVectoringInfo;
17495 ExitEventInfo.uIdtVectoringErrCode = pVmxTransient->uIdtVectoringErrorCode;
17496 return IEMExecVmxVmexitApicAccess(pVCpu, &ExitInfo, &ExitEventInfo);
17497}
17498
17499
17500/**
17501 * Nested-guest VM-exit handler for APIC write emulation (VMX_EXIT_APIC_WRITE).
17502 * Conditional VM-exit.
17503 */
17504HMVMX_EXIT_DECL hmR0VmxExitApicWriteNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17505{
17506 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17507
17508 Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_APIC_REG_VIRT));
17509 hmR0VmxReadExitQualVmcs(pVmxTransient);
17510 return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
17511}
17512
17513
17514/**
17515 * Nested-guest VM-exit handler for virtualized EOI (VMX_EXIT_VIRTUALIZED_EOI).
17516 * Conditional VM-exit.
17517 */
17518HMVMX_EXIT_DECL hmR0VmxExitVirtEoiNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17519{
17520 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17521
17522 Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_VIRT_INT_DELIVERY));
17523 hmR0VmxReadExitQualVmcs(pVmxTransient);
17524 return IEMExecVmxVmexit(pVCpu, pVmxTransient->uExitReason, pVmxTransient->uExitQual);
17525}
17526
17527
17528/**
17529 * Nested-guest VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
17530 */
17531HMVMX_EXIT_DECL hmR0VmxExitRdtscpNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17532{
17533 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17534
17535 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_RDTSC_EXIT))
17536 {
17537 Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_RDTSCP));
17538 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17539 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17540 }
17541 return hmR0VmxExitRdtscp(pVCpu, pVmxTransient);
17542}
17543
17544
17545/**
17546 * Nested-guest VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
17547 */
17548HMVMX_EXIT_NSRC_DECL hmR0VmxExitWbinvdNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17549{
17550 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17551
17552 if (CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_WBINVD_EXIT))
17553 {
17554 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17555 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17556 }
17557 return hmR0VmxExitWbinvd(pVCpu, pVmxTransient);
17558}
17559
17560
17561/**
17562 * Nested-guest VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
17563 */
17564HMVMX_EXIT_DECL hmR0VmxExitInvpcidNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17565{
17566 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17567
17568 if (CPUMIsGuestVmxProcCtlsSet(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS_INVLPG_EXIT))
17569 {
17570 Assert(CPUMIsGuestVmxProcCtls2Set(&pVCpu->cpum.GstCtx, VMX_PROC_CTLS2_INVPCID));
17571 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17572 hmR0VmxReadExitQualVmcs(pVmxTransient);
17573 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
17574
17575 VMXVEXITINFO ExitInfo;
17576 RT_ZERO(ExitInfo);
17577 ExitInfo.uReason = pVmxTransient->uExitReason;
17578 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17579 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17580 ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
17581 return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17582 }
17583 return hmR0VmxExitInvpcid(pVCpu, pVmxTransient);
17584}
17585
17586
17587/**
17588 * Nested-guest VM-exit handler for invalid-guest state
17589 * (VMX_EXIT_ERR_INVALID_GUEST_STATE). Error VM-exit.
17590 */
17591HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestStateNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17592{
17593 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17594
17595 /*
17596 * Currently this should never happen because we fully emulate VMLAUNCH/VMRESUME in IEM.
17597 * So if it does happen, it indicates a bug possibly in the hardware-assisted VMX code.
17598 * Handle it like it's in an invalid guest state of the outer guest.
17599 *
17600 * When the fast path is implemented, this should be changed to cause the corresponding
17601 * nested-guest VM-exit.
17602 */
17603 return hmR0VmxExitErrInvalidGuestState(pVCpu, pVmxTransient);
17604}
17605
17606
17607/**
17608 * Nested-guest VM-exit handler for instructions that cause VM-exits uncondtionally
17609 * and only provide the instruction length.
17610 *
17611 * Unconditional VM-exit.
17612 */
17613HMVMX_EXIT_DECL hmR0VmxExitInstrNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17614{
17615 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17616
17617#ifdef VBOX_STRICT
17618 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
17619 switch (pVmxTransient->uExitReason)
17620 {
17621 case VMX_EXIT_ENCLS:
17622 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_ENCLS_EXIT));
17623 break;
17624
17625 case VMX_EXIT_VMFUNC:
17626 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_VMFUNC));
17627 break;
17628 }
17629#endif
17630
17631 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17632 return IEMExecVmxVmexitInstr(pVCpu, pVmxTransient->uExitReason, pVmxTransient->cbExitInstr);
17633}
17634
17635
17636/**
17637 * Nested-guest VM-exit handler for instructions that provide instruction length as
17638 * well as more information.
17639 *
17640 * Unconditional VM-exit.
17641 */
17642HMVMX_EXIT_DECL hmR0VmxExitInstrWithInfoNested(PVMCPUCC pVCpu, PVMXTRANSIENT pVmxTransient)
17643{
17644 HMVMX_VALIDATE_NESTED_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient);
17645
17646#ifdef VBOX_STRICT
17647 PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx;
17648 switch (pVmxTransient->uExitReason)
17649 {
17650 case VMX_EXIT_GDTR_IDTR_ACCESS:
17651 case VMX_EXIT_LDTR_TR_ACCESS:
17652 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_DESC_TABLE_EXIT));
17653 break;
17654
17655 case VMX_EXIT_RDRAND:
17656 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_RDRAND_EXIT));
17657 break;
17658
17659 case VMX_EXIT_RDSEED:
17660 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_RDSEED_EXIT));
17661 break;
17662
17663 case VMX_EXIT_XSAVES:
17664 case VMX_EXIT_XRSTORS:
17665 /** @todo NSTVMX: Verify XSS-bitmap. */
17666 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_XSAVES_XRSTORS));
17667 break;
17668
17669 case VMX_EXIT_UMWAIT:
17670 case VMX_EXIT_TPAUSE:
17671 Assert(CPUMIsGuestVmxProcCtlsSet(pCtx, VMX_PROC_CTLS_RDTSC_EXIT));
17672 Assert(CPUMIsGuestVmxProcCtls2Set(pCtx, VMX_PROC_CTLS2_USER_WAIT_PAUSE));
17673 break;
17674
17675 case VMX_EXIT_LOADIWKEY:
17676 Assert(CPUMIsGuestVmxProcCtls3Set(pCtx, VMX_PROC_CTLS3_LOADIWKEY_EXIT));
17677 break;
17678 }
17679#endif
17680
17681 hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
17682 hmR0VmxReadExitQualVmcs(pVmxTransient);
17683 hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
17684
17685 VMXVEXITINFO ExitInfo;
17686 RT_ZERO(ExitInfo);
17687 ExitInfo.uReason = pVmxTransient->uExitReason;
17688 ExitInfo.cbInstr = pVmxTransient->cbExitInstr;
17689 ExitInfo.u64Qual = pVmxTransient->uExitQual;
17690 ExitInfo.InstrInfo = pVmxTransient->ExitInstrInfo;
17691 return IEMExecVmxVmexitInstrWithInfo(pVCpu, &ExitInfo);
17692}
17693
17694/** @} */
17695#endif /* VBOX_WITH_NESTED_HWVIRT_VMX */
17696
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette