VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 58022

Last change on this file since 58022 was 58014, checked in by vboxsync, 9 years ago

VMM/GIM: Fix Hyper-V hypercall vs hypercall page confusion.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 505.5 KB
Line 
1/* $Id: HMVMXR0.cpp 58014 2015-10-02 17:33:16Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#include <iprt/x86.h>
24#include <iprt/asm-amd64-x86.h>
25#include <iprt/thread.h>
26
27#include <VBox/vmm/pdmapi.h>
28#include <VBox/vmm/dbgf.h>
29#include <VBox/vmm/iem.h>
30#include <VBox/vmm/iom.h>
31#include <VBox/vmm/selm.h>
32#include <VBox/vmm/tm.h>
33#include <VBox/vmm/gim.h>
34#ifdef VBOX_WITH_REM
35# include <VBox/vmm/rem.h>
36#endif
37#include "HMInternal.h"
38#include <VBox/vmm/vm.h>
39#include "HMVMXR0.h"
40#include "dtrace/VBoxVMM.h"
41
42#ifdef DEBUG_ramshankar
43# define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS
44# define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE
45# define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
46# define HMVMX_ALWAYS_CHECK_GUEST_STATE
47# define HMVMX_ALWAYS_TRAP_ALL_XCPTS
48# define HMVMX_ALWAYS_TRAP_PF
49# define HMVMX_ALWAYS_SWAP_FPU_STATE
50# define HMVMX_ALWAYS_FLUSH_TLB
51# define HMVMX_ALWAYS_SWAP_EFER
52#endif
53
54
55/*********************************************************************************************************************************
56* Defined Constants And Macros *
57*********************************************************************************************************************************/
58/** Use the function table. */
59#define HMVMX_USE_FUNCTION_TABLE
60
61/** Determine which tagged-TLB flush handler to use. */
62#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0
63#define HMVMX_FLUSH_TAGGED_TLB_EPT 1
64#define HMVMX_FLUSH_TAGGED_TLB_VPID 2
65#define HMVMX_FLUSH_TAGGED_TLB_NONE 3
66
67/** @name Updated-guest-state flags.
68 * @{ */
69#define HMVMX_UPDATED_GUEST_RIP RT_BIT(0)
70#define HMVMX_UPDATED_GUEST_RSP RT_BIT(1)
71#define HMVMX_UPDATED_GUEST_RFLAGS RT_BIT(2)
72#define HMVMX_UPDATED_GUEST_CR0 RT_BIT(3)
73#define HMVMX_UPDATED_GUEST_CR3 RT_BIT(4)
74#define HMVMX_UPDATED_GUEST_CR4 RT_BIT(5)
75#define HMVMX_UPDATED_GUEST_GDTR RT_BIT(6)
76#define HMVMX_UPDATED_GUEST_IDTR RT_BIT(7)
77#define HMVMX_UPDATED_GUEST_LDTR RT_BIT(8)
78#define HMVMX_UPDATED_GUEST_TR RT_BIT(9)
79#define HMVMX_UPDATED_GUEST_SEGMENT_REGS RT_BIT(10)
80#define HMVMX_UPDATED_GUEST_DEBUG RT_BIT(11)
81#define HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR RT_BIT(12)
82#define HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR RT_BIT(13)
83#define HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR RT_BIT(14)
84#define HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS RT_BIT(15)
85#define HMVMX_UPDATED_GUEST_LAZY_MSRS RT_BIT(16)
86#define HMVMX_UPDATED_GUEST_ACTIVITY_STATE RT_BIT(17)
87#define HMVMX_UPDATED_GUEST_INTR_STATE RT_BIT(18)
88#define HMVMX_UPDATED_GUEST_APIC_STATE RT_BIT(19)
89#define HMVMX_UPDATED_GUEST_ALL ( HMVMX_UPDATED_GUEST_RIP \
90 | HMVMX_UPDATED_GUEST_RSP \
91 | HMVMX_UPDATED_GUEST_RFLAGS \
92 | HMVMX_UPDATED_GUEST_CR0 \
93 | HMVMX_UPDATED_GUEST_CR3 \
94 | HMVMX_UPDATED_GUEST_CR4 \
95 | HMVMX_UPDATED_GUEST_GDTR \
96 | HMVMX_UPDATED_GUEST_IDTR \
97 | HMVMX_UPDATED_GUEST_LDTR \
98 | HMVMX_UPDATED_GUEST_TR \
99 | HMVMX_UPDATED_GUEST_SEGMENT_REGS \
100 | HMVMX_UPDATED_GUEST_DEBUG \
101 | HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR \
102 | HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR \
103 | HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR \
104 | HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS \
105 | HMVMX_UPDATED_GUEST_LAZY_MSRS \
106 | HMVMX_UPDATED_GUEST_ACTIVITY_STATE \
107 | HMVMX_UPDATED_GUEST_INTR_STATE \
108 | HMVMX_UPDATED_GUEST_APIC_STATE)
109/** @} */
110
111/** @name
112 * Flags to skip redundant reads of some common VMCS fields that are not part of
113 * the guest-CPU state but are in the transient structure.
114 */
115#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO RT_BIT(0)
116#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE RT_BIT(1)
117#define HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION RT_BIT(2)
118#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN RT_BIT(3)
119#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO RT_BIT(4)
120#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE RT_BIT(5)
121#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO RT_BIT(6)
122/** @} */
123
124/** @name
125 * States of the VMCS.
126 *
127 * This does not reflect all possible VMCS states but currently only those
128 * needed for maintaining the VMCS consistently even when thread-context hooks
129 * are used. Maybe later this can be extended (i.e. Nested Virtualization).
130 */
131#define HMVMX_VMCS_STATE_CLEAR RT_BIT(0)
132#define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1)
133#define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2)
134/** @} */
135
136/**
137 * Exception bitmap mask for real-mode guests (real-on-v86).
138 *
139 * We need to intercept all exceptions manually except:
140 * - #NM, #MF handled in hmR0VmxLoadSharedCR0().
141 * - #DB handled in hmR0VmxLoadSharedDebugState().
142 * - #PF need not be intercepted even in real-mode if we have Nested Paging
143 * support.
144 */
145#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) /* RT_BIT(X86_XCPT_DB) */ | RT_BIT(X86_XCPT_NMI) \
146 | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \
147 | RT_BIT(X86_XCPT_UD) /* RT_BIT(X86_XCPT_NM) */ | RT_BIT(X86_XCPT_DF) \
148 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \
149 | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \
150 /* RT_BIT(X86_XCPT_MF) */ | RT_BIT(X86_XCPT_AC) | RT_BIT(X86_XCPT_MC) \
151 | RT_BIT(X86_XCPT_XF))
152
153/**
154 * Exception bitmap mask for all contributory exceptions.
155 *
156 * Page fault is deliberately excluded here as it's conditional as to whether
157 * it's contributory or benign. Page faults are handled separately.
158 */
159#define HMVMX_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \
160 | RT_BIT(X86_XCPT_DE))
161
162/** Maximum VM-instruction error number. */
163#define HMVMX_INSTR_ERROR_MAX 28
164
165/** Profiling macro. */
166#ifdef HM_PROFILE_EXIT_DISPATCH
167# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed)
168# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed)
169#else
170# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0)
171# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0)
172#endif
173
174/** Assert that preemption is disabled or covered by thread-context hooks. */
175#define HMVMX_ASSERT_PREEMPT_SAFE() Assert( VMMR0ThreadCtxHookIsEnabled(pVCpu) \
176 || !RTThreadPreemptIsEnabled(NIL_RTTHREAD));
177
178/** Assert that we haven't migrated CPUs when thread-context hooks are not
179 * used. */
180#define HMVMX_ASSERT_CPU_SAFE() AssertMsg( VMMR0ThreadCtxHookIsEnabled(pVCpu) \
181 || pVCpu->hm.s.idEnteredCpu == RTMpCpuId(), \
182 ("Illegal migration! Entered on CPU %u Current %u\n", \
183 pVCpu->hm.s.idEnteredCpu, RTMpCpuId())); \
184
185/** Helper macro for VM-exit handlers called unexpectedly. */
186#define HMVMX_RETURN_UNEXPECTED_EXIT() \
187 do { \
188 pVCpu->hm.s.u32HMError = pVmxTransient->uExitReason; \
189 return VERR_VMX_UNEXPECTED_EXIT; \
190 } while (0)
191
192
193/*********************************************************************************************************************************
194* Structures and Typedefs *
195*********************************************************************************************************************************/
196/**
197 * VMX transient state.
198 *
199 * A state structure for holding miscellaneous information across
200 * VMX non-root operation and restored after the transition.
201 */
202typedef struct VMXTRANSIENT
203{
204 /** The host's rflags/eflags. */
205 RTCCUINTREG fEFlags;
206#if HC_ARCH_BITS == 32
207 uint32_t u32Alignment0;
208#endif
209 /** The guest's TPR value used for TPR shadowing. */
210 uint8_t u8GuestTpr;
211 /** Alignment. */
212 uint8_t abAlignment0[7];
213
214 /** The basic VM-exit reason. */
215 uint16_t uExitReason;
216 /** Alignment. */
217 uint16_t u16Alignment0;
218 /** The VM-exit interruption error code. */
219 uint32_t uExitIntErrorCode;
220 /** The VM-exit exit code qualification. */
221 uint64_t uExitQualification;
222
223 /** The VM-exit interruption-information field. */
224 uint32_t uExitIntInfo;
225 /** The VM-exit instruction-length field. */
226 uint32_t cbInstr;
227 /** The VM-exit instruction-information field. */
228 union
229 {
230 /** Plain unsigned int representation. */
231 uint32_t u;
232 /** INS and OUTS information. */
233 struct
234 {
235 uint32_t u6Reserved0 : 7;
236 /** The address size; 0=16-bit, 1=32-bit, 2=64-bit, rest undefined. */
237 uint32_t u3AddrSize : 3;
238 uint32_t u5Reserved1 : 5;
239 /** The segment register (X86_SREG_XXX). */
240 uint32_t iSegReg : 3;
241 uint32_t uReserved2 : 14;
242 } StrIo;
243 } ExitInstrInfo;
244 /** Whether the VM-entry failed or not. */
245 bool fVMEntryFailed;
246 /** Alignment. */
247 uint8_t abAlignment1[3];
248
249 /** The VM-entry interruption-information field. */
250 uint32_t uEntryIntInfo;
251 /** The VM-entry exception error code field. */
252 uint32_t uEntryXcptErrorCode;
253 /** The VM-entry instruction length field. */
254 uint32_t cbEntryInstr;
255
256 /** IDT-vectoring information field. */
257 uint32_t uIdtVectoringInfo;
258 /** IDT-vectoring error code. */
259 uint32_t uIdtVectoringErrorCode;
260
261 /** Mask of currently read VMCS fields; HMVMX_UPDATED_TRANSIENT_*. */
262 uint32_t fVmcsFieldsRead;
263
264 /** Whether the guest FPU was active at the time of VM-exit. */
265 bool fWasGuestFPUStateActive;
266 /** Whether the guest debug state was active at the time of VM-exit. */
267 bool fWasGuestDebugStateActive;
268 /** Whether the hyper debug state was active at the time of VM-exit. */
269 bool fWasHyperDebugStateActive;
270 /** Whether TSC-offsetting should be setup before VM-entry. */
271 bool fUpdateTscOffsettingAndPreemptTimer;
272 /** Whether the VM-exit was caused by a page-fault during delivery of a
273 * contributory exception or a page-fault. */
274 bool fVectoringDoublePF;
275 /** Whether the VM-exit was caused by a page-fault during delivery of an
276 * external interrupt or NMI. */
277 bool fVectoringPF;
278} VMXTRANSIENT;
279AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t));
280AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, sizeof(uint64_t));
281AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, sizeof(uint64_t));
282AssertCompileMemberAlignment(VMXTRANSIENT, fWasGuestFPUStateActive, sizeof(uint64_t));
283AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t));
284/** Pointer to VMX transient state. */
285typedef VMXTRANSIENT *PVMXTRANSIENT;
286
287
288/**
289 * MSR-bitmap read permissions.
290 */
291typedef enum VMXMSREXITREAD
292{
293 /** Reading this MSR causes a VM-exit. */
294 VMXMSREXIT_INTERCEPT_READ = 0xb,
295 /** Reading this MSR does not cause a VM-exit. */
296 VMXMSREXIT_PASSTHRU_READ
297} VMXMSREXITREAD;
298/** Pointer to MSR-bitmap read permissions. */
299typedef VMXMSREXITREAD* PVMXMSREXITREAD;
300
301/**
302 * MSR-bitmap write permissions.
303 */
304typedef enum VMXMSREXITWRITE
305{
306 /** Writing to this MSR causes a VM-exit. */
307 VMXMSREXIT_INTERCEPT_WRITE = 0xd,
308 /** Writing to this MSR does not cause a VM-exit. */
309 VMXMSREXIT_PASSTHRU_WRITE
310} VMXMSREXITWRITE;
311/** Pointer to MSR-bitmap write permissions. */
312typedef VMXMSREXITWRITE* PVMXMSREXITWRITE;
313
314
315/**
316 * VMX VM-exit handler.
317 *
318 * @returns VBox status code.
319 * @param pVCpu Pointer to the VMCPU.
320 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
321 * out-of-sync. Make sure to update the required
322 * fields before using them.
323 * @param pVmxTransient Pointer to the VMX-transient structure.
324 */
325#ifndef HMVMX_USE_FUNCTION_TABLE
326typedef int FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
327#else
328typedef DECLCALLBACK(int) FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
329/** Pointer to VM-exit handler. */
330typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER;
331#endif
332
333
334/*********************************************************************************************************************************
335* Internal Functions *
336*********************************************************************************************************************************/
337static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMXFLUSHEPT enmFlush);
338static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMXFLUSHVPID enmFlush, RTGCPTR GCPtr);
339static void hmR0VmxClearIntNmiWindowsVmcs(PVMCPU pVCpu);
340static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr,
341 uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress,
342 bool fStepping, uint32_t *puIntState);
343#if HC_ARCH_BITS == 32
344static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu);
345#endif
346#ifndef HMVMX_USE_FUNCTION_TABLE
347DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason);
348# define HMVMX_EXIT_DECL static int
349#else
350# define HMVMX_EXIT_DECL static DECLCALLBACK(int)
351#endif
352DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExitStep(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient,
353 uint32_t uExitReason, uint16_t uCsStart, uint64_t uRipStart);
354
355/** @name VM-exit handlers.
356 * @{
357 */
358static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi;
359static FNVMXEXITHANDLER hmR0VmxExitExtInt;
360static FNVMXEXITHANDLER hmR0VmxExitTripleFault;
361static FNVMXEXITHANDLER hmR0VmxExitInitSignal;
362static FNVMXEXITHANDLER hmR0VmxExitSipi;
363static FNVMXEXITHANDLER hmR0VmxExitIoSmi;
364static FNVMXEXITHANDLER hmR0VmxExitSmi;
365static FNVMXEXITHANDLER hmR0VmxExitIntWindow;
366static FNVMXEXITHANDLER hmR0VmxExitNmiWindow;
367static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch;
368static FNVMXEXITHANDLER hmR0VmxExitCpuid;
369static FNVMXEXITHANDLER hmR0VmxExitGetsec;
370static FNVMXEXITHANDLER hmR0VmxExitHlt;
371static FNVMXEXITHANDLER hmR0VmxExitInvd;
372static FNVMXEXITHANDLER hmR0VmxExitInvlpg;
373static FNVMXEXITHANDLER hmR0VmxExitRdpmc;
374static FNVMXEXITHANDLER hmR0VmxExitVmcall;
375static FNVMXEXITHANDLER hmR0VmxExitRdtsc;
376static FNVMXEXITHANDLER hmR0VmxExitRsm;
377static FNVMXEXITHANDLER hmR0VmxExitSetPendingXcptUD;
378static FNVMXEXITHANDLER hmR0VmxExitMovCRx;
379static FNVMXEXITHANDLER hmR0VmxExitMovDRx;
380static FNVMXEXITHANDLER hmR0VmxExitIoInstr;
381static FNVMXEXITHANDLER hmR0VmxExitRdmsr;
382static FNVMXEXITHANDLER hmR0VmxExitWrmsr;
383static FNVMXEXITHANDLER hmR0VmxExitErrInvalidGuestState;
384static FNVMXEXITHANDLER hmR0VmxExitErrMsrLoad;
385static FNVMXEXITHANDLER hmR0VmxExitErrUndefined;
386static FNVMXEXITHANDLER hmR0VmxExitMwait;
387static FNVMXEXITHANDLER hmR0VmxExitMtf;
388static FNVMXEXITHANDLER hmR0VmxExitMonitor;
389static FNVMXEXITHANDLER hmR0VmxExitPause;
390static FNVMXEXITHANDLER hmR0VmxExitErrMachineCheck;
391static FNVMXEXITHANDLER hmR0VmxExitTprBelowThreshold;
392static FNVMXEXITHANDLER hmR0VmxExitApicAccess;
393static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
394static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
395static FNVMXEXITHANDLER hmR0VmxExitEptViolation;
396static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig;
397static FNVMXEXITHANDLER hmR0VmxExitRdtscp;
398static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer;
399static FNVMXEXITHANDLER hmR0VmxExitWbinvd;
400static FNVMXEXITHANDLER hmR0VmxExitXsetbv;
401static FNVMXEXITHANDLER hmR0VmxExitRdrand;
402static FNVMXEXITHANDLER hmR0VmxExitInvpcid;
403/** @} */
404
405static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
406static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
407static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
408static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
409static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
410static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
411#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
412static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
413#endif
414static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
415
416
417/*********************************************************************************************************************************
418* Global Variables *
419*********************************************************************************************************************************/
420#ifdef HMVMX_USE_FUNCTION_TABLE
421
422/**
423 * VMX_EXIT dispatch table.
424 */
425static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] =
426{
427 /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi,
428 /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt,
429 /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault,
430 /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal,
431 /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi,
432 /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi,
433 /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi,
434 /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow,
435 /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow,
436 /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch,
437 /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid,
438 /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec,
439 /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt,
440 /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd,
441 /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg,
442 /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc,
443 /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc,
444 /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm,
445 /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitVmcall,
446 /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD,
447 /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD,
448 /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD,
449 /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD,
450 /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD,
451 /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD,
452 /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD,
453 /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD,
454 /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD,
455 /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx,
456 /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx,
457 /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr,
458 /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr,
459 /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr,
460 /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState,
461 /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad,
462 /* 35 UNDEFINED */ hmR0VmxExitErrUndefined,
463 /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait,
464 /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf,
465 /* 38 UNDEFINED */ hmR0VmxExitErrUndefined,
466 /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor,
467 /* 40 UNDEFINED */ hmR0VmxExitPause,
468 /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck,
469 /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined,
470 /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold,
471 /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess,
472 /* 45 UNDEFINED */ hmR0VmxExitErrUndefined,
473 /* 46 VMX_EXIT_XDTR_ACCESS */ hmR0VmxExitXdtrAccess,
474 /* 47 VMX_EXIT_TR_ACCESS */ hmR0VmxExitXdtrAccess,
475 /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation,
476 /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig,
477 /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD,
478 /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp,
479 /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer,
480 /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD,
481 /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd,
482 /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv,
483 /* 56 UNDEFINED */ hmR0VmxExitErrUndefined,
484 /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand,
485 /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid,
486 /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD,
487 /* 60 VMX_EXIT_RESERVED_60 */ hmR0VmxExitErrUndefined,
488 /* 61 VMX_EXIT_RDSEED */ hmR0VmxExitErrUndefined, /* only spurious exits, so undefined */
489 /* 62 VMX_EXIT_RESERVED_62 */ hmR0VmxExitErrUndefined,
490 /* 63 VMX_EXIT_XSAVES */ hmR0VmxExitSetPendingXcptUD,
491 /* 64 VMX_EXIT_XRSTORS */ hmR0VmxExitSetPendingXcptUD,
492};
493#endif /* HMVMX_USE_FUNCTION_TABLE */
494
495#ifdef VBOX_STRICT
496static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] =
497{
498 /* 0 */ "(Not Used)",
499 /* 1 */ "VMCALL executed in VMX root operation.",
500 /* 2 */ "VMCLEAR with invalid physical address.",
501 /* 3 */ "VMCLEAR with VMXON pointer.",
502 /* 4 */ "VMLAUNCH with non-clear VMCS.",
503 /* 5 */ "VMRESUME with non-launched VMCS.",
504 /* 6 */ "VMRESUME after VMXOFF",
505 /* 7 */ "VM-entry with invalid control fields.",
506 /* 8 */ "VM-entry with invalid host state fields.",
507 /* 9 */ "VMPTRLD with invalid physical address.",
508 /* 10 */ "VMPTRLD with VMXON pointer.",
509 /* 11 */ "VMPTRLD with incorrect revision identifier.",
510 /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.",
511 /* 13 */ "VMWRITE to read-only VMCS component.",
512 /* 14 */ "(Not Used)",
513 /* 15 */ "VMXON executed in VMX root operation.",
514 /* 16 */ "VM-entry with invalid executive-VMCS pointer.",
515 /* 17 */ "VM-entry with non-launched executing VMCS.",
516 /* 18 */ "VM-entry with executive-VMCS pointer not VMXON pointer.",
517 /* 19 */ "VMCALL with non-clear VMCS.",
518 /* 20 */ "VMCALL with invalid VM-exit control fields.",
519 /* 21 */ "(Not Used)",
520 /* 22 */ "VMCALL with incorrect MSEG revision identifier.",
521 /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.",
522 /* 24 */ "VMCALL with invalid SMM-monitor features.",
523 /* 25 */ "VM-entry with invalid VM-execution control fields in executive VMCS.",
524 /* 26 */ "VM-entry with events blocked by MOV SS.",
525 /* 27 */ "(Not Used)",
526 /* 28 */ "Invalid operand to INVEPT/INVVPID."
527};
528#endif /* VBOX_STRICT */
529
530
531
532/**
533 * Updates the VM's last error record. If there was a VMX instruction error,
534 * reads the error data from the VMCS and updates VCPU's last error record as
535 * well.
536 *
537 * @param pVM Pointer to the VM.
538 * @param pVCpu Pointer to the VMCPU (can be NULL if @a rc is not
539 * VERR_VMX_UNABLE_TO_START_VM or
540 * VERR_VMX_INVALID_VMCS_FIELD).
541 * @param rc The error code.
542 */
543static void hmR0VmxUpdateErrorRecord(PVM pVM, PVMCPU pVCpu, int rc)
544{
545 AssertPtr(pVM);
546 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
547 || rc == VERR_VMX_UNABLE_TO_START_VM)
548 {
549 AssertPtrReturnVoid(pVCpu);
550 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
551 }
552 pVM->hm.s.lLastError = rc;
553}
554
555
556/**
557 * Reads the VM-entry interruption-information field from the VMCS into the VMX
558 * transient structure.
559 *
560 * @returns VBox status code.
561 * @param pVmxTransient Pointer to the VMX transient structure.
562 *
563 * @remarks No-long-jump zone!!!
564 */
565DECLINLINE(int) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
566{
567 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo);
568 AssertRCReturn(rc, rc);
569 return VINF_SUCCESS;
570}
571
572
573/**
574 * Reads the VM-entry exception error code field from the VMCS into
575 * the VMX transient structure.
576 *
577 * @returns VBox status code.
578 * @param pVmxTransient Pointer to the VMX transient structure.
579 *
580 * @remarks No-long-jump zone!!!
581 */
582DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
583{
584 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode);
585 AssertRCReturn(rc, rc);
586 return VINF_SUCCESS;
587}
588
589
590/**
591 * Reads the VM-entry exception error code field from the VMCS into
592 * the VMX transient structure.
593 *
594 * @returns VBox status code.
595 * @param pVmxTransient Pointer to the VMX transient structure.
596 *
597 * @remarks No-long-jump zone!!!
598 */
599DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
600{
601 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr);
602 AssertRCReturn(rc, rc);
603 return VINF_SUCCESS;
604}
605
606
607/**
608 * Reads the VM-exit interruption-information field from the VMCS into the VMX
609 * transient structure.
610 *
611 * @returns VBox status code.
612 * @param pVmxTransient Pointer to the VMX transient structure.
613 */
614DECLINLINE(int) hmR0VmxReadExitIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
615{
616 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO))
617 {
618 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo);
619 AssertRCReturn(rc, rc);
620 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO;
621 }
622 return VINF_SUCCESS;
623}
624
625
626/**
627 * Reads the VM-exit interruption error code from the VMCS into the VMX
628 * transient structure.
629 *
630 * @returns VBox status code.
631 * @param pVmxTransient Pointer to the VMX transient structure.
632 */
633DECLINLINE(int) hmR0VmxReadExitIntErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
634{
635 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE))
636 {
637 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
638 AssertRCReturn(rc, rc);
639 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE;
640 }
641 return VINF_SUCCESS;
642}
643
644
645/**
646 * Reads the VM-exit instruction length field from the VMCS into the VMX
647 * transient structure.
648 *
649 * @returns VBox status code.
650 * @param pVCpu Pointer to the VMCPU.
651 * @param pVmxTransient Pointer to the VMX transient structure.
652 */
653DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMXTRANSIENT pVmxTransient)
654{
655 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN))
656 {
657 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr);
658 AssertRCReturn(rc, rc);
659 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN;
660 }
661 return VINF_SUCCESS;
662}
663
664
665/**
666 * Reads the VM-exit instruction-information field from the VMCS into
667 * the VMX transient structure.
668 *
669 * @returns VBox status code.
670 * @param pVmxTransient Pointer to the VMX transient structure.
671 */
672DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMXTRANSIENT pVmxTransient)
673{
674 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO))
675 {
676 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u);
677 AssertRCReturn(rc, rc);
678 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO;
679 }
680 return VINF_SUCCESS;
681}
682
683
684/**
685 * Reads the exit code qualification from the VMCS into the VMX transient
686 * structure.
687 *
688 * @returns VBox status code.
689 * @param pVCpu Pointer to the VMCPU (required for the VMCS cache
690 * case).
691 * @param pVmxTransient Pointer to the VMX transient structure.
692 */
693DECLINLINE(int) hmR0VmxReadExitQualificationVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
694{
695 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION))
696 {
697 int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQualification); NOREF(pVCpu);
698 AssertRCReturn(rc, rc);
699 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION;
700 }
701 return VINF_SUCCESS;
702}
703
704
705/**
706 * Reads the IDT-vectoring information field from the VMCS into the VMX
707 * transient structure.
708 *
709 * @returns VBox status code.
710 * @param pVmxTransient Pointer to the VMX transient structure.
711 *
712 * @remarks No-long-jump zone!!!
713 */
714DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient)
715{
716 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO))
717 {
718 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_INFO, &pVmxTransient->uIdtVectoringInfo);
719 AssertRCReturn(rc, rc);
720 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO;
721 }
722 return VINF_SUCCESS;
723}
724
725
726/**
727 * Reads the IDT-vectoring error code from the VMCS into the VMX
728 * transient structure.
729 *
730 * @returns VBox status code.
731 * @param pVmxTransient Pointer to the VMX transient structure.
732 */
733DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
734{
735 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE))
736 {
737 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
738 AssertRCReturn(rc, rc);
739 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE;
740 }
741 return VINF_SUCCESS;
742}
743
744
745/**
746 * Enters VMX root mode operation on the current CPU.
747 *
748 * @returns VBox status code.
749 * @param pVM Pointer to the VM (optional, can be NULL, after
750 * a resume).
751 * @param HCPhysCpuPage Physical address of the VMXON region.
752 * @param pvCpuPage Pointer to the VMXON region.
753 */
754static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
755{
756 Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS);
757 Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage);
758 Assert(pvCpuPage);
759 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
760
761 if (pVM)
762 {
763 /* Write the VMCS revision dword to the VMXON region. */
764 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo);
765 }
766
767 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
768 RTCCUINTREG fEFlags = ASMIntDisableFlags();
769
770 /* Enable the VMX bit in CR4 if necessary. */
771 RTCCUINTREG uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, ~0);
772
773 /* Enter VMX root mode. */
774 int rc = VMXEnable(HCPhysCpuPage);
775 if (RT_FAILURE(rc))
776 {
777 if (!(uOldCr4 & X86_CR4_VMXE))
778 SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
779
780 if (pVM)
781 pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
782 }
783
784 /* Restore interrupts. */
785 ASMSetFlags(fEFlags);
786 return rc;
787}
788
789
790/**
791 * Exits VMX root mode operation on the current CPU.
792 *
793 * @returns VBox status code.
794 */
795static int hmR0VmxLeaveRootMode(void)
796{
797 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
798
799 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
800 RTCCUINTREG fEFlags = ASMIntDisableFlags();
801
802 /* If we're for some reason not in VMX root mode, then don't leave it. */
803 RTCCUINTREG uHostCR4 = ASMGetCR4();
804
805 int rc;
806 if (uHostCR4 & X86_CR4_VMXE)
807 {
808 /* Exit VMX root mode and clear the VMX bit in CR4. */
809 VMXDisable();
810 SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
811 rc = VINF_SUCCESS;
812 }
813 else
814 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
815
816 /* Restore interrupts. */
817 ASMSetFlags(fEFlags);
818 return rc;
819}
820
821
822/**
823 * Allocates and maps one physically contiguous page. The allocated page is
824 * zero'd out. (Used by various VT-x structures).
825 *
826 * @returns IPRT status code.
827 * @param pMemObj Pointer to the ring-0 memory object.
828 * @param ppVirt Where to store the virtual address of the
829 * allocation.
830 * @param pPhys Where to store the physical address of the
831 * allocation.
832 */
833DECLINLINE(int) hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
834{
835 AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER);
836 AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER);
837 AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
838
839 int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */);
840 if (RT_FAILURE(rc))
841 return rc;
842 *ppVirt = RTR0MemObjAddress(*pMemObj);
843 *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */);
844 ASMMemZero32(*ppVirt, PAGE_SIZE);
845 return VINF_SUCCESS;
846}
847
848
849/**
850 * Frees and unmaps an allocated physical page.
851 *
852 * @param pMemObj Pointer to the ring-0 memory object.
853 * @param ppVirt Where to re-initialize the virtual address of
854 * allocation as 0.
855 * @param pHCPhys Where to re-initialize the physical address of the
856 * allocation as 0.
857 */
858DECLINLINE(void) hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
859{
860 AssertPtr(pMemObj);
861 AssertPtr(ppVirt);
862 AssertPtr(pHCPhys);
863 if (*pMemObj != NIL_RTR0MEMOBJ)
864 {
865 int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */);
866 AssertRC(rc);
867 *pMemObj = NIL_RTR0MEMOBJ;
868 *ppVirt = 0;
869 *pHCPhys = 0;
870 }
871}
872
873
874/**
875 * Worker function to free VT-x related structures.
876 *
877 * @returns IPRT status code.
878 * @param pVM Pointer to the VM.
879 */
880static void hmR0VmxStructsFree(PVM pVM)
881{
882 for (VMCPUID i = 0; i < pVM->cCpus; i++)
883 {
884 PVMCPU pVCpu = &pVM->aCpus[i];
885 AssertPtr(pVCpu);
886
887 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
888 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
889
890 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
891 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
892
893 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, &pVCpu->hm.s.vmx.HCPhysVirtApic);
894 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
895 }
896
897 hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess);
898#ifdef VBOX_WITH_CRASHDUMP_MAGIC
899 hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
900#endif
901}
902
903
904/**
905 * Worker function to allocate VT-x related VM structures.
906 *
907 * @returns IPRT status code.
908 * @param pVM Pointer to the VM.
909 */
910static int hmR0VmxStructsAlloc(PVM pVM)
911{
912 /*
913 * Initialize members up-front so we can cleanup properly on allocation failure.
914 */
915#define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \
916 pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
917 pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
918 pVM->hm.s.vmx.HCPhys##a_Name = 0;
919
920#define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \
921 pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
922 pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
923 pVCpu->hm.s.vmx.HCPhys##a_Name = 0;
924
925#ifdef VBOX_WITH_CRASHDUMP_MAGIC
926 VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv);
927#endif
928 VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb);
929
930 AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus));
931 for (VMCPUID i = 0; i < pVM->cCpus; i++)
932 {
933 PVMCPU pVCpu = &pVM->aCpus[i];
934 VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv);
935 VMXLOCAL_INIT_VMCPU_MEMOBJ(VirtApic, pb);
936 VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv);
937 VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv);
938 VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv);
939 }
940#undef VMXLOCAL_INIT_VMCPU_MEMOBJ
941#undef VMXLOCAL_INIT_VM_MEMOBJ
942
943 /* The VMCS size cannot be more than 4096 bytes. See Intel spec. Appendix A.1 "Basic VMX Information". */
944 AssertReturnStmt(MSR_IA32_VMX_BASIC_INFO_VMCS_SIZE(pVM->hm.s.vmx.Msrs.u64BasicInfo) <= PAGE_SIZE,
945 (&pVM->aCpus[0])->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE,
946 VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO);
947
948 /*
949 * Allocate all the VT-x structures.
950 */
951 int rc = VINF_SUCCESS;
952#ifdef VBOX_WITH_CRASHDUMP_MAGIC
953 rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
954 if (RT_FAILURE(rc))
955 goto cleanup;
956 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
957 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
958#endif
959
960 /* Allocate the APIC-access page for trapping APIC accesses from the guest. */
961 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
962 {
963 rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess,
964 &pVM->hm.s.vmx.HCPhysApicAccess);
965 if (RT_FAILURE(rc))
966 goto cleanup;
967 }
968
969 /*
970 * Initialize per-VCPU VT-x structures.
971 */
972 for (VMCPUID i = 0; i < pVM->cCpus; i++)
973 {
974 PVMCPU pVCpu = &pVM->aCpus[i];
975 AssertPtr(pVCpu);
976
977 /* Allocate the VM control structure (VMCS). */
978 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
979 if (RT_FAILURE(rc))
980 goto cleanup;
981
982 /* Allocate the Virtual-APIC page for transparent TPR accesses. */
983 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
984 {
985 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic,
986 &pVCpu->hm.s.vmx.HCPhysVirtApic);
987 if (RT_FAILURE(rc))
988 goto cleanup;
989 }
990
991 /*
992 * Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for
993 * transparent accesses of specific MSRs.
994 *
995 * If the condition for enabling MSR bitmaps changes here, don't forget to
996 * update HMAreMsrBitmapsAvailable().
997 */
998 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
999 {
1000 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap,
1001 &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
1002 if (RT_FAILURE(rc))
1003 goto cleanup;
1004 ASMMemFill32(pVCpu->hm.s.vmx.pvMsrBitmap, PAGE_SIZE, UINT32_C(0xffffffff));
1005 }
1006
1007 /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */
1008 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
1009 if (RT_FAILURE(rc))
1010 goto cleanup;
1011
1012 /* Allocate the VM-exit MSR-load page for the host MSRs. */
1013 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
1014 if (RT_FAILURE(rc))
1015 goto cleanup;
1016 }
1017
1018 return VINF_SUCCESS;
1019
1020cleanup:
1021 hmR0VmxStructsFree(pVM);
1022 return rc;
1023}
1024
1025
1026/**
1027 * Does global VT-x initialization (called during module initialization).
1028 *
1029 * @returns VBox status code.
1030 */
1031VMMR0DECL(int) VMXR0GlobalInit(void)
1032{
1033#ifdef HMVMX_USE_FUNCTION_TABLE
1034 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers));
1035# ifdef VBOX_STRICT
1036 for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++)
1037 Assert(g_apfnVMExitHandlers[i]);
1038# endif
1039#endif
1040 return VINF_SUCCESS;
1041}
1042
1043
1044/**
1045 * Does global VT-x termination (called during module termination).
1046 */
1047VMMR0DECL(void) VMXR0GlobalTerm()
1048{
1049 /* Nothing to do currently. */
1050}
1051
1052
1053/**
1054 * Sets up and activates VT-x on the current CPU.
1055 *
1056 * @returns VBox status code.
1057 * @param pCpu Pointer to the global CPU info struct.
1058 * @param pVM Pointer to the VM (can be NULL after a host resume
1059 * operation).
1060 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
1061 * fEnabledByHost is true).
1062 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
1063 * @a fEnabledByHost is true).
1064 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
1065 * enable VT-x on the host.
1066 * @param pvMsrs Opaque pointer to VMXMSRS struct.
1067 */
1068VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
1069 void *pvMsrs)
1070{
1071 Assert(pCpu);
1072 Assert(pvMsrs);
1073 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1074
1075 /* Enable VT-x if it's not already enabled by the host. */
1076 if (!fEnabledByHost)
1077 {
1078 int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage);
1079 if (RT_FAILURE(rc))
1080 return rc;
1081 }
1082
1083 /*
1084 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been using EPTPs) so
1085 * we don't retain any stale guest-physical mappings which won't get invalidated when flushing by VPID.
1086 */
1087 PVMXMSRS pMsrs = (PVMXMSRS)pvMsrs;
1088 if (pMsrs->u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
1089 {
1090 hmR0VmxFlushEpt(NULL /* pVCpu */, VMXFLUSHEPT_ALL_CONTEXTS);
1091 pCpu->fFlushAsidBeforeUse = false;
1092 }
1093 else
1094 pCpu->fFlushAsidBeforeUse = true;
1095
1096 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
1097 ++pCpu->cTlbFlushes;
1098
1099 return VINF_SUCCESS;
1100}
1101
1102
1103/**
1104 * Deactivates VT-x on the current CPU.
1105 *
1106 * @returns VBox status code.
1107 * @param pCpu Pointer to the global CPU info struct.
1108 * @param pvCpuPage Pointer to the VMXON region.
1109 * @param HCPhysCpuPage Physical address of the VMXON region.
1110 *
1111 * @remarks This function should never be called when SUPR0EnableVTx() or
1112 * similar was used to enable VT-x on the host.
1113 */
1114VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
1115{
1116 NOREF(pCpu);
1117 NOREF(pvCpuPage);
1118 NOREF(HCPhysCpuPage);
1119
1120 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1121 return hmR0VmxLeaveRootMode();
1122}
1123
1124
1125/**
1126 * Sets the permission bits for the specified MSR in the MSR bitmap.
1127 *
1128 * @param pVCpu Pointer to the VMCPU.
1129 * @param uMSR The MSR value.
1130 * @param enmRead Whether reading this MSR causes a VM-exit.
1131 * @param enmWrite Whether writing this MSR causes a VM-exit.
1132 */
1133static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite)
1134{
1135 int32_t iBit;
1136 uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
1137
1138 /*
1139 * Layout:
1140 * 0x000 - 0x3ff - Low MSR read bits
1141 * 0x400 - 0x7ff - High MSR read bits
1142 * 0x800 - 0xbff - Low MSR write bits
1143 * 0xc00 - 0xfff - High MSR write bits
1144 */
1145 if (uMsr <= 0x00001FFF)
1146 iBit = uMsr;
1147 else if ( uMsr >= 0xC0000000
1148 && uMsr <= 0xC0001FFF)
1149 {
1150 iBit = (uMsr - 0xC0000000);
1151 pbMsrBitmap += 0x400;
1152 }
1153 else
1154 AssertMsgFailedReturnVoid(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr));
1155
1156 Assert(iBit <= 0x1fff);
1157 if (enmRead == VMXMSREXIT_INTERCEPT_READ)
1158 ASMBitSet(pbMsrBitmap, iBit);
1159 else
1160 ASMBitClear(pbMsrBitmap, iBit);
1161
1162 if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE)
1163 ASMBitSet(pbMsrBitmap + 0x800, iBit);
1164 else
1165 ASMBitClear(pbMsrBitmap + 0x800, iBit);
1166}
1167
1168
1169#ifdef VBOX_STRICT
1170/**
1171 * Gets the permission bits for the specified MSR in the MSR bitmap.
1172 *
1173 * @returns VBox status code.
1174 * @retval VINF_SUCCESS if the specified MSR is found.
1175 * @retval VERR_NOT_FOUND if the specified MSR is not found.
1176 * @retval VERR_NOT_SUPPORTED if VT-x doesn't allow the MSR.
1177 *
1178 * @param pVCpu Pointer to the VMCPU.
1179 * @param uMsr The MSR.
1180 * @param penmRead Where to store the read permissions.
1181 * @param penmWrite Where to store the write permissions.
1182 */
1183static int hmR0VmxGetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, PVMXMSREXITREAD penmRead, PVMXMSREXITWRITE penmWrite)
1184{
1185 AssertPtrReturn(penmRead, VERR_INVALID_PARAMETER);
1186 AssertPtrReturn(penmWrite, VERR_INVALID_PARAMETER);
1187 int32_t iBit;
1188 uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
1189
1190 /* See hmR0VmxSetMsrPermission() for the layout. */
1191 if (uMsr <= 0x00001FFF)
1192 iBit = uMsr;
1193 else if ( uMsr >= 0xC0000000
1194 && uMsr <= 0xC0001FFF)
1195 {
1196 iBit = (uMsr - 0xC0000000);
1197 pbMsrBitmap += 0x400;
1198 }
1199 else
1200 AssertMsgFailedReturn(("hmR0VmxGetMsrPermission: Invalid MSR %#RX32\n", uMsr), VERR_NOT_SUPPORTED);
1201
1202 Assert(iBit <= 0x1fff);
1203 if (ASMBitTest(pbMsrBitmap, iBit))
1204 *penmRead = VMXMSREXIT_INTERCEPT_READ;
1205 else
1206 *penmRead = VMXMSREXIT_PASSTHRU_READ;
1207
1208 if (ASMBitTest(pbMsrBitmap + 0x800, iBit))
1209 *penmWrite = VMXMSREXIT_INTERCEPT_WRITE;
1210 else
1211 *penmWrite = VMXMSREXIT_PASSTHRU_WRITE;
1212 return VINF_SUCCESS;
1213}
1214#endif /* VBOX_STRICT */
1215
1216
1217/**
1218 * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR
1219 * area.
1220 *
1221 * @returns VBox status code.
1222 * @param pVCpu Pointer to the VMCPU.
1223 * @param cMsrs The number of MSRs.
1224 */
1225DECLINLINE(int) hmR0VmxSetAutoLoadStoreMsrCount(PVMCPU pVCpu, uint32_t cMsrs)
1226{
1227 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
1228 uint32_t const cMaxSupportedMsrs = MSR_IA32_VMX_MISC_MAX_MSR(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.u64Misc);
1229 if (RT_UNLIKELY(cMsrs > cMaxSupportedMsrs))
1230 {
1231 LogRel(("CPU auto-load/store MSR count in VMCS exceeded cMsrs=%u Supported=%u.\n", cMsrs, cMaxSupportedMsrs));
1232 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
1233 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1234 }
1235
1236 /* Update number of guest MSRs to load/store across the world-switch. */
1237 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); AssertRCReturn(rc, rc);
1238 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); AssertRCReturn(rc, rc);
1239
1240 /* Update number of host MSRs to load after the world-switch. Identical to guest-MSR count as it's always paired. */
1241 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRCReturn(rc, rc);
1242
1243 /* Update the VCPU's copy of the MSR count. */
1244 pVCpu->hm.s.vmx.cMsrs = cMsrs;
1245
1246 return VINF_SUCCESS;
1247}
1248
1249
1250/**
1251 * Adds a new (or updates the value of an existing) guest/host MSR
1252 * pair to be swapped during the world-switch as part of the
1253 * auto-load/store MSR area in the VMCS.
1254 *
1255 * @returns VBox status code.
1256 * @param pVCpu Pointer to the VMCPU.
1257 * @param uMsr The MSR.
1258 * @param uGuestMsr Value of the guest MSR.
1259 * @param fUpdateHostMsr Whether to update the value of the host MSR if
1260 * necessary.
1261 * @param pfAddedAndUpdated Where to store whether the MSR was added -and-
1262 * its value was updated. Optional, can be NULL.
1263 */
1264static int hmR0VmxAddAutoLoadStoreMsr(PVMCPU pVCpu, uint32_t uMsr, uint64_t uGuestMsrValue, bool fUpdateHostMsr,
1265 bool *pfAddedAndUpdated)
1266{
1267 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
1268 uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
1269 uint32_t i;
1270 for (i = 0; i < cMsrs; i++)
1271 {
1272 if (pGuestMsr->u32Msr == uMsr)
1273 break;
1274 pGuestMsr++;
1275 }
1276
1277 bool fAdded = false;
1278 if (i == cMsrs)
1279 {
1280 ++cMsrs;
1281 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, cMsrs);
1282 AssertMsgRCReturn(rc, ("hmR0VmxAddAutoLoadStoreMsr: Insufficient space to add MSR %u\n", uMsr), rc);
1283
1284 /* Now that we're swapping MSRs during the world-switch, allow the guest to read/write them without causing VM-exits. */
1285 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1286 hmR0VmxSetMsrPermission(pVCpu, uMsr, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1287
1288 fAdded = true;
1289 }
1290
1291 /* Update the MSR values in the auto-load/store MSR area. */
1292 pGuestMsr->u32Msr = uMsr;
1293 pGuestMsr->u64Value = uGuestMsrValue;
1294
1295 /* Create/update the MSR slot in the host MSR area. */
1296 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
1297 pHostMsr += i;
1298 pHostMsr->u32Msr = uMsr;
1299
1300 /*
1301 * Update the host MSR only when requested by the caller AND when we're
1302 * adding it to the auto-load/store area. Otherwise, it would have been
1303 * updated by hmR0VmxSaveHostMsrs(). We do this for performance reasons.
1304 */
1305 bool fUpdatedMsrValue = false;
1306 if ( fAdded
1307 && fUpdateHostMsr)
1308 {
1309 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1310 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1311 pHostMsr->u64Value = ASMRdMsr(pHostMsr->u32Msr);
1312 fUpdatedMsrValue = true;
1313 }
1314
1315 if (pfAddedAndUpdated)
1316 *pfAddedAndUpdated = fUpdatedMsrValue;
1317 return VINF_SUCCESS;
1318}
1319
1320
1321/**
1322 * Removes a guest/host MSR pair to be swapped during the world-switch from the
1323 * auto-load/store MSR area in the VMCS.
1324 *
1325 * @returns VBox status code.
1326 * @param pVCpu Pointer to the VMCPU.
1327 * @param uMsr The MSR.
1328 */
1329static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPU pVCpu, uint32_t uMsr)
1330{
1331 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
1332 uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
1333 for (uint32_t i = 0; i < cMsrs; i++)
1334 {
1335 /* Find the MSR. */
1336 if (pGuestMsr->u32Msr == uMsr)
1337 {
1338 /* If it's the last MSR, simply reduce the count. */
1339 if (i == cMsrs - 1)
1340 {
1341 --cMsrs;
1342 break;
1343 }
1344
1345 /* Remove it by swapping the last MSR in place of it, and reducing the count. */
1346 PVMXAUTOMSR pLastGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
1347 pLastGuestMsr += cMsrs - 1;
1348 pGuestMsr->u32Msr = pLastGuestMsr->u32Msr;
1349 pGuestMsr->u64Value = pLastGuestMsr->u64Value;
1350
1351 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
1352 PVMXAUTOMSR pLastHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
1353 pLastHostMsr += cMsrs - 1;
1354 pHostMsr->u32Msr = pLastHostMsr->u32Msr;
1355 pHostMsr->u64Value = pLastHostMsr->u64Value;
1356 --cMsrs;
1357 break;
1358 }
1359 pGuestMsr++;
1360 }
1361
1362 /* Update the VMCS if the count changed (meaning the MSR was found). */
1363 if (cMsrs != pVCpu->hm.s.vmx.cMsrs)
1364 {
1365 int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, cMsrs);
1366 AssertRCReturn(rc, rc);
1367
1368 /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */
1369 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1370 hmR0VmxSetMsrPermission(pVCpu, uMsr, VMXMSREXIT_INTERCEPT_READ, VMXMSREXIT_INTERCEPT_WRITE);
1371
1372 Log4(("Removed MSR %#RX32 new cMsrs=%u\n", uMsr, pVCpu->hm.s.vmx.cMsrs));
1373 return VINF_SUCCESS;
1374 }
1375
1376 return VERR_NOT_FOUND;
1377}
1378
1379
1380/**
1381 * Checks if the specified guest MSR is part of the auto-load/store area in
1382 * the VMCS.
1383 *
1384 * @returns true if found, false otherwise.
1385 * @param pVCpu Pointer to the VMCPU.
1386 * @param uMsr The MSR to find.
1387 */
1388static bool hmR0VmxIsAutoLoadStoreGuestMsr(PVMCPU pVCpu, uint32_t uMsr)
1389{
1390 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
1391 uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
1392
1393 for (uint32_t i = 0; i < cMsrs; i++, pGuestMsr++)
1394 {
1395 if (pGuestMsr->u32Msr == uMsr)
1396 return true;
1397 }
1398 return false;
1399}
1400
1401
1402/**
1403 * Updates the value of all host MSRs in the auto-load/store area in the VMCS.
1404 *
1405 * @param pVCpu Pointer to the VMCPU.
1406 *
1407 * @remarks No-long-jump zone!!!
1408 */
1409static void hmR0VmxUpdateAutoLoadStoreHostMsrs(PVMCPU pVCpu)
1410{
1411 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1412 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
1413 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
1414 uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
1415
1416 for (uint32_t i = 0; i < cMsrs; i++, pHostMsr++, pGuestMsr++)
1417 {
1418 AssertReturnVoid(pHostMsr->u32Msr == pGuestMsr->u32Msr);
1419
1420 /*
1421 * Performance hack for the host EFER MSR. We use the cached value rather than re-read it.
1422 * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}.
1423 */
1424 if (pHostMsr->u32Msr == MSR_K6_EFER)
1425 pHostMsr->u64Value = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostEfer;
1426 else
1427 pHostMsr->u64Value = ASMRdMsr(pHostMsr->u32Msr);
1428 }
1429
1430 pVCpu->hm.s.vmx.fUpdatedHostMsrs = true;
1431}
1432
1433
1434#if HC_ARCH_BITS == 64
1435/**
1436 * Saves a set of host MSRs to allow read/write passthru access to the guest and
1437 * perform lazy restoration of the host MSRs while leaving VT-x.
1438 *
1439 * @param pVCpu Pointer to the VMCPU.
1440 *
1441 * @remarks No-long-jump zone!!!
1442 */
1443static void hmR0VmxLazySaveHostMsrs(PVMCPU pVCpu)
1444{
1445 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1446
1447 /*
1448 * Note: If you're adding MSRs here, make sure to update the MSR-bitmap permissions in hmR0VmxSetupProcCtls().
1449 */
1450 if (!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
1451 {
1452 pVCpu->hm.s.vmx.u64HostLStarMsr = ASMRdMsr(MSR_K8_LSTAR);
1453 pVCpu->hm.s.vmx.u64HostStarMsr = ASMRdMsr(MSR_K6_STAR);
1454 pVCpu->hm.s.vmx.u64HostSFMaskMsr = ASMRdMsr(MSR_K8_SF_MASK);
1455 pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1456 pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST;
1457 }
1458}
1459
1460
1461/**
1462 * Checks whether the MSR belongs to the set of guest MSRs that we restore
1463 * lazily while leaving VT-x.
1464 *
1465 * @returns true if it does, false otherwise.
1466 * @param pVCpu Pointer to the VMCPU.
1467 * @param uMsr The MSR to check.
1468 */
1469static bool hmR0VmxIsLazyGuestMsr(PVMCPU pVCpu, uint32_t uMsr)
1470{
1471 NOREF(pVCpu);
1472 switch (uMsr)
1473 {
1474 case MSR_K8_LSTAR:
1475 case MSR_K6_STAR:
1476 case MSR_K8_SF_MASK:
1477 case MSR_K8_KERNEL_GS_BASE:
1478 return true;
1479 }
1480 return false;
1481}
1482
1483
1484/**
1485 * Saves a set of guest MSRs back into the guest-CPU context.
1486 *
1487 * @param pVCpu Pointer to the VMCPU.
1488 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
1489 * out-of-sync. Make sure to update the required fields
1490 * before using them.
1491 *
1492 * @remarks No-long-jump zone!!!
1493 */
1494static void hmR0VmxLazySaveGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
1495{
1496 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1497 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1498
1499 if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
1500 {
1501 Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
1502 pMixedCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
1503 pMixedCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR);
1504 pMixedCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK);
1505 pMixedCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE);
1506 }
1507}
1508
1509
1510/**
1511 * Loads a set of guests MSRs to allow read/passthru to the guest.
1512 *
1513 * The name of this function is slightly confusing. This function does NOT
1514 * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a
1515 * common prefix for functions dealing with "lazy restoration" of the shared
1516 * MSRs.
1517 *
1518 * @param pVCpu Pointer to the VMCPU.
1519 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
1520 * out-of-sync. Make sure to update the required fields
1521 * before using them.
1522 *
1523 * @remarks No-long-jump zone!!!
1524 */
1525static void hmR0VmxLazyLoadGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
1526{
1527 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1528 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1529
1530#define VMXLOCAL_LAZY_LOAD_GUEST_MSR(uMsr, a_GuestMsr, a_HostMsr) \
1531 do { \
1532 if (pMixedCtx->msr##a_GuestMsr != pVCpu->hm.s.vmx.u64Host##a_HostMsr##Msr) \
1533 ASMWrMsr(uMsr, pMixedCtx->msr##a_GuestMsr); \
1534 else \
1535 Assert(ASMRdMsr(uMsr) == pVCpu->hm.s.vmx.u64Host##a_HostMsr##Msr); \
1536 } while (0)
1537
1538 Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
1539 if (!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST))
1540 {
1541 VMXLOCAL_LAZY_LOAD_GUEST_MSR(MSR_K8_LSTAR, LSTAR, LStar);
1542 VMXLOCAL_LAZY_LOAD_GUEST_MSR(MSR_K6_STAR, STAR, Star);
1543 VMXLOCAL_LAZY_LOAD_GUEST_MSR(MSR_K8_SF_MASK, SFMASK, SFMask);
1544 VMXLOCAL_LAZY_LOAD_GUEST_MSR(MSR_K8_KERNEL_GS_BASE, KERNELGSBASE, KernelGSBase);
1545 pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST;
1546 }
1547 else
1548 {
1549 ASMWrMsr(MSR_K8_LSTAR, pMixedCtx->msrLSTAR);
1550 ASMWrMsr(MSR_K6_STAR, pMixedCtx->msrSTAR);
1551 ASMWrMsr(MSR_K8_SF_MASK, pMixedCtx->msrSFMASK);
1552 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pMixedCtx->msrKERNELGSBASE);
1553 }
1554
1555#undef VMXLOCAL_LAZY_LOAD_GUEST_MSR
1556}
1557
1558
1559/**
1560 * Performs lazy restoration of the set of host MSRs if they were previously
1561 * loaded with guest MSR values.
1562 *
1563 * @param pVCpu Pointer to the VMCPU.
1564 *
1565 * @remarks No-long-jump zone!!!
1566 * @remarks The guest MSRs should have been saved back into the guest-CPU
1567 * context by hmR0VmxSaveGuestLazyMsrs()!!!
1568 */
1569static void hmR0VmxLazyRestoreHostMsrs(PVMCPU pVCpu)
1570{
1571 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1572 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1573
1574 if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)
1575 {
1576 Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST);
1577 ASMWrMsr(MSR_K8_LSTAR, pVCpu->hm.s.vmx.u64HostLStarMsr);
1578 ASMWrMsr(MSR_K6_STAR, pVCpu->hm.s.vmx.u64HostStarMsr);
1579 ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hm.s.vmx.u64HostSFMaskMsr);
1580 ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr);
1581 }
1582 pVCpu->hm.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST);
1583}
1584#endif /* HC_ARCH_BITS == 64 */
1585
1586
1587/**
1588 * Verifies that our cached values of the VMCS controls are all
1589 * consistent with what's actually present in the VMCS.
1590 *
1591 * @returns VBox status code.
1592 * @param pVCpu Pointer to the VMCPU.
1593 */
1594static int hmR0VmxCheckVmcsCtls(PVMCPU pVCpu)
1595{
1596 uint32_t u32Val;
1597 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
1598 AssertRCReturn(rc, rc);
1599 AssertMsgReturn(pVCpu->hm.s.vmx.u32EntryCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32", pVCpu->hm.s.vmx.u32EntryCtls, u32Val),
1600 VERR_VMX_ENTRY_CTLS_CACHE_INVALID);
1601
1602 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val);
1603 AssertRCReturn(rc, rc);
1604 AssertMsgReturn(pVCpu->hm.s.vmx.u32ExitCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32", pVCpu->hm.s.vmx.u32ExitCtls, u32Val),
1605 VERR_VMX_EXIT_CTLS_CACHE_INVALID);
1606
1607 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val);
1608 AssertRCReturn(rc, rc);
1609 AssertMsgReturn(pVCpu->hm.s.vmx.u32PinCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32", pVCpu->hm.s.vmx.u32PinCtls, u32Val),
1610 VERR_VMX_PIN_EXEC_CTLS_CACHE_INVALID);
1611
1612 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val);
1613 AssertRCReturn(rc, rc);
1614 AssertMsgReturn(pVCpu->hm.s.vmx.u32ProcCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32", pVCpu->hm.s.vmx.u32ProcCtls, u32Val),
1615 VERR_VMX_PROC_EXEC_CTLS_CACHE_INVALID);
1616
1617 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
1618 {
1619 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val);
1620 AssertRCReturn(rc, rc);
1621 AssertMsgReturn(pVCpu->hm.s.vmx.u32ProcCtls2 == u32Val,
1622 ("Cache=%#RX32 VMCS=%#RX32", pVCpu->hm.s.vmx.u32ProcCtls2, u32Val),
1623 VERR_VMX_PROC_EXEC2_CTLS_CACHE_INVALID);
1624 }
1625
1626 return VINF_SUCCESS;
1627}
1628
1629
1630#ifdef VBOX_STRICT
1631/**
1632 * Verifies that our cached host EFER value has not changed
1633 * since we cached it.
1634 *
1635 * @param pVCpu Pointer to the VMCPU.
1636 */
1637static void hmR0VmxCheckHostEferMsr(PVMCPU pVCpu)
1638{
1639 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1640
1641 if (pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR)
1642 {
1643 uint64_t u64Val;
1644 int rc = VMXReadVmcs64(VMX_VMCS64_HOST_FIELD_EFER_FULL, &u64Val);
1645 AssertRC(rc);
1646
1647 uint64_t u64HostEferMsr = ASMRdMsr(MSR_K6_EFER);
1648 AssertMsgReturnVoid(u64HostEferMsr == u64Val, ("u64HostEferMsr=%#RX64 u64Val=%#RX64\n", u64HostEferMsr, u64Val));
1649 }
1650}
1651
1652
1653/**
1654 * Verifies whether the guest/host MSR pairs in the auto-load/store area in the
1655 * VMCS are correct.
1656 *
1657 * @param pVCpu Pointer to the VMCPU.
1658 */
1659static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPU pVCpu)
1660{
1661 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1662
1663 /* Verify MSR counts in the VMCS are what we think it should be. */
1664 uint32_t cMsrs;
1665 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cMsrs); AssertRC(rc);
1666 Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs);
1667
1668 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cMsrs); AssertRC(rc);
1669 Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs);
1670
1671 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cMsrs); AssertRC(rc);
1672 Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs);
1673
1674 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
1675 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
1676 for (uint32_t i = 0; i < cMsrs; i++, pHostMsr++, pGuestMsr++)
1677 {
1678 /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */
1679 AssertMsgReturnVoid(pHostMsr->u32Msr == pGuestMsr->u32Msr, ("HostMsr=%#RX32 GuestMsr=%#RX32 cMsrs=%u\n", pHostMsr->u32Msr,
1680 pGuestMsr->u32Msr, cMsrs));
1681
1682 uint64_t u64Msr = ASMRdMsr(pHostMsr->u32Msr);
1683 AssertMsgReturnVoid(pHostMsr->u64Value == u64Msr, ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n",
1684 pHostMsr->u32Msr, pHostMsr->u64Value, u64Msr, cMsrs));
1685
1686 /* Verify that the permissions are as expected in the MSR bitmap. */
1687 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1688 {
1689 VMXMSREXITREAD enmRead;
1690 VMXMSREXITWRITE enmWrite;
1691 rc = hmR0VmxGetMsrPermission(pVCpu, pGuestMsr->u32Msr, &enmRead, &enmWrite);
1692 AssertMsgReturnVoid(rc == VINF_SUCCESS, ("hmR0VmxGetMsrPermission! failed. rc=%Rrc\n", rc));
1693 if (pGuestMsr->u32Msr == MSR_K6_EFER)
1694 {
1695 AssertMsgReturnVoid(enmRead == VMXMSREXIT_INTERCEPT_READ, ("Passthru read for EFER!?\n"));
1696 AssertMsgReturnVoid(enmWrite == VMXMSREXIT_INTERCEPT_WRITE, ("Passthru write for EFER!?\n"));
1697 }
1698 else
1699 {
1700 AssertMsgReturnVoid(enmRead == VMXMSREXIT_PASSTHRU_READ, ("u32Msr=%#RX32 cMsrs=%u No passthru read!\n",
1701 pGuestMsr->u32Msr, cMsrs));
1702 AssertMsgReturnVoid(enmWrite == VMXMSREXIT_PASSTHRU_WRITE, ("u32Msr=%#RX32 cMsrs=%u No passthru write!\n",
1703 pGuestMsr->u32Msr, cMsrs));
1704 }
1705 }
1706 }
1707}
1708#endif /* VBOX_STRICT */
1709
1710
1711/**
1712 * Flushes the TLB using EPT.
1713 *
1714 * @returns VBox status code.
1715 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
1716 * enmFlush).
1717 * @param enmFlush Type of flush.
1718 *
1719 * @remarks Caller is responsible for making sure this function is called only
1720 * when NestedPaging is supported and providing @a enmFlush that is
1721 * supported by the CPU.
1722 * @remarks Can be called with interrupts disabled.
1723 */
1724static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMXFLUSHEPT enmFlush)
1725{
1726 uint64_t au64Descriptor[2];
1727 if (enmFlush == VMXFLUSHEPT_ALL_CONTEXTS)
1728 au64Descriptor[0] = 0;
1729 else
1730 {
1731 Assert(pVCpu);
1732 au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP;
1733 }
1734 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1735
1736 int rc = VMXR0InvEPT(enmFlush, &au64Descriptor[0]);
1737 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0,
1738 rc));
1739 if ( RT_SUCCESS(rc)
1740 && pVCpu)
1741 {
1742 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1743 }
1744}
1745
1746
1747/**
1748 * Flushes the TLB using VPID.
1749 *
1750 * @returns VBox status code.
1751 * @param pVM Pointer to the VM.
1752 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
1753 * enmFlush).
1754 * @param enmFlush Type of flush.
1755 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1756 * on @a enmFlush).
1757 *
1758 * @remarks Can be called with interrupts disabled.
1759 */
1760static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMXFLUSHVPID enmFlush, RTGCPTR GCPtr)
1761{
1762 NOREF(pVM);
1763 AssertPtr(pVM);
1764 Assert(pVM->hm.s.vmx.fVpid);
1765
1766 uint64_t au64Descriptor[2];
1767 if (enmFlush == VMXFLUSHVPID_ALL_CONTEXTS)
1768 {
1769 au64Descriptor[0] = 0;
1770 au64Descriptor[1] = 0;
1771 }
1772 else
1773 {
1774 AssertPtr(pVCpu);
1775 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
1776 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
1777 au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid;
1778 au64Descriptor[1] = GCPtr;
1779 }
1780
1781 int rc = VMXR0InvVPID(enmFlush, &au64Descriptor[0]); NOREF(rc);
1782 AssertMsg(rc == VINF_SUCCESS,
1783 ("VMXR0InvVPID %#x %u %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
1784 if ( RT_SUCCESS(rc)
1785 && pVCpu)
1786 {
1787 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1788 }
1789}
1790
1791
1792/**
1793 * Invalidates a guest page by guest virtual address. Only relevant for
1794 * EPT/VPID, otherwise there is nothing really to invalidate.
1795 *
1796 * @returns VBox status code.
1797 * @param pVM Pointer to the VM.
1798 * @param pVCpu Pointer to the VMCPU.
1799 * @param GCVirt Guest virtual address of the page to invalidate.
1800 */
1801VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
1802{
1803 AssertPtr(pVM);
1804 AssertPtr(pVCpu);
1805 LogFlowFunc(("pVM=%p pVCpu=%p GCVirt=%RGv\n", pVM, pVCpu, GCVirt));
1806
1807 bool fFlushPending = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_FLUSH);
1808 if (!fFlushPending)
1809 {
1810 /*
1811 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
1812 * See @bugref{6043} and @bugref{6177}.
1813 *
1814 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() as this
1815 * function maybe called in a loop with individual addresses.
1816 */
1817 if (pVM->hm.s.vmx.fVpid)
1818 {
1819 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1820 {
1821 hmR0VmxFlushVpid(pVM, pVCpu, VMXFLUSHVPID_INDIV_ADDR, GCVirt);
1822 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1823 }
1824 else
1825 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1826 }
1827 else if (pVM->hm.s.fNestedPaging)
1828 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1829 }
1830
1831 return VINF_SUCCESS;
1832}
1833
1834
1835/**
1836 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
1837 * otherwise there is nothing really to invalidate.
1838 *
1839 * @returns VBox status code.
1840 * @param pVM Pointer to the VM.
1841 * @param pVCpu Pointer to the VMCPU.
1842 * @param GCPhys Guest physical address of the page to invalidate.
1843 */
1844VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
1845{
1846 NOREF(pVM); NOREF(GCPhys);
1847 LogFlowFunc(("%RGp\n", GCPhys));
1848
1849 /*
1850 * We cannot flush a page by guest-physical address. invvpid takes only a linear address while invept only flushes
1851 * by EPT not individual addresses. We update the force flag here and flush before the next VM-entry in hmR0VmxFlushTLB*().
1852 * This function might be called in a loop. This should cause a flush-by-EPT if EPT is in use. See @bugref{6568}.
1853 */
1854 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1855 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgPhys);
1856 return VINF_SUCCESS;
1857}
1858
1859
1860/**
1861 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1862 * case where neither EPT nor VPID is supported by the CPU.
1863 *
1864 * @param pVM Pointer to the VM.
1865 * @param pVCpu Pointer to the VMCPU.
1866 * @param pCpu Pointer to the global HM struct.
1867 *
1868 * @remarks Called with interrupts disabled.
1869 */
1870static void hmR0VmxFlushTaggedTlbNone(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1871{
1872 AssertPtr(pVCpu);
1873 AssertPtr(pCpu);
1874 NOREF(pVM);
1875
1876 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1877
1878 Assert(pCpu->idCpu != NIL_RTCPUID);
1879 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1880 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1881 pVCpu->hm.s.fForceTLBFlush = false;
1882 return;
1883}
1884
1885
1886/**
1887 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1888 *
1889 * @param pVM Pointer to the VM.
1890 * @param pVCpu Pointer to the VMCPU.
1891 * @param pCpu Pointer to the global HM CPU struct.
1892 * @remarks All references to "ASID" in this function pertains to "VPID" in
1893 * Intel's nomenclature. The reason is, to avoid confusion in compare
1894 * statements since the host-CPU copies are named "ASID".
1895 *
1896 * @remarks Called with interrupts disabled.
1897 */
1898static void hmR0VmxFlushTaggedTlbBoth(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1899{
1900#ifdef VBOX_WITH_STATISTICS
1901 bool fTlbFlushed = false;
1902# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1903# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1904 if (!fTlbFlushed) \
1905 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1906 } while (0)
1907#else
1908# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1909# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1910#endif
1911
1912 AssertPtr(pVM);
1913 AssertPtr(pCpu);
1914 AssertPtr(pVCpu);
1915 Assert(pCpu->idCpu != NIL_RTCPUID);
1916
1917 AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid,
1918 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1919 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid));
1920
1921 /*
1922 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1923 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
1924 * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore.
1925 */
1926 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1927 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1928 {
1929 ++pCpu->uCurrentAsid;
1930 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
1931 {
1932 pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1933 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1934 pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1935 }
1936
1937 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
1938 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1939 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1940
1941 /*
1942 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1943 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1944 */
1945 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1946 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1947 HMVMX_SET_TAGGED_TLB_FLUSHED();
1948 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); /* Already flushed-by-EPT, skip doing it again below. */
1949 }
1950
1951 /* Check for explicit TLB flushes. */
1952 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1953 {
1954 /*
1955 * Changes to the EPT paging structure by VMM requires flushing by EPT as the CPU creates
1956 * guest-physical (only EPT-tagged) mappings while traversing the EPT tables when EPT is in use.
1957 * Flushing by VPID will only flush linear (only VPID-tagged) and combined (EPT+VPID tagged) mappings
1958 * but not guest-physical mappings.
1959 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". See @bugref{6568}.
1960 */
1961 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1962 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1963 HMVMX_SET_TAGGED_TLB_FLUSHED();
1964 }
1965
1966 pVCpu->hm.s.fForceTLBFlush = false;
1967 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1968
1969 Assert(pVCpu->hm.s.idLastCpu == pCpu->idCpu);
1970 Assert(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes);
1971 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
1972 ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1973 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
1974 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pCpu->idCpu,
1975 pCpu->uCurrentAsid, pCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes));
1976 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
1977 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
1978
1979 /* Update VMCS with the VPID. */
1980 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
1981 AssertRC(rc);
1982
1983#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1984}
1985
1986
1987/**
1988 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1989 *
1990 * @returns VBox status code.
1991 * @param pVM Pointer to the VM.
1992 * @param pVCpu Pointer to the VMCPU.
1993 * @param pCpu Pointer to the global HM CPU struct.
1994 *
1995 * @remarks Called with interrupts disabled.
1996 */
1997static void hmR0VmxFlushTaggedTlbEpt(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1998{
1999 AssertPtr(pVM);
2000 AssertPtr(pVCpu);
2001 AssertPtr(pCpu);
2002 Assert(pCpu->idCpu != NIL_RTCPUID);
2003 AssertMsg(pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with NestedPaging disabled."));
2004 AssertMsg(!pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID enabled."));
2005
2006 /*
2007 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
2008 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
2009 */
2010 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2011 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2012 {
2013 pVCpu->hm.s.fForceTLBFlush = true;
2014 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2015 }
2016
2017 /* Check for explicit TLB flushes. */
2018 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2019 {
2020 pVCpu->hm.s.fForceTLBFlush = true;
2021 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
2022 }
2023
2024 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2025 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2026
2027 if (pVCpu->hm.s.fForceTLBFlush)
2028 {
2029 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2030 pVCpu->hm.s.fForceTLBFlush = false;
2031 }
2032}
2033
2034
2035/**
2036 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
2037 *
2038 * @returns VBox status code.
2039 * @param pVM Pointer to the VM.
2040 * @param pVCpu Pointer to the VMCPU.
2041 * @param pCpu Pointer to the global HM CPU struct.
2042 *
2043 * @remarks Called with interrupts disabled.
2044 */
2045static void hmR0VmxFlushTaggedTlbVpid(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
2046{
2047 AssertPtr(pVM);
2048 AssertPtr(pVCpu);
2049 AssertPtr(pCpu);
2050 Assert(pCpu->idCpu != NIL_RTCPUID);
2051 AssertMsg(pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked with VPID disabled."));
2052 AssertMsg(!pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging enabled"));
2053
2054 /*
2055 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last.
2056 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2057 * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2058 */
2059 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2060 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2061 {
2062 pVCpu->hm.s.fForceTLBFlush = true;
2063 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2064 }
2065
2066 /* Check for explicit TLB flushes. */
2067 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2068 {
2069 /*
2070 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see hmR0VmxSetupTaggedTlb())
2071 * we would need to explicitly flush in this case (add an fExplicitFlush = true here and change the
2072 * pCpu->fFlushAsidBeforeUse check below to include fExplicitFlush's too) - an obscure corner case.
2073 */
2074 pVCpu->hm.s.fForceTLBFlush = true;
2075 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
2076 }
2077
2078 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2079 if (pVCpu->hm.s.fForceTLBFlush)
2080 {
2081 ++pCpu->uCurrentAsid;
2082 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2083 {
2084 pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
2085 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
2086 pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
2087 }
2088
2089 pVCpu->hm.s.fForceTLBFlush = false;
2090 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2091 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2092 if (pCpu->fFlushAsidBeforeUse)
2093 {
2094 if (pVM->hm.s.vmx.enmFlushVpid == VMXFLUSHVPID_SINGLE_CONTEXT)
2095 hmR0VmxFlushVpid(pVM, pVCpu, VMXFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2096 else if (pVM->hm.s.vmx.enmFlushVpid == VMXFLUSHVPID_ALL_CONTEXTS)
2097 {
2098 hmR0VmxFlushVpid(pVM, pVCpu, VMXFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */);
2099 pCpu->fFlushAsidBeforeUse = false;
2100 }
2101 else
2102 {
2103 /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */
2104 AssertMsgFailed(("Unsupported VPID-flush context type.\n"));
2105 }
2106 }
2107 }
2108
2109 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2110 ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2111 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2112 ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pCpu->idCpu,
2113 pCpu->uCurrentAsid, pCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes));
2114 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2115 ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2116
2117 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2118 AssertRC(rc);
2119}
2120
2121
2122/**
2123 * Flushes the guest TLB entry based on CPU capabilities.
2124 *
2125 * @param pVCpu Pointer to the VMCPU.
2126 * @param pCpu Pointer to the global HM CPU struct.
2127 */
2128DECLINLINE(void) hmR0VmxFlushTaggedTlb(PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
2129{
2130#ifdef HMVMX_ALWAYS_FLUSH_TLB
2131 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2132#endif
2133 PVM pVM = pVCpu->CTX_SUFF(pVM);
2134 switch (pVM->hm.s.vmx.uFlushTaggedTlb)
2135 {
2136 case HMVMX_FLUSH_TAGGED_TLB_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pVM, pVCpu, pCpu); break;
2137 case HMVMX_FLUSH_TAGGED_TLB_EPT: hmR0VmxFlushTaggedTlbEpt(pVM, pVCpu, pCpu); break;
2138 case HMVMX_FLUSH_TAGGED_TLB_VPID: hmR0VmxFlushTaggedTlbVpid(pVM, pVCpu, pCpu); break;
2139 case HMVMX_FLUSH_TAGGED_TLB_NONE: hmR0VmxFlushTaggedTlbNone(pVM, pVCpu, pCpu); break;
2140 default:
2141 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
2142 break;
2143 }
2144
2145 /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */
2146}
2147
2148
2149/**
2150 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
2151 * TLB entries from the host TLB before VM-entry.
2152 *
2153 * @returns VBox status code.
2154 * @param pVM Pointer to the VM.
2155 */
2156static int hmR0VmxSetupTaggedTlb(PVM pVM)
2157{
2158 /*
2159 * Determine optimal flush type for Nested Paging.
2160 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted
2161 * guest execution (see hmR3InitFinalizeR0()).
2162 */
2163 if (pVM->hm.s.fNestedPaging)
2164 {
2165 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
2166 {
2167 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
2168 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_SINGLE_CONTEXT;
2169 else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
2170 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_ALL_CONTEXTS;
2171 else
2172 {
2173 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
2174 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_NOT_SUPPORTED;
2175 pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED;
2176 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2177 }
2178
2179 /* Make sure the write-back cacheable memory type for EPT is supported. */
2180 if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB)))
2181 {
2182 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_NOT_SUPPORTED;
2183 pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB;
2184 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2185 }
2186
2187 /* EPT requires a page-walk length of 4. */
2188 if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4)))
2189 {
2190 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_NOT_SUPPORTED;
2191 pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED;
2192 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2193 }
2194 }
2195 else
2196 {
2197 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
2198 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_NOT_SUPPORTED;
2199 pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE;
2200 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2201 }
2202 }
2203
2204 /*
2205 * Determine optimal flush type for VPID.
2206 */
2207 if (pVM->hm.s.vmx.fVpid)
2208 {
2209 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
2210 {
2211 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2212 pVM->hm.s.vmx.enmFlushVpid = VMXFLUSHVPID_SINGLE_CONTEXT;
2213 else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
2214 pVM->hm.s.vmx.enmFlushVpid = VMXFLUSHVPID_ALL_CONTEXTS;
2215 else
2216 {
2217 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
2218 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2219 LogRel(("hmR0VmxSetupTaggedTlb: Only INDIV_ADDR supported. Ignoring VPID.\n"));
2220 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
2221 LogRel(("hmR0VmxSetupTaggedTlb: Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
2222 pVM->hm.s.vmx.enmFlushVpid = VMXFLUSHVPID_NOT_SUPPORTED;
2223 pVM->hm.s.vmx.fVpid = false;
2224 }
2225 }
2226 else
2227 {
2228 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
2229 Log4(("hmR0VmxSetupTaggedTlb: VPID supported without INVEPT support. Ignoring VPID.\n"));
2230 pVM->hm.s.vmx.enmFlushVpid = VMXFLUSHVPID_NOT_SUPPORTED;
2231 pVM->hm.s.vmx.fVpid = false;
2232 }
2233 }
2234
2235 /*
2236 * Setup the handler for flushing tagged-TLBs.
2237 */
2238 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
2239 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT_VPID;
2240 else if (pVM->hm.s.fNestedPaging)
2241 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT;
2242 else if (pVM->hm.s.vmx.fVpid)
2243 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_VPID;
2244 else
2245 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_NONE;
2246 return VINF_SUCCESS;
2247}
2248
2249
2250/**
2251 * Sets up pin-based VM-execution controls in the VMCS.
2252 *
2253 * @returns VBox status code.
2254 * @param pVM Pointer to the VM.
2255 * @param pVCpu Pointer to the VMCPU.
2256 */
2257static int hmR0VmxSetupPinCtls(PVM pVM, PVMCPU pVCpu)
2258{
2259 AssertPtr(pVM);
2260 AssertPtr(pVCpu);
2261
2262 uint32_t val = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0; /* Bits set here must always be set. */
2263 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
2264
2265 val |= VMX_VMCS_CTRL_PIN_EXEC_EXT_INT_EXIT /* External interrupts cause a VM-exit. */
2266 | VMX_VMCS_CTRL_PIN_EXEC_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */
2267
2268 if (pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)
2269 val |= VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */
2270
2271 /* Enable the VMX preemption timer. */
2272 if (pVM->hm.s.vmx.fUsePreemptTimer)
2273 {
2274 Assert(pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER);
2275 val |= VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER;
2276 }
2277
2278 if ((val & zap) != val)
2279 {
2280 LogRel(("hmR0VmxSetupPinCtls: Invalid pin-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n",
2281 pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0, val, zap));
2282 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
2283 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2284 }
2285
2286 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, val);
2287 AssertRCReturn(rc, rc);
2288
2289 pVCpu->hm.s.vmx.u32PinCtls = val;
2290 return rc;
2291}
2292
2293
2294/**
2295 * Sets up processor-based VM-execution controls in the VMCS.
2296 *
2297 * @returns VBox status code.
2298 * @param pVM Pointer to the VM.
2299 * @param pVMCPU Pointer to the VMCPU.
2300 */
2301static int hmR0VmxSetupProcCtls(PVM pVM, PVMCPU pVCpu)
2302{
2303 AssertPtr(pVM);
2304 AssertPtr(pVCpu);
2305
2306 int rc = VERR_INTERNAL_ERROR_5;
2307 uint32_t val = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0; /* Bits set here must be set in the VMCS. */
2308 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2309
2310 val |= VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT /* HLT causes a VM-exit. */
2311 | VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
2312 | VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
2313 | VMX_VMCS_CTRL_PROC_EXEC_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
2314 | VMX_VMCS_CTRL_PROC_EXEC_RDPMC_EXIT /* RDPMC causes a VM-exit. */
2315 | VMX_VMCS_CTRL_PROC_EXEC_MONITOR_EXIT /* MONITOR causes a VM-exit. */
2316 | VMX_VMCS_CTRL_PROC_EXEC_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
2317
2318 /* We toggle VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
2319 if ( !(pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT)
2320 || (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT))
2321 {
2322 LogRel(("hmR0VmxSetupProcCtls: Unsupported VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT combo!"));
2323 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
2324 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2325 }
2326
2327 /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
2328 if (!pVM->hm.s.fNestedPaging)
2329 {
2330 Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */
2331 val |= VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT
2332 | VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
2333 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
2334 }
2335
2336 /* Use TPR shadowing if supported by the CPU. */
2337 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
2338 {
2339 Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
2340 Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
2341 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
2342 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic);
2343 AssertRCReturn(rc, rc);
2344
2345 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
2346 /* CR8 writes cause a VM-exit based on TPR threshold. */
2347 Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT));
2348 Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT));
2349 }
2350 else
2351 {
2352 /*
2353 * Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs.
2354 * Set this control only for 64-bit guests.
2355 */
2356 if (pVM->hm.s.fAllow64BitGuests)
2357 {
2358 val |= VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */
2359 | VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */
2360 }
2361 }
2362
2363 /* Use MSR-bitmaps if supported by the CPU. */
2364 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
2365 {
2366 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS;
2367
2368 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
2369 Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
2370 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
2371 AssertRCReturn(rc, rc);
2372
2373 /*
2374 * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored
2375 * automatically using dedicated fields in the VMCS.
2376 */
2377 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2378 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2379 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2380 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2381 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2382
2383#if HC_ARCH_BITS == 64
2384 /*
2385 * Set passthru permissions for the following MSRs (mandatory for VT-x) required for 64-bit guests.
2386 */
2387 if (pVM->hm.s.fAllow64BitGuests)
2388 {
2389 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2390 hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2391 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2392 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
2393 }
2394#endif
2395 }
2396
2397 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
2398 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
2399 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
2400
2401 if ((val & zap) != val)
2402 {
2403 LogRel(("hmR0VmxSetupProcCtls: Invalid processor-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n",
2404 pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0, val, zap));
2405 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
2406 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2407 }
2408
2409 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, val);
2410 AssertRCReturn(rc, rc);
2411
2412 pVCpu->hm.s.vmx.u32ProcCtls = val;
2413
2414 /*
2415 * Secondary processor-based VM-execution controls.
2416 */
2417 if (RT_LIKELY(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL))
2418 {
2419 val = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0; /* Bits set here must be set in the VMCS. */
2420 zap = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2421
2422 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT)
2423 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT; /* WBINVD causes a VM-exit. */
2424
2425 if (pVM->hm.s.fNestedPaging)
2426 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT; /* Enable EPT. */
2427 else
2428 {
2429 /*
2430 * Without Nested Paging, INVPCID should cause a VM-exit. Enabling this bit causes the CPU to refer to
2431 * VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT when INVPCID is executed by the guest.
2432 * See Intel spec. 25.4 "Changes to instruction behaviour in VMX non-root operation".
2433 */
2434 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_INVPCID)
2435 val |= VMX_VMCS_CTRL_PROC_EXEC2_INVPCID;
2436 }
2437
2438 if (pVM->hm.s.vmx.fVpid)
2439 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID; /* Enable VPID. */
2440
2441 if (pVM->hm.s.vmx.fUnrestrictedGuest)
2442 val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST; /* Enable Unrestricted Execution. */
2443
2444 /* Enable Virtual-APIC page accesses if supported by the CPU. This is essentially where the TPR shadow resides. */
2445 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
2446 * done dynamically. */
2447 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2448 {
2449 Assert(pVM->hm.s.vmx.HCPhysApicAccess);
2450 Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
2451 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC; /* Virtualize APIC accesses. */
2452 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
2453 AssertRCReturn(rc, rc);
2454 }
2455
2456 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
2457 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP; /* Enable RDTSCP support. */
2458
2459 if ( pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_PAUSE_LOOP_EXIT
2460 && pVM->hm.s.vmx.cPleGapTicks
2461 && pVM->hm.s.vmx.cPleWindowTicks)
2462 {
2463 val |= VMX_VMCS_CTRL_PROC_EXEC2_PAUSE_LOOP_EXIT; /* Enable pause-loop exiting. */
2464
2465 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks);
2466 AssertRCReturn(rc, rc);
2467
2468 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks);
2469 AssertRCReturn(rc, rc);
2470 }
2471
2472 if ((val & zap) != val)
2473 {
2474 LogRel(("hmR0VmxSetupProcCtls: Invalid secondary processor-based VM-execution controls combo! "
2475 "cpu=%#RX64 val=%#RX64 zap=%#RX64\n", pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0, val, zap));
2476 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2;
2477 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2478 }
2479
2480 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, val);
2481 AssertRCReturn(rc, rc);
2482
2483 pVCpu->hm.s.vmx.u32ProcCtls2 = val;
2484 }
2485 else if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest))
2486 {
2487 LogRel(("hmR0VmxSetupProcCtls: Unrestricted Guest set as true when secondary processor-based VM-execution controls not "
2488 "available\n"));
2489 pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO;
2490 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2491 }
2492
2493 return VINF_SUCCESS;
2494}
2495
2496
2497/**
2498 * Sets up miscellaneous (everything other than Pin & Processor-based
2499 * VM-execution) control fields in the VMCS.
2500 *
2501 * @returns VBox status code.
2502 * @param pVM Pointer to the VM.
2503 * @param pVCpu Pointer to the VMCPU.
2504 */
2505static int hmR0VmxSetupMiscCtls(PVM pVM, PVMCPU pVCpu)
2506{
2507 NOREF(pVM);
2508 AssertPtr(pVM);
2509 AssertPtr(pVCpu);
2510
2511 int rc = VERR_GENERAL_FAILURE;
2512
2513 /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
2514#if 0
2515 /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxLoadGuestCR3AndCR4())*/
2516 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0); AssertRCReturn(rc, rc);
2517 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0); AssertRCReturn(rc, rc);
2518
2519 /*
2520 * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is)
2521 * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit.
2522 * We thus use the exception bitmap to control it rather than use both.
2523 */
2524 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0); AssertRCReturn(rc, rc);
2525 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0); AssertRCReturn(rc, rc);
2526
2527 /** @todo Explore possibility of using IO-bitmaps. */
2528 /* All IO & IOIO instructions cause VM-exits. */
2529 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0); AssertRCReturn(rc, rc);
2530 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0); AssertRCReturn(rc, rc);
2531
2532 /* Initialize the MSR-bitmap area. */
2533 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc);
2534 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0); AssertRCReturn(rc, rc);
2535 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc);
2536#endif
2537
2538 /* Setup MSR auto-load/store area. */
2539 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
2540 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */
2541 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
2542 AssertRCReturn(rc, rc);
2543 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
2544 AssertRCReturn(rc, rc);
2545
2546 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
2547 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */
2548 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
2549 AssertRCReturn(rc, rc);
2550
2551 /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */
2552 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff));
2553 AssertRCReturn(rc, rc);
2554
2555 /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
2556#if 0
2557 /* Setup debug controls */
2558 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0); /** @todo We don't support IA32_DEBUGCTL MSR. Should we? */
2559 AssertRCReturn(rc, rc);
2560 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
2561 AssertRCReturn(rc, rc);
2562#endif
2563
2564 return rc;
2565}
2566
2567
2568/**
2569 * Sets up the initial exception bitmap in the VMCS based on static conditions.
2570 *
2571 * @returns VBox status code.
2572 * @param pVM Pointer to the VM.
2573 * @param pVCpu Pointer to the VMCPU.
2574 */
2575static int hmR0VmxInitXcptBitmap(PVM pVM, PVMCPU pVCpu)
2576{
2577 AssertPtr(pVM);
2578 AssertPtr(pVCpu);
2579
2580 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
2581
2582 uint32_t u32XcptBitmap = pVCpu->hm.s.fGIMTrapXcptUD ? RT_BIT(X86_XCPT_UD) : 0;
2583
2584 /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */
2585 if (!pVM->hm.s.fNestedPaging)
2586 u32XcptBitmap |= RT_BIT(X86_XCPT_PF);
2587
2588 pVCpu->hm.s.vmx.u32XcptBitmap = u32XcptBitmap;
2589 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
2590 AssertRCReturn(rc, rc);
2591 return rc;
2592}
2593
2594
2595/**
2596 * Sets up the initial guest-state mask. The guest-state mask is consulted
2597 * before reading guest-state fields from the VMCS as VMREADs can be expensive
2598 * for the nested virtualization case (as it would cause a VM-exit).
2599 *
2600 * @param pVCpu Pointer to the VMCPU.
2601 */
2602static int hmR0VmxInitUpdatedGuestStateMask(PVMCPU pVCpu)
2603{
2604 /* Initially the guest-state is up-to-date as there is nothing in the VMCS. */
2605 HMVMXCPU_GST_RESET_TO(pVCpu, HMVMX_UPDATED_GUEST_ALL);
2606 return VINF_SUCCESS;
2607}
2608
2609
2610/**
2611 * Does per-VM VT-x initialization.
2612 *
2613 * @returns VBox status code.
2614 * @param pVM Pointer to the VM.
2615 */
2616VMMR0DECL(int) VMXR0InitVM(PVM pVM)
2617{
2618 LogFlowFunc(("pVM=%p\n", pVM));
2619
2620 int rc = hmR0VmxStructsAlloc(pVM);
2621 if (RT_FAILURE(rc))
2622 {
2623 LogRel(("VMXR0InitVM: hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc));
2624 return rc;
2625 }
2626
2627 return VINF_SUCCESS;
2628}
2629
2630
2631/**
2632 * Does per-VM VT-x termination.
2633 *
2634 * @returns VBox status code.
2635 * @param pVM Pointer to the VM.
2636 */
2637VMMR0DECL(int) VMXR0TermVM(PVM pVM)
2638{
2639 LogFlowFunc(("pVM=%p\n", pVM));
2640
2641#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2642 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
2643 ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE);
2644#endif
2645 hmR0VmxStructsFree(pVM);
2646 return VINF_SUCCESS;
2647}
2648
2649
2650/**
2651 * Sets up the VM for execution under VT-x.
2652 * This function is only called once per-VM during initialization.
2653 *
2654 * @returns VBox status code.
2655 * @param pVM Pointer to the VM.
2656 */
2657VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
2658{
2659 AssertPtrReturn(pVM, VERR_INVALID_PARAMETER);
2660 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2661
2662 LogFlowFunc(("pVM=%p\n", pVM));
2663
2664 /*
2665 * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be allocated.
2666 * We no longer support the highly unlikely case of UnrestrictedGuest without pRealModeTSS. See hmR3InitFinalizeR0Intel().
2667 */
2668 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
2669 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
2670 || !pVM->hm.s.vmx.pRealModeTSS))
2671 {
2672 LogRel(("VMXR0SetupVM: Invalid real-on-v86 state.\n"));
2673 return VERR_INTERNAL_ERROR;
2674 }
2675
2676 /* Initialize these always, see hmR3InitFinalizeR0().*/
2677 pVM->hm.s.vmx.enmFlushEpt = VMXFLUSHEPT_NONE;
2678 pVM->hm.s.vmx.enmFlushVpid = VMXFLUSHVPID_NONE;
2679
2680 /* Setup the tagged-TLB flush handlers. */
2681 int rc = hmR0VmxSetupTaggedTlb(pVM);
2682 if (RT_FAILURE(rc))
2683 {
2684 LogRel(("VMXR0SetupVM: hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc));
2685 return rc;
2686 }
2687
2688 /* Check if we can use the VMCS controls for swapping the EFER MSR. */
2689 Assert(!pVM->hm.s.vmx.fSupportsVmcsEfer);
2690#if HC_ARCH_BITS == 64
2691 if ( (pVM->hm.s.vmx.Msrs.VmxEntry.n.allowed1 & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR)
2692 && (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR)
2693 && (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR))
2694 {
2695 pVM->hm.s.vmx.fSupportsVmcsEfer = true;
2696 }
2697#endif
2698
2699 for (VMCPUID i = 0; i < pVM->cCpus; i++)
2700 {
2701 PVMCPU pVCpu = &pVM->aCpus[i];
2702 AssertPtr(pVCpu);
2703 AssertPtr(pVCpu->hm.s.vmx.pvVmcs);
2704
2705 /* Log the VCPU pointers, useful for debugging SMP VMs. */
2706 Log4(("VMXR0SetupVM: pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
2707
2708 /* Initialize the VM-exit history array with end-of-array markers (UINT16_MAX). */
2709 Assert(!pVCpu->hm.s.idxExitHistoryFree);
2710 HMCPU_EXIT_HISTORY_RESET(pVCpu);
2711
2712 /* Set revision dword at the beginning of the VMCS structure. */
2713 *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo);
2714
2715 /* Initialize our VMCS region in memory, set the VMCS launch state to "clear". */
2716 rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2717 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2718 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2719
2720 /* Load this VMCS as the current VMCS. */
2721 rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2722 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2723 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2724
2725 rc = hmR0VmxSetupPinCtls(pVM, pVCpu);
2726 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2727 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2728
2729 rc = hmR0VmxSetupProcCtls(pVM, pVCpu);
2730 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2731 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2732
2733 rc = hmR0VmxSetupMiscCtls(pVM, pVCpu);
2734 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2735 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2736
2737 rc = hmR0VmxInitXcptBitmap(pVM, pVCpu);
2738 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2739 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2740
2741 rc = hmR0VmxInitUpdatedGuestStateMask(pVCpu);
2742 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitUpdatedGuestStateMask failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2743 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2744
2745#if HC_ARCH_BITS == 32
2746 rc = hmR0VmxInitVmcsReadCache(pVM, pVCpu);
2747 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2748 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2749#endif
2750
2751 /* Re-sync the CPU's internal data into our VMCS memory region & reset the launch state to "clear". */
2752 rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2753 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2754 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2755
2756 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
2757
2758 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc);
2759 }
2760
2761 return VINF_SUCCESS;
2762}
2763
2764
2765/**
2766 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
2767 * the VMCS.
2768 *
2769 * @returns VBox status code.
2770 * @param pVM Pointer to the VM.
2771 * @param pVCpu Pointer to the VMCPU.
2772 */
2773DECLINLINE(int) hmR0VmxSaveHostControlRegs(PVM pVM, PVMCPU pVCpu)
2774{
2775 NOREF(pVM); NOREF(pVCpu);
2776
2777 RTCCUINTREG uReg = ASMGetCR0();
2778 int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg);
2779 AssertRCReturn(rc, rc);
2780
2781 uReg = ASMGetCR3();
2782 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg);
2783 AssertRCReturn(rc, rc);
2784
2785 uReg = ASMGetCR4();
2786 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg);
2787 AssertRCReturn(rc, rc);
2788 return rc;
2789}
2790
2791
2792#if HC_ARCH_BITS == 64
2793/**
2794 * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry
2795 * requirements. See hmR0VmxSaveHostSegmentRegs().
2796 */
2797# define VMXLOCAL_ADJUST_HOST_SEG(seg, selValue) \
2798 if ((selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \
2799 { \
2800 bool fValidSelector = true; \
2801 if ((selValue) & X86_SEL_LDT) \
2802 { \
2803 uint32_t uAttr = ASMGetSegAttr((selValue)); \
2804 fValidSelector = RT_BOOL(uAttr != UINT32_MAX && (uAttr & X86_DESC_P)); \
2805 } \
2806 if (fValidSelector) \
2807 { \
2808 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##seg; \
2809 pVCpu->hm.s.vmx.RestoreHost.uHostSel##seg = (selValue); \
2810 } \
2811 (selValue) = 0; \
2812 }
2813#endif
2814
2815
2816/**
2817 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
2818 * the host-state area in the VMCS.
2819 *
2820 * @returns VBox status code.
2821 * @param pVM Pointer to the VM.
2822 * @param pVCpu Pointer to the VMCPU.
2823 */
2824DECLINLINE(int) hmR0VmxSaveHostSegmentRegs(PVM pVM, PVMCPU pVCpu)
2825{
2826 int rc = VERR_INTERNAL_ERROR_5;
2827
2828#if HC_ARCH_BITS == 64
2829 /*
2830 * If we've executed guest code using VT-x, the host-state bits will be messed up. We
2831 * should -not- save the messed up state without restoring the original host-state. See @bugref{7240}.
2832 */
2833 AssertMsgReturn(!(pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED),
2834 ("Re-saving host-state after executing guest code without leaving VT-x!\n"), VERR_WRONG_ORDER);
2835#endif
2836
2837 /*
2838 * Host DS, ES, FS and GS segment registers.
2839 */
2840#if HC_ARCH_BITS == 64
2841 RTSEL uSelDS = ASMGetDS();
2842 RTSEL uSelES = ASMGetES();
2843 RTSEL uSelFS = ASMGetFS();
2844 RTSEL uSelGS = ASMGetGS();
2845#else
2846 RTSEL uSelDS = 0;
2847 RTSEL uSelES = 0;
2848 RTSEL uSelFS = 0;
2849 RTSEL uSelGS = 0;
2850#endif
2851
2852 /* Recalculate which host-state bits need to be manually restored. */
2853 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
2854
2855 /*
2856 * Host CS and SS segment registers.
2857 */
2858 RTSEL uSelCS = ASMGetCS();
2859 RTSEL uSelSS = ASMGetSS();
2860
2861 /*
2862 * Host TR segment register.
2863 */
2864 RTSEL uSelTR = ASMGetTR();
2865
2866#if HC_ARCH_BITS == 64
2867 /*
2868 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to gain VM-entry and restore them
2869 * before we get preempted. See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
2870 */
2871 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
2872 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
2873 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
2874 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
2875# undef VMXLOCAL_ADJUST_HOST_SEG
2876#endif
2877
2878 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
2879 Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT));
2880 Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT));
2881 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
2882 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
2883 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
2884 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
2885 Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT));
2886 Assert(uSelCS);
2887 Assert(uSelTR);
2888
2889 /* Assertion is right but we would not have updated u32ExitCtls yet. */
2890#if 0
2891 if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE))
2892 Assert(uSelSS != 0);
2893#endif
2894
2895 /* Write these host selector fields into the host-state area in the VMCS. */
2896 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_CS, uSelCS); AssertRCReturn(rc, rc);
2897 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_SS, uSelSS); AssertRCReturn(rc, rc);
2898#if HC_ARCH_BITS == 64
2899 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_DS, uSelDS); AssertRCReturn(rc, rc);
2900 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_ES, uSelES); AssertRCReturn(rc, rc);
2901 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_FS, uSelFS); AssertRCReturn(rc, rc);
2902 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_GS, uSelGS); AssertRCReturn(rc, rc);
2903#else
2904 NOREF(uSelDS);
2905 NOREF(uSelES);
2906 NOREF(uSelFS);
2907 NOREF(uSelGS);
2908#endif
2909 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_TR, uSelTR); AssertRCReturn(rc, rc);
2910
2911 /*
2912 * Host GDTR and IDTR.
2913 */
2914 RTGDTR Gdtr;
2915 RTIDTR Idtr;
2916 RT_ZERO(Gdtr);
2917 RT_ZERO(Idtr);
2918 ASMGetGDTR(&Gdtr);
2919 ASMGetIDTR(&Idtr);
2920 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt); AssertRCReturn(rc, rc);
2921 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt); AssertRCReturn(rc, rc);
2922
2923#if HC_ARCH_BITS == 64
2924 /*
2925 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps them to the
2926 * maximum limit (0xffff) on every VM-exit.
2927 */
2928 if (Gdtr.cbGdt != 0xffff)
2929 {
2930 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
2931 AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64));
2932 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
2933 }
2934
2935 /*
2936 * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT"
2937 * and Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit as 0xfff, VT-x
2938 * bloating the limit to 0xffff shouldn't cause any different CPU behavior. However, several hosts either insists
2939 * on 0xfff being the limit (Windows Patch Guard) or uses the limit for other purposes (darwin puts the CPU ID in there
2940 * but botches sidt alignment in at least one consumer). So, we're only allowing IDTR.LIMIT to be left at 0xffff on
2941 * hosts where we are pretty sure it won't cause trouble.
2942 */
2943# if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS)
2944 if (Idtr.cbIdt < 0x0fff)
2945# else
2946 if (Idtr.cbIdt != 0xffff)
2947# endif
2948 {
2949 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
2950 AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64));
2951 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64));
2952 }
2953#endif
2954
2955 /*
2956 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI and RPL bits
2957 * is effectively what the CPU does for "scaling by 8". TI is always 0 and RPL should be too in most cases.
2958 */
2959 AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= Gdtr.cbGdt,
2960 ("hmR0VmxSaveHostSegmentRegs: TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt),
2961 VERR_VMX_INVALID_HOST_STATE);
2962
2963 PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK));
2964#if HC_ARCH_BITS == 64
2965 uintptr_t uTRBase = X86DESC64_BASE(pDesc);
2966
2967 /*
2968 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on all VM-exits.
2969 * The type is the same for 64-bit busy TSS[1]. The limit needs manual restoration if the host has something else.
2970 * Task switching is not supported in 64-bit mode[2], but the limit still matters as IOPM is supported in 64-bit mode.
2971 * Restoring the limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
2972 *
2973 * [1] See Intel spec. 3.5 "System Descriptor Types".
2974 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
2975 */
2976 Assert(pDesc->System.u4Type == 11);
2977 if ( pDesc->System.u16LimitLow != 0x67
2978 || pDesc->System.u4LimitHigh)
2979 {
2980 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
2981 /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */
2982 if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY)
2983 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY;
2984 pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR;
2985
2986 /* Store the GDTR here as we need it while restoring TR. */
2987 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
2988 }
2989#else
2990 NOREF(pVM);
2991 uintptr_t uTRBase = X86DESC_BASE(pDesc);
2992#endif
2993 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase);
2994 AssertRCReturn(rc, rc);
2995
2996 /*
2997 * Host FS base and GS base.
2998 */
2999#if HC_ARCH_BITS == 64
3000 uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE);
3001 uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE);
3002 rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase); AssertRCReturn(rc, rc);
3003 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase); AssertRCReturn(rc, rc);
3004
3005 /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */
3006 if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS)
3007 pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase;
3008 if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS)
3009 pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase;
3010#endif
3011 return rc;
3012}
3013
3014
3015/**
3016 * Saves certain host MSRs in the VM-Exit MSR-load area and some in the
3017 * host-state area of the VMCS. Theses MSRs will be automatically restored on
3018 * the host after every successful VM-exit.
3019 *
3020 * @returns VBox status code.
3021 * @param pVM Pointer to the VM.
3022 * @param pVCpu Pointer to the VMCPU.
3023 *
3024 * @remarks No-long-jump zone!!!
3025 */
3026DECLINLINE(int) hmR0VmxSaveHostMsrs(PVM pVM, PVMCPU pVCpu)
3027{
3028 NOREF(pVM);
3029
3030 AssertPtr(pVCpu);
3031 AssertPtr(pVCpu->hm.s.vmx.pvHostMsr);
3032
3033 int rc = VINF_SUCCESS;
3034#if HC_ARCH_BITS == 64
3035 if (pVM->hm.s.fAllow64BitGuests)
3036 hmR0VmxLazySaveHostMsrs(pVCpu);
3037#endif
3038
3039 /*
3040 * Host Sysenter MSRs.
3041 */
3042 rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
3043 AssertRCReturn(rc, rc);
3044#if HC_ARCH_BITS == 32
3045 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
3046 AssertRCReturn(rc, rc);
3047 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
3048#else
3049 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
3050 AssertRCReturn(rc, rc);
3051 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
3052#endif
3053 AssertRCReturn(rc, rc);
3054
3055 /*
3056 * Host EFER MSR.
3057 * If the CPU supports the newer VMCS controls for managing EFER, use it.
3058 * Otherwise it's done as part of auto-load/store MSR area in the VMCS, see hmR0VmxLoadGuestMsrs().
3059 */
3060 if (pVM->hm.s.vmx.fSupportsVmcsEfer)
3061 {
3062 rc = VMXWriteVmcs64(VMX_VMCS64_HOST_FIELD_EFER_FULL, pVM->hm.s.vmx.u64HostEfer);
3063 AssertRCReturn(rc, rc);
3064 }
3065
3066 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see
3067 * hmR0VmxLoadGuestExitCtls() !! */
3068
3069 return rc;
3070}
3071
3072
3073/**
3074 * Figures out if we need to swap the EFER MSR which is
3075 * particularly expensive.
3076 *
3077 * We check all relevant bits. For now, that's everything
3078 * besides LMA/LME, as these two bits are handled by VM-entry,
3079 * see hmR0VmxLoadGuestExitCtls() and
3080 * hmR0VMxLoadGuestEntryCtls().
3081 *
3082 * @returns true if we need to load guest EFER, false otherwise.
3083 * @param pVCpu Pointer to the VMCPU.
3084 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3085 * out-of-sync. Make sure to update the required fields
3086 * before using them.
3087 *
3088 * @remarks Requires EFER, CR4.
3089 * @remarks No-long-jump zone!!!
3090 */
3091static bool hmR0VmxShouldSwapEferMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3092{
3093#ifdef HMVMX_ALWAYS_SWAP_EFER
3094 return true;
3095#endif
3096
3097#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
3098 /* For 32-bit hosts running 64-bit guests, we always swap EFER in the world-switcher. Nothing to do here. */
3099 if (CPUMIsGuestInLongMode(pVCpu))
3100 return false;
3101#endif
3102
3103 PVM pVM = pVCpu->CTX_SUFF(pVM);
3104 uint64_t u64HostEfer = pVM->hm.s.vmx.u64HostEfer;
3105 uint64_t u64GuestEfer = pMixedCtx->msrEFER;
3106
3107 /*
3108 * For 64-bit guests, if EFER.SCE bit differs, we need to swap to ensure that the
3109 * guest's SYSCALL behaviour isn't screwed. See @bugref{7386}.
3110 */
3111 if ( CPUMIsGuestInLongMode(pVCpu)
3112 && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE))
3113 {
3114 return true;
3115 }
3116
3117 /*
3118 * If the guest uses PAE and EFER.NXE bit differs, we need to swap EFER as it
3119 * affects guest paging. 64-bit paging implies CR4.PAE as well.
3120 * See Intel spec. 4.5 "IA-32e Paging" and Intel spec. 4.1.1 "Three Paging Modes".
3121 */
3122 if ( (pMixedCtx->cr4 & X86_CR4_PAE)
3123 && (pMixedCtx->cr0 & X86_CR0_PG)
3124 && (u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE))
3125 {
3126 /* Assert that host is PAE capable. */
3127 Assert(pVM->hm.s.cpuid.u32AMDFeatureEDX & X86_CPUID_EXT_FEATURE_EDX_NX);
3128 return true;
3129 }
3130
3131 /** @todo Check the latest Intel spec. for any other bits,
3132 * like SMEP/SMAP? */
3133 return false;
3134}
3135
3136
3137/**
3138 * Sets up VM-entry controls in the VMCS. These controls can affect things done
3139 * on VM-exit; e.g. "load debug controls", see Intel spec. 24.8.1 "VM-entry
3140 * controls".
3141 *
3142 * @returns VBox status code.
3143 * @param pVCpu Pointer to the VMCPU.
3144 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3145 * out-of-sync. Make sure to update the required fields
3146 * before using them.
3147 *
3148 * @remarks Requires EFER.
3149 * @remarks No-long-jump zone!!!
3150 */
3151DECLINLINE(int) hmR0VmxLoadGuestEntryCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3152{
3153 int rc = VINF_SUCCESS;
3154 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS))
3155 {
3156 PVM pVM = pVCpu->CTX_SUFF(pVM);
3157 uint32_t val = pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0; /* Bits set here must be set in the VMCS. */
3158 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxEntry.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
3159
3160 /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */
3161 val |= VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG;
3162
3163 /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */
3164 if (CPUMIsGuestInLongModeEx(pMixedCtx))
3165 {
3166 val |= VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST;
3167 Log4(("Load[%RU32]: VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST\n", pVCpu->idCpu));
3168 }
3169 else
3170 Assert(!(val & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST));
3171
3172 /* If the CPU supports the newer VMCS controls for managing guest/host EFER, use it. */
3173 if ( pVM->hm.s.vmx.fSupportsVmcsEfer
3174 && hmR0VmxShouldSwapEferMsr(pVCpu, pMixedCtx))
3175 {
3176 val |= VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR;
3177 Log4(("Load[%RU32]: VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR\n", pVCpu->idCpu));
3178 }
3179
3180 /*
3181 * The following should -not- be set (since we're not in SMM mode):
3182 * - VMX_VMCS_CTRL_ENTRY_ENTRY_SMM
3183 * - VMX_VMCS_CTRL_ENTRY_DEACTIVATE_DUALMON
3184 */
3185
3186 /** @todo VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR,
3187 * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR. */
3188
3189 if ((val & zap) != val)
3190 {
3191 LogRel(("hmR0VmxLoadGuestEntryCtls: Invalid VM-entry controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n",
3192 pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0, val, zap));
3193 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY;
3194 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3195 }
3196
3197 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, val);
3198 AssertRCReturn(rc, rc);
3199
3200 pVCpu->hm.s.vmx.u32EntryCtls = val;
3201 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS);
3202 }
3203 return rc;
3204}
3205
3206
3207/**
3208 * Sets up the VM-exit controls in the VMCS.
3209 *
3210 * @returns VBox status code.
3211 * @param pVM Pointer to the VM.
3212 * @param pVCpu Pointer to the VMCPU.
3213 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3214 * out-of-sync. Make sure to update the required fields
3215 * before using them.
3216 *
3217 * @remarks Requires EFER.
3218 */
3219DECLINLINE(int) hmR0VmxLoadGuestExitCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3220{
3221 NOREF(pMixedCtx);
3222
3223 int rc = VINF_SUCCESS;
3224 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_EXIT_CTLS))
3225 {
3226 PVM pVM = pVCpu->CTX_SUFF(pVM);
3227 uint32_t val = pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0; /* Bits set here must be set in the VMCS. */
3228 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
3229
3230 /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */
3231 val |= VMX_VMCS_CTRL_EXIT_SAVE_DEBUG;
3232
3233 /*
3234 * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary.
3235 * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in hmR0VmxSaveHostMsrs().
3236 */
3237#if HC_ARCH_BITS == 64
3238 val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE;
3239 Log4(("Load[%RU32]: VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE\n", pVCpu->idCpu));
3240#else
3241 if (CPUMIsGuestInLongModeEx(pMixedCtx))
3242 {
3243 /* The switcher returns to long mode, EFER is managed by the switcher. */
3244 val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE;
3245 Log4(("Load[%RU32]: VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE\n", pVCpu->idCpu));
3246 }
3247 else
3248 Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE));
3249#endif
3250
3251 /* If the newer VMCS fields for managing EFER exists, use it. */
3252 if ( pVM->hm.s.vmx.fSupportsVmcsEfer
3253 && hmR0VmxShouldSwapEferMsr(pVCpu, pMixedCtx))
3254 {
3255 val |= VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR
3256 | VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR;
3257 Log4(("Load[%RU32]: VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR, VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR\n", pVCpu->idCpu));
3258 }
3259
3260 /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */
3261 Assert(!(val & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT));
3262
3263 /** @todo VMX_VMCS_CTRL_EXIT_LOAD_PERF_MSR,
3264 * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_PAT_MSR,
3265 * VMX_VMCS_CTRL_EXIT_LOAD_HOST_PAT_MSR. */
3266
3267 if ( pVM->hm.s.vmx.fUsePreemptTimer
3268 && (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER))
3269 val |= VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER;
3270
3271 if ((val & zap) != val)
3272 {
3273 LogRel(("hmR0VmxSetupProcCtls: Invalid VM-exit controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n",
3274 pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0, val, zap));
3275 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT;
3276 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
3277 }
3278
3279 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, val);
3280 AssertRCReturn(rc, rc);
3281
3282 pVCpu->hm.s.vmx.u32ExitCtls = val;
3283 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_EXIT_CTLS);
3284 }
3285 return rc;
3286}
3287
3288
3289/**
3290 * Loads the guest APIC and related state.
3291 *
3292 * @returns VBox status code.
3293 * @param pVM Pointer to the VM.
3294 * @param pVCpu Pointer to the VMCPU.
3295 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3296 * out-of-sync. Make sure to update the required fields
3297 * before using them.
3298 */
3299DECLINLINE(int) hmR0VmxLoadGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3300{
3301 NOREF(pMixedCtx);
3302
3303 int rc = VINF_SUCCESS;
3304 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE))
3305 {
3306 /* Setup TPR shadowing. Also setup TPR patching for 32-bit guests. */
3307 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
3308 {
3309 Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
3310
3311 bool fPendingIntr = false;
3312 uint8_t u8Tpr = 0;
3313 uint8_t u8PendingIntr = 0;
3314 rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr);
3315 AssertRCReturn(rc, rc);
3316
3317 /*
3318 * If there are external interrupts pending but masked by the TPR value, instruct VT-x to cause a VM-exit when
3319 * the guest lowers its TPR below the highest-priority pending interrupt and we can deliver the interrupt.
3320 * If there are no external interrupts pending, set threshold to 0 to not cause a VM-exit. We will eventually deliver
3321 * the interrupt when we VM-exit for other reasons.
3322 */
3323 pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8Tpr; /* Offset 0x80 is TPR in the APIC MMIO range. */
3324 uint32_t u32TprThreshold = 0;
3325 if (fPendingIntr)
3326 {
3327 /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR (which is the Task-Priority Class). */
3328 const uint8_t u8PendingPriority = (u8PendingIntr >> 4) & 0xf;
3329 const uint8_t u8TprPriority = (u8Tpr >> 4) & 0xf;
3330 if (u8PendingPriority <= u8TprPriority)
3331 u32TprThreshold = u8PendingPriority;
3332 else
3333 u32TprThreshold = u8TprPriority; /* Required for Vista 64-bit guest, see @bugref{6398}. */
3334 }
3335 Assert(!(u32TprThreshold & 0xfffffff0)); /* Bits 31:4 MBZ. */
3336
3337 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
3338 AssertRCReturn(rc, rc);
3339 }
3340
3341 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
3342 }
3343 return rc;
3344}
3345
3346
3347/**
3348 * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it).
3349 *
3350 * @returns Guest's interruptibility-state.
3351 * @param pVCpu Pointer to the VMCPU.
3352 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3353 * out-of-sync. Make sure to update the required fields
3354 * before using them.
3355 *
3356 * @remarks No-long-jump zone!!!
3357 */
3358DECLINLINE(uint32_t) hmR0VmxGetGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3359{
3360 /*
3361 * Check if we should inhibit interrupt delivery due to instructions like STI and MOV SS.
3362 */
3363 uint32_t uIntrState = 0;
3364 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
3365 {
3366 /* If inhibition is active, RIP & RFLAGS should've been accessed (i.e. read previously from the VMCS or from ring-3). */
3367 AssertMsg(HMVMXCPU_GST_IS_SET(pVCpu, HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS),
3368 ("%#x\n", HMVMXCPU_GST_VALUE(pVCpu)));
3369 if (pMixedCtx->rip == EMGetInhibitInterruptsPC(pVCpu))
3370 {
3371 if (pMixedCtx->eflags.Bits.u1IF)
3372 uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI;
3373 else
3374 uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS;
3375 }
3376 /* else: Although we can clear the force-flag here, let's keep this side-effects free. */
3377 }
3378
3379 /*
3380 * NMIs to the guest are blocked after an NMI is injected until the guest executes an IRET. We only
3381 * bother with virtual-NMI blocking when we have support for virtual NMIs in the CPU, otherwise
3382 * setting this would block host-NMIs and IRET will not clear the blocking.
3383 *
3384 * See Intel spec. 26.6.1 "Interruptibility state". See @bugref{7445}.
3385 */
3386 if ( VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS)
3387 && (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI))
3388 {
3389 uIntrState |= VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI;
3390 }
3391
3392 return uIntrState;
3393}
3394
3395
3396/**
3397 * Loads the guest's interruptibility-state into the guest-state area in the
3398 * VMCS.
3399 *
3400 * @returns VBox status code.
3401 * @param pVCpu Pointer to the VMCPU.
3402 * @param uIntrState The interruptibility-state to set.
3403 */
3404static int hmR0VmxLoadGuestIntrState(PVMCPU pVCpu, uint32_t uIntrState)
3405{
3406 NOREF(pVCpu);
3407 AssertMsg(!(uIntrState & 0xfffffff0), ("%#x\n", uIntrState)); /* Bits 31:4 MBZ. */
3408 Assert((uIntrState & 0x3) != 0x3); /* Block-by-STI and MOV SS cannot be simultaneously set. */
3409 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, uIntrState);
3410 AssertRCReturn(rc, rc);
3411 return rc;
3412}
3413
3414
3415/**
3416 * Loads the exception intercepts required for guest execution in the VMCS.
3417 *
3418 * @returns VBox status code.
3419 * @param pVCpu Pointer to the VMCPU.
3420 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3421 * out-of-sync. Make sure to update the required fields
3422 * before using them.
3423 */
3424static int hmR0VmxLoadGuestXcptIntercepts(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3425{
3426 NOREF(pMixedCtx);
3427 int rc = VINF_SUCCESS;
3428 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS))
3429 {
3430 /* The remaining exception intercepts are handled elsewhere, e.g. in hmR0VmxLoadSharedCR0(). */
3431 if (pVCpu->hm.s.fGIMTrapXcptUD)
3432 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_UD);
3433 else
3434 {
3435#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3436 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_UD);
3437#endif
3438 }
3439
3440 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
3441 AssertRCReturn(rc, rc);
3442
3443 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
3444 Log4(("Load[%RU32]: VMX_VMCS32_CTRL_EXCEPTION_BITMAP=%#RX64 fContextUseFlags=%#RX32\n", pVCpu->idCpu,
3445 pVCpu->hm.s.vmx.u32XcptBitmap, HMCPU_CF_VALUE(pVCpu)));
3446 }
3447 return rc;
3448}
3449
3450
3451/**
3452 * Loads the guest's RIP into the guest-state area in the VMCS.
3453 *
3454 * @returns VBox status code.
3455 * @param pVCpu Pointer to the VMCPU.
3456 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3457 * out-of-sync. Make sure to update the required fields
3458 * before using them.
3459 *
3460 * @remarks No-long-jump zone!!!
3461 */
3462static int hmR0VmxLoadGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3463{
3464 int rc = VINF_SUCCESS;
3465 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RIP))
3466 {
3467 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pMixedCtx->rip);
3468 AssertRCReturn(rc, rc);
3469
3470 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RIP);
3471 Log4(("Load[%RU32]: VMX_VMCS_GUEST_RIP=%#RX64 fContextUseFlags=%#RX32\n", pVCpu->idCpu, pMixedCtx->rip,
3472 HMCPU_CF_VALUE(pVCpu)));
3473 }
3474 return rc;
3475}
3476
3477
3478/**
3479 * Loads the guest's RSP into the guest-state area in the VMCS.
3480 *
3481 * @returns VBox status code.
3482 * @param pVCpu Pointer to the VMCPU.
3483 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3484 * out-of-sync. Make sure to update the required fields
3485 * before using them.
3486 *
3487 * @remarks No-long-jump zone!!!
3488 */
3489static int hmR0VmxLoadGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3490{
3491 int rc = VINF_SUCCESS;
3492 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RSP))
3493 {
3494 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pMixedCtx->rsp);
3495 AssertRCReturn(rc, rc);
3496
3497 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RSP);
3498 Log4(("Load[%RU32]: VMX_VMCS_GUEST_RSP=%#RX64\n", pVCpu->idCpu, pMixedCtx->rsp));
3499 }
3500 return rc;
3501}
3502
3503
3504/**
3505 * Loads the guest's RFLAGS into the guest-state area in the VMCS.
3506 *
3507 * @returns VBox status code.
3508 * @param pVCpu Pointer to the VMCPU.
3509 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3510 * out-of-sync. Make sure to update the required fields
3511 * before using them.
3512 *
3513 * @remarks No-long-jump zone!!!
3514 */
3515static int hmR0VmxLoadGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3516{
3517 int rc = VINF_SUCCESS;
3518 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS))
3519 {
3520 /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ).
3521 Let us assert it as such and use 32-bit VMWRITE. */
3522 Assert(!(pMixedCtx->rflags.u64 >> 32));
3523 X86EFLAGS Eflags = pMixedCtx->eflags;
3524 /** @todo r=bird: There shall be no need to OR in X86_EFL_1 here, nor
3525 * shall there be any reason for clearing bits 63:22, 15, 5 and 3.
3526 * These will never be cleared/set, unless some other part of the VMM
3527 * code is buggy - in which case we're better of finding and fixing
3528 * those bugs than hiding them. */
3529 Assert(Eflags.u32 & X86_EFL_RA1_MASK);
3530 Assert(!(Eflags.u32 & ~(X86_EFL_1 | X86_EFL_LIVE_MASK)));
3531 Eflags.u32 &= VMX_EFLAGS_RESERVED_0; /* Bits 22-31, 15, 5 & 3 MBZ. */
3532 Eflags.u32 |= VMX_EFLAGS_RESERVED_1; /* Bit 1 MB1. */
3533
3534 /*
3535 * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so we can restore them on VM-exit.
3536 * Modify the real-mode guest's eflags so that VT-x can run the real-mode guest code under Virtual 8086 mode.
3537 */
3538 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3539 {
3540 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
3541 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
3542 pVCpu->hm.s.vmx.RealMode.Eflags.u32 = Eflags.u32; /* Save the original eflags of the real-mode guest. */
3543 Eflags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */
3544 Eflags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */
3545 }
3546
3547 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, Eflags.u32);
3548 AssertRCReturn(rc, rc);
3549
3550 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_RFLAGS);
3551 Log4(("Load[%RU32]: VMX_VMCS_GUEST_RFLAGS=%#RX32\n", pVCpu->idCpu, Eflags.u32));
3552 }
3553 return rc;
3554}
3555
3556
3557/**
3558 * Loads the guest RIP, RSP and RFLAGS into the guest-state area in the VMCS.
3559 *
3560 * @returns VBox status code.
3561 * @param pVCpu Pointer to the VMCPU.
3562 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3563 * out-of-sync. Make sure to update the required fields
3564 * before using them.
3565 *
3566 * @remarks No-long-jump zone!!!
3567 */
3568DECLINLINE(int) hmR0VmxLoadGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3569{
3570 int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx);
3571 AssertRCReturn(rc, rc);
3572 rc = hmR0VmxLoadGuestRsp(pVCpu, pMixedCtx);
3573 AssertRCReturn(rc, rc);
3574 rc = hmR0VmxLoadGuestRflags(pVCpu, pMixedCtx);
3575 AssertRCReturn(rc, rc);
3576 return rc;
3577}
3578
3579
3580/**
3581 * Loads the guest CR0 control register into the guest-state area in the VMCS.
3582 * CR0 is partially shared with the host and we have to consider the FPU bits.
3583 *
3584 * @returns VBox status code.
3585 * @param pVM Pointer to the VM.
3586 * @param pVCpu Pointer to the VMCPU.
3587 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3588 * out-of-sync. Make sure to update the required fields
3589 * before using them.
3590 *
3591 * @remarks No-long-jump zone!!!
3592 */
3593static int hmR0VmxLoadSharedCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3594{
3595 /*
3596 * Guest CR0.
3597 * Guest FPU.
3598 */
3599 int rc = VINF_SUCCESS;
3600 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0))
3601 {
3602 Assert(!(pMixedCtx->cr0 >> 32));
3603 uint32_t u32GuestCR0 = pMixedCtx->cr0;
3604 PVM pVM = pVCpu->CTX_SUFF(pVM);
3605
3606 /* The guest's view (read access) of its CR0 is unblemished. */
3607 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32GuestCR0);
3608 AssertRCReturn(rc, rc);
3609 Log4(("Load[%RU32]: VMX_VMCS_CTRL_CR0_READ_SHADOW=%#RX32\n", pVCpu->idCpu, u32GuestCR0));
3610
3611 /* Setup VT-x's view of the guest CR0. */
3612 /* Minimize VM-exits due to CR3 changes when we have NestedPaging. */
3613 if (pVM->hm.s.fNestedPaging)
3614 {
3615 if (CPUMIsGuestPagingEnabledEx(pMixedCtx))
3616 {
3617 /* The guest has paging enabled, let it access CR3 without causing a VM-exit if supported. */
3618 pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
3619 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT);
3620 }
3621 else
3622 {
3623 /* The guest doesn't have paging enabled, make CR3 access cause a VM-exit to update our shadow. */
3624 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
3625 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
3626 }
3627
3628 /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */
3629 if (pVM->hm.s.vmx.fUnrestrictedGuest)
3630 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
3631
3632 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3633 AssertRCReturn(rc, rc);
3634 }
3635 else
3636 u32GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
3637
3638 /*
3639 * Guest FPU bits.
3640 * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be set on the first
3641 * CPUs to support VT-x and no mention of with regards to UX in VM-entry checks.
3642 */
3643 u32GuestCR0 |= X86_CR0_NE;
3644 bool fInterceptNM = false;
3645 if (CPUMIsGuestFPUStateActive(pVCpu))
3646 {
3647 fInterceptNM = false; /* Guest FPU active, no need to VM-exit on #NM. */
3648 /* The guest should still get #NM exceptions when it expects it to, so we should not clear TS & MP bits here.
3649 We're only concerned about -us- not intercepting #NMs when the guest-FPU is active. Not the guest itself! */
3650 }
3651 else
3652 {
3653 fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */
3654 u32GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */
3655 | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */
3656 }
3657
3658 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
3659 bool fInterceptMF = false;
3660 if (!(pMixedCtx->cr0 & X86_CR0_NE))
3661 fInterceptMF = true;
3662
3663 /* Finally, intercept all exceptions as we cannot directly inject them in real-mode, see hmR0VmxInjectEventVmcs(). */
3664 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3665 {
3666 Assert(PDMVmmDevHeapIsEnabled(pVM));
3667 Assert(pVM->hm.s.vmx.pRealModeTSS);
3668 pVCpu->hm.s.vmx.u32XcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK;
3669 fInterceptNM = true;
3670 fInterceptMF = true;
3671 }
3672 else
3673 {
3674 /* For now, cleared here as mode-switches can happen outside HM/VT-x. See @bugref{7626#c11}. */
3675 pVCpu->hm.s.vmx.u32XcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK;
3676 }
3677 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
3678
3679 if (fInterceptNM)
3680 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_NM);
3681 else
3682 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_NM);
3683
3684 if (fInterceptMF)
3685 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_MF);
3686 else
3687 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_MF);
3688
3689 /* Additional intercepts for debugging, define these yourself explicitly. */
3690#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3691 pVCpu->hm.s.vmx.u32XcptBitmap |= 0
3692 | RT_BIT(X86_XCPT_BP)
3693 | RT_BIT(X86_XCPT_DB)
3694 | RT_BIT(X86_XCPT_DE)
3695 | RT_BIT(X86_XCPT_NM)
3696 | RT_BIT(X86_XCPT_TS)
3697 | RT_BIT(X86_XCPT_UD)
3698 | RT_BIT(X86_XCPT_NP)
3699 | RT_BIT(X86_XCPT_SS)
3700 | RT_BIT(X86_XCPT_GP)
3701 | RT_BIT(X86_XCPT_PF)
3702 | RT_BIT(X86_XCPT_MF)
3703 ;
3704#elif defined(HMVMX_ALWAYS_TRAP_PF)
3705 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_PF);
3706#endif
3707
3708 Assert(pVM->hm.s.fNestedPaging || (pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT(X86_XCPT_PF)));
3709
3710 /* Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW). */
3711 uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
3712 uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
3713 if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */
3714 uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG);
3715 else
3716 Assert((uSetCR0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG));
3717
3718 u32GuestCR0 |= uSetCR0;
3719 u32GuestCR0 &= uZapCR0;
3720 u32GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */
3721
3722 /* Write VT-x's view of the guest CR0 into the VMCS. */
3723 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCR0);
3724 AssertRCReturn(rc, rc);
3725 Log4(("Load[%RU32]: VMX_VMCS_GUEST_CR0=%#RX32 (uSetCR0=%#RX32 uZapCR0=%#RX32)\n", pVCpu->idCpu, u32GuestCR0, uSetCR0,
3726 uZapCR0));
3727
3728 /*
3729 * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed
3730 * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits
3731 * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables).
3732 */
3733 uint32_t u32CR0Mask = 0;
3734 u32CR0Mask = X86_CR0_PE
3735 | X86_CR0_NE
3736 | X86_CR0_WP
3737 | X86_CR0_PG
3738 | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */
3739 | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */
3740 | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */
3741
3742 /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM
3743 * enmGuestMode to be in-sync with the current mode. See @bugref{6398}
3744 * and @bugref{6944}. */
3745#if 0
3746 if (pVM->hm.s.vmx.fUnrestrictedGuest)
3747 u32CR0Mask &= ~X86_CR0_PE;
3748#endif
3749 if (pVM->hm.s.fNestedPaging)
3750 u32CR0Mask &= ~X86_CR0_WP;
3751
3752 /* If the guest FPU state is active, don't need to VM-exit on writes to FPU related bits in CR0. */
3753 if (fInterceptNM)
3754 {
3755 u32CR0Mask |= X86_CR0_TS
3756 | X86_CR0_MP;
3757 }
3758
3759 /* Write the CR0 mask into the VMCS and update the VCPU's copy of the current CR0 mask. */
3760 pVCpu->hm.s.vmx.u32CR0Mask = u32CR0Mask;
3761 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32CR0Mask);
3762 AssertRCReturn(rc, rc);
3763 Log4(("Load[%RU32]: VMX_VMCS_CTRL_CR0_MASK=%#RX32\n", pVCpu->idCpu, u32CR0Mask));
3764
3765 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0);
3766 }
3767 return rc;
3768}
3769
3770
3771/**
3772 * Loads the guest control registers (CR3, CR4) into the guest-state area
3773 * in the VMCS.
3774 *
3775 * @returns VBox status code.
3776 * @param pVM Pointer to the VM.
3777 * @param pVCpu Pointer to the VMCPU.
3778 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3779 * out-of-sync. Make sure to update the required fields
3780 * before using them.
3781 *
3782 * @remarks No-long-jump zone!!!
3783 */
3784static int hmR0VmxLoadGuestCR3AndCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3785{
3786 int rc = VINF_SUCCESS;
3787 PVM pVM = pVCpu->CTX_SUFF(pVM);
3788
3789 /*
3790 * Guest CR2.
3791 * It's always loaded in the assembler code. Nothing to do here.
3792 */
3793
3794 /*
3795 * Guest CR3.
3796 */
3797 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR3))
3798 {
3799 RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS;
3800 if (pVM->hm.s.fNestedPaging)
3801 {
3802 pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu);
3803
3804 /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */
3805 Assert(pVCpu->hm.s.vmx.HCPhysEPTP);
3806 Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000)));
3807 Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff));
3808
3809 /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */
3810 pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB
3811 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
3812
3813 /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */
3814 AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */
3815 && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 7) & 0x1f) == 0, /* Bits 7:11 MBZ. */
3816 ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
3817 AssertMsg( !((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x01) /* Bit 6 (EPT accessed & dirty bit). */
3818 || (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EPT_ACCESS_DIRTY),
3819 ("EPTP accessed/dirty bit not supported by CPU but set %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
3820
3821 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP);
3822 AssertRCReturn(rc, rc);
3823 Log4(("Load[%RU32]: VMX_VMCS64_CTRL_EPTP_FULL=%#RX64\n", pVCpu->idCpu, pVCpu->hm.s.vmx.HCPhysEPTP));
3824
3825 if ( pVM->hm.s.vmx.fUnrestrictedGuest
3826 || CPUMIsGuestPagingEnabledEx(pMixedCtx))
3827 {
3828 /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */
3829 if (CPUMIsGuestInPAEModeEx(pMixedCtx))
3830 {
3831 rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); AssertRCReturn(rc, rc);
3832 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc);
3833 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc);
3834 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc);
3835 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc);
3836 }
3837
3838 /* The guest's view of its CR3 is unblemished with Nested Paging when the guest is using paging or we
3839 have Unrestricted Execution to handle the guest when it's not using paging. */
3840 GCPhysGuestCR3 = pMixedCtx->cr3;
3841 }
3842 else
3843 {
3844 /*
3845 * The guest is not using paging, but the CPU (VT-x) has to. While the guest thinks it accesses physical memory
3846 * directly, we use our identity-mapped page table to map guest-linear to guest-physical addresses.
3847 * EPT takes care of translating it to host-physical addresses.
3848 */
3849 RTGCPHYS GCPhys;
3850 Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable);
3851 Assert(PDMVmmDevHeapIsEnabled(pVM));
3852
3853 /* We obtain it here every time as the guest could have relocated this PCI region. */
3854 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
3855 AssertRCReturn(rc, rc);
3856
3857 GCPhysGuestCR3 = GCPhys;
3858 }
3859
3860 Log4(("Load[%RU32]: VMX_VMCS_GUEST_CR3=%#RGv (GstN)\n", pVCpu->idCpu, GCPhysGuestCR3));
3861 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3);
3862 }
3863 else
3864 {
3865 /* Non-nested paging case, just use the hypervisor's CR3. */
3866 RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu);
3867
3868 Log4(("Load[%RU32]: VMX_VMCS_GUEST_CR3=%#RHv (HstN)\n", pVCpu->idCpu, HCPhysGuestCR3));
3869 rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3);
3870 }
3871 AssertRCReturn(rc, rc);
3872
3873 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR3);
3874 }
3875
3876 /*
3877 * Guest CR4.
3878 * ASSUMES this is done everytime we get in from ring-3! (XCR0)
3879 */
3880 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4))
3881 {
3882 Assert(!(pMixedCtx->cr4 >> 32));
3883 uint32_t u32GuestCR4 = pMixedCtx->cr4;
3884
3885 /* The guest's view of its CR4 is unblemished. */
3886 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32GuestCR4);
3887 AssertRCReturn(rc, rc);
3888 Log4(("Load[%RU32]: VMX_VMCS_CTRL_CR4_READ_SHADOW=%#RX32\n", pVCpu->idCpu, u32GuestCR4));
3889
3890 /* Setup VT-x's view of the guest CR4. */
3891 /*
3892 * If we're emulating real-mode using virtual-8086 mode, we want to redirect software interrupts to the 8086 program
3893 * interrupt handler. Clear the VME bit (the interrupt redirection bitmap is already all 0, see hmR3InitFinalizeR0())
3894 * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode".
3895 */
3896 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3897 {
3898 Assert(pVM->hm.s.vmx.pRealModeTSS);
3899 Assert(PDMVmmDevHeapIsEnabled(pVM));
3900 u32GuestCR4 &= ~X86_CR4_VME;
3901 }
3902
3903 if (pVM->hm.s.fNestedPaging)
3904 {
3905 if ( !CPUMIsGuestPagingEnabledEx(pMixedCtx)
3906 && !pVM->hm.s.vmx.fUnrestrictedGuest)
3907 {
3908 /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */
3909 u32GuestCR4 |= X86_CR4_PSE;
3910 /* Our identity mapping is a 32-bit page directory. */
3911 u32GuestCR4 &= ~X86_CR4_PAE;
3912 }
3913 /* else use guest CR4.*/
3914 }
3915 else
3916 {
3917 /*
3918 * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host
3919 * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables.
3920 */
3921 switch (pVCpu->hm.s.enmShadowMode)
3922 {
3923 case PGMMODE_REAL: /* Real-mode. */
3924 case PGMMODE_PROTECTED: /* Protected mode without paging. */
3925 case PGMMODE_32_BIT: /* 32-bit paging. */
3926 {
3927 u32GuestCR4 &= ~X86_CR4_PAE;
3928 break;
3929 }
3930
3931 case PGMMODE_PAE: /* PAE paging. */
3932 case PGMMODE_PAE_NX: /* PAE paging with NX. */
3933 {
3934 u32GuestCR4 |= X86_CR4_PAE;
3935 break;
3936 }
3937
3938 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
3939 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
3940#ifdef VBOX_ENABLE_64_BITS_GUESTS
3941 break;
3942#endif
3943 default:
3944 AssertFailed();
3945 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
3946 }
3947 }
3948
3949 /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */
3950 uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
3951 uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
3952 u32GuestCR4 |= uSetCR4;
3953 u32GuestCR4 &= uZapCR4;
3954
3955 /* Write VT-x's view of the guest CR4 into the VMCS. */
3956 Log4(("Load[%RU32]: VMX_VMCS_GUEST_CR4=%#RX32 (Set=%#RX32 Zap=%#RX32)\n", pVCpu->idCpu, u32GuestCR4, uSetCR4, uZapCR4));
3957 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCR4);
3958 AssertRCReturn(rc, rc);
3959
3960 /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them, that would cause a VM-exit. */
3961 uint32_t u32CR4Mask = X86_CR4_VME
3962 | X86_CR4_PAE
3963 | X86_CR4_PGE
3964 | X86_CR4_PSE
3965 | X86_CR4_VMXE;
3966 if (pVM->cpum.ro.HostFeatures.fXSaveRstor)
3967 u32CR4Mask |= X86_CR4_OSXSAVE;
3968 pVCpu->hm.s.vmx.u32CR4Mask = u32CR4Mask;
3969 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32CR4Mask);
3970 AssertRCReturn(rc, rc);
3971
3972 /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */
3973 pVCpu->hm.s.fLoadSaveGuestXcr0 = (pMixedCtx->cr4 & X86_CR4_OSXSAVE) && pMixedCtx->aXcr[0] != ASMGetXcr0();
3974
3975 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4);
3976 }
3977 return rc;
3978}
3979
3980
3981/**
3982 * Loads the guest debug registers into the guest-state area in the VMCS.
3983 * This also sets up whether #DB and MOV DRx accesses cause VM-exits.
3984 *
3985 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3986 *
3987 * @returns VBox status code.
3988 * @param pVCpu Pointer to the VMCPU.
3989 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3990 * out-of-sync. Make sure to update the required fields
3991 * before using them.
3992 *
3993 * @remarks No-long-jump zone!!!
3994 */
3995static int hmR0VmxLoadSharedDebugState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3996{
3997 if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG))
3998 return VINF_SUCCESS;
3999
4000#ifdef VBOX_STRICT
4001 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
4002 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
4003 {
4004 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
4005 Assert((pMixedCtx->dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); /* Bits 63:32, 15, 14, 12, 11 are reserved. */
4006 Assert((pMixedCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); /* Bit 10 is reserved (RA1). */
4007 }
4008#endif
4009
4010 int rc;
4011 PVM pVM = pVCpu->CTX_SUFF(pVM);
4012 bool fInterceptDB = false;
4013 bool fInterceptMovDRx = false;
4014 if ( pVCpu->hm.s.fSingleInstruction
4015 || DBGFIsStepping(pVCpu))
4016 {
4017 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
4018 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG)
4019 {
4020 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG;
4021 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
4022 AssertRCReturn(rc, rc);
4023 Assert(fInterceptDB == false);
4024 }
4025 else
4026 {
4027 pMixedCtx->eflags.u32 |= X86_EFL_TF;
4028 pVCpu->hm.s.fClearTrapFlag = true;
4029 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS);
4030 fInterceptDB = true;
4031 }
4032 }
4033
4034 if ( fInterceptDB
4035 || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
4036 {
4037 /*
4038 * Use the combined guest and host DRx values found in the hypervisor
4039 * register set because the debugger has breakpoints active or someone
4040 * is single stepping on the host side without a monitor trap flag.
4041 *
4042 * Note! DBGF expects a clean DR6 state before executing guest code.
4043 */
4044#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
4045 if ( CPUMIsGuestInLongModeEx(pMixedCtx)
4046 && !CPUMIsHyperDebugStateActivePending(pVCpu))
4047 {
4048 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
4049 Assert(CPUMIsHyperDebugStateActivePending(pVCpu));
4050 Assert(!CPUMIsGuestDebugStateActivePending(pVCpu));
4051 }
4052 else
4053#endif
4054 if (!CPUMIsHyperDebugStateActive(pVCpu))
4055 {
4056 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
4057 Assert(CPUMIsHyperDebugStateActive(pVCpu));
4058 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
4059 }
4060
4061 /* Update DR7. (The other DRx values are handled by CPUM one way or the other.) */
4062 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)CPUMGetHyperDR7(pVCpu));
4063 AssertRCReturn(rc, rc);
4064
4065 pVCpu->hm.s.fUsingHyperDR7 = true;
4066 fInterceptDB = true;
4067 fInterceptMovDRx = true;
4068 }
4069 else
4070 {
4071 /*
4072 * If the guest has enabled debug registers, we need to load them prior to
4073 * executing guest code so they'll trigger at the right time.
4074 */
4075 if (pMixedCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */
4076 {
4077#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
4078 if ( CPUMIsGuestInLongModeEx(pMixedCtx)
4079 && !CPUMIsGuestDebugStateActivePending(pVCpu))
4080 {
4081 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
4082 Assert(CPUMIsGuestDebugStateActivePending(pVCpu));
4083 Assert(!CPUMIsHyperDebugStateActivePending(pVCpu));
4084 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
4085 }
4086 else
4087#endif
4088 if (!CPUMIsGuestDebugStateActive(pVCpu))
4089 {
4090 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
4091 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4092 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
4093 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
4094 }
4095 Assert(!fInterceptDB);
4096 Assert(!fInterceptMovDRx);
4097 }
4098 /*
4099 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
4100 * must intercept #DB in order to maintain a correct DR6 guest value.
4101 */
4102#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
4103 else if ( !CPUMIsGuestDebugStateActivePending(pVCpu)
4104 && !CPUMIsGuestDebugStateActive(pVCpu))
4105#else
4106 else if (!CPUMIsGuestDebugStateActive(pVCpu))
4107#endif
4108 {
4109 fInterceptMovDRx = true;
4110 fInterceptDB = true;
4111 }
4112
4113 /* Update guest DR7. */
4114 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, pMixedCtx->dr[7]);
4115 AssertRCReturn(rc, rc);
4116
4117 pVCpu->hm.s.fUsingHyperDR7 = false;
4118 }
4119
4120 /*
4121 * Update the exception bitmap regarding intercepting #DB generated by the guest.
4122 */
4123 if ( fInterceptDB
4124 || pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
4125 {
4126 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_DB);
4127 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
4128 }
4129 else
4130 {
4131#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
4132 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB);
4133 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
4134#endif
4135 }
4136
4137 /*
4138 * Update the processor-based VM-execution controls regarding intercepting MOV DRx instructions.
4139 */
4140 if (fInterceptMovDRx)
4141 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
4142 else
4143 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
4144 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
4145 AssertRCReturn(rc, rc);
4146
4147 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG);
4148 return VINF_SUCCESS;
4149}
4150
4151
4152#ifdef VBOX_STRICT
4153/**
4154 * Strict function to validate segment registers.
4155 *
4156 * @remarks ASSUMES CR0 is up to date.
4157 */
4158static void hmR0VmxValidateSegmentRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4159{
4160 /* Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". */
4161 /* NOTE: The reason we check for attribute value 0 and not just the unusable bit here is because hmR0VmxWriteSegmentReg()
4162 * only updates the VMCS' copy of the value with the unusable bit and doesn't change the guest-context value. */
4163 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
4164 && ( !CPUMIsGuestInRealModeEx(pCtx)
4165 && !CPUMIsGuestInV86ModeEx(pCtx)))
4166 {
4167 /* Protected mode checks */
4168 /* CS */
4169 Assert(pCtx->cs.Attr.n.u1Present);
4170 Assert(!(pCtx->cs.Attr.u & 0xf00));
4171 Assert(!(pCtx->cs.Attr.u & 0xfffe0000));
4172 Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff
4173 || !(pCtx->cs.Attr.n.u1Granularity));
4174 Assert( !(pCtx->cs.u32Limit & 0xfff00000)
4175 || (pCtx->cs.Attr.n.u1Granularity));
4176 /* CS cannot be loaded with NULL in protected mode. */
4177 Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS? */
4178 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
4179 Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl);
4180 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
4181 Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl);
4182 else
4183 AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl));
4184 /* SS */
4185 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
4186 Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL));
4187 if ( !(pCtx->cr0 & X86_CR0_PE)
4188 || pCtx->cs.Attr.n.u4Type == 3)
4189 {
4190 Assert(!pCtx->ss.Attr.n.u2Dpl);
4191 }
4192 if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
4193 {
4194 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
4195 Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7);
4196 Assert(pCtx->ss.Attr.n.u1Present);
4197 Assert(!(pCtx->ss.Attr.u & 0xf00));
4198 Assert(!(pCtx->ss.Attr.u & 0xfffe0000));
4199 Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff
4200 || !(pCtx->ss.Attr.n.u1Granularity));
4201 Assert( !(pCtx->ss.u32Limit & 0xfff00000)
4202 || (pCtx->ss.Attr.n.u1Granularity));
4203 }
4204 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */
4205 if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
4206 {
4207 Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
4208 Assert(pCtx->ds.Attr.n.u1Present);
4209 Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL));
4210 Assert(!(pCtx->ds.Attr.u & 0xf00));
4211 Assert(!(pCtx->ds.Attr.u & 0xfffe0000));
4212 Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff
4213 || !(pCtx->ds.Attr.n.u1Granularity));
4214 Assert( !(pCtx->ds.u32Limit & 0xfff00000)
4215 || (pCtx->ds.Attr.n.u1Granularity));
4216 Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
4217 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ));
4218 }
4219 if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
4220 {
4221 Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
4222 Assert(pCtx->es.Attr.n.u1Present);
4223 Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL));
4224 Assert(!(pCtx->es.Attr.u & 0xf00));
4225 Assert(!(pCtx->es.Attr.u & 0xfffe0000));
4226 Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff
4227 || !(pCtx->es.Attr.n.u1Granularity));
4228 Assert( !(pCtx->es.u32Limit & 0xfff00000)
4229 || (pCtx->es.Attr.n.u1Granularity));
4230 Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
4231 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ));
4232 }
4233 if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
4234 {
4235 Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
4236 Assert(pCtx->fs.Attr.n.u1Present);
4237 Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL));
4238 Assert(!(pCtx->fs.Attr.u & 0xf00));
4239 Assert(!(pCtx->fs.Attr.u & 0xfffe0000));
4240 Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff
4241 || !(pCtx->fs.Attr.n.u1Granularity));
4242 Assert( !(pCtx->fs.u32Limit & 0xfff00000)
4243 || (pCtx->fs.Attr.n.u1Granularity));
4244 Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
4245 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ));
4246 }
4247 if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
4248 {
4249 Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
4250 Assert(pCtx->gs.Attr.n.u1Present);
4251 Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL));
4252 Assert(!(pCtx->gs.Attr.u & 0xf00));
4253 Assert(!(pCtx->gs.Attr.u & 0xfffe0000));
4254 Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff
4255 || !(pCtx->gs.Attr.n.u1Granularity));
4256 Assert( !(pCtx->gs.u32Limit & 0xfff00000)
4257 || (pCtx->gs.Attr.n.u1Granularity));
4258 Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
4259 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ));
4260 }
4261 /* 64-bit capable CPUs. */
4262# if HC_ARCH_BITS == 64
4263 Assert(!(pCtx->cs.u64Base >> 32));
4264 Assert(!pCtx->ss.Attr.u || !(pCtx->ss.u64Base >> 32));
4265 Assert(!pCtx->ds.Attr.u || !(pCtx->ds.u64Base >> 32));
4266 Assert(!pCtx->es.Attr.u || !(pCtx->es.u64Base >> 32));
4267# endif
4268 }
4269 else if ( CPUMIsGuestInV86ModeEx(pCtx)
4270 || ( CPUMIsGuestInRealModeEx(pCtx)
4271 && !pVM->hm.s.vmx.fUnrestrictedGuest))
4272 {
4273 /* Real and v86 mode checks. */
4274 /* hmR0VmxWriteSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */
4275 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
4276 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
4277 {
4278 u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3;
4279 }
4280 else
4281 {
4282 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u;
4283 u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
4284 }
4285
4286 /* CS */
4287 AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel));
4288 Assert(pCtx->cs.u32Limit == 0xffff);
4289 Assert(u32CSAttr == 0xf3);
4290 /* SS */
4291 Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4);
4292 Assert(pCtx->ss.u32Limit == 0xffff);
4293 Assert(u32SSAttr == 0xf3);
4294 /* DS */
4295 Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4);
4296 Assert(pCtx->ds.u32Limit == 0xffff);
4297 Assert(u32DSAttr == 0xf3);
4298 /* ES */
4299 Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4);
4300 Assert(pCtx->es.u32Limit == 0xffff);
4301 Assert(u32ESAttr == 0xf3);
4302 /* FS */
4303 Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4);
4304 Assert(pCtx->fs.u32Limit == 0xffff);
4305 Assert(u32FSAttr == 0xf3);
4306 /* GS */
4307 Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4);
4308 Assert(pCtx->gs.u32Limit == 0xffff);
4309 Assert(u32GSAttr == 0xf3);
4310 /* 64-bit capable CPUs. */
4311# if HC_ARCH_BITS == 64
4312 Assert(!(pCtx->cs.u64Base >> 32));
4313 Assert(!u32SSAttr || !(pCtx->ss.u64Base >> 32));
4314 Assert(!u32DSAttr || !(pCtx->ds.u64Base >> 32));
4315 Assert(!u32ESAttr || !(pCtx->es.u64Base >> 32));
4316# endif
4317 }
4318}
4319#endif /* VBOX_STRICT */
4320
4321
4322/**
4323 * Writes a guest segment register into the guest-state area in the VMCS.
4324 *
4325 * @returns VBox status code.
4326 * @param pVCpu Pointer to the VMCPU.
4327 * @param idxSel Index of the selector in the VMCS.
4328 * @param idxLimit Index of the segment limit in the VMCS.
4329 * @param idxBase Index of the segment base in the VMCS.
4330 * @param idxAccess Index of the access rights of the segment in the VMCS.
4331 * @param pSelReg Pointer to the segment selector.
4332 *
4333 * @remarks No-long-jump zone!!!
4334 */
4335static int hmR0VmxWriteSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase,
4336 uint32_t idxAccess, PCPUMSELREG pSelReg)
4337{
4338 int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */
4339 AssertRCReturn(rc, rc);
4340 rc = VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */
4341 AssertRCReturn(rc, rc);
4342 rc = VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/
4343 AssertRCReturn(rc, rc);
4344
4345 uint32_t u32Access = pSelReg->Attr.u;
4346 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
4347 {
4348 /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */
4349 u32Access = 0xf3;
4350 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
4351 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
4352 }
4353 else
4354 {
4355 /*
4356 * The way to differentiate between whether this is really a null selector or was just a selector loaded with 0 in
4357 * real-mode is using the segment attributes. A selector loaded in real-mode with the value 0 is valid and usable in
4358 * protected-mode and we should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures NULL selectors
4359 * loaded in protected-mode have their attribute as 0.
4360 */
4361 if (!u32Access)
4362 u32Access = X86DESCATTR_UNUSABLE;
4363 }
4364
4365 /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */
4366 AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED),
4367 ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u));
4368
4369 rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */
4370 AssertRCReturn(rc, rc);
4371 return rc;
4372}
4373
4374
4375/**
4376 * Loads the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases)
4377 * into the guest-state area in the VMCS.
4378 *
4379 * @returns VBox status code.
4380 * @param pVM Pointer to the VM.
4381 * @param pVCPU Pointer to the VMCPU.
4382 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4383 * out-of-sync. Make sure to update the required fields
4384 * before using them.
4385 *
4386 * @remarks ASSUMES pMixedCtx->cr0 is up to date (strict builds validation).
4387 * @remarks No-long-jump zone!!!
4388 */
4389static int hmR0VmxLoadGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4390{
4391 int rc = VERR_INTERNAL_ERROR_5;
4392 PVM pVM = pVCpu->CTX_SUFF(pVM);
4393
4394 /*
4395 * Guest Segment registers: CS, SS, DS, ES, FS, GS.
4396 */
4397 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS))
4398 {
4399 /* Save the segment attributes for real-on-v86 mode hack, so we can restore them on VM-exit. */
4400 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
4401 {
4402 pVCpu->hm.s.vmx.RealMode.AttrCS.u = pMixedCtx->cs.Attr.u;
4403 pVCpu->hm.s.vmx.RealMode.AttrSS.u = pMixedCtx->ss.Attr.u;
4404 pVCpu->hm.s.vmx.RealMode.AttrDS.u = pMixedCtx->ds.Attr.u;
4405 pVCpu->hm.s.vmx.RealMode.AttrES.u = pMixedCtx->es.Attr.u;
4406 pVCpu->hm.s.vmx.RealMode.AttrFS.u = pMixedCtx->fs.Attr.u;
4407 pVCpu->hm.s.vmx.RealMode.AttrGS.u = pMixedCtx->gs.Attr.u;
4408 }
4409
4410#ifdef VBOX_WITH_REM
4411 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
4412 {
4413 Assert(pVM->hm.s.vmx.pRealModeTSS);
4414 AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED);
4415 if ( pVCpu->hm.s.vmx.fWasInRealMode
4416 && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED)
4417 {
4418 /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute
4419 in real-mode (e.g. OpenBSD 4.0) */
4420 REMFlushTBs(pVM);
4421 Log4(("Load[%RU32]: Switch to protected mode detected!\n", pVCpu->idCpu));
4422 pVCpu->hm.s.vmx.fWasInRealMode = false;
4423 }
4424 }
4425#endif
4426 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_CS, VMX_VMCS32_GUEST_CS_LIMIT, VMX_VMCS_GUEST_CS_BASE,
4427 VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS, &pMixedCtx->cs);
4428 AssertRCReturn(rc, rc);
4429 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_SS, VMX_VMCS32_GUEST_SS_LIMIT, VMX_VMCS_GUEST_SS_BASE,
4430 VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS, &pMixedCtx->ss);
4431 AssertRCReturn(rc, rc);
4432 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_DS, VMX_VMCS32_GUEST_DS_LIMIT, VMX_VMCS_GUEST_DS_BASE,
4433 VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS, &pMixedCtx->ds);
4434 AssertRCReturn(rc, rc);
4435 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_ES, VMX_VMCS32_GUEST_ES_LIMIT, VMX_VMCS_GUEST_ES_BASE,
4436 VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS, &pMixedCtx->es);
4437 AssertRCReturn(rc, rc);
4438 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_FS, VMX_VMCS32_GUEST_FS_LIMIT, VMX_VMCS_GUEST_FS_BASE,
4439 VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS, &pMixedCtx->fs);
4440 AssertRCReturn(rc, rc);
4441 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_GS, VMX_VMCS32_GUEST_GS_LIMIT, VMX_VMCS_GUEST_GS_BASE,
4442 VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS, &pMixedCtx->gs);
4443 AssertRCReturn(rc, rc);
4444
4445#ifdef VBOX_STRICT
4446 /* Validate. */
4447 hmR0VmxValidateSegmentRegs(pVM, pVCpu, pMixedCtx);
4448#endif
4449
4450 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS);
4451 Log4(("Load[%RU32]: CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pVCpu->idCpu, pMixedCtx->cs.Sel,
4452 pMixedCtx->cs.u64Base, pMixedCtx->cs.u32Limit, pMixedCtx->cs.Attr.u));
4453 }
4454
4455 /*
4456 * Guest TR.
4457 */
4458 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_TR))
4459 {
4460 /*
4461 * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is achieved
4462 * using the interrupt redirection bitmap (all bits cleared to let the guest handle INT-n's) in the TSS.
4463 * See hmR3InitFinalizeR0() to see how pRealModeTSS is setup.
4464 */
4465 uint16_t u16Sel = 0;
4466 uint32_t u32Limit = 0;
4467 uint64_t u64Base = 0;
4468 uint32_t u32AccessRights = 0;
4469
4470 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
4471 {
4472 u16Sel = pMixedCtx->tr.Sel;
4473 u32Limit = pMixedCtx->tr.u32Limit;
4474 u64Base = pMixedCtx->tr.u64Base;
4475 u32AccessRights = pMixedCtx->tr.Attr.u;
4476 }
4477 else
4478 {
4479 Assert(pVM->hm.s.vmx.pRealModeTSS);
4480 Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMR3CanExecuteGuest() -XXX- what about inner loop changes? */
4481
4482 /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */
4483 RTGCPHYS GCPhys;
4484 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
4485 AssertRCReturn(rc, rc);
4486
4487 X86DESCATTR DescAttr;
4488 DescAttr.u = 0;
4489 DescAttr.n.u1Present = 1;
4490 DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
4491
4492 u16Sel = 0;
4493 u32Limit = HM_VTX_TSS_SIZE;
4494 u64Base = GCPhys; /* in real-mode phys = virt. */
4495 u32AccessRights = DescAttr.u;
4496 }
4497
4498 /* Validate. */
4499 Assert(!(u16Sel & RT_BIT(2)));
4500 AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY
4501 || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights));
4502 AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights));
4503 Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/
4504 Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/
4505 Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */
4506 Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */
4507 Assert( (u32Limit & 0xfff) == 0xfff
4508 || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */
4509 Assert( !(pMixedCtx->tr.u32Limit & 0xfff00000)
4510 || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */
4511
4512 rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_TR, u16Sel); AssertRCReturn(rc, rc);
4513 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); AssertRCReturn(rc, rc);
4514 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRCReturn(rc, rc);
4515 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); AssertRCReturn(rc, rc);
4516
4517 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_TR);
4518 Log4(("Load[%RU32]: VMX_VMCS_GUEST_TR_BASE=%#RX64\n", pVCpu->idCpu, u64Base));
4519 }
4520
4521 /*
4522 * Guest GDTR.
4523 */
4524 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_GDTR))
4525 {
4526 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pMixedCtx->gdtr.cbGdt); AssertRCReturn(rc, rc);
4527 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pMixedCtx->gdtr.pGdt); AssertRCReturn(rc, rc);
4528
4529 /* Validate. */
4530 Assert(!(pMixedCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */
4531
4532 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_GDTR);
4533 Log4(("Load[%RU32]: VMX_VMCS_GUEST_GDTR_BASE=%#RX64\n", pVCpu->idCpu, pMixedCtx->gdtr.pGdt));
4534 }
4535
4536 /*
4537 * Guest LDTR.
4538 */
4539 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_LDTR))
4540 {
4541 /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */
4542 uint32_t u32Access = 0;
4543 if (!pMixedCtx->ldtr.Attr.u)
4544 u32Access = X86DESCATTR_UNUSABLE;
4545 else
4546 u32Access = pMixedCtx->ldtr.Attr.u;
4547
4548 rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_LDTR, pMixedCtx->ldtr.Sel); AssertRCReturn(rc, rc);
4549 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pMixedCtx->ldtr.u32Limit); AssertRCReturn(rc, rc);
4550 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pMixedCtx->ldtr.u64Base); AssertRCReturn(rc, rc);
4551 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); AssertRCReturn(rc, rc);
4552
4553 /* Validate. */
4554 if (!(u32Access & X86DESCATTR_UNUSABLE))
4555 {
4556 Assert(!(pMixedCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */
4557 Assert(pMixedCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */
4558 Assert(!pMixedCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */
4559 Assert(pMixedCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */
4560 Assert(!pMixedCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */
4561 Assert(!(pMixedCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */
4562 Assert( (pMixedCtx->ldtr.u32Limit & 0xfff) == 0xfff
4563 || !pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */
4564 Assert( !(pMixedCtx->ldtr.u32Limit & 0xfff00000)
4565 || pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */
4566 }
4567
4568 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_LDTR);
4569 Log4(("Load[%RU32]: VMX_VMCS_GUEST_LDTR_BASE=%#RX64\n", pVCpu->idCpu, pMixedCtx->ldtr.u64Base));
4570 }
4571
4572 /*
4573 * Guest IDTR.
4574 */
4575 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_IDTR))
4576 {
4577 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pMixedCtx->idtr.cbIdt); AssertRCReturn(rc, rc);
4578 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pMixedCtx->idtr.pIdt); AssertRCReturn(rc, rc);
4579
4580 /* Validate. */
4581 Assert(!(pMixedCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */
4582
4583 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_IDTR);
4584 Log4(("Load[%RU32]: VMX_VMCS_GUEST_IDTR_BASE=%#RX64\n", pVCpu->idCpu, pMixedCtx->idtr.pIdt));
4585 }
4586
4587 return VINF_SUCCESS;
4588}
4589
4590
4591/**
4592 * Loads certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
4593 * areas.
4594 *
4595 * These MSRs will automatically be loaded to the host CPU on every successful
4596 * VM-entry and stored from the host CPU on every successful VM-exit. This also
4597 * creates/updates MSR slots for the host MSRs. The actual host MSR values are
4598 * -not- updated here for performance reasons. See hmR0VmxSaveHostMsrs().
4599 *
4600 * Also loads the sysenter MSRs into the guest-state area in the VMCS.
4601 *
4602 * @returns VBox status code.
4603 * @param pVCpu Pointer to the VMCPU.
4604 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4605 * out-of-sync. Make sure to update the required fields
4606 * before using them.
4607 *
4608 * @remarks No-long-jump zone!!!
4609 */
4610static int hmR0VmxLoadGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4611{
4612 AssertPtr(pVCpu);
4613 AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr);
4614
4615 /*
4616 * MSRs that we use the auto-load/store MSR area in the VMCS.
4617 */
4618 PVM pVM = pVCpu->CTX_SUFF(pVM);
4619 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS))
4620 {
4621 /* For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(). */
4622#if HC_ARCH_BITS == 32
4623 if (pVM->hm.s.fAllow64BitGuests)
4624 {
4625 int rc = VINF_SUCCESS;
4626 rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_LSTAR, pMixedCtx->msrLSTAR, false, NULL);
4627 rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K6_STAR, pMixedCtx->msrSTAR, false, NULL);
4628 rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_SF_MASK, pMixedCtx->msrSFMASK, false, NULL);
4629 rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_KERNEL_GS_BASE, pMixedCtx->msrKERNELGSBASE, false, NULL);
4630 AssertRCReturn(rc, rc);
4631# ifdef LOG_ENABLED
4632 PVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
4633 for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cMsrs; i++, pMsr++)
4634 {
4635 Log4(("Load[%RU32]: MSR[%RU32]: u32Msr=%#RX32 u64Value=%#RX64\n", pVCpu->idCpu, i, pMsr->u32Msr,
4636 pMsr->u64Value));
4637 }
4638# endif
4639 }
4640#endif
4641 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4642 }
4643
4644 /*
4645 * Guest Sysenter MSRs.
4646 * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause
4647 * VM-exits on WRMSRs for these MSRs.
4648 */
4649 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR))
4650 {
4651 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pMixedCtx->SysEnter.cs); AssertRCReturn(rc, rc);
4652 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4653 }
4654
4655 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR))
4656 {
4657 int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pMixedCtx->SysEnter.eip); AssertRCReturn(rc, rc);
4658 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4659 }
4660
4661 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR))
4662 {
4663 int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pMixedCtx->SysEnter.esp); AssertRCReturn(rc, rc);
4664 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4665 }
4666
4667 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_EFER_MSR))
4668 {
4669 if (hmR0VmxShouldSwapEferMsr(pVCpu, pMixedCtx))
4670 {
4671 /*
4672 * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option
4673 * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}.
4674 */
4675 if (pVM->hm.s.vmx.fSupportsVmcsEfer)
4676 {
4677 int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, pMixedCtx->msrEFER);
4678 AssertRCReturn(rc,rc);
4679 Log4(("Load[%RU32]: VMX_VMCS64_GUEST_EFER_FULL=%#RX64\n", pVCpu->idCpu, pMixedCtx->msrEFER));
4680 }
4681 else
4682 {
4683 int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K6_EFER, pMixedCtx->msrEFER, false /* fUpdateHostMsr */,
4684 NULL /* pfAddedAndUpdated */);
4685 AssertRCReturn(rc, rc);
4686
4687 /* We need to intercept reads too, see @bugref{7386#c16}. */
4688 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
4689 hmR0VmxSetMsrPermission(pVCpu, MSR_K6_EFER, VMXMSREXIT_INTERCEPT_READ, VMXMSREXIT_INTERCEPT_WRITE);
4690 Log4(("Load[%RU32]: MSR[--]: u32Msr=%#RX32 u64Value=%#RX64 cMsrs=%u\n", pVCpu->idCpu, MSR_K6_EFER,
4691 pMixedCtx->msrEFER, pVCpu->hm.s.vmx.cMsrs));
4692 }
4693 }
4694 else if (!pVM->hm.s.vmx.fSupportsVmcsEfer)
4695 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, MSR_K6_EFER);
4696 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_EFER_MSR);
4697 }
4698
4699 return VINF_SUCCESS;
4700}
4701
4702
4703/**
4704 * Loads the guest activity state into the guest-state area in the VMCS.
4705 *
4706 * @returns VBox status code.
4707 * @param pVCpu Pointer to the VMCPU.
4708 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4709 * out-of-sync. Make sure to update the required fields
4710 * before using them.
4711 *
4712 * @remarks No-long-jump zone!!!
4713 */
4714static int hmR0VmxLoadGuestActivityState(PVMCPU pVCpu, PCPUMCTX pCtx)
4715{
4716 NOREF(pCtx);
4717 /** @todo See if we can make use of other states, e.g.
4718 * VMX_VMCS_GUEST_ACTIVITY_SHUTDOWN or HLT. */
4719 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE))
4720 {
4721 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE);
4722 AssertRCReturn(rc, rc);
4723
4724 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE);
4725 }
4726 return VINF_SUCCESS;
4727}
4728
4729
4730/**
4731 * Sets up the appropriate function to run guest code.
4732 *
4733 * @returns VBox status code.
4734 * @param pVCpu Pointer to the VMCPU.
4735 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4736 * out-of-sync. Make sure to update the required fields
4737 * before using them.
4738 *
4739 * @remarks No-long-jump zone!!!
4740 */
4741static int hmR0VmxSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4742{
4743 if (CPUMIsGuestInLongModeEx(pMixedCtx))
4744 {
4745#ifndef VBOX_ENABLE_64_BITS_GUESTS
4746 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
4747#endif
4748 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */
4749#if HC_ARCH_BITS == 32
4750 /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */
4751 if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64)
4752 {
4753 if (pVCpu->hm.s.vmx.pfnStartVM != NULL) /* Very first entry would have saved host-state already, ignore it. */
4754 {
4755 /* Currently, all mode changes sends us back to ring-3, so these should be set. See @bugref{6944}. */
4756 AssertMsg(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_VMX_EXIT_CTLS
4757 | HM_CHANGED_VMX_ENTRY_CTLS
4758 | HM_CHANGED_GUEST_EFER_MSR), ("flags=%#x\n", HMCPU_CF_VALUE(pVCpu)));
4759 }
4760 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
4761 }
4762#else
4763 /* 64-bit host. */
4764 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
4765#endif
4766 }
4767 else
4768 {
4769 /* Guest is not in long mode, use the 32-bit handler. */
4770#if HC_ARCH_BITS == 32
4771 if ( pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32
4772 && pVCpu->hm.s.vmx.pfnStartVM != NULL) /* Very first entry would have saved host-state already, ignore it. */
4773 {
4774 /* Currently, all mode changes sends us back to ring-3, so these should be set. See @bugref{6944}. */
4775 AssertMsg(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_VMX_EXIT_CTLS
4776 | HM_CHANGED_VMX_ENTRY_CTLS
4777 | HM_CHANGED_GUEST_EFER_MSR), ("flags=%#x\n", HMCPU_CF_VALUE(pVCpu)));
4778 }
4779#endif
4780 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
4781 }
4782 Assert(pVCpu->hm.s.vmx.pfnStartVM);
4783 return VINF_SUCCESS;
4784}
4785
4786
4787/**
4788 * Wrapper for running the guest code in VT-x.
4789 *
4790 * @returns VBox strict status code.
4791 * @param pVM Pointer to the VM.
4792 * @param pVCpu Pointer to the VMCPU.
4793 * @param pCtx Pointer to the guest-CPU context.
4794 *
4795 * @remarks No-long-jump zone!!!
4796 */
4797DECLINLINE(int) hmR0VmxRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4798{
4799 /*
4800 * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations
4801 * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper.
4802 * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details.
4803 */
4804 bool const fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED);
4805 /** @todo Add stats for resume vs launch. */
4806#ifdef VBOX_WITH_KERNEL_USING_XMM
4807 return HMR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
4808#else
4809 return pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
4810#endif
4811}
4812
4813
4814/**
4815 * Reports world-switch error and dumps some useful debug info.
4816 *
4817 * @param pVM Pointer to the VM.
4818 * @param pVCpu Pointer to the VMCPU.
4819 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4820 * @param pCtx Pointer to the guest-CPU context.
4821 * @param pVmxTransient Pointer to the VMX transient structure (only
4822 * exitReason updated).
4823 */
4824static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx, PVMXTRANSIENT pVmxTransient)
4825{
4826 Assert(pVM);
4827 Assert(pVCpu);
4828 Assert(pCtx);
4829 Assert(pVmxTransient);
4830 HMVMX_ASSERT_PREEMPT_SAFE();
4831
4832 Log4(("VM-entry failure: %Rrc\n", rcVMRun));
4833 switch (rcVMRun)
4834 {
4835 case VERR_VMX_INVALID_VMXON_PTR:
4836 AssertFailed();
4837 break;
4838 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4839 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4840 {
4841 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4842 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4843 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
4844 AssertRC(rc);
4845
4846 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
4847 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4848 Cannot do it here as we may have been long preempted. */
4849
4850#ifdef VBOX_STRICT
4851 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4852 pVmxTransient->uExitReason));
4853 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQualification));
4854 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4855 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4856 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4857 else
4858 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4859 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4860 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4861
4862 /* VMX control bits. */
4863 uint32_t u32Val;
4864 uint64_t u64Val;
4865 RTHCUINTREG uHCReg;
4866 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc);
4867 Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val));
4868 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc);
4869 Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val));
4870 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc);
4871 Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val));
4872 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc);
4873 Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val));
4874 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc);
4875 Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val));
4876 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc);
4877 Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val));
4878 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc);
4879 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val));
4880 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc);
4881 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val));
4882 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc);
4883 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val));
4884 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc);
4885 Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val));
4886 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc);
4887 Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val));
4888 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
4889 Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val));
4890 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
4891 Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val));
4892 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc);
4893 Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val));
4894 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc);
4895 Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val));
4896 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc);
4897 Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val));
4898 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
4899 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
4900 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
4901 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
4902 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
4903 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
4904 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
4905 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
4906 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
4907 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
4908
4909 /* Guest bits. */
4910 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc);
4911 Log4(("Old Guest Rip %#RX64 New %#RX64\n", pCtx->rip, u64Val));
4912 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc);
4913 Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pCtx->rsp, u64Val));
4914 rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc);
4915 Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pCtx->eflags.u32, u32Val));
4916 rc = VMXReadVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, &u32Val); AssertRC(rc);
4917 Log4(("VMX_VMCS16_GUEST_FIELD_VPID %u\n", u32Val));
4918
4919 /* Host bits. */
4920 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc);
4921 Log4(("Host CR0 %#RHr\n", uHCReg));
4922 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc);
4923 Log4(("Host CR3 %#RHr\n", uHCReg));
4924 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc);
4925 Log4(("Host CR4 %#RHr\n", uHCReg));
4926
4927 RTGDTR HostGdtr;
4928 PCX86DESCHC pDesc;
4929 ASMGetGDTR(&HostGdtr);
4930 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_CS, &u32Val); AssertRC(rc);
4931 Log4(("Host CS %#08x\n", u32Val));
4932 if (u32Val < HostGdtr.cbGdt)
4933 {
4934 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4935 HMR0DumpDescriptor(pDesc, u32Val, "CS: ");
4936 }
4937
4938 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_DS, &u32Val); AssertRC(rc);
4939 Log4(("Host DS %#08x\n", u32Val));
4940 if (u32Val < HostGdtr.cbGdt)
4941 {
4942 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4943 HMR0DumpDescriptor(pDesc, u32Val, "DS: ");
4944 }
4945
4946 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_ES, &u32Val); AssertRC(rc);
4947 Log4(("Host ES %#08x\n", u32Val));
4948 if (u32Val < HostGdtr.cbGdt)
4949 {
4950 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4951 HMR0DumpDescriptor(pDesc, u32Val, "ES: ");
4952 }
4953
4954 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_FS, &u32Val); AssertRC(rc);
4955 Log4(("Host FS %#08x\n", u32Val));
4956 if (u32Val < HostGdtr.cbGdt)
4957 {
4958 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4959 HMR0DumpDescriptor(pDesc, u32Val, "FS: ");
4960 }
4961
4962 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_GS, &u32Val); AssertRC(rc);
4963 Log4(("Host GS %#08x\n", u32Val));
4964 if (u32Val < HostGdtr.cbGdt)
4965 {
4966 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4967 HMR0DumpDescriptor(pDesc, u32Val, "GS: ");
4968 }
4969
4970 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_SS, &u32Val); AssertRC(rc);
4971 Log4(("Host SS %#08x\n", u32Val));
4972 if (u32Val < HostGdtr.cbGdt)
4973 {
4974 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4975 HMR0DumpDescriptor(pDesc, u32Val, "SS: ");
4976 }
4977
4978 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_TR, &u32Val); AssertRC(rc);
4979 Log4(("Host TR %#08x\n", u32Val));
4980 if (u32Val < HostGdtr.cbGdt)
4981 {
4982 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4983 HMR0DumpDescriptor(pDesc, u32Val, "TR: ");
4984 }
4985
4986 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc);
4987 Log4(("Host TR Base %#RHv\n", uHCReg));
4988 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc);
4989 Log4(("Host GDTR Base %#RHv\n", uHCReg));
4990 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc);
4991 Log4(("Host IDTR Base %#RHv\n", uHCReg));
4992 rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc);
4993 Log4(("Host SYSENTER CS %#08x\n", u32Val));
4994 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc);
4995 Log4(("Host SYSENTER EIP %#RHv\n", uHCReg));
4996 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc);
4997 Log4(("Host SYSENTER ESP %#RHv\n", uHCReg));
4998 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc);
4999 Log4(("Host RSP %#RHv\n", uHCReg));
5000 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc);
5001 Log4(("Host RIP %#RHv\n", uHCReg));
5002# if HC_ARCH_BITS == 64
5003 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
5004 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5005 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5006 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
5007 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5008 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5009# endif
5010#endif /* VBOX_STRICT */
5011 break;
5012 }
5013
5014 default:
5015 /* Impossible */
5016 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
5017 break;
5018 }
5019 NOREF(pVM); NOREF(pCtx);
5020}
5021
5022
5023#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
5024#ifndef VMX_USE_CACHED_VMCS_ACCESSES
5025# error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!"
5026#endif
5027#ifdef VBOX_STRICT
5028static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5029{
5030 switch (idxField)
5031 {
5032 case VMX_VMCS_GUEST_RIP:
5033 case VMX_VMCS_GUEST_RSP:
5034 case VMX_VMCS_GUEST_SYSENTER_EIP:
5035 case VMX_VMCS_GUEST_SYSENTER_ESP:
5036 case VMX_VMCS_GUEST_GDTR_BASE:
5037 case VMX_VMCS_GUEST_IDTR_BASE:
5038 case VMX_VMCS_GUEST_CS_BASE:
5039 case VMX_VMCS_GUEST_DS_BASE:
5040 case VMX_VMCS_GUEST_ES_BASE:
5041 case VMX_VMCS_GUEST_FS_BASE:
5042 case VMX_VMCS_GUEST_GS_BASE:
5043 case VMX_VMCS_GUEST_SS_BASE:
5044 case VMX_VMCS_GUEST_LDTR_BASE:
5045 case VMX_VMCS_GUEST_TR_BASE:
5046 case VMX_VMCS_GUEST_CR3:
5047 return true;
5048 }
5049 return false;
5050}
5051
5052static bool hmR0VmxIsValidReadField(uint32_t idxField)
5053{
5054 switch (idxField)
5055 {
5056 /* Read-only fields. */
5057 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5058 return true;
5059 }
5060 /* Remaining readable fields should also be writable. */
5061 return hmR0VmxIsValidWriteField(idxField);
5062}
5063#endif /* VBOX_STRICT */
5064
5065
5066/**
5067 * Executes the specified handler in 64-bit mode.
5068 *
5069 * @returns VBox status code.
5070 * @param pVM Pointer to the VM.
5071 * @param pVCpu Pointer to the VMCPU.
5072 * @param pCtx Pointer to the guest CPU context.
5073 * @param enmOp The operation to perform.
5074 * @param cParams Number of parameters.
5075 * @param paParam Array of 32-bit parameters.
5076 */
5077VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp,
5078 uint32_t cParams, uint32_t *paParam)
5079{
5080 NOREF(pCtx);
5081
5082 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5083 Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END);
5084 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
5085 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
5086
5087#ifdef VBOX_STRICT
5088 for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++)
5089 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
5090
5091 for (uint32_t i = 0; i <pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries; i++)
5092 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
5093#endif
5094
5095 /* Disable interrupts. */
5096 RTCCUINTREG fOldEFlags = ASMIntDisableFlags();
5097
5098#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5099 RTCPUID idHostCpu = RTMpCpuId();
5100 CPUMR0SetLApic(pVCpu, idHostCpu);
5101#endif
5102
5103 PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu();
5104 RTHCPHYS HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5105
5106 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5107 VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
5108
5109 /* Leave VMX Root Mode. */
5110 VMXDisable();
5111
5112 SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
5113
5114 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5115 CPUMSetHyperEIP(pVCpu, enmOp);
5116 for (int i = (int)cParams - 1; i >= 0; i--)
5117 CPUMPushHyper(pVCpu, paParam[i]);
5118
5119 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
5120
5121 /* Call the switcher. */
5122 int rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5123 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
5124
5125 /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */
5126 /* Make sure the VMX instructions don't cause #UD faults. */
5127 SUPR0ChangeCR4(X86_CR4_VMXE, ~0);
5128
5129 /* Re-enter VMX Root Mode */
5130 int rc2 = VMXEnable(HCPhysCpuPage);
5131 if (RT_FAILURE(rc2))
5132 {
5133 SUPR0ChangeCR4(0, ~X86_CR4_VMXE);
5134 ASMSetFlags(fOldEFlags);
5135 pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage;
5136 return rc2;
5137 }
5138
5139 rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
5140 AssertRC(rc2);
5141 Assert(!(ASMGetFlags() & X86_EFL_IF));
5142 ASMSetFlags(fOldEFlags);
5143 return rc;
5144}
5145
5146
5147/**
5148 * Prepares for and executes VMLAUNCH (64-bit guests) for 32-bit hosts
5149 * supporting 64-bit guests.
5150 *
5151 * @returns VBox status code.
5152 * @param fResume Whether to VMLAUNCH or VMRESUME.
5153 * @param pCtx Pointer to the guest-CPU context.
5154 * @param pCache Pointer to the VMCS cache.
5155 * @param pVM Pointer to the VM.
5156 * @param pVCpu Pointer to the VMCPU.
5157 */
5158DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5159{
5160 NOREF(fResume);
5161
5162 PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu();
5163 RTHCPHYS HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5164
5165#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5166 pCache->uPos = 1;
5167 pCache->interPD = PGMGetInterPaeCR3(pVM);
5168 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
5169#endif
5170
5171#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES)
5172 pCache->TestIn.HCPhysCpuPage = 0;
5173 pCache->TestIn.HCPhysVmcs = 0;
5174 pCache->TestIn.pCache = 0;
5175 pCache->TestOut.HCPhysVmcs = 0;
5176 pCache->TestOut.pCache = 0;
5177 pCache->TestOut.pCtx = 0;
5178 pCache->TestOut.eflags = 0;
5179#else
5180 NOREF(pCache);
5181#endif
5182
5183 uint32_t aParam[10];
5184 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5185 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5186 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */
5187 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */
5188 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
5189 aParam[5] = 0;
5190 aParam[6] = VM_RC_ADDR(pVM, pVM);
5191 aParam[7] = 0;
5192 aParam[8] = VM_RC_ADDR(pVM, pVCpu);
5193 aParam[9] = 0;
5194
5195#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5196 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
5197 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
5198#endif
5199 int rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_VMXRCStartVM64, RT_ELEMENTS(aParam), &aParam[0]);
5200
5201#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5202 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
5203 Assert(pCtx->dr[4] == 10);
5204 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
5205#endif
5206
5207#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES)
5208 AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5209 AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
5210 pVCpu->hm.s.vmx.HCPhysVmcs));
5211 AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
5212 pCache->TestOut.HCPhysVmcs));
5213 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5214 pCache->TestOut.pCache));
5215 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
5216 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
5217 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5218 pCache->TestOut.pCtx));
5219 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5220#endif
5221 return rc;
5222}
5223
5224
5225/**
5226 * Initialize the VMCS-Read cache.
5227 *
5228 * The VMCS cache is used for 32-bit hosts running 64-bit guests (except 32-bit
5229 * Darwin which runs with 64-bit paging in 32-bit mode) for 64-bit fields that
5230 * cannot be accessed in 32-bit mode. Some 64-bit fields -can- be accessed
5231 * (those that have a 32-bit FULL & HIGH part).
5232 *
5233 * @returns VBox status code.
5234 * @param pVM Pointer to the VM.
5235 * @param pVCpu Pointer to the VMCPU.
5236 */
5237static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu)
5238{
5239#define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \
5240{ \
5241 Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \
5242 pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \
5243 pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \
5244 ++cReadFields; \
5245}
5246
5247 AssertPtr(pVM);
5248 AssertPtr(pVCpu);
5249 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
5250 uint32_t cReadFields = 0;
5251
5252 /*
5253 * Don't remove the #if 0'd fields in this code. They're listed here for consistency
5254 * and serve to indicate exceptions to the rules.
5255 */
5256
5257 /* Guest-natural selector base fields. */
5258#if 0
5259 /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */
5260 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0);
5261 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4);
5262#endif
5263 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE);
5264 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE);
5265 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE);
5266 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE);
5267 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE);
5268 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE);
5269 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE);
5270 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE);
5271 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE);
5272 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE);
5273 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP);
5274 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP);
5275#if 0
5276 /* Unused natural width guest-state fields. */
5277 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS);
5278 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */
5279#endif
5280 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
5281 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
5282
5283 /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for these 64-bit fields (using "FULL" and "HIGH" fields). */
5284#if 0
5285 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL);
5286 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL);
5287 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL);
5288 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL);
5289 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL);
5290 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL);
5291 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL);
5292 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL);
5293 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL);
5294#endif
5295
5296 /* Natural width guest-state fields. */
5297 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
5298#if 0
5299 /* Currently unused field. */
5300 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR);
5301#endif
5302
5303 if (pVM->hm.s.fNestedPaging)
5304 {
5305 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3);
5306 AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields,
5307 VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX));
5308 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
5309 }
5310 else
5311 {
5312 AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX));
5313 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
5314 }
5315
5316#undef VMXLOCAL_INIT_READ_CACHE_FIELD
5317 return VINF_SUCCESS;
5318}
5319
5320
5321/**
5322 * Writes a field into the VMCS. This can either directly invoke a VMWRITE or
5323 * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except
5324 * darwin, running 64-bit guests).
5325 *
5326 * @returns VBox status code.
5327 * @param pVCpu Pointer to the VMCPU.
5328 * @param idxField The VMCS field encoding.
5329 * @param u64Val 16, 32 or 64-bit value.
5330 */
5331VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5332{
5333 int rc;
5334 switch (idxField)
5335 {
5336 /*
5337 * These fields consists of a "FULL" and a "HIGH" part which can be written to individually.
5338 */
5339 /* 64-bit Control fields. */
5340 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
5341 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
5342 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
5343 case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
5344 case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
5345 case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
5346 case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL:
5347 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
5348 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
5349 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
5350 case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL:
5351 case VMX_VMCS64_CTRL_EPTP_FULL:
5352 case VMX_VMCS64_CTRL_EPTP_LIST_FULL:
5353 /* 64-bit Guest-state fields. */
5354 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
5355 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
5356 case VMX_VMCS64_GUEST_PAT_FULL:
5357 case VMX_VMCS64_GUEST_EFER_FULL:
5358 case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL:
5359 case VMX_VMCS64_GUEST_PDPTE0_FULL:
5360 case VMX_VMCS64_GUEST_PDPTE1_FULL:
5361 case VMX_VMCS64_GUEST_PDPTE2_FULL:
5362 case VMX_VMCS64_GUEST_PDPTE3_FULL:
5363 /* 64-bit Host-state fields. */
5364 case VMX_VMCS64_HOST_FIELD_PAT_FULL:
5365 case VMX_VMCS64_HOST_FIELD_EFER_FULL:
5366 case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL:
5367 {
5368 rc = VMXWriteVmcs32(idxField, u64Val);
5369 rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32));
5370 break;
5371 }
5372
5373 /*
5374 * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit
5375 * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then.
5376 */
5377 /* Natural-width Guest-state fields. */
5378 case VMX_VMCS_GUEST_CR3:
5379 case VMX_VMCS_GUEST_ES_BASE:
5380 case VMX_VMCS_GUEST_CS_BASE:
5381 case VMX_VMCS_GUEST_SS_BASE:
5382 case VMX_VMCS_GUEST_DS_BASE:
5383 case VMX_VMCS_GUEST_FS_BASE:
5384 case VMX_VMCS_GUEST_GS_BASE:
5385 case VMX_VMCS_GUEST_LDTR_BASE:
5386 case VMX_VMCS_GUEST_TR_BASE:
5387 case VMX_VMCS_GUEST_GDTR_BASE:
5388 case VMX_VMCS_GUEST_IDTR_BASE:
5389 case VMX_VMCS_GUEST_RSP:
5390 case VMX_VMCS_GUEST_RIP:
5391 case VMX_VMCS_GUEST_SYSENTER_ESP:
5392 case VMX_VMCS_GUEST_SYSENTER_EIP:
5393 {
5394 if (!(u64Val >> 32))
5395 {
5396 /* If this field is 64-bit, VT-x will zero out the top bits. */
5397 rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val);
5398 }
5399 else
5400 {
5401 /* Assert that only the 32->64 switcher case should ever come here. */
5402 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests);
5403 rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val);
5404 }
5405 break;
5406 }
5407
5408 default:
5409 {
5410 AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val));
5411 rc = VERR_INVALID_PARAMETER;
5412 break;
5413 }
5414 }
5415 AssertRCReturn(rc, rc);
5416 return rc;
5417}
5418
5419
5420/**
5421 * Queue up a VMWRITE by using the VMCS write cache.
5422 * This is only used on 32-bit hosts (except darwin) for 64-bit guests.
5423 *
5424 * @param pVCpu Pointer to the VMCPU.
5425 * @param idxField The VMCS field encoding.
5426 * @param u64Val 16, 32 or 64-bit value.
5427 */
5428VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5429{
5430 AssertPtr(pVCpu);
5431 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
5432
5433 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5434 ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5435
5436 /* Make sure there are no duplicates. */
5437 for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++)
5438 {
5439 if (pCache->Write.aField[i] == idxField)
5440 {
5441 pCache->Write.aFieldVal[i] = u64Val;
5442 return VINF_SUCCESS;
5443 }
5444 }
5445
5446 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5447 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5448 pCache->Write.cValidEntries++;
5449 return VINF_SUCCESS;
5450}
5451#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */
5452
5453
5454/**
5455 * Sets up the usage of TSC-offsetting and updates the VMCS.
5456 *
5457 * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the
5458 * VMX preemption timer.
5459 *
5460 * @returns VBox status code.
5461 * @param pVM Pointer to the cross context VM structure.
5462 * @param pVCpu Pointer to the VMCPU.
5463 *
5464 * @remarks No-long-jump zone!!!
5465 */
5466static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVM pVM, PVMCPU pVCpu)
5467{
5468 int rc;
5469 bool fOffsettedTsc;
5470 bool fParavirtTsc;
5471 if (pVM->hm.s.vmx.fUsePreemptTimer)
5472 {
5473 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset,
5474 &fOffsettedTsc, &fParavirtTsc);
5475
5476 /* Make sure the returned values have sane upper and lower boundaries. */
5477 uint64_t u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet);
5478 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */
5479 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
5480 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
5481
5482 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
5483 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc);
5484 }
5485 else
5486 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset, &fParavirtTsc);
5487
5488 /** @todo later optimize this to be done elsewhere and not before every
5489 * VM-entry. */
5490 if (fParavirtTsc)
5491 {
5492 /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC
5493 information before every VM-entry, hence disable it for performance sake. */
5494#if 0
5495 rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */);
5496 AssertRC(rc);
5497#endif
5498 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt);
5499 }
5500
5501 if (fOffsettedTsc)
5502 {
5503 /* Note: VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
5504 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset); AssertRC(rc);
5505
5506 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
5507 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
5508 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
5509 }
5510 else
5511 {
5512 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
5513 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
5514 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
5515 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
5516 }
5517}
5518
5519
5520/**
5521 * Determines if an exception is a contributory exception.
5522 *
5523 * Contributory exceptions are ones which can cause double-faults unless the
5524 * original exception was a benign exception. Page-fault is intentionally not
5525 * included here as it's a conditional contributory exception.
5526 *
5527 * @returns true if the exception is contributory, false otherwise.
5528 * @param uVector The exception vector.
5529 */
5530DECLINLINE(bool) hmR0VmxIsContributoryXcpt(const uint32_t uVector)
5531{
5532 switch (uVector)
5533 {
5534 case X86_XCPT_GP:
5535 case X86_XCPT_SS:
5536 case X86_XCPT_NP:
5537 case X86_XCPT_TS:
5538 case X86_XCPT_DE:
5539 return true;
5540 default:
5541 break;
5542 }
5543 return false;
5544}
5545
5546
5547/**
5548 * Sets an event as a pending event to be injected into the guest.
5549 *
5550 * @param pVCpu Pointer to the VMCPU.
5551 * @param u32IntInfo The VM-entry interruption-information field.
5552 * @param cbInstr The VM-entry instruction length in bytes (for software
5553 * interrupts, exceptions and privileged software
5554 * exceptions).
5555 * @param u32ErrCode The VM-entry exception error code.
5556 * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
5557 * page-fault.
5558 *
5559 * @remarks Statistics counter assumes this is a guest event being injected or
5560 * re-injected into the guest, i.e. 'StatInjectPendingReflect' is
5561 * always incremented.
5562 */
5563DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
5564 RTGCUINTPTR GCPtrFaultAddress)
5565{
5566 Assert(!pVCpu->hm.s.Event.fPending);
5567 pVCpu->hm.s.Event.fPending = true;
5568 pVCpu->hm.s.Event.u64IntInfo = u32IntInfo;
5569 pVCpu->hm.s.Event.u32ErrCode = u32ErrCode;
5570 pVCpu->hm.s.Event.cbInstr = cbInstr;
5571 pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
5572
5573 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
5574}
5575
5576
5577/**
5578 * Sets a double-fault (#DF) exception as pending-for-injection into the VM.
5579 *
5580 * @param pVCpu Pointer to the VMCPU.
5581 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5582 * out-of-sync. Make sure to update the required fields
5583 * before using them.
5584 */
5585DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5586{
5587 NOREF(pMixedCtx);
5588 uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID;
5589 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
5590 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
5591 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
5592}
5593
5594
5595/**
5596 * Handle a condition that occurred while delivering an event through the guest
5597 * IDT.
5598 *
5599 * @returns VBox status code (informational error codes included).
5600 * @retval VINF_SUCCESS if we should continue handling the VM-exit.
5601 * @retval VINF_HM_DOUBLE_FAULT if a #DF condition was detected and we ought to
5602 * continue execution of the guest which will delivery the #DF.
5603 * @retval VINF_EM_RESET if we detected a triple-fault condition.
5604 *
5605 * @param pVCpu Pointer to the VMCPU.
5606 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5607 * out-of-sync. Make sure to update the required fields
5608 * before using them.
5609 * @param pVmxTransient Pointer to the VMX transient structure.
5610 *
5611 * @remarks No-long-jump zone!!!
5612 */
5613static int hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
5614{
5615 uint32_t uExitVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVmxTransient->uExitIntInfo);
5616
5617 int rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
5618 AssertRCReturn(rc, rc);
5619 rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
5620 AssertRCReturn(rc, rc);
5621
5622 if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo))
5623 {
5624 uint32_t uIdtVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
5625 uint32_t uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
5626
5627 typedef enum
5628 {
5629 VMXREFLECTXCPT_XCPT, /* Reflect the exception to the guest or for further evaluation by VMM. */
5630 VMXREFLECTXCPT_DF, /* Reflect the exception as a double-fault to the guest. */
5631 VMXREFLECTXCPT_TF, /* Indicate a triple faulted state to the VMM. */
5632 VMXREFLECTXCPT_NONE /* Nothing to reflect. */
5633 } VMXREFLECTXCPT;
5634
5635 /* See Intel spec. 30.7.1.1 "Reflecting Exceptions to Guest Software". */
5636 VMXREFLECTXCPT enmReflect = VMXREFLECTXCPT_NONE;
5637 if (VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo))
5638 {
5639 if (uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT)
5640 {
5641 enmReflect = VMXREFLECTXCPT_XCPT;
5642#ifdef VBOX_STRICT
5643 if ( hmR0VmxIsContributoryXcpt(uIdtVector)
5644 && uExitVector == X86_XCPT_PF)
5645 {
5646 Log4(("IDT: vcpu[%RU32] Contributory #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5647 }
5648#endif
5649 if ( uExitVector == X86_XCPT_PF
5650 && uIdtVector == X86_XCPT_PF)
5651 {
5652 pVmxTransient->fVectoringDoublePF = true;
5653 Log4(("IDT: vcpu[%RU32] Vectoring Double #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5654 }
5655 else if ( (pVCpu->hm.s.vmx.u32XcptBitmap & HMVMX_CONTRIBUTORY_XCPT_MASK)
5656 && hmR0VmxIsContributoryXcpt(uExitVector)
5657 && ( hmR0VmxIsContributoryXcpt(uIdtVector)
5658 || uIdtVector == X86_XCPT_PF))
5659 {
5660 enmReflect = VMXREFLECTXCPT_DF;
5661 }
5662 else if (uIdtVector == X86_XCPT_DF)
5663 enmReflect = VMXREFLECTXCPT_TF;
5664 }
5665 else if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT
5666 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI)
5667 {
5668 /*
5669 * Ignore software interrupts (INT n), software exceptions (#BP, #OF) and
5670 * privileged software exception (#DB from ICEBP) as they reoccur when restarting the instruction.
5671 */
5672 enmReflect = VMXREFLECTXCPT_XCPT;
5673
5674 if (uExitVector == X86_XCPT_PF)
5675 {
5676 pVmxTransient->fVectoringPF = true;
5677 Log4(("IDT: vcpu[%RU32] Vectoring #PF due to Ext-Int/NMI. uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5678 }
5679 }
5680 }
5681 else if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
5682 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT
5683 || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI)
5684 {
5685 /*
5686 * If event delivery caused an EPT violation/misconfig or APIC access VM-exit, then the VM-exit
5687 * interruption-information will not be valid as it's not an exception and we end up here. In such cases,
5688 * it is sufficient to reflect the original exception to the guest after handling the VM-exit.
5689 */
5690 enmReflect = VMXREFLECTXCPT_XCPT;
5691 }
5692
5693 /*
5694 * On CPUs that support Virtual NMIs, if this VM-exit (be it an exception or EPT violation/misconfig etc.) occurred
5695 * while delivering the NMI, we need to clear the block-by-NMI field in the guest interruptibility-state before
5696 * re-delivering the NMI after handling the VM-exit. Otherwise the subsequent VM-entry would fail.
5697 *
5698 * See Intel spec. 30.7.1.2 "Resuming Guest Software after Handling an Exception". See @bugref{7445}.
5699 */
5700 if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI
5701 && enmReflect == VMXREFLECTXCPT_XCPT
5702 && (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)
5703 && VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS))
5704 {
5705 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
5706 }
5707
5708 switch (enmReflect)
5709 {
5710 case VMXREFLECTXCPT_XCPT:
5711 {
5712 Assert( uIdtVectorType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT
5713 && uIdtVectorType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
5714 && uIdtVectorType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT);
5715
5716 uint32_t u32ErrCode = 0;
5717 if (VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo))
5718 {
5719 rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
5720 AssertRCReturn(rc, rc);
5721 u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
5722 }
5723
5724 /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF. See hmR0VmxExitXcptPF(). */
5725 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
5726 0 /* cbInstr */, u32ErrCode, pMixedCtx->cr2);
5727 rc = VINF_SUCCESS;
5728 Log4(("IDT: vcpu[%RU32] Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->idCpu,
5729 pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.u32ErrCode));
5730
5731 break;
5732 }
5733
5734 case VMXREFLECTXCPT_DF:
5735 {
5736 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
5737 rc = VINF_HM_DOUBLE_FAULT;
5738 Log4(("IDT: vcpu[%RU32] Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->idCpu,
5739 pVCpu->hm.s.Event.u64IntInfo, uIdtVector, uExitVector));
5740
5741 break;
5742 }
5743
5744 case VMXREFLECTXCPT_TF:
5745 {
5746 rc = VINF_EM_RESET;
5747 Log4(("IDT: vcpu[%RU32] Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", pVCpu->idCpu, uIdtVector,
5748 uExitVector));
5749 break;
5750 }
5751
5752 default:
5753 Assert(rc == VINF_SUCCESS);
5754 break;
5755 }
5756 }
5757 else if ( VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo)
5758 && VMX_EXIT_INTERRUPTION_INFO_NMI_UNBLOCK_IRET(pVmxTransient->uExitIntInfo)
5759 && uExitVector != X86_XCPT_DF
5760 && (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI))
5761 {
5762 /*
5763 * Execution of IRET caused this fault when NMI blocking was in effect (i.e we're in the guest NMI handler).
5764 * We need to set the block-by-NMI field so that NMIs remain blocked until the IRET execution is restarted.
5765 * See Intel spec. 30.7.1.2 "Resuming guest software after handling an exception".
5766 */
5767 if (!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS))
5768 {
5769 Log4(("hmR0VmxCheckExitDueToEventDelivery: vcpu[%RU32] Setting VMCPU_FF_BLOCK_NMIS. Valid=%RTbool uExitReason=%u\n",
5770 pVCpu->idCpu, VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo), pVmxTransient->uExitReason));
5771 VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
5772 }
5773 }
5774
5775 Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET);
5776 return rc;
5777}
5778
5779
5780/**
5781 * Saves the guest's CR0 register from the VMCS into the guest-CPU context.
5782 *
5783 * @returns VBox status code.
5784 * @param pVCpu Pointer to the VMCPU.
5785 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5786 * out-of-sync. Make sure to update the required fields
5787 * before using them.
5788 *
5789 * @remarks No-long-jump zone!!!
5790 */
5791static int hmR0VmxSaveGuestCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5792{
5793 NOREF(pMixedCtx);
5794
5795 /*
5796 * While in the middle of saving guest-CR0, we could get preempted and re-invoked from the preemption hook,
5797 * see hmR0VmxLeave(). Safer to just make this code non-preemptible.
5798 */
5799 VMMRZCallRing3Disable(pVCpu);
5800 HM_DISABLE_PREEMPT();
5801
5802 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0))
5803 {
5804 uint32_t uVal = 0;
5805 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &uVal);
5806 AssertRCReturn(rc, rc);
5807
5808 uint32_t uShadow = 0;
5809 rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uShadow);
5810 AssertRCReturn(rc, rc);
5811
5812 uVal = (uShadow & pVCpu->hm.s.vmx.u32CR0Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR0Mask);
5813 CPUMSetGuestCR0(pVCpu, uVal);
5814 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0);
5815 }
5816
5817 HM_RESTORE_PREEMPT();
5818 VMMRZCallRing3Enable(pVCpu);
5819 return VINF_SUCCESS;
5820}
5821
5822
5823/**
5824 * Saves the guest's CR4 register from the VMCS into the guest-CPU context.
5825 *
5826 * @returns VBox status code.
5827 * @param pVCpu Pointer to the VMCPU.
5828 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5829 * out-of-sync. Make sure to update the required fields
5830 * before using them.
5831 *
5832 * @remarks No-long-jump zone!!!
5833 */
5834static int hmR0VmxSaveGuestCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5835{
5836 NOREF(pMixedCtx);
5837
5838 int rc = VINF_SUCCESS;
5839 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4))
5840 {
5841 uint32_t uVal = 0;
5842 uint32_t uShadow = 0;
5843 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &uVal);
5844 AssertRCReturn(rc, rc);
5845 rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uShadow);
5846 AssertRCReturn(rc, rc);
5847
5848 uVal = (uShadow & pVCpu->hm.s.vmx.u32CR4Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR4Mask);
5849 CPUMSetGuestCR4(pVCpu, uVal);
5850 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4);
5851 }
5852 return rc;
5853}
5854
5855
5856/**
5857 * Saves the guest's RIP register from the VMCS into the guest-CPU context.
5858 *
5859 * @returns VBox status code.
5860 * @param pVCpu Pointer to the VMCPU.
5861 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5862 * out-of-sync. Make sure to update the required fields
5863 * before using them.
5864 *
5865 * @remarks No-long-jump zone!!!
5866 */
5867static int hmR0VmxSaveGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5868{
5869 int rc = VINF_SUCCESS;
5870 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP))
5871 {
5872 uint64_t u64Val = 0;
5873 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val);
5874 AssertRCReturn(rc, rc);
5875
5876 pMixedCtx->rip = u64Val;
5877 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP);
5878 }
5879 return rc;
5880}
5881
5882
5883/**
5884 * Saves the guest's RSP register from the VMCS into the guest-CPU context.
5885 *
5886 * @returns VBox status code.
5887 * @param pVCpu Pointer to the VMCPU.
5888 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5889 * out-of-sync. Make sure to update the required fields
5890 * before using them.
5891 *
5892 * @remarks No-long-jump zone!!!
5893 */
5894static int hmR0VmxSaveGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5895{
5896 int rc = VINF_SUCCESS;
5897 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RSP))
5898 {
5899 uint64_t u64Val = 0;
5900 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val);
5901 AssertRCReturn(rc, rc);
5902
5903 pMixedCtx->rsp = u64Val;
5904 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RSP);
5905 }
5906 return rc;
5907}
5908
5909
5910/**
5911 * Saves the guest's RFLAGS from the VMCS into the guest-CPU context.
5912 *
5913 * @returns VBox status code.
5914 * @param pVCpu Pointer to the VMCPU.
5915 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5916 * out-of-sync. Make sure to update the required fields
5917 * before using them.
5918 *
5919 * @remarks No-long-jump zone!!!
5920 */
5921static int hmR0VmxSaveGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5922{
5923 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS))
5924 {
5925 uint32_t uVal = 0;
5926 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &uVal);
5927 AssertRCReturn(rc, rc);
5928
5929 pMixedCtx->eflags.u32 = uVal;
5930 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) /* Undo our real-on-v86-mode changes to eflags if necessary. */
5931 {
5932 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
5933 Log4(("Saving real-mode EFLAGS VT-x view=%#RX32\n", pMixedCtx->eflags.u32));
5934
5935 pMixedCtx->eflags.Bits.u1VM = 0;
5936 pMixedCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL;
5937 }
5938
5939 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS);
5940 }
5941 return VINF_SUCCESS;
5942}
5943
5944
5945/**
5946 * Wrapper for saving the guest's RIP, RSP and RFLAGS from the VMCS into the
5947 * guest-CPU context.
5948 */
5949DECLINLINE(int) hmR0VmxSaveGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5950{
5951 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
5952 rc |= hmR0VmxSaveGuestRsp(pVCpu, pMixedCtx);
5953 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
5954 return rc;
5955}
5956
5957
5958/**
5959 * Saves the guest's interruptibility-state ("interrupt shadow" as AMD calls it)
5960 * from the guest-state area in the VMCS.
5961 *
5962 * @param pVCpu Pointer to the VMCPU.
5963 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5964 * out-of-sync. Make sure to update the required fields
5965 * before using them.
5966 *
5967 * @remarks No-long-jump zone!!!
5968 */
5969static void hmR0VmxSaveGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5970{
5971 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_INTR_STATE))
5972 {
5973 uint32_t uIntrState = 0;
5974 int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
5975 AssertRC(rc);
5976
5977 if (!uIntrState)
5978 {
5979 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
5980 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
5981
5982 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS))
5983 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
5984 }
5985 else
5986 {
5987 if (uIntrState & ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS
5988 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI))
5989 {
5990 rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
5991 AssertRC(rc);
5992 rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* for hmR0VmxGetGuestIntrState(). */
5993 AssertRC(rc);
5994
5995 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
5996 Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
5997 }
5998 else if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
5999 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
6000
6001 if (uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI)
6002 {
6003 if (!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS))
6004 VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS);
6005 }
6006 else if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS))
6007 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS);
6008 }
6009
6010 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_INTR_STATE);
6011 }
6012}
6013
6014
6015/**
6016 * Saves the guest's activity state.
6017 *
6018 * @returns VBox status code.
6019 * @param pVCpu Pointer to the VMCPU.
6020 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6021 * out-of-sync. Make sure to update the required fields
6022 * before using them.
6023 *
6024 * @remarks No-long-jump zone!!!
6025 */
6026static int hmR0VmxSaveGuestActivityState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6027{
6028 NOREF(pMixedCtx);
6029 /* Nothing to do for now until we make use of different guest-CPU activity state. Just update the flag. */
6030 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_ACTIVITY_STATE);
6031 return VINF_SUCCESS;
6032}
6033
6034
6035/**
6036 * Saves the guest SYSENTER MSRs (SYSENTER_CS, SYSENTER_EIP, SYSENTER_ESP) from
6037 * the current VMCS into the guest-CPU context.
6038 *
6039 * @returns VBox status code.
6040 * @param pVCpu Pointer to the VMCPU.
6041 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6042 * out-of-sync. Make sure to update the required fields
6043 * before using them.
6044 *
6045 * @remarks No-long-jump zone!!!
6046 */
6047static int hmR0VmxSaveGuestSysenterMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6048{
6049 int rc = VINF_SUCCESS;
6050 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR))
6051 {
6052 uint32_t u32Val = 0;
6053 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRCReturn(rc, rc);
6054 pMixedCtx->SysEnter.cs = u32Val;
6055 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR);
6056 }
6057
6058 uint64_t u64Val = 0;
6059 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR))
6060 {
6061 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &u64Val); AssertRCReturn(rc, rc);
6062 pMixedCtx->SysEnter.eip = u64Val;
6063 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR);
6064 }
6065 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR))
6066 {
6067 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &u64Val); AssertRCReturn(rc, rc);
6068 pMixedCtx->SysEnter.esp = u64Val;
6069 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR);
6070 }
6071 return rc;
6072}
6073
6074
6075/**
6076 * Saves the set of guest MSRs (that we restore lazily while leaving VT-x) from
6077 * the CPU back into the guest-CPU context.
6078 *
6079 * @returns VBox status code.
6080 * @param pVCpu Pointer to the VMCPU.
6081 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6082 * out-of-sync. Make sure to update the required fields
6083 * before using them.
6084 *
6085 * @remarks No-long-jump zone!!!
6086 */
6087static int hmR0VmxSaveGuestLazyMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6088{
6089#if HC_ARCH_BITS == 64
6090 if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests)
6091 {
6092 /* Since this can be called from our preemption hook it's safer to make the guest-MSRs update non-preemptible. */
6093 VMMRZCallRing3Disable(pVCpu);
6094 HM_DISABLE_PREEMPT();
6095
6096 /* Doing the check here ensures we don't overwrite already-saved guest MSRs from a preemption hook. */
6097 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LAZY_MSRS))
6098 {
6099 hmR0VmxLazySaveGuestMsrs(pVCpu, pMixedCtx);
6100 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LAZY_MSRS);
6101 }
6102
6103 HM_RESTORE_PREEMPT();
6104 VMMRZCallRing3Enable(pVCpu);
6105 }
6106 else
6107 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LAZY_MSRS);
6108#else
6109 NOREF(pMixedCtx);
6110 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LAZY_MSRS);
6111#endif
6112
6113 return VINF_SUCCESS;
6114}
6115
6116
6117/**
6118 * Saves the auto load/store'd guest MSRs from the current VMCS into
6119 * the guest-CPU context.
6120 *
6121 * @returns VBox status code.
6122 * @param pVCpu Pointer to the VMCPU.
6123 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6124 * out-of-sync. Make sure to update the required fields
6125 * before using them.
6126 *
6127 * @remarks No-long-jump zone!!!
6128 */
6129static int hmR0VmxSaveGuestAutoLoadStoreMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6130{
6131 if (HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS))
6132 return VINF_SUCCESS;
6133
6134 PVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
6135 uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs;
6136 Log4(("hmR0VmxSaveGuestAutoLoadStoreMsrs: cMsrs=%u\n", cMsrs));
6137 for (uint32_t i = 0; i < cMsrs; i++, pMsr++)
6138 {
6139 switch (pMsr->u32Msr)
6140 {
6141 case MSR_K8_TSC_AUX: CPUMR0SetGuestTscAux(pVCpu, pMsr->u64Value); break;
6142 case MSR_K8_LSTAR: pMixedCtx->msrLSTAR = pMsr->u64Value; break;
6143 case MSR_K6_STAR: pMixedCtx->msrSTAR = pMsr->u64Value; break;
6144 case MSR_K8_SF_MASK: pMixedCtx->msrSFMASK = pMsr->u64Value; break;
6145 case MSR_K8_KERNEL_GS_BASE: pMixedCtx->msrKERNELGSBASE = pMsr->u64Value; break;
6146 case MSR_K6_EFER: /* Nothing to do here since we intercept writes, see hmR0VmxLoadGuestMsrs(). */
6147 break;
6148
6149 default:
6150 {
6151 AssertMsgFailed(("Unexpected MSR in auto-load/store area. uMsr=%#RX32 cMsrs=%u\n", pMsr->u32Msr, cMsrs));
6152 pVCpu->hm.s.u32HMError = pMsr->u32Msr;
6153 return VERR_HM_UNEXPECTED_LD_ST_MSR;
6154 }
6155 }
6156 }
6157
6158 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS);
6159 return VINF_SUCCESS;
6160}
6161
6162
6163/**
6164 * Saves the guest control registers from the current VMCS into the guest-CPU
6165 * context.
6166 *
6167 * @returns VBox status code.
6168 * @param pVCpu Pointer to the VMCPU.
6169 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6170 * out-of-sync. Make sure to update the required fields
6171 * before using them.
6172 *
6173 * @remarks No-long-jump zone!!!
6174 */
6175static int hmR0VmxSaveGuestControlRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6176{
6177 /* Guest CR0. Guest FPU. */
6178 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6179 AssertRCReturn(rc, rc);
6180
6181 /* Guest CR4. */
6182 rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
6183 AssertRCReturn(rc, rc);
6184
6185 /* Guest CR2 - updated always during the world-switch or in #PF. */
6186 /* Guest CR3. Only changes with Nested Paging. This must be done -after- saving CR0 and CR4 from the guest! */
6187 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR3))
6188 {
6189 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0));
6190 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR4));
6191
6192 PVM pVM = pVCpu->CTX_SUFF(pVM);
6193 if ( pVM->hm.s.vmx.fUnrestrictedGuest
6194 || ( pVM->hm.s.fNestedPaging
6195 && CPUMIsGuestPagingEnabledEx(pMixedCtx)))
6196 {
6197 uint64_t u64Val = 0;
6198 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val);
6199 if (pMixedCtx->cr3 != u64Val)
6200 {
6201 CPUMSetGuestCR3(pVCpu, u64Val);
6202 if (VMMRZCallRing3IsEnabled(pVCpu))
6203 {
6204 PGMUpdateCR3(pVCpu, u64Val);
6205 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6206 }
6207 else
6208 {
6209 /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMUpdateCR3().*/
6210 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
6211 }
6212 }
6213
6214 /* If the guest is in PAE mode, sync back the PDPE's into the guest state. */
6215 if (CPUMIsGuestInPAEModeEx(pMixedCtx)) /* Reads CR0, CR4 and EFER MSR (EFER is always up-to-date). */
6216 {
6217 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc);
6218 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc);
6219 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc);
6220 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc);
6221
6222 if (VMMRZCallRing3IsEnabled(pVCpu))
6223 {
6224 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
6225 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
6226 }
6227 else
6228 {
6229 /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMGstUpdatePaePdpes(). */
6230 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES);
6231 }
6232 }
6233 }
6234
6235 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR3);
6236 }
6237
6238 /*
6239 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback()
6240 * -> VMMRZCallRing3Disable() -> hmR0VmxSaveGuestState() -> Set VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
6241 * -> continue with VM-exit handling -> hmR0VmxSaveGuestControlRegs() and here we are.
6242 *
6243 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
6244 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
6245 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
6246 * -NOT- check if HMVMX_UPDATED_GUEST_CR3 is already set or not!
6247 *
6248 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
6249 */
6250 if (VMMRZCallRing3IsEnabled(pVCpu))
6251 {
6252 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6253 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
6254
6255 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
6256 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
6257
6258 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6259 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
6260 }
6261
6262 return rc;
6263}
6264
6265
6266/**
6267 * Reads a guest segment register from the current VMCS into the guest-CPU
6268 * context.
6269 *
6270 * @returns VBox status code.
6271 * @param pVCpu Pointer to the VMCPU.
6272 * @param idxSel Index of the selector in the VMCS.
6273 * @param idxLimit Index of the segment limit in the VMCS.
6274 * @param idxBase Index of the segment base in the VMCS.
6275 * @param idxAccess Index of the access rights of the segment in the VMCS.
6276 * @param pSelReg Pointer to the segment selector.
6277 *
6278 * @remarks No-long-jump zone!!!
6279 * @remarks Never call this function directly!!! Use the VMXLOCAL_READ_SEG()
6280 * macro as that takes care of whether to read from the VMCS cache or
6281 * not.
6282 */
6283DECLINLINE(int) hmR0VmxReadSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess,
6284 PCPUMSELREG pSelReg)
6285{
6286 NOREF(pVCpu);
6287
6288 uint32_t u32Val = 0;
6289 int rc = VMXReadVmcs32(idxSel, &u32Val);
6290 AssertRCReturn(rc, rc);
6291 pSelReg->Sel = (uint16_t)u32Val;
6292 pSelReg->ValidSel = (uint16_t)u32Val;
6293 pSelReg->fFlags = CPUMSELREG_FLAGS_VALID;
6294
6295 rc = VMXReadVmcs32(idxLimit, &u32Val);
6296 AssertRCReturn(rc, rc);
6297 pSelReg->u32Limit = u32Val;
6298
6299 uint64_t u64Val = 0;
6300 rc = VMXReadVmcsGstNByIdxVal(idxBase, &u64Val);
6301 AssertRCReturn(rc, rc);
6302 pSelReg->u64Base = u64Val;
6303
6304 rc = VMXReadVmcs32(idxAccess, &u32Val);
6305 AssertRCReturn(rc, rc);
6306 pSelReg->Attr.u = u32Val;
6307
6308 /*
6309 * If VT-x marks the segment as unusable, most other bits remain undefined:
6310 * - For CS the L, D and G bits have meaning.
6311 * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox).
6312 * - For the remaining data segments no bits are defined.
6313 *
6314 * The present bit and the unusable bit has been observed to be set at the
6315 * same time (the selector was supposed to be invalid as we started executing
6316 * a V8086 interrupt in ring-0).
6317 *
6318 * What should be important for the rest of the VBox code, is that the P bit is
6319 * cleared. Some of the other VBox code recognizes the unusable bit, but
6320 * AMD-V certainly don't, and REM doesn't really either. So, to be on the
6321 * safe side here, we'll strip off P and other bits we don't care about. If
6322 * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed.
6323 *
6324 * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers".
6325 */
6326 if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE)
6327 {
6328 Assert(idxSel != VMX_VMCS16_GUEST_FIELD_TR); /* TR is the only selector that can never be unusable. */
6329
6330 /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */
6331 pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G
6332 | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT;
6333
6334 Log4(("hmR0VmxReadSegmentReg: Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Val, pSelReg->Attr.u));
6335#ifdef DEBUG_bird
6336 AssertMsg((u32Val & ~X86DESCATTR_P) == pSelReg->Attr.u,
6337 ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n",
6338 idxSel, u32Val, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit));
6339#endif
6340 }
6341 return VINF_SUCCESS;
6342}
6343
6344
6345#ifdef VMX_USE_CACHED_VMCS_ACCESSES
6346# define VMXLOCAL_READ_SEG(Sel, CtxSel) \
6347 hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
6348 VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel)
6349#else
6350# define VMXLOCAL_READ_SEG(Sel, CtxSel) \
6351 hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
6352 VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel)
6353#endif
6354
6355
6356/**
6357 * Saves the guest segment registers from the current VMCS into the guest-CPU
6358 * context.
6359 *
6360 * @returns VBox status code.
6361 * @param pVCpu Pointer to the VMCPU.
6362 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6363 * out-of-sync. Make sure to update the required fields
6364 * before using them.
6365 *
6366 * @remarks No-long-jump zone!!!
6367 */
6368static int hmR0VmxSaveGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6369{
6370 /* Guest segment registers. */
6371 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SEGMENT_REGS))
6372 {
6373 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); AssertRCReturn(rc, rc);
6374 rc = VMXLOCAL_READ_SEG(CS, cs); AssertRCReturn(rc, rc);
6375 rc = VMXLOCAL_READ_SEG(SS, ss); AssertRCReturn(rc, rc);
6376 rc = VMXLOCAL_READ_SEG(DS, ds); AssertRCReturn(rc, rc);
6377 rc = VMXLOCAL_READ_SEG(ES, es); AssertRCReturn(rc, rc);
6378 rc = VMXLOCAL_READ_SEG(FS, fs); AssertRCReturn(rc, rc);
6379 rc = VMXLOCAL_READ_SEG(GS, gs); AssertRCReturn(rc, rc);
6380
6381 /* Restore segment attributes for real-on-v86 mode hack. */
6382 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
6383 {
6384 pMixedCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u;
6385 pMixedCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u;
6386 pMixedCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u;
6387 pMixedCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u;
6388 pMixedCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u;
6389 pMixedCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u;
6390 }
6391 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_SEGMENT_REGS);
6392 }
6393
6394 return VINF_SUCCESS;
6395}
6396
6397
6398/**
6399 * Saves the guest descriptor table registers and task register from the current
6400 * VMCS into the guest-CPU context.
6401 *
6402 * @returns VBox status code.
6403 * @param pVCpu Pointer to the VMCPU.
6404 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6405 * out-of-sync. Make sure to update the required fields
6406 * before using them.
6407 *
6408 * @remarks No-long-jump zone!!!
6409 */
6410static int hmR0VmxSaveGuestTableRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6411{
6412 int rc = VINF_SUCCESS;
6413
6414 /* Guest LDTR. */
6415 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LDTR))
6416 {
6417 rc = VMXLOCAL_READ_SEG(LDTR, ldtr);
6418 AssertRCReturn(rc, rc);
6419 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LDTR);
6420 }
6421
6422 /* Guest GDTR. */
6423 uint64_t u64Val = 0;
6424 uint32_t u32Val = 0;
6425 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GDTR))
6426 {
6427 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); AssertRCReturn(rc, rc);
6428 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc);
6429 pMixedCtx->gdtr.pGdt = u64Val;
6430 pMixedCtx->gdtr.cbGdt = u32Val;
6431 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_GDTR);
6432 }
6433
6434 /* Guest IDTR. */
6435 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_IDTR))
6436 {
6437 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); AssertRCReturn(rc, rc);
6438 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc);
6439 pMixedCtx->idtr.pIdt = u64Val;
6440 pMixedCtx->idtr.cbIdt = u32Val;
6441 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_IDTR);
6442 }
6443
6444 /* Guest TR. */
6445 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_TR))
6446 {
6447 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6448 AssertRCReturn(rc, rc);
6449
6450 /* For real-mode emulation using virtual-8086 mode we have the fake TSS (pRealModeTSS) in TR, don't save the fake one. */
6451 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
6452 {
6453 rc = VMXLOCAL_READ_SEG(TR, tr);
6454 AssertRCReturn(rc, rc);
6455 }
6456 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_TR);
6457 }
6458 return rc;
6459}
6460
6461#undef VMXLOCAL_READ_SEG
6462
6463
6464/**
6465 * Saves the guest debug-register DR7 from the current VMCS into the guest-CPU
6466 * context.
6467 *
6468 * @returns VBox status code.
6469 * @param pVCpu Pointer to the VMCPU.
6470 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6471 * out-of-sync. Make sure to update the required fields
6472 * before using them.
6473 *
6474 * @remarks No-long-jump zone!!!
6475 */
6476static int hmR0VmxSaveGuestDR7(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6477{
6478 if (!HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_DEBUG))
6479 {
6480 if (!pVCpu->hm.s.fUsingHyperDR7)
6481 {
6482 /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */
6483 uint32_t u32Val;
6484 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val); AssertRCReturn(rc, rc);
6485 pMixedCtx->dr[7] = u32Val;
6486 }
6487
6488 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_DEBUG);
6489 }
6490 return VINF_SUCCESS;
6491}
6492
6493
6494/**
6495 * Saves the guest APIC state from the current VMCS into the guest-CPU context.
6496 *
6497 * @returns VBox status code.
6498 * @param pVCpu Pointer to the VMCPU.
6499 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
6500 * out-of-sync. Make sure to update the required fields
6501 * before using them.
6502 *
6503 * @remarks No-long-jump zone!!!
6504 */
6505static int hmR0VmxSaveGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6506{
6507 NOREF(pMixedCtx);
6508
6509 /* Updating TPR is already done in hmR0VmxPostRunGuest(). Just update the flag. */
6510 HMVMXCPU_GST_SET_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_APIC_STATE);
6511 return VINF_SUCCESS;
6512}
6513
6514
6515/**
6516 * Saves the entire guest state from the currently active VMCS into the
6517 * guest-CPU context.
6518 *
6519 * This essentially VMREADs all guest-data.
6520 *
6521 * @returns VBox status code.
6522 * @param pVCpu Pointer to the VMCPU.
6523 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6524 * out-of-sync. Make sure to update the required fields
6525 * before using them.
6526 */
6527static int hmR0VmxSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6528{
6529 Assert(pVCpu);
6530 Assert(pMixedCtx);
6531
6532 if (HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL)
6533 return VINF_SUCCESS;
6534
6535 /* Though we can longjmp to ring-3 due to log-flushes here and get recalled
6536 again on the ring-3 callback path, there is no real need to. */
6537 if (VMMRZCallRing3IsEnabled(pVCpu))
6538 VMMR0LogFlushDisable(pVCpu);
6539 else
6540 Assert(VMMR0IsLogFlushDisabled(pVCpu));
6541 Log4Func(("vcpu[%RU32]\n", pVCpu->idCpu));
6542
6543 int rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
6544 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestRipRspRflags failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6545
6546 rc = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
6547 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestControlRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6548
6549 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
6550 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSegmentRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6551
6552 rc = hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx);
6553 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestTableRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6554
6555 rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
6556 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestDR7 failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6557
6558 rc = hmR0VmxSaveGuestSysenterMsrs(pVCpu, pMixedCtx);
6559 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSysenterMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6560
6561 rc = hmR0VmxSaveGuestLazyMsrs(pVCpu, pMixedCtx);
6562 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestLazyMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6563
6564 rc = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
6565 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestAutoLoadStoreMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6566
6567 rc = hmR0VmxSaveGuestActivityState(pVCpu, pMixedCtx);
6568 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestActivityState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6569
6570 rc = hmR0VmxSaveGuestApicState(pVCpu, pMixedCtx);
6571 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestApicState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
6572
6573 AssertMsg(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL,
6574 ("Missed guest state bits while saving state; residue %RX32\n", HMVMXCPU_GST_VALUE(pVCpu)));
6575
6576 if (VMMRZCallRing3IsEnabled(pVCpu))
6577 VMMR0LogFlushEnable(pVCpu);
6578
6579 return VINF_SUCCESS;
6580}
6581
6582
6583/**
6584 * Saves basic guest registers needed for IEM instruction execution.
6585 *
6586 * @returns VBox status code (OR-able).
6587 * @param pVCpu Pointer to the cross context CPU data for the calling
6588 * EMT.
6589 * @param pMixedCtx Pointer to the CPU context of the guest.
6590 * @param fMemory Whether the instruction being executed operates on
6591 * memory or not. Only CR0 is synced up if clear.
6592 * @param fNeedRsp Need RSP (any instruction working on GPRs or stack).
6593 */
6594static int hmR0VmxSaveGuestRegsForIemExec(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fMemory, bool fNeedRsp)
6595{
6596 /*
6597 * We assume all general purpose registers other than RSP are available.
6598 *
6599 * RIP is a must, as it will be incremented or otherwise changed.
6600 *
6601 * RFLAGS are always required to figure the CPL.
6602 *
6603 * RSP isn't always required, however it's a GPR, so frequently required.
6604 *
6605 * SS and CS are the only segment register needed if IEM doesn't do memory
6606 * access (CPL + 16/32/64-bit mode), but we can only get all segment registers.
6607 *
6608 * CR0 is always required by IEM for the CPL, while CR3 and CR4 will only
6609 * be required for memory accesses.
6610 *
6611 * Note! Before IEM dispatches an exception, it will call us to sync in everything.
6612 */
6613 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
6614 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
6615 if (fNeedRsp)
6616 rc |= hmR0VmxSaveGuestRsp(pVCpu, pMixedCtx);
6617 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
6618 if (!fMemory)
6619 rc |= hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6620 else
6621 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
6622 return rc;
6623}
6624
6625
6626/**
6627 * Ensures that we've got a complete basic guest-context.
6628 *
6629 * This excludes the FPU, SSE, AVX, and similar extended state. The interface
6630 * is for the interpreter.
6631 *
6632 * @returns VBox status code.
6633 * @param pVCpu Pointer to the VMCPU of the calling EMT.
6634 * @param pMixedCtx Pointer to the guest-CPU context which may have data
6635 * needing to be synced in.
6636 * @thread EMT(pVCpu)
6637 */
6638VMMR0_INT_DECL(int) HMR0EnsureCompleteBasicContext(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6639{
6640 /* Note! Since this is only applicable to VT-x, the implementation is placed
6641 in the VT-x part of the sources instead of the generic stuff. */
6642 if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported)
6643 return hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
6644 return VINF_SUCCESS;
6645}
6646
6647
6648/**
6649 * Check per-VM and per-VCPU force flag actions that require us to go back to
6650 * ring-3 for one reason or another.
6651 *
6652 * @returns VBox status code (information status code included).
6653 * @retval VINF_SUCCESS if we don't have any actions that require going back to
6654 * ring-3.
6655 * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
6656 * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
6657 * interrupts)
6658 * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
6659 * all EMTs to be in ring-3.
6660 * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
6661 * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
6662 * to the EM loop.
6663 *
6664 * @param pVM Pointer to the VM.
6665 * @param pVCpu Pointer to the VMCPU.
6666 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6667 * out-of-sync. Make sure to update the required fields
6668 * before using them.
6669 */
6670static int hmR0VmxCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6671{
6672 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6673
6674 if ( VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction
6675 ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
6676 || VMCPU_FF_IS_PENDING(pVCpu, !pVCpu->hm.s.fSingleInstruction
6677 ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
6678 {
6679 /* We need the control registers now, make sure the guest-CPU context is updated. */
6680 int rc3 = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
6681 AssertRCReturn(rc3, rc3);
6682
6683 /* Pending HM CR3 sync. */
6684 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6685 {
6686 int rc2 = PGMUpdateCR3(pVCpu, pMixedCtx->cr3);
6687 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6688 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6689 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6690 }
6691
6692 /* Pending HM PAE PDPEs. */
6693 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
6694 {
6695 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
6696 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
6697 }
6698
6699 /* Pending PGM C3 sync. */
6700 if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
6701 {
6702 int rc2 = PGMSyncCR3(pVCpu, pMixedCtx->cr0, pMixedCtx->cr3, pMixedCtx->cr4,
6703 VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
6704 if (rc2 != VINF_SUCCESS)
6705 {
6706 AssertRC(rc2);
6707 Log4(("hmR0VmxCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", rc2));
6708 return rc2;
6709 }
6710 }
6711
6712 /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
6713 if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK)
6714 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6715 {
6716 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6717 int rc2 = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
6718 Log4(("hmR0VmxCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2));
6719 return rc2;
6720 }
6721
6722 /* Pending VM request packets, such as hardware interrupts. */
6723 if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST)
6724 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST))
6725 {
6726 Log4(("hmR0VmxCheckForceFlags: Pending VM request forcing us back to ring-3\n"));
6727 return VINF_EM_PENDING_REQUEST;
6728 }
6729
6730 /* Pending PGM pool flushes. */
6731 if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
6732 {
6733 Log4(("hmR0VmxCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n"));
6734 return VINF_PGM_POOL_FLUSH_PENDING;
6735 }
6736
6737 /* Pending DMA requests. */
6738 if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA))
6739 {
6740 Log4(("hmR0VmxCheckForceFlags: Pending DMA request forcing us back to ring-3\n"));
6741 return VINF_EM_RAW_TO_R3;
6742 }
6743 }
6744
6745 return VINF_SUCCESS;
6746}
6747
6748
6749/**
6750 * Converts any TRPM trap into a pending HM event. This is typically used when
6751 * entering from ring-3 (not longjmp returns).
6752 *
6753 * @param pVCpu Pointer to the VMCPU.
6754 */
6755static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu)
6756{
6757 Assert(TRPMHasTrap(pVCpu));
6758 Assert(!pVCpu->hm.s.Event.fPending);
6759
6760 uint8_t uVector;
6761 TRPMEVENT enmTrpmEvent;
6762 RTGCUINT uErrCode;
6763 RTGCUINTPTR GCPtrFaultAddress;
6764 uint8_t cbInstr;
6765
6766 int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr);
6767 AssertRC(rc);
6768
6769 /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntInfo. */
6770 uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID;
6771 if (enmTrpmEvent == TRPM_TRAP)
6772 {
6773 switch (uVector)
6774 {
6775 case X86_XCPT_NMI:
6776 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6777 break;
6778
6779 case X86_XCPT_BP:
6780 case X86_XCPT_OF:
6781 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6782 break;
6783
6784 case X86_XCPT_PF:
6785 case X86_XCPT_DF:
6786 case X86_XCPT_TS:
6787 case X86_XCPT_NP:
6788 case X86_XCPT_SS:
6789 case X86_XCPT_GP:
6790 case X86_XCPT_AC:
6791 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6792 /* no break! */
6793 default:
6794 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6795 break;
6796 }
6797 }
6798 else if (enmTrpmEvent == TRPM_HARDWARE_INT)
6799 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6800 else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
6801 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6802 else
6803 AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
6804
6805 rc = TRPMResetTrap(pVCpu);
6806 AssertRC(rc);
6807 Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n",
6808 u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress));
6809
6810 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress);
6811 STAM_COUNTER_DEC(&pVCpu->hm.s.StatInjectPendingReflect);
6812}
6813
6814
6815/**
6816 * Converts the pending HM event into a TRPM trap.
6817 *
6818 * @param pvCpu Pointer to the VMCPU.
6819 */
6820static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu)
6821{
6822 Assert(pVCpu->hm.s.Event.fPending);
6823
6824 uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
6825 uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntInfo);
6826 bool fErrorCodeValid = VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntInfo);
6827 uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode;
6828
6829 /* If a trap was already pending, we did something wrong! */
6830 Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
6831
6832 TRPMEVENT enmTrapType;
6833 switch (uVectorType)
6834 {
6835 case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
6836 enmTrapType = TRPM_HARDWARE_INT;
6837 break;
6838
6839 case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
6840 enmTrapType = TRPM_SOFTWARE_INT;
6841 break;
6842
6843 case VMX_IDT_VECTORING_INFO_TYPE_NMI:
6844 case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
6845 case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */
6846 case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
6847 enmTrapType = TRPM_TRAP;
6848 break;
6849
6850 default:
6851 AssertMsgFailed(("Invalid trap type %#x\n", uVectorType));
6852 enmTrapType = TRPM_32BIT_HACK;
6853 break;
6854 }
6855
6856 Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType));
6857
6858 int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
6859 AssertRC(rc);
6860
6861 if (fErrorCodeValid)
6862 TRPMSetErrorCode(pVCpu, uErrorCode);
6863
6864 if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
6865 && uVector == X86_XCPT_PF)
6866 {
6867 TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
6868 }
6869 else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
6870 || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
6871 || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
6872 {
6873 AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
6874 || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF),
6875 ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
6876 TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
6877 }
6878
6879 /* Clear any pending events from the VMCS. */
6880 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); AssertRC(rc);
6881 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0); AssertRC(rc);
6882
6883 /* We're now done converting the pending event. */
6884 pVCpu->hm.s.Event.fPending = false;
6885}
6886
6887
6888/**
6889 * Does the necessary state syncing before returning to ring-3 for any reason
6890 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
6891 *
6892 * @returns VBox status code.
6893 * @param pVM Pointer to the VM.
6894 * @param pVCpu Pointer to the VMCPU.
6895 * @param pMixedCtx Pointer to the guest-CPU context. The data may
6896 * be out-of-sync. Make sure to update the required
6897 * fields before using them.
6898 * @param fSaveGuestState Whether to save the guest state or not.
6899 *
6900 * @remarks No-long-jmp zone!!!
6901 */
6902static int hmR0VmxLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fSaveGuestState)
6903{
6904 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6905 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6906
6907 RTCPUID idCpu = RTMpCpuId();
6908 Log4Func(("HostCpuId=%u\n", idCpu));
6909
6910 /*
6911 * !!! IMPORTANT !!!
6912 * If you modify code here, check whether hmR0VmxCallRing3Callback() needs to be updated too.
6913 */
6914
6915 /* Save the guest state if necessary. */
6916 if ( fSaveGuestState
6917 && HMVMXCPU_GST_VALUE(pVCpu) != HMVMX_UPDATED_GUEST_ALL)
6918 {
6919 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
6920 AssertRCReturn(rc, rc);
6921 Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL);
6922 }
6923
6924 /* Restore host FPU state if necessary and resync on next R0 reentry .*/
6925 if (CPUMIsGuestFPUStateActive(pVCpu))
6926 {
6927 /* We shouldn't reload CR0 without saving it first. */
6928 if (!fSaveGuestState)
6929 {
6930 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6931 AssertRCReturn(rc, rc);
6932 }
6933 CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx);
6934 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
6935 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
6936 }
6937
6938 /* Restore host debug registers if necessary and resync on next R0 reentry. */
6939#ifdef VBOX_STRICT
6940 if (CPUMIsHyperDebugStateActive(pVCpu))
6941 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT);
6942#endif
6943 if (CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */))
6944 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
6945 Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu));
6946 Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu));
6947
6948#if HC_ARCH_BITS == 64
6949 /* Restore host-state bits that VT-x only restores partially. */
6950 if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
6951 && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
6952 {
6953 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu));
6954 VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
6955 }
6956 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
6957#endif
6958
6959#if HC_ARCH_BITS == 64
6960 /* Restore the lazy host MSRs as we're leaving VT-x context. */
6961 if ( pVM->hm.s.fAllow64BitGuests
6962 && pVCpu->hm.s.vmx.fLazyMsrs)
6963 {
6964 /* We shouldn't reload the guest MSRs without saving it first. */
6965 if (!fSaveGuestState)
6966 {
6967 int rc = hmR0VmxSaveGuestLazyMsrs(pVCpu, pMixedCtx);
6968 AssertRCReturn(rc, rc);
6969 }
6970 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_LAZY_MSRS));
6971 hmR0VmxLazyRestoreHostMsrs(pVCpu);
6972 Assert(!pVCpu->hm.s.vmx.fLazyMsrs);
6973 }
6974#endif
6975
6976 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
6977 pVCpu->hm.s.vmx.fUpdatedHostMsrs = false;
6978
6979 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
6980 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatLoadGuestState);
6981 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
6982 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
6983 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
6984 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
6985 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
6986 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
6987
6988 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
6989
6990 /** @todo This partially defeats the purpose of having preemption hooks.
6991 * The problem is, deregistering the hooks should be moved to a place that
6992 * lasts until the EMT is about to be destroyed not everytime while leaving HM
6993 * context.
6994 */
6995 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE)
6996 {
6997 int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
6998 AssertRCReturn(rc, rc);
6999
7000 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
7001 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
7002 }
7003 Assert(!(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED));
7004 NOREF(idCpu);
7005
7006 return VINF_SUCCESS;
7007}
7008
7009
7010/**
7011 * Leaves the VT-x session.
7012 *
7013 * @returns VBox status code.
7014 * @param pVM Pointer to the VM.
7015 * @param pVCpu Pointer to the VMCPU.
7016 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7017 * out-of-sync. Make sure to update the required fields
7018 * before using them.
7019 *
7020 * @remarks No-long-jmp zone!!!
7021 */
7022DECLINLINE(int) hmR0VmxLeaveSession(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7023{
7024 HM_DISABLE_PREEMPT();
7025 HMVMX_ASSERT_CPU_SAFE();
7026 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7027 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7028
7029 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
7030 and done this from the VMXR0ThreadCtxCallback(). */
7031 if (!pVCpu->hm.s.fLeaveDone)
7032 {
7033 int rc2 = hmR0VmxLeave(pVM, pVCpu, pMixedCtx, true /* fSaveGuestState */);
7034 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2);
7035 pVCpu->hm.s.fLeaveDone = true;
7036 }
7037 Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL);
7038
7039 /*
7040 * !!! IMPORTANT !!!
7041 * If you modify code here, make sure to check whether hmR0VmxCallRing3Callback() needs to be updated too.
7042 */
7043
7044 /* Deregister hook now that we've left HM context before re-enabling preemption. */
7045 /** @todo Deregistering here means we need to VMCLEAR always
7046 * (longjmp/exit-to-r3) in VT-x which is not efficient. */
7047 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
7048 VMMR0ThreadCtxHookDisable(pVCpu);
7049
7050 /* Leave HM context. This takes care of local init (term). */
7051 int rc = HMR0LeaveCpu(pVCpu);
7052
7053 HM_RESTORE_PREEMPT();
7054 return rc;
7055}
7056
7057
7058/**
7059 * Does the necessary state syncing before doing a longjmp to ring-3.
7060 *
7061 * @returns VBox status code.
7062 * @param pVM Pointer to the VM.
7063 * @param pVCpu Pointer to the VMCPU.
7064 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7065 * out-of-sync. Make sure to update the required fields
7066 * before using them.
7067 *
7068 * @remarks No-long-jmp zone!!!
7069 */
7070DECLINLINE(int) hmR0VmxLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7071{
7072 return hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx);
7073}
7074
7075
7076/**
7077 * Take necessary actions before going back to ring-3.
7078 *
7079 * An action requires us to go back to ring-3. This function does the necessary
7080 * steps before we can safely return to ring-3. This is not the same as longjmps
7081 * to ring-3, this is voluntary and prepares the guest so it may continue
7082 * executing outside HM (recompiler/IEM).
7083 *
7084 * @returns VBox status code.
7085 * @param pVM Pointer to the VM.
7086 * @param pVCpu Pointer to the VMCPU.
7087 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7088 * out-of-sync. Make sure to update the required fields
7089 * before using them.
7090 * @param rcExit The reason for exiting to ring-3. Can be
7091 * VINF_VMM_UNKNOWN_RING3_CALL.
7092 */
7093static int hmR0VmxExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, int rcExit)
7094{
7095 Assert(pVM);
7096 Assert(pVCpu);
7097 Assert(pMixedCtx);
7098 HMVMX_ASSERT_PREEMPT_SAFE();
7099
7100 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
7101 {
7102 VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VMCSPhys);
7103 pVCpu->hm.s.vmx.LastError.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs;
7104 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
7105 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
7106 }
7107
7108 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
7109 VMMRZCallRing3Disable(pVCpu);
7110 Log4(("hmR0VmxExitToRing3: pVCpu=%p idCpu=%RU32 rcExit=%d\n", pVCpu, pVCpu->idCpu, rcExit));
7111
7112 /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
7113 if (pVCpu->hm.s.Event.fPending)
7114 {
7115 hmR0VmxPendingEventToTrpmTrap(pVCpu);
7116 Assert(!pVCpu->hm.s.Event.fPending);
7117 }
7118
7119 /* Clear interrupt-window and NMI-window controls as we re-evaluate it when we return from ring-3. */
7120 hmR0VmxClearIntNmiWindowsVmcs(pVCpu);
7121
7122 /* If we're emulating an instruction, we shouldn't have any TRPM traps pending
7123 and if we're injecting an event we should have a TRPM trap pending. */
7124 AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", rcExit));
7125 AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", rcExit));
7126
7127 /* Save guest state and restore host state bits. */
7128 int rc = hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx);
7129 AssertRCReturn(rc, rc);
7130 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
7131 /* Thread-context hooks are unregistered at this point!!! */
7132
7133 /* Sync recompiler state. */
7134 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
7135 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
7136 | CPUM_CHANGED_LDTR
7137 | CPUM_CHANGED_GDTR
7138 | CPUM_CHANGED_IDTR
7139 | CPUM_CHANGED_TR
7140 | CPUM_CHANGED_HIDDEN_SEL_REGS);
7141 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0));
7142 if ( pVM->hm.s.fNestedPaging
7143 && CPUMIsGuestPagingEnabledEx(pMixedCtx))
7144 {
7145 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
7146 }
7147
7148 Assert(!pVCpu->hm.s.fClearTrapFlag);
7149
7150 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
7151 if (rcExit != VINF_EM_RAW_INTERRUPT)
7152 HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
7153
7154 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
7155
7156 /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */
7157 VMMRZCallRing3RemoveNotification(pVCpu);
7158 VMMRZCallRing3Enable(pVCpu);
7159
7160 return rc;
7161}
7162
7163
7164/**
7165 * VMMRZCallRing3() callback wrapper which saves the guest state before we
7166 * longjump to ring-3 and possibly get preempted.
7167 *
7168 * @returns VBox status code.
7169 * @param pVCpu Pointer to the VMCPU.
7170 * @param enmOperation The operation causing the ring-3 longjump.
7171 * @param pvUser Opaque pointer to the guest-CPU context. The data
7172 * may be out-of-sync. Make sure to update the required
7173 * fields before using them.
7174 */
7175static DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
7176{
7177 if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
7178 {
7179 /*
7180 * !!! IMPORTANT !!!
7181 * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too.
7182 * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions.
7183 */
7184 VMMRZCallRing3RemoveNotification(pVCpu);
7185 VMMRZCallRing3Disable(pVCpu);
7186 RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER;
7187 RTThreadPreemptDisable(&PreemptState);
7188
7189 PVM pVM = pVCpu->CTX_SUFF(pVM);
7190 if (CPUMIsGuestFPUStateActive(pVCpu))
7191 CPUMR0SaveGuestFPU(pVM, pVCpu, (PCPUMCTX)pvUser);
7192
7193 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
7194
7195#if HC_ARCH_BITS == 64
7196 /* Restore host-state bits that VT-x only restores partially. */
7197 if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
7198 && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
7199 VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
7200 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
7201
7202 /* Restore the lazy host MSRs as we're leaving VT-x context. */
7203 if ( pVM->hm.s.fAllow64BitGuests
7204 && pVCpu->hm.s.vmx.fLazyMsrs)
7205 hmR0VmxLazyRestoreHostMsrs(pVCpu);
7206#endif
7207 /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */
7208 pVCpu->hm.s.vmx.fUpdatedHostMsrs = false;
7209 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
7210 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE)
7211 {
7212 VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
7213 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
7214 }
7215
7216 /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */
7217 VMMR0ThreadCtxHookDisable(pVCpu);
7218 HMR0LeaveCpu(pVCpu);
7219 RTThreadPreemptRestore(&PreemptState);
7220 return VINF_SUCCESS;
7221 }
7222
7223 Assert(pVCpu);
7224 Assert(pvUser);
7225 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7226 HMVMX_ASSERT_PREEMPT_SAFE();
7227
7228 VMMRZCallRing3Disable(pVCpu);
7229 Assert(VMMR0IsLogFlushDisabled(pVCpu));
7230
7231 Log4(("hmR0VmxCallRing3Callback->hmR0VmxLongJmpToRing3 pVCpu=%p idCpu=%RU32 enmOperation=%d\n", pVCpu, pVCpu->idCpu,
7232 enmOperation));
7233
7234 int rc = hmR0VmxLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser);
7235 AssertRCReturn(rc, rc);
7236
7237 VMMRZCallRing3Enable(pVCpu);
7238 return VINF_SUCCESS;
7239}
7240
7241
7242/**
7243 * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to
7244 * cause a VM-exit as soon as the guest is in a state to receive interrupts.
7245 *
7246 * @param pVCpu Pointer to the VMCPU.
7247 */
7248DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu)
7249{
7250 if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT))
7251 {
7252 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT))
7253 {
7254 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
7255 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
7256 AssertRC(rc);
7257 Log4(("Setup interrupt-window exiting\n"));
7258 }
7259 } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */
7260}
7261
7262
7263/**
7264 * Clears the interrupt-window exiting control in the VMCS.
7265 *
7266 * @param pVCpu Pointer to the VMCPU.
7267 */
7268DECLINLINE(void) hmR0VmxClearIntWindowExitVmcs(PVMCPU pVCpu)
7269{
7270 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT);
7271 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
7272 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
7273 AssertRC(rc);
7274 Log4(("Cleared interrupt-window exiting\n"));
7275}
7276
7277
7278/**
7279 * Sets the NMI-window exiting control in the VMCS which instructs VT-x to
7280 * cause a VM-exit as soon as the guest is in a state to receive NMIs.
7281 *
7282 * @param pVCpu Pointer to the VMCPU.
7283 */
7284DECLINLINE(void) hmR0VmxSetNmiWindowExitVmcs(PVMCPU pVCpu)
7285{
7286 if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT))
7287 {
7288 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT))
7289 {
7290 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT;
7291 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
7292 AssertRC(rc);
7293 Log4(("Setup NMI-window exiting\n"));
7294 }
7295 } /* else we will deliver NMIs whenever we VM-exit next, even possibly nesting NMIs. Can't be helped on ancient CPUs. */
7296}
7297
7298
7299/**
7300 * Clears the NMI-window exiting control in the VMCS.
7301 *
7302 * @param pVCpu Pointer to the VMCPU.
7303 */
7304DECLINLINE(void) hmR0VmxClearNmiWindowExitVmcs(PVMCPU pVCpu)
7305{
7306 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT);
7307 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT;
7308 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
7309 AssertRC(rc);
7310 Log4(("Cleared NMI-window exiting\n"));
7311}
7312
7313
7314/**
7315 * Evaluates the event to be delivered to the guest and sets it as the pending
7316 * event.
7317 *
7318 * @param pVCpu Pointer to the VMCPU.
7319 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7320 * out-of-sync. Make sure to update the required fields
7321 * before using them.
7322 */
7323static void hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7324{
7325 /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
7326 uint32_t const uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx);
7327 bool const fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
7328 bool const fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
7329 bool const fBlockNmi = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI);
7330
7331 Assert(!fBlockSti || HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS));
7332 Assert(!(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/
7333 Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
7334 Assert(!TRPMHasTrap(pVCpu));
7335
7336 /*
7337 * Toggling of interrupt force-flags here is safe since we update TRPM on premature exits
7338 * to ring-3 before executing guest code, see hmR0VmxExitToRing3(). We must NOT restore these force-flags.
7339 */
7340 /** @todo SMI. SMIs take priority over NMIs. */
7341 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts. */
7342 {
7343 /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */
7344 if ( !pVCpu->hm.s.Event.fPending
7345 && !fBlockNmi
7346 && !fBlockSti
7347 && !fBlockMovSS)
7348 {
7349 Log4(("Pending NMI vcpu[%RU32]\n", pVCpu->idCpu));
7350 uint32_t u32IntInfo = X86_XCPT_NMI | VMX_EXIT_INTERRUPTION_INFO_VALID;
7351 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7352
7353 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7354 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
7355 }
7356 else
7357 hmR0VmxSetNmiWindowExitVmcs(pVCpu);
7358 }
7359 /*
7360 * Check if the guest can receive external interrupts (PIC/APIC). Once we do PDMGetInterrupt() we -must- deliver
7361 * the interrupt ASAP. We must not execute any guest code until we inject the interrupt.
7362 */
7363 else if ( VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
7364 && !pVCpu->hm.s.fSingleInstruction)
7365 {
7366 int rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
7367 AssertRC(rc);
7368 bool const fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF);
7369 if ( !pVCpu->hm.s.Event.fPending
7370 && !fBlockInt
7371 && !fBlockSti
7372 && !fBlockMovSS)
7373 {
7374 uint8_t u8Interrupt;
7375 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
7376 if (RT_SUCCESS(rc))
7377 {
7378 Log4(("Pending interrupt vcpu[%RU32] u8Interrupt=%#x \n", pVCpu->idCpu, u8Interrupt));
7379 uint32_t u32IntInfo = u8Interrupt | VMX_EXIT_INTERRUPTION_INFO_VALID;
7380 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7381
7382 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */);
7383 }
7384 else
7385 {
7386 /** @todo Does this actually happen? If not turn it into an assertion. */
7387 Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)));
7388 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
7389 }
7390 }
7391 else
7392 hmR0VmxSetIntWindowExitVmcs(pVCpu);
7393 }
7394}
7395
7396
7397/**
7398 * Sets a pending-debug exception to be delivered to the guest if the guest is
7399 * single-stepping.
7400 *
7401 * @param pVCpu Pointer to the VMCPU.
7402 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7403 * out-of-sync. Make sure to update the required fields
7404 * before using them.
7405 */
7406DECLINLINE(void) hmR0VmxSetPendingDebugXcpt(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7407{
7408 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS)); NOREF(pVCpu);
7409 if (pMixedCtx->eflags.Bits.u1TF) /* We don't have any IA32_DEBUGCTL MSR for guests. Treat as all bits 0. */
7410 {
7411 int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, VMX_VMCS_GUEST_DEBUG_EXCEPTIONS_BS);
7412 AssertRC(rc);
7413 }
7414}
7415
7416
7417/**
7418 * Injects any pending events into the guest if the guest is in a state to
7419 * receive them.
7420 *
7421 * @returns VBox status code (informational status codes included).
7422 * @param pVCpu Pointer to the VMCPU.
7423 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7424 * out-of-sync. Make sure to update the required fields
7425 * before using them.
7426 * @param fStepping Running in hmR0VmxRunGuestCodeStep() and we should
7427 * return VINF_EM_DBG_STEPPED if the event was
7428 * dispatched directly.
7429 */
7430static int hmR0VmxInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fStepping)
7431{
7432 HMVMX_ASSERT_PREEMPT_SAFE();
7433 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7434
7435 /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
7436 uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx);
7437 bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
7438 bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
7439
7440 Assert(!fBlockSti || HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RFLAGS));
7441 Assert(!(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/
7442 Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
7443 Assert(!TRPMHasTrap(pVCpu));
7444
7445 int rc = VINF_SUCCESS;
7446 if (pVCpu->hm.s.Event.fPending)
7447 {
7448 /*
7449 * Do -not- clear any interrupt-window exiting control here. We might have an interrupt
7450 * pending even while injecting an event and in this case, we want a VM-exit as soon as
7451 * the guest is ready for the next interrupt, see @bugref{6208#c45}.
7452 *
7453 * See Intel spec. 26.6.5 "Interrupt-Window Exiting and Virtual-Interrupt Delivery".
7454 */
7455 uint32_t const uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
7456#ifdef VBOX_STRICT
7457 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
7458 {
7459 bool const fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF);
7460 Assert(!fBlockInt);
7461 Assert(!fBlockSti);
7462 Assert(!fBlockMovSS);
7463 }
7464 else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
7465 {
7466 bool const fBlockNmi = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI);
7467 Assert(!fBlockSti);
7468 Assert(!fBlockMovSS);
7469 Assert(!fBlockNmi);
7470 }
7471#endif
7472 Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64 Type=%#x\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo,
7473 (uint8_t)uIntType));
7474 rc = hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.cbInstr,
7475 pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, fStepping, &uIntrState);
7476 AssertRCReturn(rc, rc);
7477
7478 /* Update the interruptibility-state as it could have been changed by
7479 hmR0VmxInjectEventVmcs() (e.g. real-on-v86 guest injecting software interrupts) */
7480 fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
7481 fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
7482
7483 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
7484 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
7485 else
7486 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
7487 }
7488
7489 /* Deliver pending debug exception if the guest is single-stepping. Evaluate and set the BS bit. */
7490 if ( fBlockSti
7491 || fBlockMovSS)
7492 {
7493 if ( !pVCpu->hm.s.fSingleInstruction
7494 && !DBGFIsStepping(pVCpu))
7495 {
7496 /*
7497 * The pending-debug exceptions field is cleared on all VM-exits except VMX_EXIT_TPR_BELOW_THRESHOLD,
7498 * VMX_EXIT_MTF, VMX_EXIT_APIC_WRITE and VMX_EXIT_VIRTUALIZED_EOI.
7499 * See Intel spec. 27.3.4 "Saving Non-Register State".
7500 */
7501 int rc2 = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
7502 AssertRCReturn(rc2, rc2);
7503 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
7504 }
7505 else if (pMixedCtx->eflags.Bits.u1TF)
7506 {
7507 /*
7508 * We are single-stepping in the hypervisor debugger using EFLAGS.TF. Clear interrupt inhibition as setting the
7509 * BS bit would mean delivering a #DB to the guest upon VM-entry when it shouldn't be.
7510 */
7511 Assert(!(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG));
7512 uIntrState = 0;
7513 }
7514 }
7515
7516 /*
7517 * There's no need to clear the VM-entry interruption-information field here if we're not injecting anything.
7518 * VT-x clears the valid bit on every VM-exit. See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
7519 */
7520 int rc2 = hmR0VmxLoadGuestIntrState(pVCpu, uIntrState);
7521 AssertRC(rc2);
7522
7523 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET || (rc == VINF_EM_DBG_STEPPED && fStepping));
7524 NOREF(fBlockMovSS); NOREF(fBlockSti);
7525 return rc;
7526}
7527
7528
7529/**
7530 * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM.
7531 *
7532 * @param pVCpu Pointer to the VMCPU.
7533 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7534 * out-of-sync. Make sure to update the required fields
7535 * before using them.
7536 */
7537DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7538{
7539 NOREF(pMixedCtx);
7540 uint32_t u32IntInfo = X86_XCPT_UD | VMX_EXIT_INTERRUPTION_INFO_VALID;
7541 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7542}
7543
7544
7545/**
7546 * Injects a double-fault (#DF) exception into the VM.
7547 *
7548 * @returns VBox status code (informational status code included).
7549 * @param pVCpu Pointer to the VMCPU.
7550 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7551 * out-of-sync. Make sure to update the required fields
7552 * before using them.
7553 * @param fStepping Whether we're running in hmR0VmxRunGuestCodeStep()
7554 * and should return VINF_EM_DBG_STEPPED if the event
7555 * is injected directly (register modified by us, not
7556 * by hardware on VM-entry).
7557 * @param puIntrState Pointer to the current guest interruptibility-state.
7558 * This interruptibility-state will be updated if
7559 * necessary. This cannot not be NULL.
7560 */
7561DECLINLINE(int) hmR0VmxInjectXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fStepping, uint32_t *puIntrState)
7562{
7563 uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID;
7564 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7565 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
7566 return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */,
7567 fStepping, puIntrState);
7568}
7569
7570
7571/**
7572 * Sets a debug (#DB) exception as pending-for-injection into the VM.
7573 *
7574 * @param pVCpu Pointer to the VMCPU.
7575 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7576 * out-of-sync. Make sure to update the required fields
7577 * before using them.
7578 */
7579DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7580{
7581 NOREF(pMixedCtx);
7582 uint32_t u32IntInfo = X86_XCPT_DB | VMX_EXIT_INTERRUPTION_INFO_VALID;
7583 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7584 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7585}
7586
7587
7588/**
7589 * Sets an overflow (#OF) exception as pending-for-injection into the VM.
7590 *
7591 * @param pVCpu Pointer to the VMCPU.
7592 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7593 * out-of-sync. Make sure to update the required fields
7594 * before using them.
7595 * @param cbInstr The value of RIP that is to be pushed on the guest
7596 * stack.
7597 */
7598DECLINLINE(void) hmR0VmxSetPendingXcptOF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t cbInstr)
7599{
7600 NOREF(pMixedCtx);
7601 uint32_t u32IntInfo = X86_XCPT_OF | VMX_EXIT_INTERRUPTION_INFO_VALID;
7602 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7603 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7604}
7605
7606
7607/**
7608 * Injects a general-protection (#GP) fault into the VM.
7609 *
7610 * @returns VBox status code (informational status code included).
7611 * @param pVCpu Pointer to the VMCPU.
7612 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7613 * out-of-sync. Make sure to update the required fields
7614 * before using them.
7615 * @param fErrorCodeValid Whether the error code is valid (depends on the CPU
7616 * mode, i.e. in real-mode it's not valid).
7617 * @param u32ErrorCode The error code associated with the #GP.
7618 * @param fStepping Whether we're running in
7619 * hmR0VmxRunGuestCodeStep() and should return
7620 * VINF_EM_DBG_STEPPED if the event is injected
7621 * directly (register modified by us, not by
7622 * hardware on VM-entry).
7623 * @param puIntrState Pointer to the current guest interruptibility-state.
7624 * This interruptibility-state will be updated if
7625 * necessary. This cannot not be NULL.
7626 */
7627DECLINLINE(int) hmR0VmxInjectXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fErrorCodeValid, uint32_t u32ErrorCode,
7628 bool fStepping, uint32_t *puIntrState)
7629{
7630 uint32_t u32IntInfo = X86_XCPT_GP | VMX_EXIT_INTERRUPTION_INFO_VALID;
7631 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7632 if (fErrorCodeValid)
7633 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
7634 return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */,
7635 fStepping, puIntrState);
7636}
7637
7638
7639/**
7640 * Sets a general-protection (#GP) exception as pending-for-injection into the
7641 * VM.
7642 *
7643 * @param pVCpu Pointer to the VMCPU.
7644 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7645 * out-of-sync. Make sure to update the required fields
7646 * before using them.
7647 * @param u32ErrorCode The error code associated with the #GP.
7648 */
7649DECLINLINE(void) hmR0VmxSetPendingXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t u32ErrorCode)
7650{
7651 NOREF(pMixedCtx);
7652 uint32_t u32IntInfo = X86_XCPT_GP | VMX_EXIT_INTERRUPTION_INFO_VALID;
7653 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7654 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
7655 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */);
7656}
7657
7658
7659/**
7660 * Sets a software interrupt (INTn) as pending-for-injection into the VM.
7661 *
7662 * @param pVCpu Pointer to the VMCPU.
7663 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7664 * out-of-sync. Make sure to update the required fields
7665 * before using them.
7666 * @param uVector The software interrupt vector number.
7667 * @param cbInstr The value of RIP that is to be pushed on the guest
7668 * stack.
7669 */
7670DECLINLINE(void) hmR0VmxSetPendingIntN(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint16_t uVector, uint32_t cbInstr)
7671{
7672 NOREF(pMixedCtx);
7673 uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID;
7674 if ( uVector == X86_XCPT_BP
7675 || uVector == X86_XCPT_OF)
7676 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7677 else
7678 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
7679 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
7680}
7681
7682
7683/**
7684 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
7685 * stack.
7686 *
7687 * @returns VBox status code (information status code included).
7688 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
7689 * @param pVM Pointer to the VM.
7690 * @param pMixedCtx Pointer to the guest-CPU context.
7691 * @param uValue The value to push to the guest stack.
7692 */
7693DECLINLINE(int) hmR0VmxRealModeGuestStackPush(PVM pVM, PCPUMCTX pMixedCtx, uint16_t uValue)
7694{
7695 /*
7696 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
7697 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
7698 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
7699 */
7700 if (pMixedCtx->sp == 1)
7701 return VINF_EM_RESET;
7702 pMixedCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
7703 int rc = PGMPhysSimpleWriteGCPhys(pVM, pMixedCtx->ss.u64Base + pMixedCtx->sp, &uValue, sizeof(uint16_t));
7704 AssertRCReturn(rc, rc);
7705 return rc;
7706}
7707
7708
7709/**
7710 * Injects an event into the guest upon VM-entry by updating the relevant fields
7711 * in the VM-entry area in the VMCS.
7712 *
7713 * @returns VBox status code (informational error codes included).
7714 * @retval VINF_SUCCESS if the event is successfully injected into the VMCS.
7715 * @retval VINF_EM_RESET if event injection resulted in a triple-fault.
7716 *
7717 * @param pVCpu Pointer to the VMCPU.
7718 * @param pMixedCtx Pointer to the guest-CPU context. The data may
7719 * be out-of-sync. Make sure to update the required
7720 * fields before using them.
7721 * @param u64IntInfo The VM-entry interruption-information field.
7722 * @param cbInstr The VM-entry instruction length in bytes (for
7723 * software interrupts, exceptions and privileged
7724 * software exceptions).
7725 * @param u32ErrCode The VM-entry exception error code.
7726 * @param GCPtrFaultAddress The page-fault address for #PF exceptions.
7727 * @param puIntrState Pointer to the current guest interruptibility-state.
7728 * This interruptibility-state will be updated if
7729 * necessary. This cannot not be NULL.
7730 * @param fStepping Whether we're running in
7731 * hmR0VmxRunGuestCodeStep() and should return
7732 * VINF_EM_DBG_STEPPED if the event is injected
7733 * directly (register modified by us, not by
7734 * hardware on VM-entry).
7735 *
7736 * @remarks Requires CR0!
7737 * @remarks No-long-jump zone!!!
7738 */
7739static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr,
7740 uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, bool fStepping, uint32_t *puIntrState)
7741{
7742 /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */
7743 AssertMsg(u64IntInfo >> 32 == 0, ("%#RX64\n", u64IntInfo));
7744 Assert(puIntrState);
7745 uint32_t u32IntInfo = (uint32_t)u64IntInfo;
7746
7747 uint32_t const uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(u32IntInfo);
7748 uint32_t const uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo);
7749
7750#ifdef VBOX_STRICT
7751 /* Validate the error-code-valid bit for hardware exceptions. */
7752 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT)
7753 {
7754 switch (uVector)
7755 {
7756 case X86_XCPT_PF:
7757 case X86_XCPT_DF:
7758 case X86_XCPT_TS:
7759 case X86_XCPT_NP:
7760 case X86_XCPT_SS:
7761 case X86_XCPT_GP:
7762 case X86_XCPT_AC:
7763 AssertMsg(VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo),
7764 ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector));
7765 /* fallthru */
7766 default:
7767 break;
7768 }
7769 }
7770#endif
7771
7772 /* Cannot inject an NMI when block-by-MOV SS is in effect. */
7773 Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI
7774 || !(*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS));
7775
7776 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]);
7777
7778 /* We require CR0 to check if the guest is in real-mode. */
7779 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
7780 AssertRCReturn(rc, rc);
7781
7782 /*
7783 * Hardware interrupts & exceptions cannot be delivered through the software interrupt redirection bitmap to the real
7784 * mode task in virtual-8086 mode. We must jump to the interrupt handler in the (real-mode) guest.
7785 * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode" for interrupt & exception classes.
7786 * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling.
7787 */
7788 if (CPUMIsGuestInRealModeEx(pMixedCtx))
7789 {
7790 PVM pVM = pVCpu->CTX_SUFF(pVM);
7791 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
7792 {
7793 Assert(PDMVmmDevHeapIsEnabled(pVM));
7794 Assert(pVM->hm.s.vmx.pRealModeTSS);
7795
7796 /* We require RIP, RSP, RFLAGS, CS, IDTR. Save the required ones from the VMCS. */
7797 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
7798 rc |= hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx);
7799 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
7800 AssertRCReturn(rc, rc);
7801 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_RIP));
7802
7803 /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */
7804 size_t const cbIdtEntry = sizeof(X86IDTR16);
7805 if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pMixedCtx->idtr.cbIdt)
7806 {
7807 /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */
7808 if (uVector == X86_XCPT_DF)
7809 return VINF_EM_RESET;
7810
7811 /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */
7812 if (uVector == X86_XCPT_GP)
7813 return hmR0VmxInjectXcptDF(pVCpu, pMixedCtx, fStepping, puIntrState);
7814
7815 /* If we're injecting an interrupt/exception with no valid IDT entry, inject a general-protection fault. */
7816 /* No error codes for exceptions in real-mode. See Intel spec. 20.1.4 "Interrupt and Exception Handling" */
7817 return hmR0VmxInjectXcptGP(pVCpu, pMixedCtx, false /* fErrCodeValid */, 0 /* u32ErrCode */,
7818 fStepping, puIntrState);
7819 }
7820
7821 /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */
7822 uint16_t uGuestIp = pMixedCtx->ip;
7823 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
7824 {
7825 Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
7826 /* #BP and #OF are both benign traps, we need to resume the next instruction. */
7827 uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr;
7828 }
7829 else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT)
7830 uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr;
7831
7832 /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */
7833 X86IDTR16 IdtEntry;
7834 RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pMixedCtx->idtr.pIdt + uVector * cbIdtEntry;
7835 rc = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry);
7836 AssertRCReturn(rc, rc);
7837
7838 /* Construct the stack frame for the interrupt/exception handler. */
7839 rc = hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->eflags.u32);
7840 rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->cs.Sel);
7841 rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, uGuestIp);
7842 AssertRCReturn(rc, rc);
7843
7844 /* Clear the required eflag bits and jump to the interrupt/exception handler. */
7845 if (rc == VINF_SUCCESS)
7846 {
7847 pMixedCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
7848 pMixedCtx->rip = IdtEntry.offSel;
7849 pMixedCtx->cs.Sel = IdtEntry.uSel;
7850 pMixedCtx->cs.ValidSel = IdtEntry.uSel;
7851 pMixedCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry;
7852 if ( uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT
7853 && uVector == X86_XCPT_PF)
7854 pMixedCtx->cr2 = GCPtrFaultAddress;
7855
7856 /* If any other guest-state bits are changed here, make sure to update
7857 hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */
7858 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS
7859 | HM_CHANGED_GUEST_RIP
7860 | HM_CHANGED_GUEST_RFLAGS
7861 | HM_CHANGED_GUEST_RSP);
7862
7863 /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */
7864 if (*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
7865 {
7866 Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI
7867 && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT);
7868 Log4(("Clearing inhibition due to STI.\n"));
7869 *puIntrState &= ~VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI;
7870 }
7871 Log4(("Injecting real-mode: u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x Eflags=%#x CS:EIP=%04x:%04x\n",
7872 u32IntInfo, u32ErrCode, cbInstr, pMixedCtx->eflags.u, pMixedCtx->cs.Sel, pMixedCtx->eip));
7873
7874 /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo'
7875 it, if we are returning to ring-3 before executing guest code. */
7876 pVCpu->hm.s.Event.fPending = false;
7877
7878 /* Make hmR0VmxPreRunGuest return if we're stepping since we've changed cs:rip. */
7879 if (fStepping)
7880 rc = VINF_EM_DBG_STEPPED;
7881 }
7882 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET || (rc == VINF_EM_DBG_STEPPED && fStepping));
7883 return rc;
7884 }
7885
7886 /*
7887 * For unrestricted execution enabled CPUs running real-mode guests, we must not set the deliver-error-code bit.
7888 * See Intel spec. 26.2.1.3 "VM-Entry Control Fields".
7889 */
7890 u32IntInfo &= ~VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
7891 }
7892
7893 /* Validate. */
7894 Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */
7895 Assert(!VMX_EXIT_INTERRUPTION_INFO_NMI_UNBLOCK_IRET(u32IntInfo)); /* Bit 12 MBZ. */
7896 Assert(!(u32IntInfo & 0x7ffff000)); /* Bits 30:12 MBZ. */
7897
7898 /* Inject. */
7899 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo);
7900 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo))
7901 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode);
7902 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
7903
7904 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT
7905 && uVector == X86_XCPT_PF)
7906 pMixedCtx->cr2 = GCPtrFaultAddress;
7907
7908 Log4(("Injecting vcpu[%RU32] u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x pMixedCtx->uCR2=%#RX64\n", pVCpu->idCpu,
7909 u32IntInfo, u32ErrCode, cbInstr, pMixedCtx->cr2));
7910
7911 AssertRCReturn(rc, rc);
7912 return rc;
7913}
7914
7915
7916/**
7917 * Clears the interrupt-window exiting control in the VMCS and if necessary
7918 * clears the current event in the VMCS as well.
7919 *
7920 * @returns VBox status code.
7921 * @param pVCpu Pointer to the VMCPU.
7922 *
7923 * @remarks Use this function only to clear events that have not yet been
7924 * delivered to the guest but are injected in the VMCS!
7925 * @remarks No-long-jump zone!!!
7926 */
7927static void hmR0VmxClearIntNmiWindowsVmcs(PVMCPU pVCpu)
7928{
7929 Log4Func(("vcpu[%d]\n", pVCpu->idCpu));
7930
7931 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)
7932 hmR0VmxClearIntWindowExitVmcs(pVCpu);
7933
7934 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT)
7935 hmR0VmxClearNmiWindowExitVmcs(pVCpu);
7936}
7937
7938
7939/**
7940 * Enters the VT-x session.
7941 *
7942 * @returns VBox status code.
7943 * @param pVM Pointer to the VM.
7944 * @param pVCpu Pointer to the VMCPU.
7945 * @param pCpu Pointer to the CPU info struct.
7946 */
7947VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
7948{
7949 AssertPtr(pVM);
7950 AssertPtr(pVCpu);
7951 Assert(pVM->hm.s.vmx.fSupported);
7952 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7953 NOREF(pCpu); NOREF(pVM);
7954
7955 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7956 Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE));
7957
7958#ifdef VBOX_STRICT
7959 /* Make sure we're in VMX root mode. */
7960 RTCCUINTREG u32HostCR4 = ASMGetCR4();
7961 if (!(u32HostCR4 & X86_CR4_VMXE))
7962 {
7963 LogRel(("VMXR0Enter: X86_CR4_VMXE bit in CR4 is not set!\n"));
7964 return VERR_VMX_X86_CR4_VMXE_CLEARED;
7965 }
7966#endif
7967
7968 /*
7969 * Load the VCPU's VMCS as the current (and active) one.
7970 */
7971 Assert(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR);
7972 int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
7973 if (RT_FAILURE(rc))
7974 return rc;
7975
7976 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE;
7977 pVCpu->hm.s.fLeaveDone = false;
7978 Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
7979
7980 return VINF_SUCCESS;
7981}
7982
7983
7984/**
7985 * The thread-context callback (only on platforms which support it).
7986 *
7987 * @param enmEvent The thread-context event.
7988 * @param pVCpu Pointer to the VMCPU.
7989 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
7990 * @thread EMT(pVCpu)
7991 */
7992VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
7993{
7994 NOREF(fGlobalInit);
7995
7996 switch (enmEvent)
7997 {
7998 case RTTHREADCTXEVENT_OUT:
7999 {
8000 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8001 Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
8002 VMCPU_ASSERT_EMT(pVCpu);
8003
8004 PVM pVM = pVCpu->CTX_SUFF(pVM);
8005 PCPUMCTX pMixedCtx = CPUMQueryGuestCtxPtr(pVCpu);
8006
8007 /* No longjmps (logger flushes, locks) in this fragile context. */
8008 VMMRZCallRing3Disable(pVCpu);
8009 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
8010
8011 /*
8012 * Restore host-state (FPU, debug etc.)
8013 */
8014 if (!pVCpu->hm.s.fLeaveDone)
8015 {
8016 /* Do -not- save guest-state here as we might already be in the middle of saving it (esp. bad if we are
8017 holding the PGM lock while saving the guest state (see hmR0VmxSaveGuestControlRegs()). */
8018 hmR0VmxLeave(pVM, pVCpu, pMixedCtx, false /* fSaveGuestState */);
8019 pVCpu->hm.s.fLeaveDone = true;
8020 }
8021
8022 /* Leave HM context, takes care of local init (term). */
8023 int rc = HMR0LeaveCpu(pVCpu);
8024 AssertRC(rc); NOREF(rc);
8025
8026 /* Restore longjmp state. */
8027 VMMRZCallRing3Enable(pVCpu);
8028 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt);
8029 break;
8030 }
8031
8032 case RTTHREADCTXEVENT_IN:
8033 {
8034 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8035 Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu));
8036 VMCPU_ASSERT_EMT(pVCpu);
8037
8038 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
8039 VMMRZCallRing3Disable(pVCpu);
8040 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
8041
8042 /* Initialize the bare minimum state required for HM. This takes care of
8043 initializing VT-x if necessary (onlined CPUs, local init etc.) */
8044 int rc = HMR0EnterCpu(pVCpu);
8045 AssertRC(rc);
8046 Assert(HMCPU_CF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE));
8047
8048 /* Load the active VMCS as the current one. */
8049 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR)
8050 {
8051 rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
8052 AssertRC(rc); NOREF(rc);
8053 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE;
8054 Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
8055 }
8056 pVCpu->hm.s.fLeaveDone = false;
8057
8058 /* Restore longjmp state. */
8059 VMMRZCallRing3Enable(pVCpu);
8060 break;
8061 }
8062
8063 default:
8064 break;
8065 }
8066}
8067
8068
8069/**
8070 * Saves the host state in the VMCS host-state.
8071 * Sets up the VM-exit MSR-load area.
8072 *
8073 * The CPU state will be loaded from these fields on every successful VM-exit.
8074 *
8075 * @returns VBox status code.
8076 * @param pVM Pointer to the VM.
8077 * @param pVCpu Pointer to the VMCPU.
8078 *
8079 * @remarks No-long-jump zone!!!
8080 */
8081static int hmR0VmxSaveHostState(PVM pVM, PVMCPU pVCpu)
8082{
8083 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8084
8085 if (!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT))
8086 return VINF_SUCCESS;
8087
8088 int rc = hmR0VmxSaveHostControlRegs(pVM, pVCpu);
8089 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostControlRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8090
8091 rc = hmR0VmxSaveHostSegmentRegs(pVM, pVCpu);
8092 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostSegmentRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8093
8094 rc = hmR0VmxSaveHostMsrs(pVM, pVCpu);
8095 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostMsrs failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8096
8097 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT);
8098 return rc;
8099}
8100
8101
8102/**
8103 * Saves the host state in the VMCS host-state.
8104 *
8105 * @returns VBox status code.
8106 * @param pVM Pointer to the VM.
8107 * @param pVCpu Pointer to the VMCPU.
8108 *
8109 * @remarks No-long-jump zone!!!
8110 */
8111VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
8112{
8113 AssertPtr(pVM);
8114 AssertPtr(pVCpu);
8115
8116 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
8117
8118 /* Save the host state here while entering HM context. When thread-context hooks are used, we might get preempted
8119 and have to resave the host state but most of the time we won't be, so do it here before we disable interrupts. */
8120 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8121 return hmR0VmxSaveHostState(pVM, pVCpu);
8122}
8123
8124
8125/**
8126 * Loads the guest state into the VMCS guest-state area.
8127 *
8128 * The will typically be done before VM-entry when the guest-CPU state and the
8129 * VMCS state may potentially be out of sync.
8130 *
8131 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the
8132 * VM-entry controls.
8133 * Sets up the appropriate VMX non-root function to execute guest code based on
8134 * the guest CPU mode.
8135 *
8136 * @returns VBox status code.
8137 * @param pVM Pointer to the VM.
8138 * @param pVCpu Pointer to the VMCPU.
8139 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
8140 * out-of-sync. Make sure to update the required fields
8141 * before using them.
8142 *
8143 * @remarks No-long-jump zone!!!
8144 */
8145static int hmR0VmxLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
8146{
8147 AssertPtr(pVM);
8148 AssertPtr(pVCpu);
8149 AssertPtr(pMixedCtx);
8150 HMVMX_ASSERT_PREEMPT_SAFE();
8151
8152 VMMRZCallRing3Disable(pVCpu);
8153 Assert(VMMR0IsLogFlushDisabled(pVCpu));
8154
8155 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
8156
8157 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x);
8158
8159 /* Determine real-on-v86 mode. */
8160 pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false;
8161 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
8162 && CPUMIsGuestInRealModeEx(pMixedCtx))
8163 {
8164 pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true;
8165 }
8166
8167 /*
8168 * Load the guest-state into the VMCS.
8169 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
8170 * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it.
8171 */
8172 int rc = hmR0VmxSetupVMRunHandler(pVCpu, pMixedCtx);
8173 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8174
8175 /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */
8176 rc = hmR0VmxLoadGuestEntryCtls(pVCpu, pMixedCtx);
8177 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestEntryCtls! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8178
8179 /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */
8180 rc = hmR0VmxLoadGuestExitCtls(pVCpu, pMixedCtx);
8181 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupExitCtls failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8182
8183 rc = hmR0VmxLoadGuestActivityState(pVCpu, pMixedCtx);
8184 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestActivityState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8185
8186 rc = hmR0VmxLoadGuestCR3AndCR4(pVCpu, pMixedCtx);
8187 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestCR3AndCR4: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8188
8189 /* Assumes pMixedCtx->cr0 is up-to-date (strict builds require CR0 for segment register validation checks). */
8190 rc = hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx);
8191 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestSegmentRegs: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8192
8193 /* This needs to be done after hmR0VmxLoadGuestEntryCtls() and hmR0VmxLoadGuestExitCtls() as it may alter controls if we
8194 determine we don't have to swap EFER after all. */
8195 rc = hmR0VmxLoadGuestMsrs(pVCpu, pMixedCtx);
8196 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadSharedMsrs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8197
8198 rc = hmR0VmxLoadGuestApicState(pVCpu, pMixedCtx);
8199 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8200
8201 rc = hmR0VmxLoadGuestXcptIntercepts(pVCpu, pMixedCtx);
8202 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestXcptIntercepts! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8203
8204 /*
8205 * Loading Rflags here is fine, even though Rflags.TF might depend on guest debug state (which is not loaded here).
8206 * It is re-evaluated and updated if necessary in hmR0VmxLoadSharedState().
8207 */
8208 rc = hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx);
8209 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestRipRspRflags! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
8210
8211 /* Clear any unused and reserved bits. */
8212 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR2);
8213
8214 VMMRZCallRing3Enable(pVCpu);
8215
8216 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x);
8217 return rc;
8218}
8219
8220
8221/**
8222 * Loads the state shared between the host and guest into the VMCS.
8223 *
8224 * @param pVM Pointer to the VM.
8225 * @param pVCpu Pointer to the VMCPU.
8226 * @param pCtx Pointer to the guest-CPU context.
8227 *
8228 * @remarks No-long-jump zone!!!
8229 */
8230static void hmR0VmxLoadSharedState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8231{
8232 NOREF(pVM);
8233
8234 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8235 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
8236
8237 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0))
8238 {
8239 int rc = hmR0VmxLoadSharedCR0(pVCpu, pCtx);
8240 AssertRC(rc);
8241 }
8242
8243 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG))
8244 {
8245 int rc = hmR0VmxLoadSharedDebugState(pVCpu, pCtx);
8246 AssertRC(rc);
8247
8248 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
8249 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS))
8250 {
8251 rc = hmR0VmxLoadGuestRflags(pVCpu, pCtx);
8252 AssertRC(rc);
8253 }
8254 }
8255
8256 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_LAZY_MSRS))
8257 {
8258#if HC_ARCH_BITS == 64
8259 if (pVM->hm.s.fAllow64BitGuests)
8260 hmR0VmxLazyLoadGuestMsrs(pVCpu, pCtx);
8261#endif
8262 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_LAZY_MSRS);
8263 }
8264
8265 /* Loading CR0, debug state might have changed intercepts, update VMCS. */
8266 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS))
8267 {
8268 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
8269 AssertRC(rc);
8270 HMCPU_CF_CLEAR(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
8271 }
8272
8273 AssertMsg(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE),
8274 ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
8275}
8276
8277
8278/**
8279 * Worker for loading the guest-state bits in the inner VT-x execution loop.
8280 *
8281 * @param pVM Pointer to the VM.
8282 * @param pVCpu Pointer to the VMCPU.
8283 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
8284 * out-of-sync. Make sure to update the required fields
8285 * before using them.
8286 */
8287DECLINLINE(void) hmR0VmxLoadGuestStateOptimal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
8288{
8289 HMVMX_ASSERT_PREEMPT_SAFE();
8290
8291 Log5(("LoadFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
8292#ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE
8293 HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
8294#endif
8295
8296 if (HMCPU_CF_IS_SET_ONLY(pVCpu, HM_CHANGED_GUEST_RIP))
8297 {
8298 int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx);
8299 AssertRC(rc);
8300 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
8301 }
8302 else if (HMCPU_CF_VALUE(pVCpu))
8303 {
8304 int rc = hmR0VmxLoadGuestState(pVM, pVCpu, pMixedCtx);
8305 AssertRC(rc);
8306 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
8307 }
8308
8309 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
8310 AssertMsg( !HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_ALL_GUEST)
8311 || HMCPU_CF_IS_PENDING_ONLY(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE),
8312 ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
8313}
8314
8315
8316/**
8317 * Does the preparations before executing guest code in VT-x.
8318 *
8319 * This may cause longjmps to ring-3 and may even result in rescheduling to the
8320 * recompiler/IEM. We must be cautious what we do here regarding committing
8321 * guest-state information into the VMCS assuming we assuredly execute the
8322 * guest in VT-x mode.
8323 *
8324 * If we fall back to the recompiler/IEM after updating the VMCS and clearing
8325 * the common-state (TRPM/forceflags), we must undo those changes so that the
8326 * recompiler/IEM can (and should) use them when it resumes guest execution.
8327 * Otherwise such operations must be done when we can no longer exit to ring-3.
8328 *
8329 * @returns Strict VBox status code.
8330 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
8331 * have been disabled.
8332 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
8333 * double-fault into the guest.
8334 * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was
8335 * dispatched directly.
8336 * @retval VINF_* scheduling changes, we have to go back to ring-3.
8337 *
8338 * @param pVM Pointer to the VM.
8339 * @param pVCpu Pointer to the VMCPU.
8340 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
8341 * out-of-sync. Make sure to update the required fields
8342 * before using them.
8343 * @param pVmxTransient Pointer to the VMX transient structure.
8344 * @param fStepping Set if called from hmR0VmxRunGuestCodeStep(). Makes
8345 * us ignore some of the reasons for returning to
8346 * ring-3, and return VINF_EM_DBG_STEPPED if event
8347 * dispatching took place.
8348 */
8349static int hmR0VmxPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, bool fStepping)
8350{
8351 Assert(VMMRZCallRing3IsEnabled(pVCpu));
8352
8353#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
8354 PGMRZDynMapFlushAutoSet(pVCpu);
8355#endif
8356
8357 /* Check force flag actions that might require us to go back to ring-3. */
8358 int rc = hmR0VmxCheckForceFlags(pVM, pVCpu, pMixedCtx);
8359 if (rc != VINF_SUCCESS)
8360 return rc;
8361
8362#ifndef IEM_VERIFICATION_MODE_FULL
8363 /* Setup the Virtualized APIC accesses. pMixedCtx->msrApicBase is always up-to-date. It's not part of the VMCS. */
8364 if ( pVCpu->hm.s.vmx.u64MsrApicBase != pMixedCtx->msrApicBase
8365 && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
8366 {
8367 Assert(pVM->hm.s.vmx.HCPhysApicAccess);
8368 RTGCPHYS GCPhysApicBase;
8369 GCPhysApicBase = pMixedCtx->msrApicBase;
8370 GCPhysApicBase &= PAGE_BASE_GC_MASK;
8371
8372 /* Unalias any existing mapping. */
8373 rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
8374 AssertRCReturn(rc, rc);
8375
8376 /* Map the HC APIC-access page into the GC space, this also updates the shadow page tables if necessary. */
8377 Log4(("Mapped HC APIC-access page into GC: GCPhysApicBase=%#RGv\n", GCPhysApicBase));
8378 rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
8379 AssertRCReturn(rc, rc);
8380
8381 pVCpu->hm.s.vmx.u64MsrApicBase = pMixedCtx->msrApicBase;
8382 }
8383#endif /* !IEM_VERIFICATION_MODE_FULL */
8384
8385 if (TRPMHasTrap(pVCpu))
8386 hmR0VmxTrpmTrapToPendingEvent(pVCpu);
8387 hmR0VmxEvaluatePendingEvent(pVCpu, pMixedCtx);
8388
8389 /*
8390 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus needs to be done with
8391 * longjmps or interrupts + preemption enabled. Event injection might also result in triple-faulting the VM.
8392 */
8393 rc = hmR0VmxInjectPendingEvent(pVCpu, pMixedCtx, fStepping);
8394 if (RT_UNLIKELY(rc != VINF_SUCCESS))
8395 {
8396 Assert(rc == VINF_EM_RESET || (rc == VINF_EM_DBG_STEPPED && fStepping));
8397 return rc;
8398 }
8399
8400 /*
8401 * Load the guest state bits, we can handle longjmps/getting preempted here.
8402 *
8403 * If we are injecting events to a real-on-v86 mode guest, we will have to update
8404 * RIP and some segment registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs().
8405 * Hence, this needs to be done -after- injection of events.
8406 */
8407 hmR0VmxLoadGuestStateOptimal(pVM, pVCpu, pMixedCtx);
8408
8409 /*
8410 * No longjmps to ring-3 from this point on!!!
8411 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
8412 * This also disables flushing of the R0-logger instance (if any).
8413 */
8414 VMMRZCallRing3Disable(pVCpu);
8415
8416 /*
8417 * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.)
8418 * when thread-context hooks aren't used and we've been running with preemption disabled for a while.
8419 *
8420 * We need to check for force-flags that could've possible been altered since we last checked them (e.g.
8421 * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}).
8422 *
8423 * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before
8424 * executing guest code.
8425 */
8426 pVmxTransient->fEFlags = ASMIntDisableFlags();
8427 if ( ( VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
8428 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
8429 && ( !fStepping /* Optimized for the non-stepping case, of course. */
8430 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) )
8431 {
8432 ASMSetFlags(pVmxTransient->fEFlags);
8433 VMMRZCallRing3Enable(pVCpu);
8434 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
8435 return VINF_EM_RAW_TO_R3;
8436 }
8437
8438 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
8439 {
8440 ASMSetFlags(pVmxTransient->fEFlags);
8441 VMMRZCallRing3Enable(pVCpu);
8442 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
8443 return VINF_EM_RAW_INTERRUPT;
8444 }
8445
8446 /* We've injected any pending events. This is really the point of no return (to ring-3). */
8447 pVCpu->hm.s.Event.fPending = false;
8448
8449 return VINF_SUCCESS;
8450}
8451
8452
8453/**
8454 * Prepares to run guest code in VT-x and we've committed to doing so. This
8455 * means there is no backing out to ring-3 or anywhere else at this
8456 * point.
8457 *
8458 * @param pVM Pointer to the VM.
8459 * @param pVCpu Pointer to the VMCPU.
8460 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
8461 * out-of-sync. Make sure to update the required fields
8462 * before using them.
8463 * @param pVmxTransient Pointer to the VMX transient structure.
8464 *
8465 * @remarks Called with preemption disabled.
8466 * @remarks No-long-jump zone!!!
8467 */
8468static void hmR0VmxPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8469{
8470 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
8471 Assert(VMMR0IsLogFlushDisabled(pVCpu));
8472 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
8473
8474 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
8475 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */
8476
8477#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE
8478 if (!CPUMIsGuestFPUStateActive(pVCpu))
8479 CPUMR0LoadGuestFPU(pVM, pVCpu, pMixedCtx);
8480 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
8481#endif
8482
8483 if ( pVCpu->hm.s.fPreloadGuestFpu
8484 && !CPUMIsGuestFPUStateActive(pVCpu))
8485 {
8486 CPUMR0LoadGuestFPU(pVM, pVCpu, pMixedCtx);
8487 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_CR0));
8488 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
8489 }
8490
8491 /*
8492 * Lazy-update of the host MSRs values in the auto-load/store MSR area.
8493 */
8494 if ( !pVCpu->hm.s.vmx.fUpdatedHostMsrs
8495 && pVCpu->hm.s.vmx.cMsrs > 0)
8496 {
8497 hmR0VmxUpdateAutoLoadStoreHostMsrs(pVCpu);
8498 }
8499
8500 /*
8501 * Load the host state bits as we may've been preempted (only happens when
8502 * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM).
8503 */
8504 /** @todo Why should hmR0VmxSetupVMRunHandler() changing pfnStartVM have
8505 * any effect to the host state needing to be saved? */
8506 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT))
8507 {
8508 /* This ASSUMES that pfnStartVM has been set up already. */
8509 int rc = hmR0VmxSaveHostState(pVM, pVCpu);
8510 AssertRC(rc);
8511 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreemptSaveHostState);
8512 }
8513 Assert(!HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT));
8514
8515 /*
8516 * Load the state shared between host and guest (FPU, debug, lazy MSRs).
8517 */
8518 if (HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE))
8519 hmR0VmxLoadSharedState(pVM, pVCpu, pMixedCtx);
8520 AssertMsg(!HMCPU_CF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", HMCPU_CF_VALUE(pVCpu)));
8521
8522 /* Store status of the shared guest-host state at the time of VM-entry. */
8523#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS)
8524 if (CPUMIsGuestInLongModeEx(pMixedCtx))
8525 {
8526 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu);
8527 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu);
8528 }
8529 else
8530#endif
8531 {
8532 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
8533 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
8534 }
8535 pVmxTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu);
8536
8537 /*
8538 * Cache the TPR-shadow for checking on every VM-exit if it might have changed.
8539 */
8540 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
8541 pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[0x80];
8542
8543 PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu();
8544 RTCPUID idCurrentCpu = pCpu->idCpu;
8545 if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer
8546 || idCurrentCpu != pVCpu->hm.s.idLastCpu)
8547 {
8548 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVM, pVCpu);
8549 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false;
8550 }
8551
8552 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */
8553 hmR0VmxFlushTaggedTlb(pVCpu, pCpu); /* Invalidate the appropriate guest entries from the TLB. */
8554 Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu);
8555 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */
8556
8557 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
8558
8559 TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about
8560 to start executing. */
8561
8562 /*
8563 * Load the TSC_AUX MSR when we are not intercepting RDTSCP.
8564 */
8565 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
8566 {
8567 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT))
8568 {
8569 bool fMsrUpdated;
8570 int rc2 = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
8571 AssertRC(rc2);
8572 Assert(HMVMXCPU_GST_IS_UPDATED(pVCpu, HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS));
8573
8574 rc2 = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_TSC_AUX, CPUMR0GetGuestTscAux(pVCpu), true /* fUpdateHostMsr */,
8575 &fMsrUpdated);
8576 AssertRC(rc2);
8577 Assert(fMsrUpdated || pVCpu->hm.s.vmx.fUpdatedHostMsrs);
8578
8579 /* Finally, mark that all host MSR values are updated so we don't redo it without leaving VT-x. See @bugref{6956}. */
8580 pVCpu->hm.s.vmx.fUpdatedHostMsrs = true;
8581 }
8582 else
8583 {
8584 hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, MSR_K8_TSC_AUX);
8585 Assert(!pVCpu->hm.s.vmx.cMsrs || pVCpu->hm.s.vmx.fUpdatedHostMsrs);
8586 }
8587 }
8588
8589#ifdef VBOX_STRICT
8590 hmR0VmxCheckAutoLoadStoreMsrs(pVCpu);
8591 hmR0VmxCheckHostEferMsr(pVCpu);
8592 AssertRC(hmR0VmxCheckVmcsCtls(pVCpu));
8593#endif
8594#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
8595 uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVM, pVCpu, pMixedCtx);
8596 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
8597 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
8598#endif
8599}
8600
8601
8602/**
8603 * Performs some essential restoration of state after running guest code in
8604 * VT-x.
8605 *
8606 * @param pVM Pointer to the VM.
8607 * @param pVCpu Pointer to the VMCPU.
8608 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
8609 * out-of-sync. Make sure to update the required fields
8610 * before using them.
8611 * @param pVmxTransient Pointer to the VMX transient structure.
8612 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
8613 *
8614 * @remarks Called with interrupts disabled, and returns with interrups enabled!
8615 *
8616 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
8617 * unconditionally when it is safe to do so.
8618 */
8619static void hmR0VmxPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, int rcVMRun)
8620{
8621 NOREF(pVM);
8622
8623 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
8624
8625 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */
8626 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */
8627 HMVMXCPU_GST_RESET_TO(pVCpu, 0); /* Exits/longjmps to ring-3 requires saving the guest state. */
8628 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
8629 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
8630 pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */
8631
8632 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT))
8633 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVCpu->hm.s.vmx.u64TSCOffset);
8634
8635 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
8636 TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */
8637 Assert(!ASMIntAreEnabled());
8638 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
8639
8640#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE
8641 if (CPUMIsGuestFPUStateActive(pVCpu))
8642 {
8643 hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
8644 CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx);
8645 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
8646 }
8647#endif
8648
8649#if HC_ARCH_BITS == 64
8650 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */
8651#endif
8652 pVCpu->hm.s.vmx.uVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
8653#ifdef VBOX_STRICT
8654 hmR0VmxCheckHostEferMsr(pVCpu); /* Verify that VMRUN/VMLAUNCH didn't modify host EFER. */
8655#endif
8656 ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */
8657 VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */
8658
8659 /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */
8660 uint32_t uExitReason;
8661 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
8662 rc |= hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
8663 AssertRC(rc);
8664 pVmxTransient->uExitReason = (uint16_t)VMX_EXIT_REASON_BASIC(uExitReason);
8665 pVmxTransient->fVMEntryFailed = VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uEntryIntInfo);
8666
8667 /* Update the VM-exit history array. */
8668 HMCPU_EXIT_HISTORY_ADD(pVCpu, pVmxTransient->uExitReason);
8669
8670 /* If the VMLAUNCH/VMRESUME failed, we can bail out early. This does -not- cover VMX_EXIT_ERR_*. */
8671 if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
8672 {
8673 Log4(("VM-entry failure: pVCpu=%p idCpu=%RU32 rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", pVCpu, pVCpu->idCpu, rcVMRun,
8674 pVmxTransient->fVMEntryFailed));
8675 return;
8676 }
8677
8678 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
8679 {
8680 /** @todo We can optimize this by only syncing with our force-flags when
8681 * really needed and keeping the VMCS state as it is for most
8682 * VM-exits. */
8683 /* Update the guest interruptibility-state from the VMCS. */
8684 hmR0VmxSaveGuestIntrState(pVCpu, pMixedCtx);
8685
8686#if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE)
8687 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
8688 AssertRC(rc);
8689#elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS)
8690 rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
8691 AssertRC(rc);
8692#endif
8693
8694 /*
8695 * If the TPR was raised by the guest, it wouldn't cause a VM-exit immediately. Instead we sync the TPR lazily whenever
8696 * we eventually get a VM-exit for any reason. This maybe expensive as PDMApicSetTPR() can longjmp to ring-3 and which is
8697 * why it's done here as it's easier and no less efficient to deal with it here than making hmR0VmxSaveGuestState()
8698 * cope with longjmps safely (see VMCPU_FF_HM_UPDATE_CR3 handling).
8699 */
8700 if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
8701 && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[0x80])
8702 {
8703 rc = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]);
8704 AssertRC(rc);
8705 HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
8706 }
8707 }
8708}
8709
8710
8711/**
8712 * Runs the guest code using VT-x the normal way.
8713 *
8714 * @returns VBox status code.
8715 * @param pVM Pointer to the VM.
8716 * @param pVCpu Pointer to the VMCPU.
8717 * @param pCtx Pointer to the guest-CPU context.
8718 *
8719 * @note Mostly the same as hmR0VmxRunGuestCodeStep().
8720 */
8721static int hmR0VmxRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8722{
8723 VMXTRANSIENT VmxTransient;
8724 VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
8725 int rc = VERR_INTERNAL_ERROR_5;
8726 uint32_t cLoops = 0;
8727
8728 for (;; cLoops++)
8729 {
8730 Assert(!HMR0SuspendPending());
8731 HMVMX_ASSERT_CPU_SAFE();
8732
8733 /* Preparatory work for running guest code, this may force us to return
8734 to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
8735 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
8736 rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient, false /* fStepping */);
8737 if (rc != VINF_SUCCESS)
8738 break;
8739
8740 hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient);
8741 rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx);
8742 /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */
8743
8744 /* Restore any residual host-state and save any bits shared between host
8745 and guest into the guest-CPU state. Re-enables interrupts! */
8746 hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc);
8747
8748 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
8749 if (RT_UNLIKELY(rc != VINF_SUCCESS))
8750 {
8751 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
8752 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient);
8753 return rc;
8754 }
8755
8756 /* Profile the VM-exit. */
8757 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
8758 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
8759 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
8760 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
8761 HMVMX_START_EXIT_DISPATCH_PROF();
8762
8763 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, pCtx, VmxTransient.uExitReason);
8764 if (RT_UNLIKELY(VBOXVMM_R0_HMVMX_VMEXIT_ENABLED()))
8765 {
8766 hmR0VmxReadExitQualificationVmcs(pVCpu, &VmxTransient);
8767 hmR0VmxSaveGuestState(pVCpu, pCtx);
8768 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, VmxTransient.uExitReason, VmxTransient.uExitQualification);
8769 }
8770
8771 /* Handle the VM-exit. */
8772#ifdef HMVMX_USE_FUNCTION_TABLE
8773 rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient);
8774#else
8775 rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason);
8776#endif
8777 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
8778 if (rc != VINF_SUCCESS)
8779 break;
8780 if (cLoops > pVM->hm.s.cMaxResumeLoops)
8781 {
8782 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
8783 rc = VINF_EM_RAW_INTERRUPT;
8784 break;
8785 }
8786 }
8787
8788 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
8789 return rc;
8790}
8791
8792
8793/**
8794 * Single steps guest code using VT-x.
8795 *
8796 * @returns VBox status code.
8797 * @param pVM Pointer to the VM.
8798 * @param pVCpu Pointer to the VMCPU.
8799 * @param pCtx Pointer to the guest-CPU context.
8800 *
8801 * @note Mostly the same as hmR0VmxRunGuestCodeNormal().
8802 */
8803static int hmR0VmxRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8804{
8805 VMXTRANSIENT VmxTransient;
8806 VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
8807 VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5;
8808 uint32_t cLoops = 0;
8809 uint16_t uCsStart = pCtx->cs.Sel;
8810 uint64_t uRipStart = pCtx->rip;
8811
8812 for (;; cLoops++)
8813 {
8814 Assert(!HMR0SuspendPending());
8815 HMVMX_ASSERT_CPU_SAFE();
8816
8817 /* Preparatory work for running guest code, this may force us to return
8818 to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
8819 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
8820 rcStrict = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient, true /* fStepping */);
8821 if (rcStrict != VINF_SUCCESS)
8822 break;
8823
8824 hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient);
8825 rcStrict = hmR0VmxRunGuest(pVM, pVCpu, pCtx);
8826 /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */
8827
8828 /* Restore any residual host-state and save any bits shared between host
8829 and guest into the guest-CPU state. Re-enables interrupts! */
8830 hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, VBOXSTRICTRC_TODO(rcStrict));
8831
8832 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
8833 if (RT_UNLIKELY(rcStrict != VINF_SUCCESS))
8834 {
8835 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
8836 hmR0VmxReportWorldSwitchError(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict), pCtx, &VmxTransient);
8837 return VBOXSTRICTRC_TODO(rcStrict);
8838 }
8839
8840 /* Profile the VM-exit. */
8841 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
8842 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
8843 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
8844 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
8845 HMVMX_START_EXIT_DISPATCH_PROF();
8846
8847 VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, pCtx, VmxTransient.uExitReason);
8848 if (RT_UNLIKELY(VBOXVMM_R0_HMVMX_VMEXIT_ENABLED()))
8849 {
8850 hmR0VmxReadExitQualificationVmcs(pVCpu, &VmxTransient);
8851 hmR0VmxSaveGuestState(pVCpu, pCtx);
8852 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, VmxTransient.uExitReason, VmxTransient.uExitQualification);
8853 }
8854
8855 /* Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitStep(). */
8856 rcStrict = hmR0VmxHandleExitStep(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason, uCsStart, uRipStart);
8857 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
8858 if (rcStrict != VINF_SUCCESS)
8859 break;
8860 if (cLoops > pVM->hm.s.cMaxResumeLoops)
8861 {
8862 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops);
8863 rcStrict = VINF_EM_RAW_INTERRUPT;
8864 break;
8865 }
8866
8867 /*
8868 * Did the RIP change, if so, consider it a single step.
8869 * Otherwise, make sure one of the TFs gets set.
8870 */
8871 int rc2 = hmR0VmxSaveGuestRip(pVCpu, pCtx);
8872 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pCtx);
8873 AssertRCReturn(rc2, rc2);
8874 if ( pCtx->rip != uRipStart
8875 || pCtx->cs.Sel != uCsStart)
8876 {
8877 rcStrict = VINF_EM_DBG_STEPPED;
8878 break;
8879 }
8880 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
8881 }
8882
8883 /*
8884 * Clear the X86_EFL_TF if necessary.
8885 */
8886 if (pVCpu->hm.s.fClearTrapFlag)
8887 {
8888 int rc2 = hmR0VmxSaveGuestRflags(pVCpu, pCtx);
8889 AssertRCReturn(rc2, rc2);
8890 pVCpu->hm.s.fClearTrapFlag = false;
8891 pCtx->eflags.Bits.u1TF = 0;
8892 }
8893 /** @todo there seems to be issues with the resume flag when the monitor trap
8894 * flag is pending without being used. Seen early in bios init when
8895 * accessing APIC page in protected mode. */
8896
8897 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
8898 return VBOXSTRICTRC_TODO(rcStrict);
8899}
8900
8901
8902/**
8903 * Runs the guest code using VT-x.
8904 *
8905 * @returns VBox status code.
8906 * @param pVM Pointer to the VM.
8907 * @param pVCpu Pointer to the VMCPU.
8908 * @param pCtx Pointer to the guest-CPU context.
8909 */
8910VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8911{
8912 Assert(VMMRZCallRing3IsEnabled(pVCpu));
8913 Assert(HMVMXCPU_GST_VALUE(pVCpu) == HMVMX_UPDATED_GUEST_ALL);
8914 HMVMX_ASSERT_PREEMPT_SAFE();
8915
8916 VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx);
8917
8918 int rc;
8919 if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu))
8920 rc = hmR0VmxRunGuestCodeNormal(pVM, pVCpu, pCtx);
8921 else
8922 rc = hmR0VmxRunGuestCodeStep(pVM, pVCpu, pCtx);
8923
8924 if (rc == VERR_EM_INTERPRETER)
8925 rc = VINF_EM_RAW_EMULATE_INSTR;
8926 else if (rc == VINF_EM_RESET)
8927 rc = VINF_EM_TRIPLE_FAULT;
8928
8929 int rc2 = hmR0VmxExitToRing3(pVM, pVCpu, pCtx, rc);
8930 if (RT_FAILURE(rc2))
8931 {
8932 pVCpu->hm.s.u32HMError = rc;
8933 rc = rc2;
8934 }
8935 Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
8936 return rc;
8937}
8938
8939
8940#ifndef HMVMX_USE_FUNCTION_TABLE
8941DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason)
8942{
8943#ifdef DEBUG_ramshankar
8944# define SVVMCS() do { int rc2 = hmR0VmxSaveGuestState(pVCpu, pMixedCtx); AssertRC(rc2); } while (0)
8945# define LDVMCS() do { HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST); } while (0)
8946#endif
8947 int rc;
8948 switch (rcReason)
8949 {
8950 case VMX_EXIT_EPT_MISCONFIG: /* SVVMCS(); */ rc = hmR0VmxExitEptMisconfig(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8951 case VMX_EXIT_EPT_VIOLATION: /* SVVMCS(); */ rc = hmR0VmxExitEptViolation(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8952 case VMX_EXIT_IO_INSTR: /* SVVMCS(); */ rc = hmR0VmxExitIoInstr(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8953 case VMX_EXIT_CPUID: /* SVVMCS(); */ rc = hmR0VmxExitCpuid(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8954 case VMX_EXIT_RDTSC: /* SVVMCS(); */ rc = hmR0VmxExitRdtsc(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8955 case VMX_EXIT_RDTSCP: /* SVVMCS(); */ rc = hmR0VmxExitRdtscp(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8956 case VMX_EXIT_APIC_ACCESS: /* SVVMCS(); */ rc = hmR0VmxExitApicAccess(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8957 case VMX_EXIT_XCPT_OR_NMI: /* SVVMCS(); */ rc = hmR0VmxExitXcptOrNmi(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8958 case VMX_EXIT_MOV_CRX: /* SVVMCS(); */ rc = hmR0VmxExitMovCRx(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8959 case VMX_EXIT_EXT_INT: /* SVVMCS(); */ rc = hmR0VmxExitExtInt(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8960 case VMX_EXIT_INT_WINDOW: /* SVVMCS(); */ rc = hmR0VmxExitIntWindow(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8961 case VMX_EXIT_MWAIT: /* SVVMCS(); */ rc = hmR0VmxExitMwait(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8962 case VMX_EXIT_MONITOR: /* SVVMCS(); */ rc = hmR0VmxExitMonitor(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8963 case VMX_EXIT_TASK_SWITCH: /* SVVMCS(); */ rc = hmR0VmxExitTaskSwitch(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8964 case VMX_EXIT_PREEMPT_TIMER: /* SVVMCS(); */ rc = hmR0VmxExitPreemptTimer(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8965 case VMX_EXIT_RDMSR: /* SVVMCS(); */ rc = hmR0VmxExitRdmsr(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8966 case VMX_EXIT_WRMSR: /* SVVMCS(); */ rc = hmR0VmxExitWrmsr(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8967 case VMX_EXIT_MOV_DRX: /* SVVMCS(); */ rc = hmR0VmxExitMovDRx(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8968 case VMX_EXIT_TPR_BELOW_THRESHOLD: /* SVVMCS(); */ rc = hmR0VmxExitTprBelowThreshold(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8969 case VMX_EXIT_HLT: /* SVVMCS(); */ rc = hmR0VmxExitHlt(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8970 case VMX_EXIT_INVD: /* SVVMCS(); */ rc = hmR0VmxExitInvd(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8971 case VMX_EXIT_INVLPG: /* SVVMCS(); */ rc = hmR0VmxExitInvlpg(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8972 case VMX_EXIT_RSM: /* SVVMCS(); */ rc = hmR0VmxExitRsm(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8973 case VMX_EXIT_MTF: /* SVVMCS(); */ rc = hmR0VmxExitMtf(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8974 case VMX_EXIT_PAUSE: /* SVVMCS(); */ rc = hmR0VmxExitPause(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8975 case VMX_EXIT_XDTR_ACCESS: /* SVVMCS(); */ rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8976 case VMX_EXIT_TR_ACCESS: /* SVVMCS(); */ rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8977 case VMX_EXIT_WBINVD: /* SVVMCS(); */ rc = hmR0VmxExitWbinvd(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8978 case VMX_EXIT_XSETBV: /* SVVMCS(); */ rc = hmR0VmxExitXsetbv(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8979 case VMX_EXIT_RDRAND: /* SVVMCS(); */ rc = hmR0VmxExitRdrand(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8980 case VMX_EXIT_INVPCID: /* SVVMCS(); */ rc = hmR0VmxExitInvpcid(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8981 case VMX_EXIT_GETSEC: /* SVVMCS(); */ rc = hmR0VmxExitGetsec(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8982 case VMX_EXIT_RDPMC: /* SVVMCS(); */ rc = hmR0VmxExitRdpmc(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8983 case VMX_EXIT_VMCALL: /* SVVMCS(); */ rc = hmR0VmxExitVmcall(pVCpu, pMixedCtx, pVmxTransient); /* LDVMCS(); */ break;
8984
8985 case VMX_EXIT_TRIPLE_FAULT: rc = hmR0VmxExitTripleFault(pVCpu, pMixedCtx, pVmxTransient); break;
8986 case VMX_EXIT_NMI_WINDOW: rc = hmR0VmxExitNmiWindow(pVCpu, pMixedCtx, pVmxTransient); break;
8987 case VMX_EXIT_INIT_SIGNAL: rc = hmR0VmxExitInitSignal(pVCpu, pMixedCtx, pVmxTransient); break;
8988 case VMX_EXIT_SIPI: rc = hmR0VmxExitSipi(pVCpu, pMixedCtx, pVmxTransient); break;
8989 case VMX_EXIT_IO_SMI: rc = hmR0VmxExitIoSmi(pVCpu, pMixedCtx, pVmxTransient); break;
8990 case VMX_EXIT_SMI: rc = hmR0VmxExitSmi(pVCpu, pMixedCtx, pVmxTransient); break;
8991 case VMX_EXIT_ERR_MSR_LOAD: rc = hmR0VmxExitErrMsrLoad(pVCpu, pMixedCtx, pVmxTransient); break;
8992 case VMX_EXIT_ERR_INVALID_GUEST_STATE: rc = hmR0VmxExitErrInvalidGuestState(pVCpu, pMixedCtx, pVmxTransient); break;
8993 case VMX_EXIT_ERR_MACHINE_CHECK: rc = hmR0VmxExitErrMachineCheck(pVCpu, pMixedCtx, pVmxTransient); break;
8994
8995 case VMX_EXIT_VMCLEAR:
8996 case VMX_EXIT_VMLAUNCH:
8997 case VMX_EXIT_VMPTRLD:
8998 case VMX_EXIT_VMPTRST:
8999 case VMX_EXIT_VMREAD:
9000 case VMX_EXIT_VMRESUME:
9001 case VMX_EXIT_VMWRITE:
9002 case VMX_EXIT_VMXOFF:
9003 case VMX_EXIT_VMXON:
9004 case VMX_EXIT_INVEPT:
9005 case VMX_EXIT_INVVPID:
9006 case VMX_EXIT_VMFUNC:
9007 case VMX_EXIT_XSAVES:
9008 case VMX_EXIT_XRSTORS:
9009 rc = hmR0VmxExitSetPendingXcptUD(pVCpu, pMixedCtx, pVmxTransient);
9010 break;
9011 case VMX_EXIT_RESERVED_60:
9012 case VMX_EXIT_RDSEED: /* only spurious exits, so undefined */
9013 case VMX_EXIT_RESERVED_62:
9014 default:
9015 rc = hmR0VmxExitErrUndefined(pVCpu, pMixedCtx, pVmxTransient);
9016 break;
9017 }
9018 return rc;
9019}
9020#endif /* !HMVMX_USE_FUNCTION_TABLE */
9021
9022
9023/**
9024 * Single-stepping VM-exit filtering.
9025 *
9026 * This is preprocessing the exits and deciding whether we've gotten far enough
9027 * to return VINF_EM_DBG_STEPPED already. If not, normal VM-exit handling is
9028 * performed.
9029 *
9030 * @returns Strict VBox status code.
9031 * @param pVCpu The virtual CPU of the calling EMT.
9032 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
9033 * out-of-sync. Make sure to update the required
9034 * fields before using them.
9035 * @param pVmxTransient Pointer to the VMX-transient structure.
9036 * @param uExitReason The VM-exit reason.
9037 */
9038DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExitStep(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient,
9039 uint32_t uExitReason, uint16_t uCsStart, uint64_t uRipStart)
9040{
9041 switch (uExitReason)
9042 {
9043 case VMX_EXIT_XCPT_OR_NMI:
9044 {
9045 /* Check for host NMI. */
9046 int rc2 = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
9047 AssertRCReturn(rc2, rc2);
9048 uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVmxTransient->uExitIntInfo);
9049 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
9050 return hmR0VmxExitXcptOrNmi(pVCpu, pMixedCtx, pVmxTransient);
9051 /* fall thru */
9052 }
9053
9054 case VMX_EXIT_EPT_MISCONFIG:
9055 case VMX_EXIT_TRIPLE_FAULT:
9056 case VMX_EXIT_APIC_ACCESS:
9057 case VMX_EXIT_TPR_BELOW_THRESHOLD:
9058 case VMX_EXIT_TASK_SWITCH:
9059
9060 /* Instruction specific VM-exits: */
9061 case VMX_EXIT_IO_INSTR:
9062 case VMX_EXIT_CPUID:
9063 case VMX_EXIT_RDTSC:
9064 case VMX_EXIT_RDTSCP:
9065 case VMX_EXIT_MOV_CRX:
9066 case VMX_EXIT_MWAIT:
9067 case VMX_EXIT_MONITOR:
9068 case VMX_EXIT_RDMSR:
9069 case VMX_EXIT_WRMSR:
9070 case VMX_EXIT_MOV_DRX:
9071 case VMX_EXIT_HLT:
9072 case VMX_EXIT_INVD:
9073 case VMX_EXIT_INVLPG:
9074 case VMX_EXIT_RSM:
9075 case VMX_EXIT_PAUSE:
9076 case VMX_EXIT_XDTR_ACCESS:
9077 case VMX_EXIT_TR_ACCESS:
9078 case VMX_EXIT_WBINVD:
9079 case VMX_EXIT_XSETBV:
9080 case VMX_EXIT_RDRAND:
9081 case VMX_EXIT_INVPCID:
9082 case VMX_EXIT_GETSEC:
9083 case VMX_EXIT_RDPMC:
9084 case VMX_EXIT_VMCALL:
9085 case VMX_EXIT_VMCLEAR:
9086 case VMX_EXIT_VMLAUNCH:
9087 case VMX_EXIT_VMPTRLD:
9088 case VMX_EXIT_VMPTRST:
9089 case VMX_EXIT_VMREAD:
9090 case VMX_EXIT_VMRESUME:
9091 case VMX_EXIT_VMWRITE:
9092 case VMX_EXIT_VMXOFF:
9093 case VMX_EXIT_VMXON:
9094 case VMX_EXIT_INVEPT:
9095 case VMX_EXIT_INVVPID:
9096 case VMX_EXIT_VMFUNC:
9097 {
9098 int rc2 = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
9099 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9100 AssertRCReturn(rc2, rc2);
9101 if ( pMixedCtx->rip != uRipStart
9102 || pMixedCtx->cs.Sel != uCsStart)
9103 return VINF_EM_DBG_STEPPED;
9104 break;
9105 }
9106 }
9107
9108 /*
9109 * Normal processing.
9110 */
9111#ifdef HMVMX_USE_FUNCTION_TABLE
9112 return g_apfnVMExitHandlers[uExitReason](pVCpu, pMixedCtx, pVmxTransient);
9113#else
9114 return hmR0VmxHandleExit(pVCpu, pMixedCtx, pVmxTransient, uExitReason);
9115#endif
9116}
9117
9118
9119#ifdef VBOX_STRICT
9120/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
9121# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \
9122 RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
9123
9124# define HMVMX_ASSERT_PREEMPT_CPUID() \
9125 do { \
9126 RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
9127 AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
9128 } while (0)
9129
9130# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() \
9131 do { \
9132 AssertPtr(pVCpu); \
9133 AssertPtr(pMixedCtx); \
9134 AssertPtr(pVmxTransient); \
9135 Assert(pVmxTransient->fVMEntryFailed == false); \
9136 Assert(ASMIntAreEnabled()); \
9137 HMVMX_ASSERT_PREEMPT_SAFE(); \
9138 HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \
9139 Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", pVCpu->idCpu)); \
9140 HMVMX_ASSERT_PREEMPT_SAFE(); \
9141 if (VMMR0IsLogFlushDisabled(pVCpu)) \
9142 HMVMX_ASSERT_PREEMPT_CPUID(); \
9143 HMVMX_STOP_EXIT_DISPATCH_PROF(); \
9144 } while (0)
9145
9146# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() \
9147 do { \
9148 Log4Func(("\n")); \
9149 } while (0)
9150#else /* nonstrict builds: */
9151# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() \
9152 do { \
9153 HMVMX_STOP_EXIT_DISPATCH_PROF(); \
9154 NOREF(pVCpu); NOREF(pMixedCtx); NOREF(pVmxTransient); \
9155 } while (0)
9156# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() do { } while (0)
9157#endif
9158
9159
9160/**
9161 * Advances the guest RIP after reading it from the VMCS.
9162 *
9163 * @returns VBox status code.
9164 * @param pVCpu Pointer to the VMCPU.
9165 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
9166 * out-of-sync. Make sure to update the required fields
9167 * before using them.
9168 * @param pVmxTransient Pointer to the VMX transient structure.
9169 *
9170 * @remarks No-long-jump zone!!!
9171 */
9172DECLINLINE(int) hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9173{
9174 int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
9175 rc |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
9176 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9177 AssertRCReturn(rc, rc);
9178
9179 pMixedCtx->rip += pVmxTransient->cbInstr;
9180 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
9181
9182 /*
9183 * Deliver a debug exception to the guest if it is single-stepping. Don't directly inject a #DB but use the
9184 * pending debug exception field as it takes care of priority of events.
9185 *
9186 * See Intel spec. 32.2.1 "Debug Exceptions".
9187 */
9188 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
9189
9190 return rc;
9191}
9192
9193
9194/**
9195 * Tries to determine what part of the guest-state VT-x has deemed as invalid
9196 * and update error record fields accordingly.
9197 *
9198 * @return VMX_IGS_* return codes.
9199 * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything
9200 * wrong with the guest state.
9201 *
9202 * @param pVM Pointer to the VM.
9203 * @param pVCpu Pointer to the VMCPU.
9204 * @param pCtx Pointer to the guest-CPU state.
9205 *
9206 * @remarks This function assumes our cache of the VMCS controls
9207 * are valid, i.e. hmR0VmxCheckVmcsCtls() succeeded.
9208 */
9209static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
9210{
9211#define HMVMX_ERROR_BREAK(err) { uError = (err); break; }
9212#define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \
9213 uError = (err); \
9214 break; \
9215 } else do { } while (0)
9216
9217 int rc;
9218 uint32_t uError = VMX_IGS_ERROR;
9219 uint32_t u32Val;
9220 bool fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest;
9221
9222 do
9223 {
9224 /*
9225 * CR0.
9226 */
9227 uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
9228 uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
9229 /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG).
9230 See Intel spec. 26.3.1 "Checks on Guest Control Registers, Debug Registers and MSRs." */
9231 if (fUnrestrictedGuest)
9232 uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG);
9233
9234 uint32_t u32GuestCR0;
9235 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCR0);
9236 AssertRCBreak(rc);
9237 HMVMX_CHECK_BREAK((u32GuestCR0 & uSetCR0) == uSetCR0, VMX_IGS_CR0_FIXED1);
9238 HMVMX_CHECK_BREAK(!(u32GuestCR0 & ~uZapCR0), VMX_IGS_CR0_FIXED0);
9239 if ( !fUnrestrictedGuest
9240 && (u32GuestCR0 & X86_CR0_PG)
9241 && !(u32GuestCR0 & X86_CR0_PE))
9242 {
9243 HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO);
9244 }
9245
9246 /*
9247 * CR4.
9248 */
9249 uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
9250 uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
9251
9252 uint32_t u32GuestCR4;
9253 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCR4);
9254 AssertRCBreak(rc);
9255 HMVMX_CHECK_BREAK((u32GuestCR4 & uSetCR4) == uSetCR4, VMX_IGS_CR4_FIXED1);
9256 HMVMX_CHECK_BREAK(!(u32GuestCR4 & ~uZapCR4), VMX_IGS_CR4_FIXED0);
9257
9258 /*
9259 * IA32_DEBUGCTL MSR.
9260 */
9261 uint64_t u64Val;
9262 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val);
9263 AssertRCBreak(rc);
9264 if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
9265 && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */
9266 {
9267 HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED);
9268 }
9269 uint64_t u64DebugCtlMsr = u64Val;
9270
9271#ifdef VBOX_STRICT
9272 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
9273 AssertRCBreak(rc);
9274 Assert(u32Val == pVCpu->hm.s.vmx.u32EntryCtls);
9275#endif
9276 bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST);
9277
9278 /*
9279 * RIP and RFLAGS.
9280 */
9281 uint32_t u32Eflags;
9282#if HC_ARCH_BITS == 64
9283 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val);
9284 AssertRCBreak(rc);
9285 /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */
9286 if ( !fLongModeGuest
9287 || !pCtx->cs.Attr.n.u1Long)
9288 {
9289 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID);
9290 }
9291 /** @todo If the processor supports N < 64 linear-address bits, bits 63:N
9292 * must be identical if the "IA-32e mode guest" VM-entry
9293 * control is 1 and CS.L is 1. No check applies if the
9294 * CPU supports 64 linear-address bits. */
9295
9296 /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */
9297 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val);
9298 AssertRCBreak(rc);
9299 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */
9300 VMX_IGS_RFLAGS_RESERVED);
9301 HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
9302 u32Eflags = u64Val;
9303#else
9304 rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags);
9305 AssertRCBreak(rc);
9306 HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */
9307 HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
9308#endif
9309
9310 if ( fLongModeGuest
9311 || ( fUnrestrictedGuest
9312 && !(u32GuestCR0 & X86_CR0_PE)))
9313 {
9314 HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID);
9315 }
9316
9317 uint32_t u32EntryInfo;
9318 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
9319 AssertRCBreak(rc);
9320 if ( VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo)
9321 && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
9322 {
9323 HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID);
9324 }
9325
9326 /*
9327 * 64-bit checks.
9328 */
9329#if HC_ARCH_BITS == 64
9330 if (fLongModeGuest)
9331 {
9332 HMVMX_CHECK_BREAK(u32GuestCR0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE);
9333 HMVMX_CHECK_BREAK(u32GuestCR4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE);
9334 }
9335
9336 if ( !fLongModeGuest
9337 && (u32GuestCR4 & X86_CR4_PCIDE))
9338 {
9339 HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE);
9340 }
9341
9342 /** @todo CR3 field must be such that bits 63:52 and bits in the range
9343 * 51:32 beyond the processor's physical-address width are 0. */
9344
9345 if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
9346 && (pCtx->dr[7] & X86_DR7_MBZ_MASK))
9347 {
9348 HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED);
9349 }
9350
9351 rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val);
9352 AssertRCBreak(rc);
9353 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL);
9354
9355 rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val);
9356 AssertRCBreak(rc);
9357 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL);
9358#endif
9359
9360 /*
9361 * PERF_GLOBAL MSR.
9362 */
9363 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR)
9364 {
9365 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val);
9366 AssertRCBreak(rc);
9367 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)),
9368 VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */
9369 }
9370
9371 /*
9372 * PAT MSR.
9373 */
9374 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR)
9375 {
9376 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val);
9377 AssertRCBreak(rc);
9378 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED);
9379 for (unsigned i = 0; i < 8; i++)
9380 {
9381 uint8_t u8Val = (u64Val & 0xff);
9382 if ( u8Val != 0 /* UC */
9383 && u8Val != 1 /* WC */
9384 && u8Val != 4 /* WT */
9385 && u8Val != 5 /* WP */
9386 && u8Val != 6 /* WB */
9387 && u8Val != 7 /* UC- */)
9388 {
9389 HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID);
9390 }
9391 u64Val >>= 8;
9392 }
9393 }
9394
9395 /*
9396 * EFER MSR.
9397 */
9398 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR)
9399 {
9400 Assert(pVM->hm.s.vmx.fSupportsVmcsEfer);
9401 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val);
9402 AssertRCBreak(rc);
9403 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)),
9404 VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */
9405 HMVMX_CHECK_BREAK(RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL( pVCpu->hm.s.vmx.u32EntryCtls
9406 & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST),
9407 VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH);
9408 HMVMX_CHECK_BREAK( fUnrestrictedGuest
9409 || !(u32GuestCR0 & X86_CR0_PG)
9410 || RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL(u64Val & MSR_K6_EFER_LME),
9411 VMX_IGS_EFER_LMA_LME_MISMATCH);
9412 }
9413
9414 /*
9415 * Segment registers.
9416 */
9417 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
9418 || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID);
9419 if (!(u32Eflags & X86_EFL_VM))
9420 {
9421 /* CS */
9422 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID);
9423 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED);
9424 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED);
9425 HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff
9426 || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
9427 HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000)
9428 || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
9429 /* CS cannot be loaded with NULL in protected mode. */
9430 HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE);
9431 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID);
9432 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
9433 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL);
9434 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
9435 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH);
9436 else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3)
9437 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID);
9438 else
9439 HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID);
9440
9441 /* SS */
9442 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
9443 || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL);
9444 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL);
9445 if ( !(pCtx->cr0 & X86_CR0_PE)
9446 || pCtx->cs.Attr.n.u4Type == 3)
9447 {
9448 HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID);
9449 }
9450 if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
9451 {
9452 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID);
9453 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID);
9454 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED);
9455 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED);
9456 HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff
9457 || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
9458 HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000)
9459 || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
9460 }
9461
9462 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */
9463 if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
9464 {
9465 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID);
9466 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID);
9467 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
9468 || pCtx->ds.Attr.n.u4Type > 11
9469 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
9470 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED);
9471 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED);
9472 HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff
9473 || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
9474 HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000)
9475 || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
9476 HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
9477 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID);
9478 }
9479 if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
9480 {
9481 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID);
9482 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID);
9483 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
9484 || pCtx->es.Attr.n.u4Type > 11
9485 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
9486 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED);
9487 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED);
9488 HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff
9489 || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
9490 HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000)
9491 || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
9492 HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
9493 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID);
9494 }
9495 if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
9496 {
9497 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID);
9498 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID);
9499 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
9500 || pCtx->fs.Attr.n.u4Type > 11
9501 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL);
9502 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED);
9503 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED);
9504 HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff
9505 || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
9506 HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000)
9507 || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
9508 HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
9509 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID);
9510 }
9511 if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
9512 {
9513 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID);
9514 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID);
9515 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
9516 || pCtx->gs.Attr.n.u4Type > 11
9517 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL);
9518 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED);
9519 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED);
9520 HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff
9521 || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
9522 HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000)
9523 || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
9524 HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
9525 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID);
9526 }
9527 /* 64-bit capable CPUs. */
9528#if HC_ARCH_BITS == 64
9529 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
9530 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
9531 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
9532 || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
9533 HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID);
9534 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32),
9535 VMX_IGS_LONGMODE_SS_BASE_INVALID);
9536 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32),
9537 VMX_IGS_LONGMODE_DS_BASE_INVALID);
9538 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32),
9539 VMX_IGS_LONGMODE_ES_BASE_INVALID);
9540#endif
9541 }
9542 else
9543 {
9544 /* V86 mode checks. */
9545 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
9546 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
9547 {
9548 u32CSAttr = 0xf3; u32SSAttr = 0xf3;
9549 u32DSAttr = 0xf3; u32ESAttr = 0xf3;
9550 u32FSAttr = 0xf3; u32GSAttr = 0xf3;
9551 }
9552 else
9553 {
9554 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u;
9555 u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u;
9556 u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
9557 }
9558
9559 /* CS */
9560 HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID);
9561 HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID);
9562 HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID);
9563 /* SS */
9564 HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID);
9565 HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID);
9566 HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID);
9567 /* DS */
9568 HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID);
9569 HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID);
9570 HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID);
9571 /* ES */
9572 HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID);
9573 HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID);
9574 HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID);
9575 /* FS */
9576 HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID);
9577 HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID);
9578 HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID);
9579 /* GS */
9580 HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID);
9581 HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID);
9582 HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID);
9583 /* 64-bit capable CPUs. */
9584#if HC_ARCH_BITS == 64
9585 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
9586 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
9587 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
9588 || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
9589 HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID);
9590 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32),
9591 VMX_IGS_LONGMODE_SS_BASE_INVALID);
9592 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32),
9593 VMX_IGS_LONGMODE_DS_BASE_INVALID);
9594 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32),
9595 VMX_IGS_LONGMODE_ES_BASE_INVALID);
9596#endif
9597 }
9598
9599 /*
9600 * TR.
9601 */
9602 HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID);
9603 /* 64-bit capable CPUs. */
9604#if HC_ARCH_BITS == 64
9605 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL);
9606#endif
9607 if (fLongModeGuest)
9608 {
9609 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */
9610 VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID);
9611 }
9612 else
9613 {
9614 HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */
9615 || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/
9616 VMX_IGS_TR_ATTR_TYPE_INVALID);
9617 }
9618 HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID);
9619 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID);
9620 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */
9621 HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff
9622 || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
9623 HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000)
9624 || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
9625 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE);
9626
9627 /*
9628 * GDTR and IDTR.
9629 */
9630#if HC_ARCH_BITS == 64
9631 rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
9632 AssertRCBreak(rc);
9633 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL);
9634
9635 rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
9636 AssertRCBreak(rc);
9637 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL);
9638#endif
9639
9640 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
9641 AssertRCBreak(rc);
9642 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
9643
9644 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
9645 AssertRCBreak(rc);
9646 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
9647
9648 /*
9649 * Guest Non-Register State.
9650 */
9651 /* Activity State. */
9652 uint32_t u32ActivityState;
9653 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState);
9654 AssertRCBreak(rc);
9655 HMVMX_CHECK_BREAK( !u32ActivityState
9656 || (u32ActivityState & MSR_IA32_VMX_MISC_ACTIVITY_STATES(pVM->hm.s.vmx.Msrs.u64Misc)),
9657 VMX_IGS_ACTIVITY_STATE_INVALID);
9658 HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl)
9659 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID);
9660 uint32_t u32IntrState;
9661 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &u32IntrState);
9662 AssertRCBreak(rc);
9663 if ( u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS
9664 || u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
9665 {
9666 HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID);
9667 }
9668
9669 /** @todo Activity state and injecting interrupts. Left as a todo since we
9670 * currently don't use activity states but ACTIVE. */
9671
9672 HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM)
9673 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID);
9674
9675 /* Guest interruptibility-state. */
9676 HMVMX_CHECK_BREAK(!(u32IntrState & 0xfffffff0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED);
9677 HMVMX_CHECK_BREAK((u32IntrState & ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
9678 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS))
9679 != ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
9680 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
9681 VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID);
9682 HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF)
9683 || !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI),
9684 VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID);
9685 if (VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo))
9686 {
9687 if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
9688 {
9689 HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
9690 && !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
9691 VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID);
9692 }
9693 else if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
9694 {
9695 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
9696 VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID);
9697 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI),
9698 VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID);
9699 }
9700 }
9701 /** @todo Assumes the processor is not in SMM. */
9702 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI),
9703 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID);
9704 HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM)
9705 || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI),
9706 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID);
9707 if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)
9708 && VMX_ENTRY_INTERRUPTION_INFO_IS_VALID(u32EntryInfo)
9709 && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
9710 {
9711 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI),
9712 VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID);
9713 }
9714
9715 /* Pending debug exceptions. */
9716#if HC_ARCH_BITS == 64
9717 rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u64Val);
9718 AssertRCBreak(rc);
9719 /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */
9720 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED);
9721 u32Val = u64Val; /* For pending debug exceptions checks below. */
9722#else
9723 rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u32Val);
9724 AssertRCBreak(rc);
9725 /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */
9726 HMVMX_CHECK_BREAK(!(u32Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED);
9727#endif
9728
9729 if ( (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
9730 || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)
9731 || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT)
9732 {
9733 if ( (u32Eflags & X86_EFL_TF)
9734 && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
9735 {
9736 /* Bit 14 is PendingDebug.BS. */
9737 HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET);
9738 }
9739 if ( !(u32Eflags & X86_EFL_TF)
9740 || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
9741 {
9742 /* Bit 14 is PendingDebug.BS. */
9743 HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR);
9744 }
9745 }
9746
9747 /* VMCS link pointer. */
9748 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val);
9749 AssertRCBreak(rc);
9750 if (u64Val != UINT64_C(0xffffffffffffffff))
9751 {
9752 HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED);
9753 /** @todo Bits beyond the processor's physical-address width MBZ. */
9754 /** @todo 32-bit located in memory referenced by value of this field (as a
9755 * physical address) must contain the processor's VMCS revision ID. */
9756 /** @todo SMM checks. */
9757 }
9758
9759 /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is
9760 * not using Nested Paging? */
9761 if ( pVM->hm.s.fNestedPaging
9762 && !fLongModeGuest
9763 && CPUMIsGuestInPAEModeEx(pCtx))
9764 {
9765 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val);
9766 AssertRCBreak(rc);
9767 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
9768
9769 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val);
9770 AssertRCBreak(rc);
9771 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
9772
9773 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val);
9774 AssertRCBreak(rc);
9775 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
9776
9777 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val);
9778 AssertRCBreak(rc);
9779 HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED);
9780 }
9781
9782 /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */
9783 if (uError == VMX_IGS_ERROR)
9784 uError = VMX_IGS_REASON_NOT_FOUND;
9785 } while (0);
9786
9787 pVCpu->hm.s.u32HMError = uError;
9788 return uError;
9789
9790#undef HMVMX_ERROR_BREAK
9791#undef HMVMX_CHECK_BREAK
9792}
9793
9794/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
9795/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
9796/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
9797
9798/** @name VM-exit handlers.
9799 * @{
9800 */
9801
9802/**
9803 * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT).
9804 */
9805HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9806{
9807 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9808 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
9809 /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */
9810 if (VMMR0ThreadCtxHookIsEnabled(pVCpu))
9811 return VINF_SUCCESS;
9812 return VINF_EM_RAW_INTERRUPT;
9813}
9814
9815
9816/**
9817 * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI).
9818 */
9819HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9820{
9821 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9822 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3);
9823
9824 int rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
9825 AssertRCReturn(rc, rc);
9826
9827 uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVmxTransient->uExitIntInfo);
9828 Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT)
9829 && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT);
9830 Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
9831
9832 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
9833 {
9834 /*
9835 * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we injected it ourselves and
9836 * anything we inject is not going to cause a VM-exit directly for the event being injected.
9837 * See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery".
9838 *
9839 * Dispatch the NMI to the host. See Intel spec. 27.5.5 "Updating Non-Register State".
9840 */
9841 VMXDispatchHostNmi();
9842 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
9843 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
9844 return VINF_SUCCESS;
9845 }
9846
9847 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
9848 rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
9849 if (RT_UNLIKELY(rc != VINF_SUCCESS))
9850 {
9851 if (rc == VINF_HM_DOUBLE_FAULT)
9852 rc = VINF_SUCCESS;
9853 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
9854 return rc;
9855 }
9856
9857 uint32_t uExitIntInfo = pVmxTransient->uExitIntInfo;
9858 uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(uExitIntInfo);
9859 switch (uIntType)
9860 {
9861 case VMX_EXIT_INTERRUPTION_INFO_TYPE_PRIV_SW_XCPT: /* Privileged software exception. (#DB from ICEBP) */
9862 Assert(uVector == X86_XCPT_DB);
9863 /* no break */
9864 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */
9865 Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_PRIV_SW_XCPT);
9866 /* no break */
9867 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT:
9868 {
9869 switch (uVector)
9870 {
9871 case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pMixedCtx, pVmxTransient); break;
9872 case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pMixedCtx, pVmxTransient); break;
9873 case X86_XCPT_NM: rc = hmR0VmxExitXcptNM(pVCpu, pMixedCtx, pVmxTransient); break;
9874 case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pMixedCtx, pVmxTransient); break;
9875 case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pMixedCtx, pVmxTransient); break;
9876 case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pMixedCtx, pVmxTransient); break;
9877#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
9878 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF);
9879 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
9880 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);
9881 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
9882 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
9883 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
9884 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);
9885 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
9886 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);
9887 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
9888 case X86_XCPT_TS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestTS);
9889 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
9890#endif
9891 default:
9892 {
9893 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9894 AssertRCReturn(rc, rc);
9895
9896 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
9897 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
9898 {
9899 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
9900 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
9901 Assert(CPUMIsGuestInRealModeEx(pMixedCtx));
9902
9903 rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
9904 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
9905 AssertRCReturn(rc, rc);
9906 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(uExitIntInfo),
9907 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode,
9908 0 /* GCPtrFaultAddress */);
9909 AssertRCReturn(rc, rc);
9910 }
9911 else
9912 {
9913 AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector));
9914 pVCpu->hm.s.u32HMError = uVector;
9915 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
9916 }
9917 break;
9918 }
9919 }
9920 break;
9921 }
9922
9923 default:
9924 {
9925 pVCpu->hm.s.u32HMError = uExitIntInfo;
9926 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
9927 AssertMsgFailed(("Unexpected interruption info %#x\n", VMX_EXIT_INTERRUPTION_INFO_TYPE(uExitIntInfo)));
9928 break;
9929 }
9930 }
9931 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
9932 return rc;
9933}
9934
9935
9936/**
9937 * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
9938 */
9939HMVMX_EXIT_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9940{
9941 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9942
9943 /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
9944 hmR0VmxClearIntWindowExitVmcs(pVCpu);
9945
9946 /* Deliver the pending interrupts via hmR0VmxEvaluatePendingEvent() and resume guest execution. */
9947 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
9948 return VINF_SUCCESS;
9949}
9950
9951
9952/**
9953 * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
9954 */
9955HMVMX_EXIT_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9956{
9957 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9958 if (RT_UNLIKELY(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_NMI_WINDOW_EXIT)))
9959 {
9960 AssertMsgFailed(("Unexpected NMI-window exit.\n"));
9961 HMVMX_RETURN_UNEXPECTED_EXIT();
9962 }
9963
9964 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_BLOCK_NMIS));
9965
9966 /*
9967 * If block-by-STI is set when we get this VM-exit, it means the CPU doesn't block NMIs following STI.
9968 * It is therefore safe to unblock STI and deliver the NMI ourselves. See @bugref{7445}.
9969 */
9970 uint32_t uIntrState = 0;
9971 int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
9972 AssertRCReturn(rc, rc);
9973
9974 bool const fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
9975 if ( fBlockSti
9976 && VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
9977 {
9978 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
9979 }
9980
9981 /* Indicate that we no longer need to VM-exit when the guest is ready to receive NMIs, it is now ready */
9982 hmR0VmxClearNmiWindowExitVmcs(pVCpu);
9983
9984 /* Deliver the pending NMI via hmR0VmxEvaluatePendingEvent() and resume guest execution. */
9985 return VINF_SUCCESS;
9986}
9987
9988
9989/**
9990 * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
9991 */
9992HMVMX_EXIT_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9993{
9994 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9995 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd);
9996 return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9997}
9998
9999
10000/**
10001 * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit.
10002 */
10003HMVMX_EXIT_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10004{
10005 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10006 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
10007 return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10008}
10009
10010
10011/**
10012 * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit.
10013 */
10014HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10015{
10016 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10017 PVM pVM = pVCpu->CTX_SUFF(pVM);
10018 int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10019 if (RT_LIKELY(rc == VINF_SUCCESS))
10020 {
10021 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10022 Assert(pVmxTransient->cbInstr == 2);
10023 }
10024 else
10025 {
10026 AssertMsgFailed(("hmR0VmxExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc));
10027 rc = VERR_EM_INTERPRETER;
10028 }
10029 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
10030 return rc;
10031}
10032
10033
10034/**
10035 * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit.
10036 */
10037HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10038{
10039 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10040 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
10041 AssertRCReturn(rc, rc);
10042
10043 if (pMixedCtx->cr4 & X86_CR4_SMXE)
10044 return VINF_EM_RAW_EMULATE_INSTR;
10045
10046 AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n"));
10047 HMVMX_RETURN_UNEXPECTED_EXIT();
10048}
10049
10050
10051/**
10052 * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
10053 */
10054HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10055{
10056 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10057 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
10058 AssertRCReturn(rc, rc);
10059
10060 PVM pVM = pVCpu->CTX_SUFF(pVM);
10061 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10062 if (RT_LIKELY(rc == VINF_SUCCESS))
10063 {
10064 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10065 Assert(pVmxTransient->cbInstr == 2);
10066 /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */
10067 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING)
10068 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
10069 }
10070 else
10071 rc = VERR_EM_INTERPRETER;
10072 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
10073 return rc;
10074}
10075
10076
10077/**
10078 * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
10079 */
10080HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10081{
10082 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10083 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
10084 rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); /* For MSR_K8_TSC_AUX */
10085 AssertRCReturn(rc, rc);
10086
10087 PVM pVM = pVCpu->CTX_SUFF(pVM);
10088 rc = EMInterpretRdtscp(pVM, pVCpu, pMixedCtx);
10089 if (RT_LIKELY(rc == VINF_SUCCESS))
10090 {
10091 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10092 Assert(pVmxTransient->cbInstr == 3);
10093 /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */
10094 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING)
10095 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
10096 }
10097 else
10098 {
10099 AssertMsgFailed(("hmR0VmxExitRdtscp: EMInterpretRdtscp failed with %Rrc\n", rc));
10100 rc = VERR_EM_INTERPRETER;
10101 }
10102 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
10103 return rc;
10104}
10105
10106
10107/**
10108 * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
10109 */
10110HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10111{
10112 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10113 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
10114 rc |= hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); /** @todo review if CR0 is really required by EM. */
10115 AssertRCReturn(rc, rc);
10116
10117 PVM pVM = pVCpu->CTX_SUFF(pVM);
10118 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10119 if (RT_LIKELY(rc == VINF_SUCCESS))
10120 {
10121 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10122 Assert(pVmxTransient->cbInstr == 2);
10123 }
10124 else
10125 {
10126 AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
10127 rc = VERR_EM_INTERPRETER;
10128 }
10129 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
10130 return rc;
10131}
10132
10133
10134/**
10135 * VM-exit handler for VMCALL (VMX_EXIT_VMCALL). Unconditional VM-exit.
10136 */
10137HMVMX_EXIT_DECL hmR0VmxExitVmcall(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10138{
10139 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10140 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitVmcall);
10141
10142 if (pVCpu->hm.s.fHypercallsEnabled)
10143 {
10144#if 0
10145 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10146 AssertRCReturn(rc, rc);
10147#else
10148 /* Aggressive state sync. for now. */
10149 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
10150 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); /* For long-mode checks in gimKvmHypercall(). */
10151#endif
10152 AssertRCReturn(rc, rc);
10153
10154 rc = GIMHypercall(pVCpu, pMixedCtx);
10155 if ( rc == VINF_SUCCESS
10156 || rc == VINF_GIM_R3_HYPERCALL)
10157 {
10158 /* If the hypercall changes anything other than guest general-purpose registers,
10159 we would need to reload the guest changed bits here before VM-reentry. */
10160 hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10161 return rc;
10162 }
10163 }
10164 else
10165 Log4(("hmR0VmxExitVmcall: Hypercalls not enabled\n"));
10166
10167 hmR0VmxSetPendingXcptUD(pVCpu, pMixedCtx);
10168 return VINF_SUCCESS;
10169}
10170
10171
10172/**
10173 * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
10174 */
10175HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10176{
10177 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10178 PVM pVM = pVCpu->CTX_SUFF(pVM);
10179 Assert(!pVM->hm.s.fNestedPaging);
10180
10181 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10182 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10183 AssertRCReturn(rc, rc);
10184
10185 VBOXSTRICTRC rc2 = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), pVmxTransient->uExitQualification);
10186 rc = VBOXSTRICTRC_VAL(rc2);
10187 if (RT_LIKELY(rc == VINF_SUCCESS))
10188 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10189 else
10190 {
10191 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitInvlpg: EMInterpretInvlpg %#RX64 failed with %Rrc\n",
10192 pVmxTransient->uExitQualification, rc));
10193 }
10194 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
10195 return rc;
10196}
10197
10198
10199/**
10200 * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
10201 */
10202HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10203{
10204 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10205 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10206 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
10207 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10208 AssertRCReturn(rc, rc);
10209
10210 PVM pVM = pVCpu->CTX_SUFF(pVM);
10211 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10212 if (RT_LIKELY(rc == VINF_SUCCESS))
10213 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10214 else
10215 {
10216 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc));
10217 rc = VERR_EM_INTERPRETER;
10218 }
10219 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
10220 return rc;
10221}
10222
10223
10224/**
10225 * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
10226 */
10227HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10228{
10229 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10230 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10231 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
10232 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10233 AssertRCReturn(rc, rc);
10234
10235 PVM pVM = pVCpu->CTX_SUFF(pVM);
10236 VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10237 rc = VBOXSTRICTRC_VAL(rc2);
10238 if (RT_LIKELY( rc == VINF_SUCCESS
10239 || rc == VINF_EM_HALT))
10240 {
10241 int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10242 AssertRCReturn(rc3, rc3);
10243
10244 if ( rc == VINF_EM_HALT
10245 && EMMonitorWaitShouldContinue(pVCpu, pMixedCtx))
10246 {
10247 rc = VINF_SUCCESS;
10248 }
10249 }
10250 else
10251 {
10252 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc));
10253 rc = VERR_EM_INTERPRETER;
10254 }
10255 AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER,
10256 ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc));
10257 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
10258 return rc;
10259}
10260
10261
10262/**
10263 * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit.
10264 */
10265HMVMX_EXIT_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10266{
10267 /*
10268 * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root mode. In theory, we should never
10269 * get this VM-exit. This can happen only if dual-monitor treatment of SMI and VMX is enabled, which can (only?) be done by
10270 * executing VMCALL in VMX root operation. If we get here, something funny is going on.
10271 * See Intel spec. "33.15.5 Enabling the Dual-Monitor Treatment".
10272 */
10273 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10274 AssertMsgFailed(("Unexpected RSM VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10275 HMVMX_RETURN_UNEXPECTED_EXIT();
10276}
10277
10278
10279/**
10280 * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit.
10281 */
10282HMVMX_EXIT_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10283{
10284 /*
10285 * This can only happen if we support dual-monitor treatment of SMI, which can be activated by executing VMCALL in VMX
10286 * root operation. Only an STM (SMM transfer monitor) would get this VM-exit when we (the executive monitor) execute a VMCALL
10287 * in VMX root mode or receive an SMI. If we get here, something funny is going on.
10288 * See Intel spec. "33.15.6 Activating the Dual-Monitor Treatment" and Intel spec. 25.3 "Other Causes of VM-Exits"
10289 */
10290 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10291 AssertMsgFailed(("Unexpected SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10292 HMVMX_RETURN_UNEXPECTED_EXIT();
10293}
10294
10295
10296/**
10297 * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit.
10298 */
10299HMVMX_EXIT_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10300{
10301 /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */
10302 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10303 AssertMsgFailed(("Unexpected IO SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10304 HMVMX_RETURN_UNEXPECTED_EXIT();
10305}
10306
10307
10308/**
10309 * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit.
10310 */
10311HMVMX_EXIT_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10312{
10313 /*
10314 * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used. We currently
10315 * don't make use of it (see hmR0VmxLoadGuestActivityState()) as our guests don't have direct access to the host LAPIC.
10316 * See Intel spec. 25.3 "Other Causes of VM-exits".
10317 */
10318 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10319 AssertMsgFailed(("Unexpected SIPI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10320 HMVMX_RETURN_UNEXPECTED_EXIT();
10321}
10322
10323
10324/**
10325 * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional
10326 * VM-exit.
10327 */
10328HMVMX_EXIT_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10329{
10330 /*
10331 * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM.
10332 * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON".
10333 *
10334 * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits.
10335 * See Intel spec. "23.8 Restrictions on VMX operation".
10336 */
10337 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10338 return VINF_SUCCESS;
10339}
10340
10341
10342/**
10343 * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional
10344 * VM-exit.
10345 */
10346HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10347{
10348 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10349 return VINF_EM_RESET;
10350}
10351
10352
10353/**
10354 * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
10355 */
10356HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10357{
10358 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10359 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT);
10360 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
10361 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
10362 AssertRCReturn(rc, rc);
10363
10364 pMixedCtx->rip++;
10365 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
10366 if (EMShouldContinueAfterHalt(pVCpu, pMixedCtx)) /* Requires eflags. */
10367 rc = VINF_SUCCESS;
10368 else
10369 rc = VINF_EM_HALT;
10370
10371 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
10372 if (rc != VINF_SUCCESS)
10373 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3);
10374 return rc;
10375}
10376
10377
10378/**
10379 * VM-exit handler for instructions that result in a #UD exception delivered to
10380 * the guest.
10381 */
10382HMVMX_EXIT_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10383{
10384 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10385 hmR0VmxSetPendingXcptUD(pVCpu, pMixedCtx);
10386 return VINF_SUCCESS;
10387}
10388
10389
10390/**
10391 * VM-exit handler for expiry of the VMX preemption timer.
10392 */
10393HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10394{
10395 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10396
10397 /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */
10398 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
10399
10400 /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */
10401 PVM pVM = pVCpu->CTX_SUFF(pVM);
10402 bool fTimersPending = TMTimerPollBool(pVM, pVCpu);
10403 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer);
10404 return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS;
10405}
10406
10407
10408/**
10409 * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit.
10410 */
10411HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10412{
10413 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10414
10415 int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
10416 rc |= hmR0VmxSaveGuestRegsForIemExec(pVCpu, pMixedCtx, false /*fMemory*/, false /*fNeedRsp*/);
10417 rc |= hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
10418 AssertRCReturn(rc, rc);
10419
10420 VBOXSTRICTRC rcStrict = IEMExecDecodedXsetbv(pVCpu, pVmxTransient->cbInstr);
10421 HMCPU_CF_SET(pVCpu, rcStrict != VINF_IEM_RAISED_XCPT ? HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS : HM_CHANGED_ALL_GUEST);
10422
10423 pVCpu->hm.s.fLoadSaveGuestXcr0 = (pMixedCtx->cr4 & X86_CR4_OSXSAVE) && pMixedCtx->aXcr[0] != ASMGetXcr0();
10424
10425 return VBOXSTRICTRC_TODO(rcStrict);
10426}
10427
10428
10429/**
10430 * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
10431 */
10432HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10433{
10434 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10435
10436 /* The guest should not invalidate the host CPU's TLBs, fallback to interpreter. */
10437 /** @todo implement EMInterpretInvpcid() */
10438 return VERR_EM_INTERPRETER;
10439}
10440
10441
10442/**
10443 * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE).
10444 * Error VM-exit.
10445 */
10446HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10447{
10448 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10449 AssertRCReturn(rc, rc);
10450
10451 rc = hmR0VmxCheckVmcsCtls(pVCpu);
10452 AssertRCReturn(rc, rc);
10453
10454 uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
10455 NOREF(uInvalidReason);
10456
10457#ifdef VBOX_STRICT
10458 uint32_t uIntrState;
10459 RTHCUINTREG uHCReg;
10460 uint64_t u64Val;
10461 uint32_t u32Val;
10462
10463 rc = hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
10464 rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient);
10465 rc |= hmR0VmxReadEntryInstrLenVmcs(pVmxTransient);
10466 rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
10467 AssertRCReturn(rc, rc);
10468
10469 Log4(("uInvalidReason %u\n", uInvalidReason));
10470 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo));
10471 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode));
10472 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr));
10473 Log4(("VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE %#RX32\n", uIntrState));
10474
10475 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc);
10476 Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val));
10477 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
10478 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
10479 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
10480 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
10481 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
10482 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
10483 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
10484 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
10485 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
10486 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
10487#else
10488 NOREF(pVmxTransient);
10489#endif
10490
10491 HMDumpRegs(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
10492 return VERR_VMX_INVALID_GUEST_STATE;
10493}
10494
10495
10496/**
10497 * VM-exit handler for VM-entry failure due to an MSR-load
10498 * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit.
10499 */
10500HMVMX_EXIT_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10501{
10502 NOREF(pVmxTransient);
10503 AssertMsgFailed(("Unexpected MSR-load exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); NOREF(pMixedCtx);
10504 HMVMX_RETURN_UNEXPECTED_EXIT();
10505}
10506
10507
10508/**
10509 * VM-exit handler for VM-entry failure due to a machine-check event
10510 * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit.
10511 */
10512HMVMX_EXIT_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10513{
10514 NOREF(pVmxTransient);
10515 AssertMsgFailed(("Unexpected machine-check event exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx)); NOREF(pMixedCtx);
10516 HMVMX_RETURN_UNEXPECTED_EXIT();
10517}
10518
10519
10520/**
10521 * VM-exit handler for all undefined reasons. Should never ever happen.. in
10522 * theory.
10523 */
10524HMVMX_EXIT_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10525{
10526 AssertMsgFailed(("Huh!? Undefined VM-exit reason %d. pVCpu=%p pMixedCtx=%p\n", pVmxTransient->uExitReason, pVCpu, pMixedCtx));
10527 NOREF(pVCpu); NOREF(pMixedCtx); NOREF(pVmxTransient);
10528 return VERR_VMX_UNDEFINED_EXIT_CODE;
10529}
10530
10531
10532/**
10533 * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses
10534 * (VMX_EXIT_XDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR).
10535 * Conditional VM-exit.
10536 */
10537HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10538{
10539 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10540
10541 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT. */
10542 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess);
10543 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT)
10544 return VERR_EM_INTERPRETER;
10545 AssertMsgFailed(("Unexpected XDTR access. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10546 HMVMX_RETURN_UNEXPECTED_EXIT();
10547}
10548
10549
10550/**
10551 * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit.
10552 */
10553HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10554{
10555 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10556
10557 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT. */
10558 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdrand);
10559 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT)
10560 return VERR_EM_INTERPRETER;
10561 AssertMsgFailed(("Unexpected RDRAND exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10562 HMVMX_RETURN_UNEXPECTED_EXIT();
10563}
10564
10565
10566/**
10567 * VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
10568 */
10569HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10570{
10571 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10572
10573 /* EMInterpretRdmsr() requires CR0, Eflags and SS segment register. */
10574 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10575 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
10576 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10577 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS))
10578 {
10579 rc |= hmR0VmxSaveGuestLazyMsrs(pVCpu, pMixedCtx);
10580 rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
10581 }
10582 AssertRCReturn(rc, rc);
10583 Log4(("ecx=%#RX32\n", pMixedCtx->ecx));
10584
10585#ifdef VBOX_STRICT
10586 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
10587 {
10588 if ( hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, pMixedCtx->ecx)
10589 && pMixedCtx->ecx != MSR_K6_EFER)
10590 {
10591 AssertMsgFailed(("Unexpected RDMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
10592 pMixedCtx->ecx));
10593 HMVMX_RETURN_UNEXPECTED_EXIT();
10594 }
10595# if HC_ARCH_BITS == 64
10596 if ( pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests
10597 && hmR0VmxIsLazyGuestMsr(pVCpu, pMixedCtx->ecx))
10598 {
10599 AssertMsgFailed(("Unexpected RDMSR for a passthru lazy-restore MSR. ecx=%#RX32\n", pMixedCtx->ecx));
10600 HMVMX_RETURN_UNEXPECTED_EXIT();
10601 }
10602# endif
10603 }
10604#endif
10605
10606 PVM pVM = pVCpu->CTX_SUFF(pVM);
10607 rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10608 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER,
10609 ("hmR0VmxExitRdmsr: failed, invalid error code %Rrc\n", rc));
10610 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
10611 if (RT_LIKELY(rc == VINF_SUCCESS))
10612 {
10613 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10614 Assert(pVmxTransient->cbInstr == 2);
10615 }
10616 return rc;
10617}
10618
10619
10620/**
10621 * VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
10622 */
10623HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10624{
10625 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10626 PVM pVM = pVCpu->CTX_SUFF(pVM);
10627 int rc = VINF_SUCCESS;
10628
10629 /* EMInterpretWrmsr() requires CR0, EFLAGS and SS segment register. */
10630 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10631 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
10632 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10633 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS))
10634 {
10635 rc |= hmR0VmxSaveGuestLazyMsrs(pVCpu, pMixedCtx);
10636 rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
10637 }
10638 AssertRCReturn(rc, rc);
10639 Log4(("ecx=%#RX32 edx:eax=%#RX32:%#RX32\n", pMixedCtx->ecx, pMixedCtx->edx, pMixedCtx->eax));
10640
10641 rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10642 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0VmxExitWrmsr: failed, invalid error code %Rrc\n", rc));
10643 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
10644
10645 if (RT_LIKELY(rc == VINF_SUCCESS))
10646 {
10647 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10648
10649 /* If this is an X2APIC WRMSR access, update the APIC state as well. */
10650 if ( pMixedCtx->ecx >= MSR_IA32_X2APIC_START
10651 && pMixedCtx->ecx <= MSR_IA32_X2APIC_END)
10652 {
10653 /* We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register
10654 * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before
10655 EMInterpretWrmsr() changes it. */
10656 HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
10657 }
10658 else if (pMixedCtx->ecx == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */
10659 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
10660 else if (pMixedCtx->ecx == MSR_K6_EFER)
10661 {
10662 /*
10663 * If the guest touches EFER we need to update the VM-Entry and VM-Exit controls as well,
10664 * even if it is -not- touching bits that cause paging mode changes (LMA/LME). We care about
10665 * the other bits as well, SCE and NXE. See @bugref{7368}.
10666 */
10667 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_CTLS | HM_CHANGED_VMX_EXIT_CTLS);
10668 }
10669
10670 /* Update MSRs that are part of the VMCS and auto-load/store area when MSR-bitmaps are not supported. */
10671 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS))
10672 {
10673 switch (pMixedCtx->ecx)
10674 {
10675 case MSR_IA32_SYSENTER_CS: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break;
10676 case MSR_IA32_SYSENTER_EIP: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break;
10677 case MSR_IA32_SYSENTER_ESP: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break;
10678 case MSR_K8_FS_BASE: /* no break */
10679 case MSR_K8_GS_BASE: HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); break;
10680 case MSR_K6_EFER: /* already handled above */ break;
10681 default:
10682 {
10683 if (hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, pMixedCtx->ecx))
10684 HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
10685#if HC_ARCH_BITS == 64
10686 else if (hmR0VmxIsLazyGuestMsr(pVCpu, pMixedCtx->ecx))
10687 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_LAZY_MSRS);
10688#endif
10689 break;
10690 }
10691 }
10692 }
10693#ifdef VBOX_STRICT
10694 else
10695 {
10696 /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */
10697 switch (pMixedCtx->ecx)
10698 {
10699 case MSR_IA32_SYSENTER_CS:
10700 case MSR_IA32_SYSENTER_EIP:
10701 case MSR_IA32_SYSENTER_ESP:
10702 case MSR_K8_FS_BASE:
10703 case MSR_K8_GS_BASE:
10704 {
10705 AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", pMixedCtx->ecx));
10706 HMVMX_RETURN_UNEXPECTED_EXIT();
10707 }
10708
10709 /* Writes to MSRs in auto-load/store area/swapped MSRs, shouldn't cause VM-exits with MSR-bitmaps. */
10710 default:
10711 {
10712 if (hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, pMixedCtx->ecx))
10713 {
10714 /* EFER writes are always intercepted, see hmR0VmxLoadGuestMsrs(). */
10715 if (pMixedCtx->ecx != MSR_K6_EFER)
10716 {
10717 AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
10718 pMixedCtx->ecx));
10719 HMVMX_RETURN_UNEXPECTED_EXIT();
10720 }
10721 }
10722
10723#if HC_ARCH_BITS == 64
10724 if (hmR0VmxIsLazyGuestMsr(pVCpu, pMixedCtx->ecx))
10725 {
10726 AssertMsgFailed(("Unexpected WRMSR for passthru, lazy-restore MSR. ecx=%#RX32\n", pMixedCtx->ecx));
10727 HMVMX_RETURN_UNEXPECTED_EXIT();
10728 }
10729#endif
10730 break;
10731 }
10732 }
10733 }
10734#endif /* VBOX_STRICT */
10735 }
10736 return rc;
10737}
10738
10739
10740/**
10741 * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
10742 */
10743HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10744{
10745 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10746
10747 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPause);
10748 return VINF_EM_RAW_INTERRUPT;
10749}
10750
10751
10752/**
10753 * VM-exit handler for when the TPR value is lowered below the specified
10754 * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
10755 */
10756HMVMX_EXIT_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10757{
10758 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10759 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW);
10760
10761 /*
10762 * The TPR has already been updated, see hmR0VMXPostRunGuest(). RIP is also updated as part of the VM-exit by VT-x. Update
10763 * the threshold in the VMCS, deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectPendingEvent() and
10764 * resume guest execution.
10765 */
10766 HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
10767 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold);
10768 return VINF_SUCCESS;
10769}
10770
10771
10772/**
10773 * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional
10774 * VM-exit.
10775 *
10776 * @retval VINF_SUCCESS when guest execution can continue.
10777 * @retval VINF_PGM_CHANGE_MODE when shadow paging mode changed, back to ring-3.
10778 * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3.
10779 * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to
10780 * interpreter.
10781 */
10782HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10783{
10784 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10785 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2);
10786 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10787 rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
10788 AssertRCReturn(rc, rc);
10789
10790 RTGCUINTPTR const uExitQualification = pVmxTransient->uExitQualification;
10791 uint32_t const uAccessType = VMX_EXIT_QUALIFICATION_CRX_ACCESS(uExitQualification);
10792 PVM pVM = pVCpu->CTX_SUFF(pVM);
10793 VBOXSTRICTRC rcStrict;
10794 rc = hmR0VmxSaveGuestRegsForIemExec(pVCpu, pMixedCtx, false /*fMemory*/, true /*fNeedRsp*/);
10795 switch (uAccessType)
10796 {
10797 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE: /* MOV to CRx */
10798 {
10799 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10800 AssertRCReturn(rc, rc);
10801
10802 rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, pVmxTransient->cbInstr,
10803 VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification),
10804 VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification));
10805 AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT || rcStrict == VINF_PGM_CHANGE_MODE
10806 || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
10807 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification))
10808 {
10809 case 0: /* CR0 */
10810 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
10811 Log4(("CRX CR0 write rcStrict=%Rrc CR0=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->cr0));
10812 break;
10813 case 2: /* CR2 */
10814 /* Nothing to do here, CR2 it's not part of the VMCS. */
10815 break;
10816 case 3: /* CR3 */
10817 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(pMixedCtx));
10818 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR3);
10819 Log4(("CRX CR3 write rcStrict=%Rrc CR3=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->cr3));
10820 break;
10821 case 4: /* CR4 */
10822 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR4);
10823 Log4(("CRX CR4 write rc=%Rrc CR4=%#RX64 fLoadSaveGuestXcr0=%u\n",
10824 VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->cr4, pVCpu->hm.s.fLoadSaveGuestXcr0));
10825 break;
10826 case 8: /* CR8 */
10827 Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW));
10828 /* CR8 contains the APIC TPR. Was updated by IEMExecDecodedMovCRxWrite(). */
10829 HMCPU_CF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
10830 break;
10831 default:
10832 AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)));
10833 break;
10834 }
10835
10836 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]);
10837 break;
10838 }
10839
10840 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ: /* MOV from CRx */
10841 {
10842 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10843 AssertRCReturn(rc, rc);
10844
10845 Assert( !pVM->hm.s.fNestedPaging
10846 || !CPUMIsGuestPagingEnabledEx(pMixedCtx)
10847 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 3);
10848
10849 /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */
10850 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 8
10851 || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW));
10852
10853 rcStrict = IEMExecDecodedMovCRxRead(pVCpu, pVmxTransient->cbInstr,
10854 VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification),
10855 VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification));
10856 AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
10857 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]);
10858 Log4(("CRX CR%d Read access rcStrict=%Rrc\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification),
10859 VBOXSTRICTRC_VAL(rcStrict)));
10860 break;
10861 }
10862
10863 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */
10864 {
10865 AssertRCReturn(rc, rc);
10866 rcStrict = IEMExecDecodedClts(pVCpu, pVmxTransient->cbInstr);
10867 AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
10868 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
10869 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
10870 Log4(("CRX CLTS rcStrict=%d\n", VBOXSTRICTRC_VAL(rcStrict)));
10871 break;
10872 }
10873
10874 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */
10875 {
10876 AssertRCReturn(rc, rc);
10877 rcStrict = IEMExecDecodedLmsw(pVCpu, pVmxTransient->cbInstr,
10878 VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(uExitQualification));
10879 AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT || rcStrict == VINF_PGM_CHANGE_MODE,
10880 ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
10881 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
10882 Log4(("CRX LMSW rcStrict=%d\n", VBOXSTRICTRC_VAL(rcStrict)));
10883 break;
10884 }
10885
10886 default:
10887 AssertMsgFailedReturn(("Invalid access-type in Mov CRx VM-exit qualification %#x\n", uAccessType),
10888 VERR_VMX_UNEXPECTED_EXCEPTION);
10889 }
10890
10891 HMCPU_CF_SET(pVCpu, rcStrict != VINF_IEM_RAISED_XCPT ? HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS : HM_CHANGED_ALL_GUEST);
10892 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2);
10893 NOREF(pVM);
10894 return VBOXSTRICTRC_TODO(rcStrict);
10895}
10896
10897
10898/**
10899 * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional
10900 * VM-exit.
10901 */
10902HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10903{
10904 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10905 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1);
10906
10907 int rc2 = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10908 rc2 |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
10909 rc2 |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
10910 rc2 |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* Eflag checks in EMInterpretDisasCurrent(). */
10911 rc2 |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); /* CR0 checks & PGM* in EMInterpretDisasCurrent(). */
10912 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); /* SELM checks in EMInterpretDisasCurrent(). */
10913 /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */
10914 AssertRCReturn(rc2, rc2);
10915
10916 /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */
10917 uint32_t uIOPort = VMX_EXIT_QUALIFICATION_IO_PORT(pVmxTransient->uExitQualification);
10918 uint8_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(pVmxTransient->uExitQualification);
10919 bool fIOWrite = ( VMX_EXIT_QUALIFICATION_IO_DIRECTION(pVmxTransient->uExitQualification)
10920 == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
10921 bool fIOString = VMX_EXIT_QUALIFICATION_IO_IS_STRING(pVmxTransient->uExitQualification);
10922 bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF);
10923 AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_VMX_IPE_1);
10924
10925 /* I/O operation lookup arrays. */
10926 static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */
10927 static uint32_t const s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving the result (in AL/AX/EAX). */
10928
10929 VBOXSTRICTRC rcStrict;
10930 uint32_t const cbValue = s_aIOSizes[uIOWidth];
10931 uint32_t const cbInstr = pVmxTransient->cbInstr;
10932 bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */
10933 PVM pVM = pVCpu->CTX_SUFF(pVM);
10934 if (fIOString)
10935 {
10936#ifdef VBOX_WITH_2ND_IEM_STEP /* This used to gurus with debian 32-bit guest without NP (on ATA reads).
10937 See @bugref{5752#c158}. Should work now. */
10938 /*
10939 * INS/OUTS - I/O String instruction.
10940 *
10941 * Use instruction-information if available, otherwise fall back on
10942 * interpreting the instruction.
10943 */
10944 Log4(("CS:RIP=%04x:%08RX64 %#06x/%u %c str\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue,
10945 fIOWrite ? 'w' : 'r'));
10946 AssertReturn(pMixedCtx->dx == uIOPort, VERR_VMX_IPE_2);
10947 if (MSR_IA32_VMX_BASIC_INFO_VMCS_INS_OUTS(pVM->hm.s.vmx.Msrs.u64BasicInfo))
10948 {
10949 rc2 = hmR0VmxReadExitInstrInfoVmcs(pVmxTransient);
10950 /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */
10951 rc2 |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10952 AssertRCReturn(rc2, rc2);
10953 AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_VMX_IPE_3);
10954 AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2);
10955 IEMMODE enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize;
10956 bool fRep = VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification);
10957 if (fIOWrite)
10958 {
10959 rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr,
10960 pVmxTransient->ExitInstrInfo.StrIo.iSegReg);
10961 }
10962 else
10963 {
10964 /*
10965 * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES.
10966 * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS.
10967 * See Intel Instruction spec. for "INS".
10968 * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS".
10969 */
10970 rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr);
10971 }
10972 }
10973 else
10974 {
10975 /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */
10976 rc2 = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10977 AssertRCReturn(rc2, rc2);
10978 rcStrict = IEMExecOne(pVCpu);
10979 }
10980 /** @todo IEM needs to be setting these flags somehow. */
10981 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
10982 fUpdateRipAlready = true;
10983#else
10984 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
10985 rcStrict = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL /* pcbInstr */);
10986 if (RT_SUCCESS(rcStrict))
10987 {
10988 if (fIOWrite)
10989 {
10990 rcStrict = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix,
10991 (DISCPUMODE)pDis->uAddrMode, cbValue);
10992 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
10993 }
10994 else
10995 {
10996 rcStrict = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix,
10997 (DISCPUMODE)pDis->uAddrMode, cbValue);
10998 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
10999 }
11000 }
11001 else
11002 {
11003 AssertMsg(rcStrict == VERR_EM_INTERPRETER, ("rcStrict=%Rrc RIP=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict),
11004 pMixedCtx->rip));
11005 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
11006 }
11007#endif
11008 }
11009 else
11010 {
11011 /*
11012 * IN/OUT - I/O instruction.
11013 */
11014 Log4(("CS:RIP=%04x:%08RX64 %#06x/%u %c\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
11015 uint32_t const uAndVal = s_aIOOpAnd[uIOWidth];
11016 Assert(!VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification));
11017 if (fIOWrite)
11018 {
11019 rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pMixedCtx->eax & uAndVal, cbValue);
11020 if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
11021 HMR0SavePendingIOPortWrite(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue);
11022 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
11023 }
11024 else
11025 {
11026 uint32_t u32Result = 0;
11027 rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue);
11028 if (IOM_SUCCESS(rcStrict))
11029 {
11030 /* Save result of I/O IN instr. in AL/AX/EAX. */
11031 pMixedCtx->eax = (pMixedCtx->eax & ~uAndVal) | (u32Result & uAndVal);
11032 }
11033 else if (rcStrict == VINF_IOM_R3_IOPORT_READ)
11034 HMR0SavePendingIOPortRead(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue);
11035 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
11036 }
11037 }
11038
11039 if (IOM_SUCCESS(rcStrict))
11040 {
11041 if (!fUpdateRipAlready)
11042 {
11043 pMixedCtx->rip += cbInstr;
11044 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
11045 }
11046
11047 /*
11048 * INS/OUTS with REP prefix updates RFLAGS, can be observed with triple-fault guru while booting Fedora 17 64-bit guest.
11049 * See Intel Instruction reference for REP/REPE/REPZ/REPNE/REPNZ.
11050 */
11051 if (fIOString)
11052 {
11053 /** @todo Single-step for INS/OUTS with REP prefix? */
11054 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS);
11055 }
11056 else if (fStepping)
11057 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
11058
11059 /*
11060 * If any I/O breakpoints are armed, we need to check if one triggered
11061 * and take appropriate action.
11062 * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
11063 */
11064 rc2 = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
11065 AssertRCReturn(rc2, rc2);
11066
11067 /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
11068 * execution engines about whether hyper BPs and such are pending. */
11069 uint32_t const uDr7 = pMixedCtx->dr[7];
11070 if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK)
11071 && X86_DR7_ANY_RW_IO(uDr7)
11072 && (pMixedCtx->cr4 & X86_CR4_DE))
11073 || DBGFBpIsHwIoArmed(pVM)))
11074 {
11075 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
11076
11077 /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
11078 VMMRZCallRing3Disable(pVCpu);
11079 HM_DISABLE_PREEMPT();
11080
11081 bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /* fDr6 */);
11082
11083 VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pMixedCtx, uIOPort, cbValue);
11084 if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
11085 {
11086 /* Raise #DB. */
11087 if (fIsGuestDbgActive)
11088 ASMSetDR6(pMixedCtx->dr[6]);
11089 if (pMixedCtx->dr[7] != uDr7)
11090 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
11091
11092 hmR0VmxSetPendingXcptDB(pVCpu, pMixedCtx);
11093 }
11094 /* rcStrict is VINF_SUCCESS or in [VINF_EM_FIRST..VINF_EM_LAST]. */
11095 else if ( rcStrict2 != VINF_SUCCESS
11096 && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
11097 rcStrict = rcStrict2;
11098
11099 HM_RESTORE_PREEMPT();
11100 VMMRZCallRing3Enable(pVCpu);
11101 }
11102 }
11103
11104#ifdef VBOX_STRICT
11105 if (rcStrict == VINF_IOM_R3_IOPORT_READ)
11106 Assert(!fIOWrite);
11107 else if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
11108 Assert(fIOWrite);
11109 else
11110 {
11111#if 0 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
11112 * statuses, that the VMM device and some others may return. See
11113 * IOM_SUCCESS() for guidance. */
11114 AssertMsg( RT_FAILURE(rcStrict)
11115 || rcStrict == VINF_SUCCESS
11116 || rcStrict == VINF_EM_RAW_EMULATE_INSTR
11117 || rcStrict == VINF_EM_DBG_BREAKPOINT
11118 || rcStrict == VINF_EM_RAW_GUEST_TRAP
11119 || rcStrict == VINF_EM_RAW_TO_R3
11120 || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
11121#endif
11122 }
11123#endif
11124
11125 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1);
11126 return VBOXSTRICTRC_TODO(rcStrict);
11127}
11128
11129
11130/**
11131 * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional
11132 * VM-exit.
11133 */
11134HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11135{
11136 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
11137
11138 /* Check if this task-switch occurred while delivery an event through the guest IDT. */
11139 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
11140 AssertRCReturn(rc, rc);
11141 if (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
11142 {
11143 rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
11144 AssertRCReturn(rc, rc);
11145 if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo))
11146 {
11147 uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
11148
11149 uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
11150 bool fErrorCodeValid = VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo);
11151
11152 /* Save it as a pending event and it'll be converted to a TRPM event on the way out to ring-3. */
11153 Assert(!pVCpu->hm.s.Event.fPending);
11154 pVCpu->hm.s.Event.fPending = true;
11155 pVCpu->hm.s.Event.u64IntInfo = pVmxTransient->uIdtVectoringInfo;
11156 rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
11157 AssertRCReturn(rc, rc);
11158 if (fErrorCodeValid)
11159 pVCpu->hm.s.Event.u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
11160 else
11161 pVCpu->hm.s.Event.u32ErrCode = 0;
11162 if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
11163 && uVector == X86_XCPT_PF)
11164 {
11165 pVCpu->hm.s.Event.GCPtrFaultAddress = pMixedCtx->cr2;
11166 }
11167
11168 Log4(("Pending event on TaskSwitch uIntType=%#x uVector=%#x\n", uIntType, uVector));
11169 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
11170 return VINF_EM_RAW_INJECT_TRPM_EVENT;
11171 }
11172 }
11173
11174 /** @todo Emulate task switch someday, currently just going back to ring-3 for
11175 * emulation. */
11176 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
11177 return VERR_EM_INTERPRETER;
11178}
11179
11180
11181/**
11182 * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit.
11183 */
11184HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11185{
11186 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
11187 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG);
11188 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG;
11189 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
11190 AssertRCReturn(rc, rc);
11191 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
11192 return VINF_EM_DBG_STEPPED;
11193}
11194
11195
11196/**
11197 * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit.
11198 */
11199HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11200{
11201 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
11202
11203 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
11204 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
11205 if (RT_UNLIKELY(rc != VINF_SUCCESS))
11206 {
11207 if (rc == VINF_HM_DOUBLE_FAULT)
11208 rc = VINF_SUCCESS;
11209 return rc;
11210 }
11211
11212#if 0
11213 /** @todo Investigate if IOMMMIOPhysHandler() requires a lot of state, for now
11214 * just sync the whole thing. */
11215 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
11216#else
11217 /* Aggressive state sync. for now. */
11218 rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
11219 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
11220 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
11221#endif
11222 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
11223 AssertRCReturn(rc, rc);
11224
11225 /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */
11226 uint32_t uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(pVmxTransient->uExitQualification);
11227 switch (uAccessType)
11228 {
11229 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
11230 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
11231 {
11232 AssertMsg( !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
11233 || VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification) != 0x80,
11234 ("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n"));
11235
11236 RTGCPHYS GCPhys = pMixedCtx->msrApicBase; /* Always up-to-date, msrApicBase is not part of the VMCS. */
11237 GCPhys &= PAGE_BASE_GC_MASK;
11238 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification);
11239 PVM pVM = pVCpu->CTX_SUFF(pVM);
11240 Log4(("ApicAccess uAccessType=%#x GCPhys=%#RGv Off=%#x\n", uAccessType, GCPhys,
11241 VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification)));
11242
11243 VBOXSTRICTRC rc2 = IOMMMIOPhysHandler(pVM, pVCpu,
11244 uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ ? 0 : X86_TRAP_PF_RW,
11245 CPUMCTX2CORE(pMixedCtx), GCPhys);
11246 rc = VBOXSTRICTRC_VAL(rc2);
11247 Log4(("ApicAccess rc=%d\n", rc));
11248 if ( rc == VINF_SUCCESS
11249 || rc == VERR_PAGE_TABLE_NOT_PRESENT
11250 || rc == VERR_PAGE_NOT_PRESENT)
11251 {
11252 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
11253 | HM_CHANGED_GUEST_RSP
11254 | HM_CHANGED_GUEST_RFLAGS
11255 | HM_CHANGED_VMX_GUEST_APIC_STATE);
11256 rc = VINF_SUCCESS;
11257 }
11258 break;
11259 }
11260
11261 default:
11262 Log4(("ApicAccess uAccessType=%#x\n", uAccessType));
11263 rc = VINF_EM_RAW_EMULATE_INSTR;
11264 break;
11265 }
11266
11267 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess);
11268 if (rc != VINF_SUCCESS)
11269 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchApicAccessToR3);
11270 return rc;
11271}
11272
11273
11274/**
11275 * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional
11276 * VM-exit.
11277 */
11278HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11279{
11280 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
11281
11282 /* We should -not- get this VM-exit if the guest's debug registers were active. */
11283 if (pVmxTransient->fWasGuestDebugStateActive)
11284 {
11285 AssertMsgFailed(("Unexpected MOV DRx exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
11286 HMVMX_RETURN_UNEXPECTED_EXIT();
11287 }
11288
11289 int rc = VERR_INTERNAL_ERROR_5;
11290 if ( !DBGFIsStepping(pVCpu)
11291 && !pVCpu->hm.s.fSingleInstruction
11292 && !pVmxTransient->fWasHyperDebugStateActive)
11293 {
11294 /* Don't intercept MOV DRx and #DB any more. */
11295 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
11296 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
11297 AssertRCReturn(rc, rc);
11298
11299 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
11300 {
11301#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
11302 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB);
11303 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_XCPT_INTERCEPTS);
11304#endif
11305 }
11306
11307 /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */
11308 VMMRZCallRing3Disable(pVCpu);
11309 HM_DISABLE_PREEMPT();
11310
11311 /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
11312 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
11313 Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32);
11314
11315 HM_RESTORE_PREEMPT();
11316 VMMRZCallRing3Enable(pVCpu);
11317
11318#ifdef VBOX_WITH_STATISTICS
11319 rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
11320 AssertRCReturn(rc, rc);
11321 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
11322 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
11323 else
11324 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
11325#endif
11326 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
11327 return VINF_SUCCESS;
11328 }
11329
11330 /*
11331 * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date.
11332 * Update the segment registers and DR7 from the CPU.
11333 */
11334 rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
11335 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
11336 rc |= hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
11337 AssertRCReturn(rc, rc);
11338 Log4(("CS:RIP=%04x:%08RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip));
11339
11340 PVM pVM = pVCpu->CTX_SUFF(pVM);
11341 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
11342 {
11343 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
11344 VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification),
11345 VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification));
11346 if (RT_SUCCESS(rc))
11347 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
11348 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
11349 }
11350 else
11351 {
11352 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
11353 VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification),
11354 VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification));
11355 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
11356 }
11357
11358 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
11359 if (RT_SUCCESS(rc))
11360 {
11361 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
11362 AssertRCReturn(rc2, rc2);
11363 }
11364 return rc;
11365}
11366
11367
11368/**
11369 * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG).
11370 * Conditional VM-exit.
11371 */
11372HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11373{
11374 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
11375 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
11376
11377 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
11378 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
11379 if (RT_UNLIKELY(rc != VINF_SUCCESS))
11380 {
11381 if (rc == VINF_HM_DOUBLE_FAULT)
11382 rc = VINF_SUCCESS;
11383 return rc;
11384 }
11385
11386 RTGCPHYS GCPhys = 0;
11387 rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
11388
11389#if 0
11390 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */
11391#else
11392 /* Aggressive state sync. for now. */
11393 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
11394 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
11395 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
11396#endif
11397 AssertRCReturn(rc, rc);
11398
11399 /*
11400 * If we succeed, resume guest execution.
11401 * If we fail in interpreting the instruction because we couldn't get the guest physical address
11402 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
11403 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
11404 * weird case. See @bugref{6043}.
11405 */
11406 PVM pVM = pVCpu->CTX_SUFF(pVM);
11407 VBOXSTRICTRC rc2 = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pMixedCtx), GCPhys, UINT32_MAX);
11408 rc = VBOXSTRICTRC_VAL(rc2);
11409 Log4(("EPT misconfig at %#RGv RIP=%#RX64 rc=%d\n", GCPhys, pMixedCtx->rip, rc));
11410 if ( rc == VINF_SUCCESS
11411 || rc == VERR_PAGE_TABLE_NOT_PRESENT
11412 || rc == VERR_PAGE_NOT_PRESENT)
11413 {
11414 /* Successfully handled MMIO operation. */
11415 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
11416 | HM_CHANGED_GUEST_RSP
11417 | HM_CHANGED_GUEST_RFLAGS
11418 | HM_CHANGED_VMX_GUEST_APIC_STATE);
11419 rc = VINF_SUCCESS;
11420 }
11421 return rc;
11422}
11423
11424
11425/**
11426 * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional
11427 * VM-exit.
11428 */
11429HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11430{
11431 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
11432 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
11433
11434 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
11435 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
11436 if (RT_UNLIKELY(rc != VINF_SUCCESS))
11437 {
11438 if (rc == VINF_HM_DOUBLE_FAULT)
11439 rc = VINF_SUCCESS;
11440 return rc;
11441 }
11442
11443 RTGCPHYS GCPhys = 0;
11444 rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
11445 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
11446#if 0
11447 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */
11448#else
11449 /* Aggressive state sync. for now. */
11450 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
11451 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
11452 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
11453#endif
11454 AssertRCReturn(rc, rc);
11455
11456 /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */
11457 AssertMsg(((pVmxTransient->uExitQualification >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQualification));
11458
11459 RTGCUINT uErrorCode = 0;
11460 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
11461 uErrorCode |= X86_TRAP_PF_ID;
11462 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
11463 uErrorCode |= X86_TRAP_PF_RW;
11464 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
11465 uErrorCode |= X86_TRAP_PF_P;
11466
11467 TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode);
11468
11469 Log4(("EPT violation %#x at %#RX64 ErrorCode %#x CS:RIP=%04x:%08RX64\n", pVmxTransient->uExitQualification, GCPhys,
11470 uErrorCode, pMixedCtx->cs.Sel, pMixedCtx->rip));
11471
11472 /* Handle the pagefault trap for the nested shadow table. */
11473 PVM pVM = pVCpu->CTX_SUFF(pVM);
11474 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pMixedCtx), GCPhys);
11475 TRPMResetTrap(pVCpu);
11476
11477 /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */
11478 if ( rc == VINF_SUCCESS
11479 || rc == VERR_PAGE_TABLE_NOT_PRESENT
11480 || rc == VERR_PAGE_NOT_PRESENT)
11481 {
11482 /* Successfully synced our nested page tables. */
11483 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
11484 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
11485 | HM_CHANGED_GUEST_RSP
11486 | HM_CHANGED_GUEST_RFLAGS);
11487 return VINF_SUCCESS;
11488 }
11489
11490 Log4(("EPT return to ring-3 rc=%Rrc\n", rc));
11491 return rc;
11492}
11493
11494/** @} */
11495
11496/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */
11497/* -=-=-=-=-=-=-=-=-=- VM-exit Exception Handlers -=-=-=-=-=-=-=-=-=-=- */
11498/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */
11499
11500/** @name VM-exit exception handlers.
11501 * @{
11502 */
11503
11504/**
11505 * VM-exit exception handler for #MF (Math Fault: floating point exception).
11506 */
11507static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11508{
11509 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
11510 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
11511
11512 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
11513 AssertRCReturn(rc, rc);
11514
11515 if (!(pMixedCtx->cr0 & X86_CR0_NE))
11516 {
11517 /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */
11518 rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13, 1, 0 /* uTagSrc */);
11519
11520 /** @todo r=ramshankar: The Intel spec. does -not- specify that this VM-exit
11521 * provides VM-exit instruction length. If this causes problem later,
11522 * disassemble the instruction like it's done on AMD-V. */
11523 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
11524 AssertRCReturn(rc2, rc2);
11525 return rc;
11526 }
11527
11528 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
11529 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
11530 return rc;
11531}
11532
11533
11534/**
11535 * VM-exit exception handler for #BP (Breakpoint exception).
11536 */
11537static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11538{
11539 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
11540 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
11541
11542 /** @todo Try optimize this by not saving the entire guest state unless
11543 * really needed. */
11544 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
11545 AssertRCReturn(rc, rc);
11546
11547 PVM pVM = pVCpu->CTX_SUFF(pVM);
11548 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
11549 if (rc == VINF_EM_RAW_GUEST_TRAP)
11550 {
11551 rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
11552 rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
11553 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
11554 AssertRCReturn(rc, rc);
11555
11556 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
11557 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
11558 }
11559
11560 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT);
11561 return rc;
11562}
11563
11564
11565/**
11566 * VM-exit exception handler for #DB (Debug exception).
11567 */
11568static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11569{
11570 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
11571 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
11572 Log6(("XcptDB\n"));
11573
11574 /*
11575 * Get the DR6-like values from the VM-exit qualification and pass it to DBGF
11576 * for processing.
11577 */
11578 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
11579 AssertRCReturn(rc, rc);
11580
11581 /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */
11582 uint64_t uDR6 = X86_DR6_INIT_VAL;
11583 uDR6 |= ( pVmxTransient->uExitQualification
11584 & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS));
11585
11586 rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pMixedCtx), uDR6, pVCpu->hm.s.fSingleInstruction);
11587 if (rc == VINF_EM_RAW_GUEST_TRAP)
11588 {
11589 /*
11590 * The exception was for the guest. Update DR6, DR7.GD and
11591 * IA32_DEBUGCTL.LBR before forwarding it.
11592 * (See Intel spec. 27.1 "Architectural State before a VM-Exit".)
11593 */
11594 VMMRZCallRing3Disable(pVCpu);
11595 HM_DISABLE_PREEMPT();
11596
11597 pMixedCtx->dr[6] &= ~X86_DR6_B_MASK;
11598 pMixedCtx->dr[6] |= uDR6;
11599 if (CPUMIsGuestDebugStateActive(pVCpu))
11600 ASMSetDR6(pMixedCtx->dr[6]);
11601
11602 HM_RESTORE_PREEMPT();
11603 VMMRZCallRing3Enable(pVCpu);
11604
11605 rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
11606 AssertRCReturn(rc, rc);
11607
11608 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
11609 pMixedCtx->dr[7] &= ~X86_DR7_GD;
11610
11611 /* Paranoia. */
11612 pMixedCtx->dr[7] &= ~X86_DR7_RAZ_MASK;
11613 pMixedCtx->dr[7] |= X86_DR7_RA1_MASK;
11614
11615 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pMixedCtx->dr[7]);
11616 AssertRCReturn(rc, rc);
11617
11618 /*
11619 * Raise #DB in the guest.
11620 *
11621 * It is important to reflect what the VM-exit gave us (preserving the interruption-type) rather than use
11622 * hmR0VmxSetPendingXcptDB() as the #DB could've been raised while executing ICEBP and not the 'normal' #DB.
11623 * Thus it -may- trigger different handling in the CPU (like skipped DPL checks). See @bugref{6398}.
11624 *
11625 * Since ICEBP isn't documented on Intel, see AMD spec. 15.20 "Event Injection".
11626 */
11627 rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
11628 rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
11629 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
11630 AssertRCReturn(rc, rc);
11631 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
11632 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
11633 return VINF_SUCCESS;
11634 }
11635
11636 /*
11637 * Not a guest trap, must be a hypervisor related debug event then.
11638 * Update DR6 in case someone is interested in it.
11639 */
11640 AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc));
11641 AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5);
11642 CPUMSetHyperDR6(pVCpu, uDR6);
11643
11644 return rc;
11645}
11646
11647
11648/**
11649 * VM-exit exception handler for #NM (Device-not-available exception: floating
11650 * point exception).
11651 */
11652static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11653{
11654 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
11655
11656 /* We require CR0 and EFER. EFER is always up-to-date. */
11657 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
11658 AssertRCReturn(rc, rc);
11659
11660 /* We're playing with the host CPU state here, have to disable preemption or longjmp. */
11661 VMMRZCallRing3Disable(pVCpu);
11662 HM_DISABLE_PREEMPT();
11663
11664 /* If the guest FPU was active at the time of the #NM exit, then it's a guest fault. */
11665 if (pVmxTransient->fWasGuestFPUStateActive)
11666 {
11667 rc = VINF_EM_RAW_GUEST_TRAP;
11668 Assert(CPUMIsGuestFPUStateActive(pVCpu) || HMCPU_CF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0));
11669 }
11670 else
11671 {
11672#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
11673 Assert(!pVmxTransient->fWasGuestFPUStateActive);
11674#endif
11675 rc = CPUMR0Trap07Handler(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
11676 Assert(rc == VINF_EM_RAW_GUEST_TRAP || (rc == VINF_SUCCESS && CPUMIsGuestFPUStateActive(pVCpu)));
11677 }
11678
11679 HM_RESTORE_PREEMPT();
11680 VMMRZCallRing3Enable(pVCpu);
11681
11682 if (rc == VINF_SUCCESS)
11683 {
11684 /* Guest FPU state was activated, we'll want to change CR0 FPU intercepts before the next VM-reentry. */
11685 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
11686 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
11687 pVCpu->hm.s.fPreloadGuestFpu = true;
11688 }
11689 else
11690 {
11691 /* Forward #NM to the guest. */
11692 Assert(rc == VINF_EM_RAW_GUEST_TRAP);
11693 rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
11694 AssertRCReturn(rc, rc);
11695 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
11696 pVmxTransient->cbInstr, 0 /* error code */, 0 /* GCPtrFaultAddress */);
11697 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
11698 }
11699
11700 return VINF_SUCCESS;
11701}
11702
11703
11704/**
11705 * VM-exit exception handler for #GP (General-protection exception).
11706 *
11707 * @remarks Requires pVmxTransient->uExitIntInfo to be up-to-date.
11708 */
11709static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11710{
11711 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
11712 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
11713
11714 int rc = VERR_INTERNAL_ERROR_5;
11715 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
11716 {
11717#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
11718 /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */
11719 rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
11720 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
11721 rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
11722 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
11723 AssertRCReturn(rc, rc);
11724 Log4(("#GP Gst: CS:RIP %04x:%08RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u TR=%#04x\n", pMixedCtx->cs.Sel, pMixedCtx->rip,
11725 pVmxTransient->uExitIntErrorCode, pMixedCtx->cr0, CPUMGetGuestCPL(pVCpu), pMixedCtx->tr.Sel));
11726 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
11727 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
11728 return rc;
11729#else
11730 /* We don't intercept #GP. */
11731 AssertMsgFailed(("Unexpected VM-exit caused by #GP exception\n"));
11732 NOREF(pVmxTransient);
11733 return VERR_VMX_UNEXPECTED_EXCEPTION;
11734#endif
11735 }
11736
11737 Assert(CPUMIsGuestInRealModeEx(pMixedCtx));
11738 Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest);
11739
11740 /* EMInterpretDisasCurrent() requires a lot of the state, save the entire state. */
11741 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
11742 AssertRCReturn(rc, rc);
11743
11744 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
11745 uint32_t cbOp = 0;
11746 PVM pVM = pVCpu->CTX_SUFF(pVM);
11747 rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
11748 if (RT_SUCCESS(rc))
11749 {
11750 rc = VINF_SUCCESS;
11751 Assert(cbOp == pDis->cbInstr);
11752 Log4(("#GP Disas OpCode=%u CS:EIP %04x:%04RX64\n", pDis->pCurInstr->uOpcode, pMixedCtx->cs.Sel, pMixedCtx->rip));
11753 switch (pDis->pCurInstr->uOpcode)
11754 {
11755 case OP_CLI:
11756 {
11757 pMixedCtx->eflags.Bits.u1IF = 0;
11758 pMixedCtx->eflags.Bits.u1RF = 0;
11759 pMixedCtx->rip += pDis->cbInstr;
11760 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
11761 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
11762 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
11763 break;
11764 }
11765
11766 case OP_STI:
11767 {
11768 bool fOldIF = pMixedCtx->eflags.Bits.u1IF;
11769 pMixedCtx->eflags.Bits.u1IF = 1;
11770 pMixedCtx->eflags.Bits.u1RF = 0;
11771 pMixedCtx->rip += pDis->cbInstr;
11772 if (!fOldIF)
11773 {
11774 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
11775 Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
11776 }
11777 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
11778 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
11779 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
11780 break;
11781 }
11782
11783 case OP_HLT:
11784 {
11785 rc = VINF_EM_HALT;
11786 pMixedCtx->rip += pDis->cbInstr;
11787 pMixedCtx->eflags.Bits.u1RF = 0;
11788 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
11789 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
11790 break;
11791 }
11792
11793 case OP_POPF:
11794 {
11795 Log4(("POPF CS:EIP %04x:%04RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip));
11796 uint32_t cbParm;
11797 uint32_t uMask;
11798 bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF);
11799 if (pDis->fPrefix & DISPREFIX_OPSIZE)
11800 {
11801 cbParm = 4;
11802 uMask = 0xffffffff;
11803 }
11804 else
11805 {
11806 cbParm = 2;
11807 uMask = 0xffff;
11808 }
11809
11810 /* Get the stack pointer & pop the contents of the stack onto Eflags. */
11811 RTGCPTR GCPtrStack = 0;
11812 X86EFLAGS Eflags;
11813 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0,
11814 &GCPtrStack);
11815 if (RT_SUCCESS(rc))
11816 {
11817 Assert(sizeof(Eflags.u32) >= cbParm);
11818 Eflags.u32 = 0;
11819 rc = VBOXSTRICTRC_TODO(PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u32, cbParm, PGMACCESSORIGIN_HM));
11820 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc)); /** @todo allow strict return codes here */
11821 }
11822 if (RT_FAILURE(rc))
11823 {
11824 rc = VERR_EM_INTERPRETER;
11825 break;
11826 }
11827 Log4(("POPF %#x -> %#RX64 mask=%#x RIP=%#RX64\n", Eflags.u, pMixedCtx->rsp, uMask, pMixedCtx->rip));
11828 pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~((X86_EFL_POPF_BITS & uMask) | X86_EFL_RF))
11829 | (Eflags.u32 & X86_EFL_POPF_BITS & uMask);
11830 pMixedCtx->esp += cbParm;
11831 pMixedCtx->esp &= uMask;
11832 pMixedCtx->rip += pDis->cbInstr;
11833 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
11834 | HM_CHANGED_GUEST_RSP
11835 | HM_CHANGED_GUEST_RFLAGS);
11836 /* Generate a pending-debug exception when stepping over POPF regardless of how POPF modifies EFLAGS.TF. */
11837 if (fStepping)
11838 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
11839
11840 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
11841 break;
11842 }
11843
11844 case OP_PUSHF:
11845 {
11846 uint32_t cbParm;
11847 uint32_t uMask;
11848 if (pDis->fPrefix & DISPREFIX_OPSIZE)
11849 {
11850 cbParm = 4;
11851 uMask = 0xffffffff;
11852 }
11853 else
11854 {
11855 cbParm = 2;
11856 uMask = 0xffff;
11857 }
11858
11859 /* Get the stack pointer & push the contents of eflags onto the stack. */
11860 RTGCPTR GCPtrStack = 0;
11861 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), (pMixedCtx->esp - cbParm) & uMask,
11862 SELMTOFLAT_FLAGS_CPL0, &GCPtrStack);
11863 if (RT_FAILURE(rc))
11864 {
11865 rc = VERR_EM_INTERPRETER;
11866 break;
11867 }
11868 X86EFLAGS Eflags = pMixedCtx->eflags;
11869 /* The RF & VM bits are cleared on image stored on stack; see Intel Instruction reference for PUSHF. */
11870 Eflags.Bits.u1RF = 0;
11871 Eflags.Bits.u1VM = 0;
11872
11873 rc = VBOXSTRICTRC_TODO(PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u, cbParm, PGMACCESSORIGIN_HM));
11874 if (RT_UNLIKELY(rc != VINF_SUCCESS))
11875 {
11876 AssertMsgFailed(("%Rrc\n", rc)); /** @todo allow strict return codes here */
11877 rc = VERR_EM_INTERPRETER;
11878 break;
11879 }
11880 Log4(("PUSHF %#x -> %#RGv\n", Eflags.u, GCPtrStack));
11881 pMixedCtx->esp -= cbParm;
11882 pMixedCtx->esp &= uMask;
11883 pMixedCtx->rip += pDis->cbInstr;
11884 pMixedCtx->eflags.Bits.u1RF = 0;
11885 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
11886 | HM_CHANGED_GUEST_RSP
11887 | HM_CHANGED_GUEST_RFLAGS);
11888 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
11889 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
11890 break;
11891 }
11892
11893 case OP_IRET:
11894 {
11895 /** @todo Handle 32-bit operand sizes and check stack limits. See Intel
11896 * instruction reference. */
11897 RTGCPTR GCPtrStack = 0;
11898 uint32_t uMask = 0xffff;
11899 bool fStepping = RT_BOOL(pMixedCtx->eflags.Bits.u1TF);
11900 uint16_t aIretFrame[3];
11901 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
11902 {
11903 rc = VERR_EM_INTERPRETER;
11904 break;
11905 }
11906 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0,
11907 &GCPtrStack);
11908 if (RT_SUCCESS(rc))
11909 {
11910 rc = VBOXSTRICTRC_TODO(PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame),
11911 PGMACCESSORIGIN_HM));
11912 AssertMsg(rc == VINF_SUCCESS, ("%Rrc\n", rc)); /** @todo allow strict return codes here */
11913 }
11914 if (RT_FAILURE(rc))
11915 {
11916 rc = VERR_EM_INTERPRETER;
11917 break;
11918 }
11919 pMixedCtx->eip = 0;
11920 pMixedCtx->ip = aIretFrame[0];
11921 pMixedCtx->cs.Sel = aIretFrame[1];
11922 pMixedCtx->cs.ValidSel = aIretFrame[1];
11923 pMixedCtx->cs.u64Base = (uint64_t)pMixedCtx->cs.Sel << 4;
11924 pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ((UINT32_C(0xffff0000) | X86_EFL_1) & ~X86_EFL_RF))
11925 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
11926 pMixedCtx->sp += sizeof(aIretFrame);
11927 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
11928 | HM_CHANGED_GUEST_SEGMENT_REGS
11929 | HM_CHANGED_GUEST_RSP
11930 | HM_CHANGED_GUEST_RFLAGS);
11931 /* Generate a pending-debug exception when stepping over IRET regardless of how IRET modifies EFLAGS.TF. */
11932 if (fStepping)
11933 hmR0VmxSetPendingDebugXcpt(pVCpu, pMixedCtx);
11934 Log4(("IRET %#RX32 to %04x:%04x\n", GCPtrStack, pMixedCtx->cs.Sel, pMixedCtx->ip));
11935 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
11936 break;
11937 }
11938
11939 case OP_INT:
11940 {
11941 uint16_t uVector = pDis->Param1.uValue & 0xff;
11942 hmR0VmxSetPendingIntN(pVCpu, pMixedCtx, uVector, pDis->cbInstr);
11943 /* INT clears EFLAGS.TF, we mustn't set any pending debug exceptions here. */
11944 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
11945 break;
11946 }
11947
11948 case OP_INTO:
11949 {
11950 if (pMixedCtx->eflags.Bits.u1OF)
11951 {
11952 hmR0VmxSetPendingXcptOF(pVCpu, pMixedCtx, pDis->cbInstr);
11953 /* INTO clears EFLAGS.TF, we mustn't set any pending debug exceptions here. */
11954 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
11955 }
11956 else
11957 {
11958 pMixedCtx->eflags.Bits.u1RF = 0;
11959 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS);
11960 }
11961 break;
11962 }
11963
11964 default:
11965 {
11966 pMixedCtx->eflags.Bits.u1RF = 0; /* This is correct most of the time... */
11967 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pMixedCtx), 0 /* pvFault */,
11968 EMCODETYPE_SUPERVISOR);
11969 rc = VBOXSTRICTRC_VAL(rc2);
11970 HMCPU_CF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
11971 /** @todo We have to set pending-debug exceptions here when the guest is
11972 * single-stepping depending on the instruction that was interpreted. */
11973 Log4(("#GP rc=%Rrc\n", rc));
11974 break;
11975 }
11976 }
11977 }
11978 else
11979 rc = VERR_EM_INTERPRETER;
11980
11981 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
11982 ("#GP Unexpected rc=%Rrc\n", rc));
11983 return rc;
11984}
11985
11986
11987#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
11988/**
11989 * VM-exit exception handler wrapper for generic exceptions. Simply re-injects
11990 * the exception reported in the VMX transient structure back into the VM.
11991 *
11992 * @remarks Requires uExitIntInfo in the VMX transient structure to be
11993 * up-to-date.
11994 */
11995static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
11996{
11997 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
11998
11999 /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in
12000 hmR0VmxCheckExitDueToEventDelivery(). */
12001 int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
12002 rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient);
12003 AssertRCReturn(rc, rc);
12004 Assert(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO);
12005
12006#ifdef DEBUG_ramshankar
12007 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
12008 uint8_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVmxTransient->uExitIntInfo);
12009 Log(("hmR0VmxExitXcptGeneric: Reinjecting Xcpt. uVector=%#x cs:rip=%#04x:%#RX64\n", uVector, pCtx->cs.Sel, pCtx->rip));
12010#endif
12011
12012 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
12013 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
12014 return VINF_SUCCESS;
12015}
12016#endif
12017
12018
12019/**
12020 * VM-exit exception handler for #PF (Page-fault exception).
12021 */
12022static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
12023{
12024 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
12025 PVM pVM = pVCpu->CTX_SUFF(pVM);
12026 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
12027 rc |= hmR0VmxReadExitIntInfoVmcs(pVmxTransient);
12028 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient);
12029 AssertRCReturn(rc, rc);
12030
12031#if defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) || defined(HMVMX_ALWAYS_TRAP_PF)
12032 if (pVM->hm.s.fNestedPaging)
12033 {
12034 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
12035 if (RT_LIKELY(!pVmxTransient->fVectoringDoublePF))
12036 {
12037 pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */
12038 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
12039 0 /* cbInstr */, pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQualification);
12040 }
12041 else
12042 {
12043 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
12044 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
12045 Log4(("Pending #DF due to vectoring #PF. NP\n"));
12046 }
12047 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
12048 return rc;
12049 }
12050#else
12051 Assert(!pVM->hm.s.fNestedPaging);
12052 NOREF(pVM);
12053#endif
12054
12055 /* If it's a vectoring #PF, emulate injecting the original event injection as PGMTrap0eHandler() is incapable
12056 of differentiating between instruction emulation and event injection that caused a #PF. See @bugref{6607}. */
12057 if (pVmxTransient->fVectoringPF)
12058 {
12059 Assert(pVCpu->hm.s.Event.fPending);
12060 return VINF_EM_RAW_INJECT_TRPM_EVENT;
12061 }
12062
12063 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
12064 AssertRCReturn(rc, rc);
12065
12066 Log4(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQualification,
12067 pMixedCtx->cs.Sel, pMixedCtx->rip, pVmxTransient->uExitIntErrorCode, pMixedCtx->cr3));
12068
12069 TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQualification, (RTGCUINT)pVmxTransient->uExitIntErrorCode);
12070 rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pMixedCtx),
12071 (RTGCPTR)pVmxTransient->uExitQualification);
12072
12073 Log4(("#PF: rc=%Rrc\n", rc));
12074 if (rc == VINF_SUCCESS)
12075 {
12076 /* Successfully synced shadow pages tables or emulated an MMIO instruction. */
12077 /** @todo this isn't quite right, what if guest does lgdt with some MMIO
12078 * memory? We don't update the whole state here... */
12079 HMCPU_CF_SET(pVCpu, HM_CHANGED_GUEST_RIP
12080 | HM_CHANGED_GUEST_RSP
12081 | HM_CHANGED_GUEST_RFLAGS
12082 | HM_CHANGED_VMX_GUEST_APIC_STATE);
12083 TRPMResetTrap(pVCpu);
12084 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
12085 return rc;
12086 }
12087
12088 if (rc == VINF_EM_RAW_GUEST_TRAP)
12089 {
12090 if (!pVmxTransient->fVectoringDoublePF)
12091 {
12092 /* It's a guest page fault and needs to be reflected to the guest. */
12093 uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu);
12094 TRPMResetTrap(pVCpu);
12095 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */
12096 pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */
12097 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
12098 0 /* cbInstr */, uGstErrorCode, pVmxTransient->uExitQualification);
12099 }
12100 else
12101 {
12102 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
12103 TRPMResetTrap(pVCpu);
12104 pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */
12105 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
12106 Log4(("#PF: Pending #DF due to vectoring #PF\n"));
12107 }
12108
12109 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
12110 return VINF_SUCCESS;
12111 }
12112
12113 TRPMResetTrap(pVCpu);
12114 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
12115 return rc;
12116}
12117
12118/** @} */
12119
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette