VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 48618

Last change on this file since 48618 was 48570, checked in by vboxsync, 12 years ago

VMM/HMVMXR0, HMSVMR0: Restore hyper/guest DR7 in accordance with what we load.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 452.8 KB
Line 
1/* $Id: HMVMXR0.cpp 48570 2013-09-19 23:41:35Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_HM
22#include <iprt/asm-amd64-x86.h>
23#include <iprt/thread.h>
24#include <iprt/string.h>
25
26#include "HMInternal.h"
27#include <VBox/vmm/vm.h>
28#include "HMVMXR0.h"
29#include <VBox/vmm/pdmapi.h>
30#include <VBox/vmm/dbgf.h>
31#include <VBox/vmm/iem.h>
32#include <VBox/vmm/iom.h>
33#include <VBox/vmm/selm.h>
34#include <VBox/vmm/tm.h>
35#ifdef VBOX_WITH_REM
36# include <VBox/vmm/rem.h>
37#endif
38#ifdef DEBUG_ramshankar
39#define HMVMX_SAVE_FULL_GUEST_STATE
40#define HMVMX_SYNC_FULL_GUEST_STATE
41#define HMVMX_ALWAYS_CHECK_GUEST_STATE
42#define HMVMX_ALWAYS_TRAP_ALL_XCPTS
43#define HMVMX_ALWAYS_TRAP_PF
44#define HMVMX_ALWAYS_SWAP_FPU_STATE
45#endif
46
47
48/*******************************************************************************
49* Defined Constants And Macros *
50*******************************************************************************/
51#if defined(RT_ARCH_AMD64)
52# define HMVMX_IS_64BIT_HOST_MODE() (true)
53typedef RTHCUINTREG HMVMXHCUINTREG;
54#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
55extern "C" uint32_t g_fVMXIs64bitHost;
56# define HMVMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
57typedef uint64_t HMVMXHCUINTREG;
58#else
59# define HMVMX_IS_64BIT_HOST_MODE() (false)
60typedef RTHCUINTREG HMVMXHCUINTREG;
61#endif
62
63/** Use the function table. */
64#define HMVMX_USE_FUNCTION_TABLE
65
66/** Determine which tagged-TLB flush handler to use. */
67#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0
68#define HMVMX_FLUSH_TAGGED_TLB_EPT 1
69#define HMVMX_FLUSH_TAGGED_TLB_VPID 2
70#define HMVMX_FLUSH_TAGGED_TLB_NONE 3
71
72/** @name Updated-guest-state flags.
73 * @{ */
74#define HMVMX_UPDATED_GUEST_RIP RT_BIT(0)
75#define HMVMX_UPDATED_GUEST_RSP RT_BIT(1)
76#define HMVMX_UPDATED_GUEST_RFLAGS RT_BIT(2)
77#define HMVMX_UPDATED_GUEST_CR0 RT_BIT(3)
78#define HMVMX_UPDATED_GUEST_CR3 RT_BIT(4)
79#define HMVMX_UPDATED_GUEST_CR4 RT_BIT(5)
80#define HMVMX_UPDATED_GUEST_GDTR RT_BIT(6)
81#define HMVMX_UPDATED_GUEST_IDTR RT_BIT(7)
82#define HMVMX_UPDATED_GUEST_LDTR RT_BIT(8)
83#define HMVMX_UPDATED_GUEST_TR RT_BIT(9)
84#define HMVMX_UPDATED_GUEST_SEGMENT_REGS RT_BIT(10)
85#define HMVMX_UPDATED_GUEST_DEBUG RT_BIT(11)
86#define HMVMX_UPDATED_GUEST_FS_BASE_MSR RT_BIT(12)
87#define HMVMX_UPDATED_GUEST_GS_BASE_MSR RT_BIT(13)
88#define HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR RT_BIT(14)
89#define HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR RT_BIT(15)
90#define HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR RT_BIT(16)
91#define HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS RT_BIT(17)
92#define HMVMX_UPDATED_GUEST_ACTIVITY_STATE RT_BIT(18)
93#define HMVMX_UPDATED_GUEST_APIC_STATE RT_BIT(19)
94#define HMVMX_UPDATED_GUEST_ALL ( HMVMX_UPDATED_GUEST_RIP \
95 | HMVMX_UPDATED_GUEST_RSP \
96 | HMVMX_UPDATED_GUEST_RFLAGS \
97 | HMVMX_UPDATED_GUEST_CR0 \
98 | HMVMX_UPDATED_GUEST_CR3 \
99 | HMVMX_UPDATED_GUEST_CR4 \
100 | HMVMX_UPDATED_GUEST_GDTR \
101 | HMVMX_UPDATED_GUEST_IDTR \
102 | HMVMX_UPDATED_GUEST_LDTR \
103 | HMVMX_UPDATED_GUEST_TR \
104 | HMVMX_UPDATED_GUEST_SEGMENT_REGS \
105 | HMVMX_UPDATED_GUEST_DEBUG \
106 | HMVMX_UPDATED_GUEST_FS_BASE_MSR \
107 | HMVMX_UPDATED_GUEST_GS_BASE_MSR \
108 | HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR \
109 | HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR \
110 | HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR \
111 | HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS \
112 | HMVMX_UPDATED_GUEST_ACTIVITY_STATE \
113 | HMVMX_UPDATED_GUEST_APIC_STATE)
114/** @} */
115
116/** @name
117 * Flags to skip redundant reads of some common VMCS fields that are not part of
118 * the guest-CPU state but are in the transient structure.
119 */
120#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO RT_BIT(0)
121#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE RT_BIT(1)
122#define HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION RT_BIT(2)
123#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN RT_BIT(3)
124#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO RT_BIT(4)
125#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE RT_BIT(5)
126/** @} */
127
128/** @name
129 * States of the VMCS.
130 *
131 * This does not reflect all possible VMCS states but currently only those
132 * needed for maintaining the VMCS consistently even when thread-context hooks
133 * are used. Maybe later this can be extended (i.e. Nested Virtualization).
134 */
135#define HMVMX_VMCS_STATE_CLEAR RT_BIT(0)
136#define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1)
137#define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2)
138/** @} */
139
140/**
141 * Exception bitmap mask for real-mode guests (real-on-v86).
142 *
143 * We need to intercept all exceptions manually (except #PF). #NM is also
144 * handled separately, see hmR0VmxLoadSharedCR0(). #PF need not be intercepted
145 * even in real-mode if we have Nested Paging support.
146 */
147#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) | RT_BIT(X86_XCPT_DB) | RT_BIT(X86_XCPT_NMI) \
148 | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \
149 | RT_BIT(X86_XCPT_UD) /* RT_BIT(X86_XCPT_NM) */ | RT_BIT(X86_XCPT_DF) \
150 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \
151 | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \
152 | RT_BIT(X86_XCPT_MF) | RT_BIT(X86_XCPT_AC) | RT_BIT(X86_XCPT_MC) \
153 | RT_BIT(X86_XCPT_XF))
154
155/**
156 * Exception bitmap mask for all contributory exceptions.
157 *
158 * Page fault is deliberately excluded here as it's conditional as to whether
159 * it's contributory or benign. Page faults are handled separately.
160 */
161#define HMVMX_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \
162 | RT_BIT(X86_XCPT_DE))
163
164/** Maximum VM-instruction error number. */
165#define HMVMX_INSTR_ERROR_MAX 28
166
167/** Profiling macro. */
168#ifdef HM_PROFILE_EXIT_DISPATCH
169# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed)
170# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed)
171#else
172# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0)
173# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0)
174#endif
175
176/** Assert that preemption is disabled or covered by thread-context hooks. */
177#define HMVMX_ASSERT_PREEMPT_SAFE() Assert( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \
178 || !RTThreadPreemptIsEnabled(NIL_RTTHREAD));
179
180/** Assert that we haven't migrated CPUs when thread-context hooks are not
181 * used. */
182#define HMVMX_ASSERT_CPU_SAFE() AssertMsg( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \
183 || pVCpu->hm.s.idEnteredCpu == RTMpCpuId(), \
184 ("Illegal migration! Entered on CPU %u Current %u\n", \
185 pVCpu->hm.s.idEnteredCpu, RTMpCpuId())); \
186
187/*******************************************************************************
188* Structures and Typedefs *
189*******************************************************************************/
190/**
191 * VMX transient state.
192 *
193 * A state structure for holding miscellaneous information across
194 * VMX non-root operation and restored after the transition.
195 */
196typedef struct VMXTRANSIENT
197{
198 /** The host's rflags/eflags. */
199 RTCCUINTREG uEflags;
200#if HC_ARCH_BITS == 32
201 uint32_t u32Alignment0;
202#endif
203 /** The guest's LSTAR MSR value used for TPR patching for 32-bit guests. */
204 uint64_t u64LStarMsr;
205 /** The guest's TPR value used for TPR shadowing. */
206 uint8_t u8GuestTpr;
207 /** Alignment. */
208 uint8_t abAlignment0[7];
209
210 /** The basic VM-exit reason. */
211 uint16_t uExitReason;
212 /** Alignment. */
213 uint16_t u16Alignment0;
214 /** The VM-exit interruption error code. */
215 uint32_t uExitIntrErrorCode;
216 /** The VM-exit exit qualification. */
217 uint64_t uExitQualification;
218
219 /** The VM-exit interruption-information field. */
220 uint32_t uExitIntrInfo;
221 /** The VM-exit instruction-length field. */
222 uint32_t cbInstr;
223 /** The VM-exit instruction-information field. */
224 union
225 {
226 /** Plain unsigned int representation. */
227 uint32_t u;
228 /** INS and OUTS information. */
229 struct
230 {
231 uint32_t u6Reserved0 : 6;
232 /** The address size; 0=16-bit, 1=32-bit, 2=64-bit, rest undefined. */
233 uint32_t u3AddrSize : 3;
234 uint32_t u5Reserved1 : 5;
235 /** The segment register (X86_SREG_XXX). */
236 uint32_t iSegReg : 3;
237 uint32_t uReserved2 : 14;
238 } StrIo;
239 } ExitInstrInfo;
240 /** Whether the VM-entry failed or not. */
241 bool fVMEntryFailed;
242 /** Alignment. */
243 uint8_t abAlignment1[3];
244
245 /** The VM-entry interruption-information field. */
246 uint32_t uEntryIntrInfo;
247 /** The VM-entry exception error code field. */
248 uint32_t uEntryXcptErrorCode;
249 /** The VM-entry instruction length field. */
250 uint32_t cbEntryInstr;
251
252 /** IDT-vectoring information field. */
253 uint32_t uIdtVectoringInfo;
254 /** IDT-vectoring error code. */
255 uint32_t uIdtVectoringErrorCode;
256
257 /** Mask of currently read VMCS fields; HMVMX_UPDATED_TRANSIENT_*. */
258 uint32_t fVmcsFieldsRead;
259 /** Whether TSC-offsetting should be setup before VM-entry. */
260 bool fUpdateTscOffsettingAndPreemptTimer;
261 /** Whether the VM-exit was caused by a page-fault during delivery of a
262 * contributory exception or a page-fault. */
263 bool fVectoringPF;
264 /** Whether the guest FPU was active at the time of VM-exit. */
265 bool fWasGuestFPUStateActive;
266 /** Whether the guest debug state was active at the time of VM-exit. */
267 bool fWasGuestDebugStateActive;
268 /** Whether the hyper debug state was active at the time of VM-exit. */
269 bool fWasHyperDebugStateActive;
270} VMXTRANSIENT;
271AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t));
272AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntrInfo, sizeof(uint64_t));
273AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntrInfo, sizeof(uint64_t));
274AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t));
275/** Pointer to VMX transient state. */
276typedef VMXTRANSIENT *PVMXTRANSIENT;
277
278
279/**
280 * MSR-bitmap read permissions.
281 */
282typedef enum VMXMSREXITREAD
283{
284 /** Reading this MSR causes a VM-exit. */
285 VMXMSREXIT_INTERCEPT_READ = 0xb,
286 /** Reading this MSR does not cause a VM-exit. */
287 VMXMSREXIT_PASSTHRU_READ
288} VMXMSREXITREAD;
289
290/**
291 * MSR-bitmap write permissions.
292 */
293typedef enum VMXMSREXITWRITE
294{
295 /** Writing to this MSR causes a VM-exit. */
296 VMXMSREXIT_INTERCEPT_WRITE = 0xd,
297 /** Writing to this MSR does not cause a VM-exit. */
298 VMXMSREXIT_PASSTHRU_WRITE
299} VMXMSREXITWRITE;
300
301/**
302 * VMX VM-exit handler.
303 *
304 * @returns VBox status code.
305 * @param pVCpu Pointer to the VMCPU.
306 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
307 * out-of-sync. Make sure to update the required
308 * fields before using them.
309 * @param pVmxTransient Pointer to the VMX-transient structure.
310 */
311#ifndef HMVMX_USE_FUNCTION_TABLE
312typedef int FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
313#else
314typedef DECLCALLBACK(int) FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
315/** Pointer to VM-exit handler. */
316typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER;
317#endif
318
319
320/*******************************************************************************
321* Internal Functions *
322*******************************************************************************/
323static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
324static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
325static void hmR0VmxClearEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx);
326static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntrInfo, uint32_t cbInstr,
327 uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntrState);
328#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
329static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu);
330#endif
331#ifndef HMVMX_USE_FUNCTION_TABLE
332DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason);
333# define HMVMX_EXIT_DECL static int
334#else
335# define HMVMX_EXIT_DECL static DECLCALLBACK(int)
336#endif
337
338/** @name VM-exit handlers.
339 * @{
340 */
341static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi;
342static FNVMXEXITHANDLER hmR0VmxExitExtInt;
343static FNVMXEXITHANDLER hmR0VmxExitTripleFault;
344static FNVMXEXITHANDLER hmR0VmxExitInitSignal;
345static FNVMXEXITHANDLER hmR0VmxExitSipi;
346static FNVMXEXITHANDLER hmR0VmxExitIoSmi;
347static FNVMXEXITHANDLER hmR0VmxExitSmi;
348static FNVMXEXITHANDLER hmR0VmxExitIntWindow;
349static FNVMXEXITHANDLER hmR0VmxExitNmiWindow;
350static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch;
351static FNVMXEXITHANDLER hmR0VmxExitCpuid;
352static FNVMXEXITHANDLER hmR0VmxExitGetsec;
353static FNVMXEXITHANDLER hmR0VmxExitHlt;
354static FNVMXEXITHANDLER hmR0VmxExitInvd;
355static FNVMXEXITHANDLER hmR0VmxExitInvlpg;
356static FNVMXEXITHANDLER hmR0VmxExitRdpmc;
357static FNVMXEXITHANDLER hmR0VmxExitRdtsc;
358static FNVMXEXITHANDLER hmR0VmxExitRsm;
359static FNVMXEXITHANDLER hmR0VmxExitSetPendingXcptUD;
360static FNVMXEXITHANDLER hmR0VmxExitMovCRx;
361static FNVMXEXITHANDLER hmR0VmxExitMovDRx;
362static FNVMXEXITHANDLER hmR0VmxExitIoInstr;
363static FNVMXEXITHANDLER hmR0VmxExitRdmsr;
364static FNVMXEXITHANDLER hmR0VmxExitWrmsr;
365static FNVMXEXITHANDLER hmR0VmxExitErrInvalidGuestState;
366static FNVMXEXITHANDLER hmR0VmxExitErrMsrLoad;
367static FNVMXEXITHANDLER hmR0VmxExitErrUndefined;
368static FNVMXEXITHANDLER hmR0VmxExitMwait;
369static FNVMXEXITHANDLER hmR0VmxExitMtf;
370static FNVMXEXITHANDLER hmR0VmxExitMonitor;
371static FNVMXEXITHANDLER hmR0VmxExitPause;
372static FNVMXEXITHANDLER hmR0VmxExitErrMachineCheck;
373static FNVMXEXITHANDLER hmR0VmxExitTprBelowThreshold;
374static FNVMXEXITHANDLER hmR0VmxExitApicAccess;
375static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
376static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
377static FNVMXEXITHANDLER hmR0VmxExitEptViolation;
378static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig;
379static FNVMXEXITHANDLER hmR0VmxExitRdtscp;
380static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer;
381static FNVMXEXITHANDLER hmR0VmxExitWbinvd;
382static FNVMXEXITHANDLER hmR0VmxExitXsetbv;
383static FNVMXEXITHANDLER hmR0VmxExitRdrand;
384static FNVMXEXITHANDLER hmR0VmxExitInvpcid;
385/** @} */
386
387static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
388static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
389static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
390static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
391static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
392static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
393static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
394static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
395
396/*******************************************************************************
397* Global Variables *
398*******************************************************************************/
399#ifdef HMVMX_USE_FUNCTION_TABLE
400
401/**
402 * VMX_EXIT dispatch table.
403 */
404static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] =
405{
406 /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi,
407 /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt,
408 /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault,
409 /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal,
410 /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi,
411 /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi,
412 /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi,
413 /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow,
414 /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow,
415 /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch,
416 /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid,
417 /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec,
418 /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt,
419 /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd,
420 /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg,
421 /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc,
422 /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc,
423 /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm,
424 /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitSetPendingXcptUD,
425 /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD,
426 /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD,
427 /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD,
428 /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD,
429 /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD,
430 /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD,
431 /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD,
432 /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD,
433 /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD,
434 /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx,
435 /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx,
436 /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr,
437 /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr,
438 /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr,
439 /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState,
440 /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad,
441 /* 35 UNDEFINED */ hmR0VmxExitErrUndefined,
442 /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait,
443 /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf,
444 /* 38 UNDEFINED */ hmR0VmxExitErrUndefined,
445 /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor,
446 /* 40 UNDEFINED */ hmR0VmxExitPause,
447 /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck,
448 /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined,
449 /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold,
450 /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess,
451 /* 45 UNDEFINED */ hmR0VmxExitErrUndefined,
452 /* 46 VMX_EXIT_XDTR_ACCESS */ hmR0VmxExitXdtrAccess,
453 /* 47 VMX_EXIT_TR_ACCESS */ hmR0VmxExitXdtrAccess,
454 /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation,
455 /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig,
456 /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD,
457 /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp,
458 /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer,
459 /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD,
460 /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd,
461 /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv,
462 /* 56 UNDEFINED */ hmR0VmxExitErrUndefined,
463 /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand,
464 /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid,
465 /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD
466};
467#endif /* HMVMX_USE_FUNCTION_TABLE */
468
469#ifdef VBOX_STRICT
470static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] =
471{
472 /* 0 */ "(Not Used)",
473 /* 1 */ "VMCALL executed in VMX root operation.",
474 /* 2 */ "VMCLEAR with invalid physical address.",
475 /* 3 */ "VMCLEAR with VMXON pointer.",
476 /* 4 */ "VMLAUNCH with non-clear VMCS.",
477 /* 5 */ "VMRESUME with non-launched VMCS.",
478 /* 6 */ "VMRESUME after VMXOFF",
479 /* 7 */ "VM entry with invalid control fields.",
480 /* 8 */ "VM entry with invalid host state fields.",
481 /* 9 */ "VMPTRLD with invalid physical address.",
482 /* 10 */ "VMPTRLD with VMXON pointer.",
483 /* 11 */ "VMPTRLD with incorrect revision identifier.",
484 /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.",
485 /* 13 */ "VMWRITE to read-only VMCS component.",
486 /* 14 */ "(Not Used)",
487 /* 15 */ "VMXON executed in VMX root operation.",
488 /* 16 */ "VM entry with invalid executive-VMCS pointer.",
489 /* 17 */ "VM entry with non-launched executing VMCS.",
490 /* 18 */ "VM entry with executive-VMCS pointer not VMXON pointer.",
491 /* 19 */ "VMCALL with non-clear VMCS.",
492 /* 20 */ "VMCALL with invalid VM-exit control fields.",
493 /* 21 */ "(Not Used)",
494 /* 22 */ "VMCALL with incorrect MSEG revision identifier.",
495 /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.",
496 /* 24 */ "VMCALL with invalid SMM-monitor features.",
497 /* 25 */ "VM entry with invalid VM-execution control fields in executive VMCS.",
498 /* 26 */ "VM entry with events blocked by MOV SS.",
499 /* 27 */ "(Not Used)",
500 /* 28 */ "Invalid operand to INVEPT/INVVPID."
501};
502#endif /* VBOX_STRICT */
503
504
505
506/**
507 * Updates the VM's last error record. If there was a VMX instruction error,
508 * reads the error data from the VMCS and updates VCPU's last error record as
509 * well.
510 *
511 * @param pVM Pointer to the VM.
512 * @param pVCpu Pointer to the VMCPU (can be NULL if @a rc is not
513 * VERR_VMX_UNABLE_TO_START_VM or
514 * VERR_VMX_INVALID_VMCS_FIELD).
515 * @param rc The error code.
516 */
517static void hmR0VmxUpdateErrorRecord(PVM pVM, PVMCPU pVCpu, int rc)
518{
519 AssertPtr(pVM);
520 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
521 || rc == VERR_VMX_UNABLE_TO_START_VM)
522 {
523 AssertPtrReturnVoid(pVCpu);
524 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
525 }
526 pVM->hm.s.lLastError = rc;
527}
528
529
530/**
531 * Reads the VM-entry interruption-information field from the VMCS into the VMX
532 * transient structure.
533 *
534 * @returns VBox status code.
535 * @param pVmxTransient Pointer to the VMX transient structure.
536 *
537 * @remarks No-long-jump zone!!!
538 */
539DECLINLINE(int) hmR0VmxReadEntryIntrInfoVmcs(PVMXTRANSIENT pVmxTransient)
540{
541 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntrInfo);
542 AssertRCReturn(rc, rc);
543 return VINF_SUCCESS;
544}
545
546
547/**
548 * Reads the VM-entry exception error code field from the VMCS into
549 * the VMX transient structure.
550 *
551 * @returns VBox status code.
552 * @param pVmxTransient Pointer to the VMX transient structure.
553 *
554 * @remarks No-long-jump zone!!!
555 */
556DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
557{
558 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode);
559 AssertRCReturn(rc, rc);
560 return VINF_SUCCESS;
561}
562
563
564/**
565 * Reads the VM-entry exception error code field from the VMCS into
566 * the VMX transient structure.
567 *
568 * @returns VBox status code.
569 * @param pVCpu Pointer to the VMCPU.
570 * @param pVmxTransient Pointer to the VMX transient structure.
571 *
572 * @remarks No-long-jump zone!!!
573 */
574DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
575{
576 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr);
577 AssertRCReturn(rc, rc);
578 return VINF_SUCCESS;
579}
580
581
582/**
583 * Reads the VM-exit interruption-information field from the VMCS into the VMX
584 * transient structure.
585 *
586 * @returns VBox status code.
587 * @param pVCpu Pointer to the VMCPU.
588 * @param pVmxTransient Pointer to the VMX transient structure.
589 */
590DECLINLINE(int) hmR0VmxReadExitIntrInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
591{
592 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO))
593 {
594 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntrInfo);
595 AssertRCReturn(rc, rc);
596 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO;
597 }
598 return VINF_SUCCESS;
599}
600
601
602/**
603 * Reads the VM-exit interruption error code from the VMCS into the VMX
604 * transient structure.
605 *
606 * @returns VBox status code.
607 * @param pVCpu Pointer to the VMCPU.
608 * @param pVmxTransient Pointer to the VMX transient structure.
609 */
610DECLINLINE(int) hmR0VmxReadExitIntrErrorCodeVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
611{
612 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE))
613 {
614 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntrErrorCode);
615 AssertRCReturn(rc, rc);
616 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE;
617 }
618 return VINF_SUCCESS;
619}
620
621
622/**
623 * Reads the VM-exit instruction length field from the VMCS into the VMX
624 * transient structure.
625 *
626 * @returns VBox status code.
627 * @param pVCpu Pointer to the VMCPU.
628 * @param pVmxTransient Pointer to the VMX transient structure.
629 */
630DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
631{
632 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN))
633 {
634 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr);
635 AssertRCReturn(rc, rc);
636 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN;
637 }
638 return VINF_SUCCESS;
639}
640
641
642/**
643 * Reads the VM-exit instruction-information field from the VMCS into
644 * the VMX transient structure.
645 *
646 * @returns VBox status code.
647 * @param pVCpu The cross context per CPU structure.
648 * @param pVmxTransient Pointer to the VMX transient structure.
649 */
650DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
651{
652 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN))
653 {
654 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->cbInstr);
655 AssertRCReturn(rc, rc);
656 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN;
657 }
658 return VINF_SUCCESS;
659}
660
661
662/**
663 * Reads the exit qualification from the VMCS into the VMX transient structure.
664 *
665 * @returns VBox status code.
666 * @param pVCpu Pointer to the VMCPU.
667 * @param pVmxTransient Pointer to the VMX transient structure.
668 */
669DECLINLINE(int) hmR0VmxReadExitQualificationVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
670{
671 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION))
672 {
673 int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQualification);
674 AssertRCReturn(rc, rc);
675 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION;
676 }
677 return VINF_SUCCESS;
678}
679
680
681/**
682 * Reads the IDT-vectoring information field from the VMCS into the VMX
683 * transient structure.
684 *
685 * @returns VBox status code.
686 * @param pVmxTransient Pointer to the VMX transient structure.
687 *
688 * @remarks No-long-jump zone!!!
689 */
690DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient)
691{
692 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO))
693 {
694 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_INFO, &pVmxTransient->uIdtVectoringInfo);
695 AssertRCReturn(rc, rc);
696 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO;
697 }
698 return VINF_SUCCESS;
699}
700
701
702/**
703 * Reads the IDT-vectoring error code from the VMCS into the VMX
704 * transient structure.
705 *
706 * @returns VBox status code.
707 * @param pVmxTransient Pointer to the VMX transient structure.
708 */
709DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
710{
711 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE))
712 {
713 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
714 AssertRCReturn(rc, rc);
715 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE;
716 }
717 return VINF_SUCCESS;
718}
719
720
721/**
722 * Enters VMX root mode operation on the current CPU.
723 *
724 * @returns VBox status code.
725 * @param pVM Pointer to the VM (optional, can be NULL, after
726 * a resume).
727 * @param HCPhysCpuPage Physical address of the VMXON region.
728 * @param pvCpuPage Pointer to the VMXON region.
729 */
730static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
731{
732 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
733 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
734 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
735
736 if (pVM)
737 {
738 /* Write the VMCS revision dword to the VMXON region. */
739 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo);
740 }
741
742 /* Enable the VMX bit in CR4 if necessary. */
743 RTCCUINTREG uCr4 = ASMGetCR4();
744 if (!(uCr4 & X86_CR4_VMXE))
745 ASMSetCR4(uCr4 | X86_CR4_VMXE);
746
747 /* Enter VMX root mode. */
748 int rc = VMXEnable(HCPhysCpuPage);
749 if (RT_FAILURE(rc))
750 ASMSetCR4(uCr4);
751
752 return rc;
753}
754
755
756/**
757 * Exits VMX root mode operation on the current CPU.
758 *
759 * @returns VBox status code.
760 */
761static int hmR0VmxLeaveRootMode(void)
762{
763 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
764
765 /* If we're for some reason not in VMX root mode, then don't leave it. */
766 RTCCUINTREG uHostCR4 = ASMGetCR4();
767 if (uHostCR4 & X86_CR4_VMXE)
768 {
769 /* Exit VMX root mode and clear the VMX bit in CR4. */
770 VMXDisable();
771 ASMSetCR4(uHostCR4 & ~X86_CR4_VMXE);
772 return VINF_SUCCESS;
773 }
774
775 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
776}
777
778
779/**
780 * Allocates and maps one physically contiguous page. The allocated page is
781 * zero'd out. (Used by various VT-x structures).
782 *
783 * @returns IPRT status code.
784 * @param pMemObj Pointer to the ring-0 memory object.
785 * @param ppVirt Where to store the virtual address of the
786 * allocation.
787 * @param pPhys Where to store the physical address of the
788 * allocation.
789 */
790DECLINLINE(int) hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
791{
792 AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER);
793 AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER);
794 AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
795
796 int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */);
797 if (RT_FAILURE(rc))
798 return rc;
799 *ppVirt = RTR0MemObjAddress(*pMemObj);
800 *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */);
801 ASMMemZero32(*ppVirt, PAGE_SIZE);
802 return VINF_SUCCESS;
803}
804
805
806/**
807 * Frees and unmaps an allocated physical page.
808 *
809 * @param pMemObj Pointer to the ring-0 memory object.
810 * @param ppVirt Where to re-initialize the virtual address of
811 * allocation as 0.
812 * @param pHCPhys Where to re-initialize the physical address of the
813 * allocation as 0.
814 */
815DECLINLINE(void) hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
816{
817 AssertPtr(pMemObj);
818 AssertPtr(ppVirt);
819 AssertPtr(pHCPhys);
820 if (*pMemObj != NIL_RTR0MEMOBJ)
821 {
822 int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */);
823 AssertRC(rc);
824 *pMemObj = NIL_RTR0MEMOBJ;
825 *ppVirt = 0;
826 *pHCPhys = 0;
827 }
828}
829
830
831/**
832 * Worker function to free VT-x related structures.
833 *
834 * @returns IPRT status code.
835 * @param pVM Pointer to the VM.
836 */
837static void hmR0VmxStructsFree(PVM pVM)
838{
839 for (VMCPUID i = 0; i < pVM->cCpus; i++)
840 {
841 PVMCPU pVCpu = &pVM->aCpus[i];
842 AssertPtr(pVCpu);
843
844#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
845 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
846 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
847#endif
848
849 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
850 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
851
852 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, &pVCpu->hm.s.vmx.HCPhysVirtApic);
853 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
854 }
855
856 hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess);
857#ifdef VBOX_WITH_CRASHDUMP_MAGIC
858 hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
859#endif
860}
861
862
863/**
864 * Worker function to allocate VT-x related VM structures.
865 *
866 * @returns IPRT status code.
867 * @param pVM Pointer to the VM.
868 */
869static int hmR0VmxStructsAlloc(PVM pVM)
870{
871 /*
872 * Initialize members up-front so we can cleanup properly on allocation failure.
873 */
874#define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \
875 pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
876 pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
877 pVM->hm.s.vmx.HCPhys##a_Name = 0;
878
879#define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \
880 pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
881 pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
882 pVCpu->hm.s.vmx.HCPhys##a_Name = 0;
883
884#ifdef VBOX_WITH_CRASHDUMP_MAGIC
885 VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv);
886#endif
887 VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb);
888
889 AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus));
890 for (VMCPUID i = 0; i < pVM->cCpus; i++)
891 {
892 PVMCPU pVCpu = &pVM->aCpus[i];
893 VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv);
894 VMXLOCAL_INIT_VMCPU_MEMOBJ(VirtApic, pb);
895 VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv);
896#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
897 VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv);
898 VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv);
899#endif
900 }
901#undef VMXLOCAL_INIT_VMCPU_MEMOBJ
902#undef VMXLOCAL_INIT_VM_MEMOBJ
903
904 /*
905 * Allocate all the VT-x structures.
906 */
907 int rc = VINF_SUCCESS;
908#ifdef VBOX_WITH_CRASHDUMP_MAGIC
909 rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
910 if (RT_FAILURE(rc))
911 goto cleanup;
912 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
913 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
914#endif
915
916 /* Allocate the APIC-access page for trapping APIC accesses from the guest. */
917 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
918 {
919 rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess,
920 &pVM->hm.s.vmx.HCPhysApicAccess);
921 if (RT_FAILURE(rc))
922 goto cleanup;
923 }
924
925 /*
926 * Initialize per-VCPU VT-x structures.
927 */
928 for (VMCPUID i = 0; i < pVM->cCpus; i++)
929 {
930 PVMCPU pVCpu = &pVM->aCpus[i];
931 AssertPtr(pVCpu);
932
933 /* Allocate the VM control structure (VMCS). */
934 AssertReturn(MSR_IA32_VMX_BASIC_INFO_VMCS_SIZE(pVM->hm.s.vmx.Msrs.u64BasicInfo) <= PAGE_SIZE, VERR_INTERNAL_ERROR);
935 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
936 if (RT_FAILURE(rc))
937 goto cleanup;
938
939 /* Allocate the Virtual-APIC page for transparent TPR accesses. */
940 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
941 {
942 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic,
943 &pVCpu->hm.s.vmx.HCPhysVirtApic);
944 if (RT_FAILURE(rc))
945 goto cleanup;
946 }
947
948 /* Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for transparent accesses of specific MSRs. */
949 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
950 {
951 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap,
952 &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
953 if (RT_FAILURE(rc))
954 goto cleanup;
955 memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE);
956 }
957
958#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
959 /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */
960 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
961 if (RT_FAILURE(rc))
962 goto cleanup;
963
964 /* Allocate the VM-exit MSR-load page for the host MSRs. */
965 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
966 if (RT_FAILURE(rc))
967 goto cleanup;
968#endif
969 }
970
971 return VINF_SUCCESS;
972
973cleanup:
974 hmR0VmxStructsFree(pVM);
975 return rc;
976}
977
978
979/**
980 * Does global VT-x initialization (called during module initialization).
981 *
982 * @returns VBox status code.
983 */
984VMMR0DECL(int) VMXR0GlobalInit(void)
985{
986#ifdef HMVMX_USE_FUNCTION_TABLE
987 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers));
988# ifdef VBOX_STRICT
989 for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++)
990 Assert(g_apfnVMExitHandlers[i]);
991# endif
992#endif
993 return VINF_SUCCESS;
994}
995
996
997/**
998 * Does global VT-x termination (called during module termination).
999 */
1000VMMR0DECL(void) VMXR0GlobalTerm()
1001{
1002 /* Nothing to do currently. */
1003}
1004
1005
1006/**
1007 * Sets up and activates VT-x on the current CPU.
1008 *
1009 * @returns VBox status code.
1010 * @param pCpu Pointer to the global CPU info struct.
1011 * @param pVM Pointer to the VM (can be NULL after a host resume
1012 * operation).
1013 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
1014 * fEnabledByHost is true).
1015 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
1016 * @a fEnabledByHost is true).
1017 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
1018 * enable VT-x on the host.
1019 * @param pvMsrs Opaque pointer to VMXMSRS struct.
1020 */
1021VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
1022 void *pvMsrs)
1023{
1024 AssertReturn(pCpu, VERR_INVALID_PARAMETER);
1025 AssertReturn(pvMsrs, VERR_INVALID_PARAMETER);
1026 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1027
1028 /* Enable VT-x if it's not already enabled by the host. */
1029 if (!fEnabledByHost)
1030 {
1031 int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage);
1032 if (RT_FAILURE(rc))
1033 return rc;
1034 }
1035
1036 /*
1037 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been using EPTPs) so
1038 * we don't retain any stale guest-physical mappings which won't get invalidated when flushing by VPID.
1039 */
1040 PVMXMSRS pMsrs = (PVMXMSRS)pvMsrs;
1041 if (pMsrs->u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
1042 {
1043 hmR0VmxFlushEpt(NULL /* pVCpu */, VMX_FLUSH_EPT_ALL_CONTEXTS);
1044 pCpu->fFlushAsidBeforeUse = false;
1045 }
1046 else
1047 pCpu->fFlushAsidBeforeUse = true;
1048
1049 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
1050 ++pCpu->cTlbFlushes;
1051
1052 return VINF_SUCCESS;
1053}
1054
1055
1056/**
1057 * Deactivates VT-x on the current CPU.
1058 *
1059 * @returns VBox status code.
1060 * @param pCpu Pointer to the global CPU info struct.
1061 * @param pvCpuPage Pointer to the VMXON region.
1062 * @param HCPhysCpuPage Physical address of the VMXON region.
1063 *
1064 * @remarks This function should never be called when SUPR0EnableVTx() or
1065 * similar was used to enable VT-x on the host.
1066 */
1067VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
1068{
1069 NOREF(pCpu);
1070 NOREF(pvCpuPage);
1071 NOREF(HCPhysCpuPage);
1072
1073 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1074 return hmR0VmxLeaveRootMode();
1075}
1076
1077
1078/**
1079 * Sets the permission bits for the specified MSR in the MSR bitmap.
1080 *
1081 * @param pVCpu Pointer to the VMCPU.
1082 * @param uMSR The MSR value.
1083 * @param enmRead Whether reading this MSR causes a VM-exit.
1084 * @param enmWrite Whether writing this MSR causes a VM-exit.
1085 */
1086static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite)
1087{
1088 int32_t iBit;
1089 uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
1090
1091 /*
1092 * Layout:
1093 * 0x000 - 0x3ff - Low MSR read bits
1094 * 0x400 - 0x7ff - High MSR read bits
1095 * 0x800 - 0xbff - Low MSR write bits
1096 * 0xc00 - 0xfff - High MSR write bits
1097 */
1098 if (uMsr <= 0x00001FFF)
1099 iBit = uMsr;
1100 else if ( uMsr >= 0xC0000000
1101 && uMsr <= 0xC0001FFF)
1102 {
1103 iBit = (uMsr - 0xC0000000);
1104 pbMsrBitmap += 0x400;
1105 }
1106 else
1107 {
1108 AssertMsgFailed(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr));
1109 return;
1110 }
1111
1112 Assert(iBit <= 0x1fff);
1113 if (enmRead == VMXMSREXIT_INTERCEPT_READ)
1114 ASMBitSet(pbMsrBitmap, iBit);
1115 else
1116 ASMBitClear(pbMsrBitmap, iBit);
1117
1118 if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE)
1119 ASMBitSet(pbMsrBitmap + 0x800, iBit);
1120 else
1121 ASMBitClear(pbMsrBitmap + 0x800, iBit);
1122}
1123
1124
1125/**
1126 * Flushes the TLB using EPT.
1127 *
1128 * @returns VBox status code.
1129 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
1130 * enmFlush).
1131 * @param enmFlush Type of flush.
1132 *
1133 * @remarks Caller is responsible for making sure this function is called only
1134 * when NestedPaging is supported and providing @a enmFlush that is
1135 * supported by the CPU.
1136 */
1137static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
1138{
1139 uint64_t au64Descriptor[2];
1140 if (enmFlush == VMX_FLUSH_EPT_ALL_CONTEXTS)
1141 au64Descriptor[0] = 0;
1142 else
1143 {
1144 Assert(pVCpu);
1145 au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP;
1146 }
1147 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1148
1149 int rc = VMXR0InvEPT(enmFlush, &au64Descriptor[0]);
1150 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0,
1151 rc));
1152 if ( RT_SUCCESS(rc)
1153 && pVCpu)
1154 {
1155 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1156 }
1157}
1158
1159
1160/**
1161 * Flushes the TLB using VPID.
1162 *
1163 * @returns VBox status code.
1164 * @param pVM Pointer to the VM.
1165 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
1166 * enmFlush).
1167 * @param enmFlush Type of flush.
1168 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1169 * on @a enmFlush).
1170 */
1171static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
1172{
1173 AssertPtr(pVM);
1174 Assert(pVM->hm.s.vmx.fVpid);
1175
1176 uint64_t au64Descriptor[2];
1177 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
1178 {
1179 au64Descriptor[0] = 0;
1180 au64Descriptor[1] = 0;
1181 }
1182 else
1183 {
1184 AssertPtr(pVCpu);
1185 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
1186 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
1187 au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid;
1188 au64Descriptor[1] = GCPtr;
1189 }
1190
1191 int rc = VMXR0InvVPID(enmFlush, &au64Descriptor[0]); NOREF(rc);
1192 AssertMsg(rc == VINF_SUCCESS,
1193 ("VMXR0InvVPID %#x %u %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
1194 if ( RT_SUCCESS(rc)
1195 && pVCpu)
1196 {
1197 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1198 }
1199}
1200
1201
1202/**
1203 * Invalidates a guest page by guest virtual address. Only relevant for
1204 * EPT/VPID, otherwise there is nothing really to invalidate.
1205 *
1206 * @returns VBox status code.
1207 * @param pVM Pointer to the VM.
1208 * @param pVCpu Pointer to the VMCPU.
1209 * @param GCVirt Guest virtual address of the page to invalidate.
1210 */
1211VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
1212{
1213 AssertPtr(pVM);
1214 AssertPtr(pVCpu);
1215 LogFlowFunc(("pVM=%p pVCpu=%p GCVirt=%RGv\n", pVM, pVCpu, GCVirt));
1216
1217 bool fFlushPending = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1218 if (!fFlushPending)
1219 {
1220 /*
1221 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
1222 * See @bugref{6043} and @bugref{6177}.
1223 *
1224 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() as this
1225 * function maybe called in a loop with individual addresses.
1226 */
1227 if (pVM->hm.s.vmx.fVpid)
1228 {
1229 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1230 {
1231 hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
1232 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1233 }
1234 else
1235 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1236 }
1237 else if (pVM->hm.s.fNestedPaging)
1238 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1239 }
1240
1241 return VINF_SUCCESS;
1242}
1243
1244
1245/**
1246 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
1247 * otherwise there is nothing really to invalidate.
1248 *
1249 * @returns VBox status code.
1250 * @param pVM Pointer to the VM.
1251 * @param pVCpu Pointer to the VMCPU.
1252 * @param GCPhys Guest physical address of the page to invalidate.
1253 */
1254VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
1255{
1256 LogFlowFunc(("%RGp\n", GCPhys));
1257
1258 /*
1259 * We cannot flush a page by guest-physical address. invvpid takes only a linear address while invept only flushes
1260 * by EPT not individual addresses. We update the force flag here and flush before the next VM-entry in hmR0VmxFlushTLB*().
1261 * This function might be called in a loop. This should cause a flush-by-EPT if EPT is in use. See @bugref{6568}.
1262 */
1263 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1264 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgPhys);
1265 return VINF_SUCCESS;
1266}
1267
1268
1269/**
1270 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1271 * case where neither EPT nor VPID is supported by the CPU.
1272 *
1273 * @param pVM Pointer to the VM.
1274 * @param pVCpu Pointer to the VMCPU.
1275 * @param pCpu Pointer to the global HM struct.
1276 *
1277 * @remarks Called with interrupts disabled.
1278 */
1279static void hmR0VmxFlushTaggedTlbNone(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1280{
1281 AssertPtr(pVCpu);
1282 AssertPtr(pCpu);
1283 NOREF(pVM);
1284
1285 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1286 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1287
1288 pVCpu->hm.s.TlbShootdown.cPages = 0;
1289 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1290 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1291 pVCpu->hm.s.fForceTLBFlush = false;
1292 return;
1293}
1294
1295
1296/**
1297 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1298 *
1299 * @param pVM Pointer to the VM.
1300 * @param pVCpu Pointer to the VMCPU.
1301 * @param pCpu Pointer to the global HM CPU struct.
1302 * @remarks All references to "ASID" in this function pertains to "VPID" in
1303 * Intel's nomenclature. The reason is, to avoid confusion in compare
1304 * statements since the host-CPU copies are named "ASID".
1305 *
1306 * @remarks Called with interrupts disabled.
1307 */
1308static void hmR0VmxFlushTaggedTlbBoth(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1309{
1310#ifdef VBOX_WITH_STATISTICS
1311 bool fTlbFlushed = false;
1312# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1313# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1314 if (!fTlbFlushed) \
1315 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1316 } while (0)
1317#else
1318# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1319# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1320#endif
1321
1322 AssertPtr(pVM);
1323 AssertPtr(pCpu);
1324 AssertPtr(pVCpu);
1325 AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid,
1326 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1327 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid));
1328
1329
1330 /*
1331 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1332 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
1333 * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore.
1334 */
1335 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1336 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1337 {
1338 ++pCpu->uCurrentAsid;
1339 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
1340 {
1341 pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1342 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1343 pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1344 }
1345
1346 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
1347 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1348 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1349
1350 /*
1351 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1352 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1353 */
1354 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1355 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1356 HMVMX_SET_TAGGED_TLB_FLUSHED();
1357 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); /* Already flushed-by-EPT, skip doing it again below. */
1358 }
1359
1360 /* Check for explicit TLB shootdowns. */
1361 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1362 {
1363 /*
1364 * Changes to the EPT paging structure by VMM requires flushing by EPT as the CPU creates
1365 * guest-physical (only EPT-tagged) mappings while traversing the EPT tables when EPT is in use.
1366 * Flushing by VPID will only flush linear (only VPID-tagged) and combined (EPT+VPID tagged) mappings
1367 * but not guest-physical mappings.
1368 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". See @bugref{6568}.
1369 */
1370 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1371 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1372 HMVMX_SET_TAGGED_TLB_FLUSHED();
1373 }
1374
1375 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
1376 * not be executed. See hmQueueInvlPage() where it is commented
1377 * out. Support individual entry flushing someday. */
1378 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1379 {
1380 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
1381
1382 /*
1383 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
1384 * as supported by the CPU.
1385 */
1386 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1387 {
1388 for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
1389 hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
1390 }
1391 else
1392 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1393
1394 HMVMX_SET_TAGGED_TLB_FLUSHED();
1395 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1396 }
1397
1398 pVCpu->hm.s.TlbShootdown.cPages = 0;
1399 pVCpu->hm.s.fForceTLBFlush = false;
1400
1401 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1402
1403 Assert(pVCpu->hm.s.idLastCpu == pCpu->idCpu);
1404 Assert(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes);
1405 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
1406 ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1407 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
1408 ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid));
1409 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
1410 ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
1411
1412 /* Update VMCS with the VPID. */
1413 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
1414 AssertRC(rc);
1415
1416#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1417}
1418
1419
1420/**
1421 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1422 *
1423 * @returns VBox status code.
1424 * @param pVM Pointer to the VM.
1425 * @param pVCpu Pointer to the VMCPU.
1426 * @param pCpu Pointer to the global HM CPU struct.
1427 *
1428 * @remarks Called with interrupts disabled.
1429 */
1430static void hmR0VmxFlushTaggedTlbEpt(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1431{
1432 AssertPtr(pVM);
1433 AssertPtr(pVCpu);
1434 AssertPtr(pCpu);
1435 AssertMsg(pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with NestedPaging disabled."));
1436 AssertMsg(!pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID enabled."));
1437
1438 /*
1439 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1440 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1441 */
1442 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1443 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1444 {
1445 pVCpu->hm.s.fForceTLBFlush = true;
1446 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1447 }
1448
1449 /* Check for explicit TLB shootdown flushes. */
1450 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1451 {
1452 pVCpu->hm.s.fForceTLBFlush = true;
1453 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1454 }
1455
1456 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1457 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1458
1459 if (pVCpu->hm.s.fForceTLBFlush)
1460 {
1461 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1462 pVCpu->hm.s.fForceTLBFlush = false;
1463 }
1464 else
1465 {
1466 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
1467 * not be executed. See hmQueueInvlPage() where it is commented
1468 * out. Support individual entry flushing someday. */
1469 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1470 {
1471 /* We cannot flush individual entries without VPID support. Flush using EPT. */
1472 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
1473 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1474 }
1475 else
1476 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
1477 }
1478
1479 pVCpu->hm.s.TlbShootdown.cPages = 0;
1480 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1481}
1482
1483
1484/**
1485 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1486 *
1487 * @returns VBox status code.
1488 * @param pVM Pointer to the VM.
1489 * @param pVCpu Pointer to the VMCPU.
1490 * @param pCpu Pointer to the global HM CPU struct.
1491 *
1492 * @remarks Called with interrupts disabled.
1493 */
1494static void hmR0VmxFlushTaggedTlbVpid(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1495{
1496 AssertPtr(pVM);
1497 AssertPtr(pVCpu);
1498 AssertPtr(pCpu);
1499 AssertMsg(pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked with VPID disabled."));
1500 AssertMsg(!pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging enabled"));
1501
1502 /*
1503 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last.
1504 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
1505 * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore.
1506 */
1507 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1508 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1509 {
1510 pVCpu->hm.s.fForceTLBFlush = true;
1511 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1512 }
1513
1514 /* Check for explicit TLB shootdown flushes. */
1515 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1516 {
1517 /*
1518 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see hmR0VmxSetupTaggedTlb())
1519 * we would need to explicitly flush in this case (add an fExplicitFlush = true here and change the
1520 * pCpu->fFlushAsidBeforeUse check below to include fExplicitFlush's too) - an obscure corner case.
1521 */
1522 pVCpu->hm.s.fForceTLBFlush = true;
1523 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1524 }
1525
1526 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1527 if (pVCpu->hm.s.fForceTLBFlush)
1528 {
1529 ++pCpu->uCurrentAsid;
1530 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
1531 {
1532 pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1533 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1534 pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1535 }
1536
1537 pVCpu->hm.s.fForceTLBFlush = false;
1538 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1539 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
1540 if (pCpu->fFlushAsidBeforeUse)
1541 hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
1542 }
1543 else
1544 {
1545 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
1546 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
1547 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
1548 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
1549
1550 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
1551 * not be executed. See hmQueueInvlPage() where it is commented
1552 * out. Support individual entry flushing someday. */
1553 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1554 {
1555 /* Flush individual guest entries using VPID or as little as possible with EPT as supported by the CPU. */
1556 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1557 {
1558 for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
1559 hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
1560 }
1561 else
1562 hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
1563 }
1564 else
1565 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
1566 }
1567
1568 pVCpu->hm.s.TlbShootdown.cPages = 0;
1569 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1570
1571 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
1572 ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1573 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
1574 ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid));
1575 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
1576 ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
1577
1578 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
1579 AssertRC(rc);
1580}
1581
1582
1583/**
1584 * Flushes the guest TLB entry based on CPU capabilities.
1585 *
1586 * @param pVCpu Pointer to the VMCPU.
1587 * @param pCpu Pointer to the global HM CPU struct.
1588 */
1589DECLINLINE(void) hmR0VmxFlushTaggedTlb(PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1590{
1591 PVM pVM = pVCpu->CTX_SUFF(pVM);
1592 switch (pVM->hm.s.vmx.uFlushTaggedTlb)
1593 {
1594 case HMVMX_FLUSH_TAGGED_TLB_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pVM, pVCpu, pCpu); break;
1595 case HMVMX_FLUSH_TAGGED_TLB_EPT: hmR0VmxFlushTaggedTlbEpt(pVM, pVCpu, pCpu); break;
1596 case HMVMX_FLUSH_TAGGED_TLB_VPID: hmR0VmxFlushTaggedTlbVpid(pVM, pVCpu, pCpu); break;
1597 case HMVMX_FLUSH_TAGGED_TLB_NONE: hmR0VmxFlushTaggedTlbNone(pVM, pVCpu, pCpu); break;
1598 default:
1599 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
1600 break;
1601 }
1602}
1603
1604
1605/**
1606 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
1607 * TLB entries from the host TLB before VM-entry.
1608 *
1609 * @returns VBox status code.
1610 * @param pVM Pointer to the VM.
1611 */
1612static int hmR0VmxSetupTaggedTlb(PVM pVM)
1613{
1614 /*
1615 * Determine optimal flush type for Nested Paging.
1616 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted
1617 * guest execution (see hmR3InitFinalizeR0()).
1618 */
1619 if (pVM->hm.s.fNestedPaging)
1620 {
1621 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
1622 {
1623 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
1624 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT;
1625 else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
1626 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS;
1627 else
1628 {
1629 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
1630 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
1631 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1632 }
1633
1634 /* Make sure the write-back cacheable memory type for EPT is supported. */
1635 if (!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB))
1636 {
1637 LogRel(("hmR0VmxSetupTaggedTlb: Unsupported EPTP memory type %#x.\n", pVM->hm.s.vmx.Msrs.u64EptVpidCaps));
1638 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
1639 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1640 }
1641 }
1642 else
1643 {
1644 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
1645 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
1646 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1647 }
1648 }
1649
1650 /*
1651 * Determine optimal flush type for VPID.
1652 */
1653 if (pVM->hm.s.vmx.fVpid)
1654 {
1655 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
1656 {
1657 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
1658 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT;
1659 else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
1660 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS;
1661 else
1662 {
1663 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
1664 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1665 LogRel(("hmR0VmxSetupTaggedTlb: Only INDIV_ADDR supported. Ignoring VPID.\n"));
1666 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
1667 LogRel(("hmR0VmxSetupTaggedTlb: Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
1668 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
1669 pVM->hm.s.vmx.fVpid = false;
1670 }
1671 }
1672 else
1673 {
1674 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
1675 Log4(("hmR0VmxSetupTaggedTlb: VPID supported without INVEPT support. Ignoring VPID.\n"));
1676 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
1677 pVM->hm.s.vmx.fVpid = false;
1678 }
1679 }
1680
1681 /*
1682 * Setup the handler for flushing tagged-TLBs.
1683 */
1684 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
1685 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT_VPID;
1686 else if (pVM->hm.s.fNestedPaging)
1687 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT;
1688 else if (pVM->hm.s.vmx.fVpid)
1689 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_VPID;
1690 else
1691 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_NONE;
1692 return VINF_SUCCESS;
1693}
1694
1695
1696/**
1697 * Sets up pin-based VM-execution controls in the VMCS.
1698 *
1699 * @returns VBox status code.
1700 * @param pVM Pointer to the VM.
1701 * @param pVCpu Pointer to the VMCPU.
1702 */
1703static int hmR0VmxSetupPinCtls(PVM pVM, PVMCPU pVCpu)
1704{
1705 AssertPtr(pVM);
1706 AssertPtr(pVCpu);
1707
1708 uint32_t val = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0; /* Bits set here must always be set. */
1709 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
1710
1711 val |= VMX_VMCS_CTRL_PIN_EXEC_EXT_INT_EXIT /* External interrupts causes a VM-exits. */
1712 | VMX_VMCS_CTRL_PIN_EXEC_NMI_EXIT; /* Non-maskable interrupts causes a VM-exit. */
1713 Assert(!(val & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI));
1714
1715 /* Enable the VMX preemption timer. */
1716 if (pVM->hm.s.vmx.fUsePreemptTimer)
1717 {
1718 Assert(pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER);
1719 val |= VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER;
1720 }
1721
1722 if ((val & zap) != val)
1723 {
1724 LogRel(("hmR0VmxSetupPinCtls: invalid pin-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n",
1725 pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0, val, zap));
1726 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
1727 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1728 }
1729
1730 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, val);
1731 AssertRCReturn(rc, rc);
1732
1733 /* Update VCPU with the currently set pin-based VM-execution controls. */
1734 pVCpu->hm.s.vmx.u32PinCtls = val;
1735 return rc;
1736}
1737
1738
1739/**
1740 * Sets up processor-based VM-execution controls in the VMCS.
1741 *
1742 * @returns VBox status code.
1743 * @param pVM Pointer to the VM.
1744 * @param pVMCPU Pointer to the VMCPU.
1745 */
1746static int hmR0VmxSetupProcCtls(PVM pVM, PVMCPU pVCpu)
1747{
1748 AssertPtr(pVM);
1749 AssertPtr(pVCpu);
1750
1751 int rc = VERR_INTERNAL_ERROR_5;
1752 uint32_t val = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0; /* Bits set here must be set in the VMCS. */
1753 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
1754
1755 val |= VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT /* HLT causes a VM-exit. */
1756 | VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
1757 | VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
1758 | VMX_VMCS_CTRL_PROC_EXEC_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
1759 | VMX_VMCS_CTRL_PROC_EXEC_RDPMC_EXIT /* RDPMC causes a VM-exit. */
1760 | VMX_VMCS_CTRL_PROC_EXEC_MONITOR_EXIT /* MONITOR causes a VM-exit. */
1761 | VMX_VMCS_CTRL_PROC_EXEC_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
1762
1763 /* We toggle VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
1764 if ( !(pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT)
1765 || (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT))
1766 {
1767 LogRel(("hmR0VmxSetupProcCtls: unsupported VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT combo!"));
1768 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
1769 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1770 }
1771
1772 /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
1773 if (!pVM->hm.s.fNestedPaging)
1774 {
1775 Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */
1776 val |= VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT
1777 | VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
1778 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
1779 }
1780
1781 /* Use TPR shadowing if supported by the CPU. */
1782 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
1783 {
1784 Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
1785 Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
1786 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
1787 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic);
1788 AssertRCReturn(rc, rc);
1789
1790 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
1791 /* CR8 writes causes a VM-exit based on TPR threshold. */
1792 Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT));
1793 Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT));
1794 }
1795 else
1796 {
1797 val |= VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT /* CR8 reads causes a VM-exit. */
1798 | VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT; /* CR8 writes causes a VM-exit. */
1799 }
1800
1801 /* Use MSR-bitmaps if supported by the CPU. */
1802 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1803 {
1804 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS;
1805
1806 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
1807 Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
1808 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
1809 AssertRCReturn(rc, rc);
1810
1811 /*
1812 * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored
1813 * automatically (either as part of the MSR-load/store areas or dedicated fields in the VMCS).
1814 */
1815 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1816 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1817 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1818 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1819 hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1820 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1821 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1822 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1823 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1824 }
1825
1826 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
1827 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
1828 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
1829
1830 if ((val & zap) != val)
1831 {
1832 LogRel(("hmR0VmxSetupProcCtls: invalid processor-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n",
1833 pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0, val, zap));
1834 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
1835 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1836 }
1837
1838 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, val);
1839 AssertRCReturn(rc, rc);
1840
1841 /* Update VCPU with the currently set processor-based VM-execution controls. */
1842 pVCpu->hm.s.vmx.u32ProcCtls = val;
1843
1844 /*
1845 * Secondary processor-based VM-execution controls.
1846 */
1847 if (RT_LIKELY(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL))
1848 {
1849 val = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0; /* Bits set here must be set in the VMCS. */
1850 zap = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
1851
1852 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT)
1853 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT; /* WBINVD causes a VM-exit. */
1854
1855 if (pVM->hm.s.fNestedPaging)
1856 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT; /* Enable EPT. */
1857 else
1858 {
1859 /*
1860 * Without Nested Paging, INVPCID should cause a VM-exit. Enabling this bit causes the CPU to refer to
1861 * VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT when INVPCID is executed by the guest.
1862 * See Intel spec. 25.4 "Changes to instruction behaviour in VMX non-root operation".
1863 */
1864 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_INVPCID)
1865 val |= VMX_VMCS_CTRL_PROC_EXEC2_INVPCID;
1866 }
1867
1868 if (pVM->hm.s.vmx.fVpid)
1869 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID; /* Enable VPID. */
1870
1871 if (pVM->hm.s.vmx.fUnrestrictedGuest)
1872 val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST; /* Enable Unrestricted Execution. */
1873
1874 /* Enable Virtual-APIC page accesses if supported by the CPU. This is essentially where the TPR shadow resides. */
1875 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
1876 * done dynamically. */
1877 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
1878 {
1879 Assert(pVM->hm.s.vmx.HCPhysApicAccess);
1880 Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
1881 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC; /* Virtualize APIC accesses. */
1882 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
1883 AssertRCReturn(rc, rc);
1884 }
1885
1886 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1887 {
1888 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP; /* Enable RDTSCP support. */
1889 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1890 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1891 }
1892
1893 if ((val & zap) != val)
1894 {
1895 LogRel(("hmR0VmxSetupProcCtls: invalid secondary processor-based VM-execution controls combo! "
1896 "cpu=%#RX64 val=%#RX64 zap=%#RX64\n", pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0, val, zap));
1897 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1898 }
1899
1900 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, val);
1901 AssertRCReturn(rc, rc);
1902
1903 /* Update VCPU with the currently set secondary processor-based VM-execution controls. */
1904 pVCpu->hm.s.vmx.u32ProcCtls2 = val;
1905 }
1906 else if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest))
1907 {
1908 LogRel(("hmR0VmxSetupProcCtls: Unrestricted Guest set as true when secondary processor-based VM-execution controls not "
1909 "available\n"));
1910 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1911 }
1912
1913 return VINF_SUCCESS;
1914}
1915
1916
1917/**
1918 * Sets up miscellaneous (everything other than Pin & Processor-based
1919 * VM-execution) control fields in the VMCS.
1920 *
1921 * @returns VBox status code.
1922 * @param pVM Pointer to the VM.
1923 * @param pVCpu Pointer to the VMCPU.
1924 */
1925static int hmR0VmxSetupMiscCtls(PVM pVM, PVMCPU pVCpu)
1926{
1927 AssertPtr(pVM);
1928 AssertPtr(pVCpu);
1929
1930 int rc = VERR_GENERAL_FAILURE;
1931
1932 /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
1933#if 0
1934 /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxLoadGuestCR3AndCR4())*/
1935 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0); AssertRCReturn(rc, rc);
1936 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0); AssertRCReturn(rc, rc);
1937
1938 /*
1939 * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is)
1940 * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit.
1941 * We thus use the exception bitmap to control it rather than use both.
1942 */
1943 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0); AssertRCReturn(rc, rc);
1944 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0); AssertRCReturn(rc, rc);
1945
1946 /** @todo Explore possibility of using IO-bitmaps. */
1947 /* All IO & IOIO instructions cause VM-exits. */
1948 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0); AssertRCReturn(rc, rc);
1949 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0); AssertRCReturn(rc, rc);
1950
1951 /* Initialize the MSR-bitmap area. */
1952 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc);
1953 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0); AssertRCReturn(rc, rc);
1954 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc);
1955#endif
1956
1957#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1958 /* Setup MSR autoloading/storing. */
1959 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
1960 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */
1961 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
1962 AssertRCReturn(rc, rc);
1963 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
1964 AssertRCReturn(rc, rc);
1965
1966 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
1967 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */
1968 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
1969 AssertRCReturn(rc, rc);
1970#endif
1971
1972 /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */
1973 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff));
1974 AssertRCReturn(rc, rc);
1975
1976 /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
1977#if 0
1978 /* Setup debug controls */
1979 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0); /** @todo We don't support IA32_DEBUGCTL MSR. Should we? */
1980 AssertRCReturn(rc, rc);
1981 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
1982 AssertRCReturn(rc, rc);
1983#endif
1984
1985 return rc;
1986}
1987
1988
1989/**
1990 * Sets up the initial exception bitmap in the VMCS based on static conditions
1991 * (i.e. conditions that cannot ever change after starting the VM).
1992 *
1993 * @returns VBox status code.
1994 * @param pVM Pointer to the VM.
1995 * @param pVCpu Pointer to the VMCPU.
1996 */
1997static int hmR0VmxInitXcptBitmap(PVM pVM, PVMCPU pVCpu)
1998{
1999 AssertPtr(pVM);
2000 AssertPtr(pVCpu);
2001
2002 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
2003
2004 uint32_t u32XcptBitmap = 0;
2005
2006 /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */
2007 if (!pVM->hm.s.fNestedPaging)
2008 u32XcptBitmap |= RT_BIT(X86_XCPT_PF);
2009
2010 pVCpu->hm.s.vmx.u32XcptBitmap = u32XcptBitmap;
2011 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
2012 AssertRCReturn(rc, rc);
2013 return rc;
2014}
2015
2016
2017/**
2018 * Sets up the initial guest-state mask. The guest-state mask is consulted
2019 * before reading guest-state fields from the VMCS as VMREADs can be expensive
2020 * for the nested virtualization case (as it would cause a VM-exit).
2021 *
2022 * @param pVCpu Pointer to the VMCPU.
2023 */
2024static int hmR0VmxInitUpdatedGuestStateMask(PVMCPU pVCpu)
2025{
2026 /* Initially the guest-state is up-to-date as there is nothing in the VMCS. */
2027 pVCpu->hm.s.vmx.fUpdatedGuestState = HMVMX_UPDATED_GUEST_ALL;
2028 return VINF_SUCCESS;
2029}
2030
2031
2032/**
2033 * Does per-VM VT-x initialization.
2034 *
2035 * @returns VBox status code.
2036 * @param pVM Pointer to the VM.
2037 */
2038VMMR0DECL(int) VMXR0InitVM(PVM pVM)
2039{
2040 LogFlowFunc(("pVM=%p\n", pVM));
2041
2042 int rc = hmR0VmxStructsAlloc(pVM);
2043 if (RT_FAILURE(rc))
2044 {
2045 LogRel(("VMXR0InitVM: hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc));
2046 return rc;
2047 }
2048
2049 return VINF_SUCCESS;
2050}
2051
2052
2053/**
2054 * Does per-VM VT-x termination.
2055 *
2056 * @returns VBox status code.
2057 * @param pVM Pointer to the VM.
2058 */
2059VMMR0DECL(int) VMXR0TermVM(PVM pVM)
2060{
2061 LogFlowFunc(("pVM=%p\n", pVM));
2062
2063#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2064 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
2065 ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE);
2066#endif
2067 hmR0VmxStructsFree(pVM);
2068 return VINF_SUCCESS;
2069}
2070
2071
2072/**
2073 * Sets up the VM for execution under VT-x.
2074 * This function is only called once per-VM during initialization.
2075 *
2076 * @returns VBox status code.
2077 * @param pVM Pointer to the VM.
2078 */
2079VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
2080{
2081 AssertPtrReturn(pVM, VERR_INVALID_PARAMETER);
2082 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2083
2084 LogFlowFunc(("pVM=%p\n", pVM));
2085
2086 /*
2087 * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be allocated.
2088 * We no longer support the highly unlikely case of UnrestrictedGuest without pRealModeTSS. See hmR3InitFinalizeR0().
2089 */
2090 /* -XXX- change hmR3InitFinalizeR0Intel() to fail if pRealModeTSS alloc fails. */
2091 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
2092 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
2093 || !pVM->hm.s.vmx.pRealModeTSS))
2094 {
2095 LogRel(("VMXR0SetupVM: invalid real-on-v86 state.\n"));
2096 return VERR_INTERNAL_ERROR;
2097 }
2098
2099#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2100 /*
2101 * This is for the darwin 32-bit/PAE kernels trying to execute 64-bit guests. We don't bother with
2102 * the 32<->64 switcher in this case. This is a rare, legacy use-case with barely any test coverage.
2103 */
2104 if ( pVM->hm.s.fAllow64BitGuests
2105 && !HMVMX_IS_64BIT_HOST_MODE())
2106 {
2107 LogRel(("VMXR0SetupVM: Unsupported guest and host paging mode combination.\n"));
2108 return VERR_PGM_UNSUPPORTED_HOST_PAGING_MODE;
2109 }
2110#endif
2111
2112 /* Initialize these always, see hmR3InitFinalizeR0().*/
2113 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE;
2114 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE;
2115
2116 /* Setup the tagged-TLB flush handlers. */
2117 int rc = hmR0VmxSetupTaggedTlb(pVM);
2118 if (RT_FAILURE(rc))
2119 {
2120 LogRel(("VMXR0SetupVM: hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc));
2121 return rc;
2122 }
2123
2124 for (VMCPUID i = 0; i < pVM->cCpus; i++)
2125 {
2126 PVMCPU pVCpu = &pVM->aCpus[i];
2127 AssertPtr(pVCpu);
2128 AssertPtr(pVCpu->hm.s.vmx.pvVmcs);
2129
2130 /* Log the VCPU pointers, useful for debugging SMP VMs. */
2131 Log4(("VMXR0SetupVM: pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
2132
2133 /* Set revision dword at the beginning of the VMCS structure. */
2134 *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo);
2135
2136 /* Initialize our VMCS region in memory, set the VMCS launch state to "clear". */
2137 rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2138 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2139 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2140
2141 /* Load this VMCS as the current VMCS. */
2142 rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2143 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2144 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2145
2146 rc = hmR0VmxSetupPinCtls(pVM, pVCpu);
2147 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2148 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2149
2150 rc = hmR0VmxSetupProcCtls(pVM, pVCpu);
2151 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2152 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2153
2154 rc = hmR0VmxSetupMiscCtls(pVM, pVCpu);
2155 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2156 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2157
2158 rc = hmR0VmxInitXcptBitmap(pVM, pVCpu);
2159 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2160 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2161
2162 rc = hmR0VmxInitUpdatedGuestStateMask(pVCpu);
2163 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitUpdatedGuestStateMask failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2164 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2165
2166#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2167 rc = hmR0VmxInitVmcsReadCache(pVM, pVCpu);
2168 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2169 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2170#endif
2171
2172 /* Re-sync the CPU's internal data into our VMCS memory region & reset the launch state to "clear". */
2173 rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2174 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2175 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2176
2177 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
2178
2179 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc);
2180 }
2181
2182 return VINF_SUCCESS;
2183}
2184
2185
2186/**
2187 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
2188 * the VMCS.
2189 *
2190 * @returns VBox status code.
2191 * @param pVM Pointer to the VM.
2192 * @param pVCpu Pointer to the VMCPU.
2193 */
2194DECLINLINE(int) hmR0VmxSaveHostControlRegs(PVM pVM, PVMCPU pVCpu)
2195{
2196 RTCCUINTREG uReg = ASMGetCR0();
2197 int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg);
2198 AssertRCReturn(rc, rc);
2199
2200#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2201 /* For the darwin 32-bit hybrid kernel, we need the 64-bit CR3 as it uses 64-bit paging. */
2202 if (HMVMX_IS_64BIT_HOST_MODE())
2203 {
2204 uint64_t uRegCR3 = HMR0Get64bitCR3();
2205 rc = VMXWriteVmcs64(VMX_VMCS_HOST_CR3, uRegCR3);
2206 }
2207 else
2208#endif
2209 {
2210 uReg = ASMGetCR3();
2211 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg);
2212 }
2213 AssertRCReturn(rc, rc);
2214
2215 uReg = ASMGetCR4();
2216 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg);
2217 AssertRCReturn(rc, rc);
2218 return rc;
2219}
2220
2221
2222/**
2223 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
2224 * the host-state area in the VMCS.
2225 *
2226 * @returns VBox status code.
2227 * @param pVM Pointer to the VM.
2228 * @param pVCpu Pointer to the VMCPU.
2229 */
2230DECLINLINE(int) hmR0VmxSaveHostSegmentRegs(PVM pVM, PVMCPU pVCpu)
2231{
2232 int rc = VERR_INTERNAL_ERROR_5;
2233
2234 /*
2235 * Host DS, ES, FS and GS segment registers.
2236 */
2237#if HC_ARCH_BITS == 64
2238 RTSEL uSelDS = ASMGetDS();
2239 RTSEL uSelES = ASMGetES();
2240 RTSEL uSelFS = ASMGetFS();
2241 RTSEL uSelGS = ASMGetGS();
2242#else
2243 RTSEL uSelDS = 0;
2244 RTSEL uSelES = 0;
2245 RTSEL uSelFS = 0;
2246 RTSEL uSelGS = 0;
2247#endif
2248
2249 /* Recalculate which host-state bits need to be manually restored. */
2250 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
2251
2252 /*
2253 * Host CS and SS segment registers.
2254 */
2255#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2256 RTSEL uSelCS;
2257 RTSEL uSelSS;
2258 if (HMVMX_IS_64BIT_HOST_MODE())
2259 {
2260 uSelCS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
2261 uSelSS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
2262 }
2263 else
2264 {
2265 /* Seems darwin uses the LDT (TI flag is set) in the CS & SS selectors which VT-x doesn't like. */
2266 uSelCS = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
2267 uSelSS = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
2268 }
2269#else
2270 RTSEL uSelCS = ASMGetCS();
2271 RTSEL uSelSS = ASMGetSS();
2272#endif
2273
2274 /*
2275 * Host TR segment register.
2276 */
2277 RTSEL uSelTR = ASMGetTR();
2278
2279#if HC_ARCH_BITS == 64
2280 /*
2281 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to gain VM-entry and restore them
2282 * before we get preempted. See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
2283 */
2284 if (uSelDS & (X86_SEL_RPL | X86_SEL_LDT))
2285 {
2286 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_DS;
2287 pVCpu->hm.s.vmx.RestoreHost.uHostSelDS = uSelDS;
2288 uSelDS = 0;
2289 }
2290 if (uSelES & (X86_SEL_RPL | X86_SEL_LDT))
2291 {
2292 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_ES;
2293 pVCpu->hm.s.vmx.RestoreHost.uHostSelES = uSelES;
2294 uSelES = 0;
2295 }
2296 if (uSelFS & (X86_SEL_RPL | X86_SEL_LDT))
2297 {
2298 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_FS;
2299 pVCpu->hm.s.vmx.RestoreHost.uHostSelFS = uSelFS;
2300 uSelFS = 0;
2301 }
2302 if (uSelGS & (X86_SEL_RPL | X86_SEL_LDT))
2303 {
2304 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_GS;
2305 pVCpu->hm.s.vmx.RestoreHost.uHostSelGS = uSelGS;
2306 uSelGS = 0;
2307 }
2308#endif
2309
2310 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
2311 Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT));
2312 Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT));
2313 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
2314 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
2315 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
2316 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
2317 Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT));
2318 Assert(uSelCS);
2319 Assert(uSelTR);
2320
2321 /* Assertion is right but we would not have updated u32ExitCtls yet. */
2322#if 0
2323 if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE))
2324 Assert(uSelSS != 0);
2325#endif
2326
2327 /* Write these host selector fields into the host-state area in the VMCS. */
2328 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_CS, uSelCS); AssertRCReturn(rc, rc);
2329 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_SS, uSelSS); AssertRCReturn(rc, rc);
2330#if HC_ARCH_BITS == 64
2331 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_DS, uSelDS); AssertRCReturn(rc, rc);
2332 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_ES, uSelES); AssertRCReturn(rc, rc);
2333 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_FS, uSelFS); AssertRCReturn(rc, rc);
2334 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_GS, uSelGS); AssertRCReturn(rc, rc);
2335#endif
2336 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_TR, uSelTR); AssertRCReturn(rc, rc);
2337
2338 /*
2339 * Host GDTR and IDTR.
2340 */
2341 RTGDTR Gdtr;
2342 RT_ZERO(Gdtr);
2343#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2344 if (HMVMX_IS_64BIT_HOST_MODE())
2345 {
2346 X86XDTR64 Gdtr64;
2347 X86XDTR64 Idtr64;
2348 HMR0Get64bitGdtrAndIdtr(&Gdtr64, &Idtr64);
2349 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GDTR_BASE, Gdtr64.uAddr); AssertRCReturn(rc, rc);
2350 rc = VMXWriteVmcs64(VMX_VMCS_HOST_IDTR_BASE, Idtr64.uAddr); AssertRCReturn(rc, rc);
2351
2352 Gdtr.cbGdt = Gdtr64.cb;
2353 Gdtr.pGdt = (uintptr_t)Gdtr64.uAddr;
2354 }
2355 else
2356#endif
2357 {
2358 RTIDTR Idtr;
2359 ASMGetGDTR(&Gdtr);
2360 ASMGetIDTR(&Idtr);
2361 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt); AssertRCReturn(rc, rc);
2362 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt); AssertRCReturn(rc, rc);
2363
2364#if HC_ARCH_BITS == 64
2365 /*
2366 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps them to the
2367 * maximum limit (0xffff) on every VM-exit.
2368 */
2369 if (Gdtr.cbGdt != 0xffff)
2370 {
2371 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
2372 AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64));
2373 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
2374 }
2375
2376 /*
2377 * IDT limit is practically 0xfff. Therefore if the host has the limit as 0xfff, VT-x bloating the limit to 0xffff
2378 * is not a problem as it's not possible to get at them anyway. See Intel spec. 6.14.1 "64-Bit Mode IDT" and
2379 * Intel spec. 6.2 "Exception and Interrupt Vectors".
2380 */
2381 if (Idtr.cbIdt < 0x0fff)
2382 {
2383 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
2384 AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64));
2385 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64));
2386 }
2387#endif
2388 }
2389
2390 /*
2391 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI and RPL bits
2392 * is effectively what the CPU does for "scaling by 8". TI is always 0 and RPL should be too in most cases.
2393 */
2394 if ((uSelTR & X86_SEL_MASK) > Gdtr.cbGdt)
2395 {
2396 AssertMsgFailed(("hmR0VmxSaveHostSegmentRegs: TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt));
2397 return VERR_VMX_INVALID_HOST_STATE;
2398 }
2399
2400 PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK));
2401#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2402 if (HMVMX_IS_64BIT_HOST_MODE())
2403 {
2404 /* We need the 64-bit TR base for hybrid darwin. */
2405 uint64_t u64TRBase = X86DESC64_BASE((PX86DESC64)pDesc);
2406 rc = VMXWriteVmcs64(VMX_VMCS_HOST_TR_BASE, u64TRBase);
2407 }
2408 else
2409#endif
2410 {
2411 uintptr_t uTRBase;
2412#if HC_ARCH_BITS == 64
2413 uTRBase = X86DESC64_BASE(pDesc);
2414
2415 /*
2416 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on all VM-exits.
2417 * The type is the same for 64-bit busy TSS[1]. The limit needs manual restoration if the host has something else.
2418 * Task switching is not supported in 64-bit mode[2], but the limit still matters as IOPM is supported in 64-bit mode.
2419 * Restoring the limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
2420 *
2421 * [1] See Intel spec. 3.5 "System Descriptor Types".
2422 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
2423 */
2424 Assert(pDesc->System.u4Type == 11);
2425 if ( pDesc->System.u16LimitLow != 0x67
2426 || pDesc->System.u4LimitHigh)
2427 {
2428 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
2429 pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR;
2430
2431 /* Store the GDTR here as we need it while restoring TR. */
2432 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
2433 }
2434#else
2435 uTRBase = X86DESC_BASE(pDesc);
2436#endif
2437 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase);
2438 }
2439 AssertRCReturn(rc, rc);
2440
2441 /*
2442 * Host FS base and GS base.
2443 */
2444#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2445 if (HMVMX_IS_64BIT_HOST_MODE())
2446 {
2447 uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE);
2448 uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE);
2449 rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase); AssertRCReturn(rc, rc);
2450 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase); AssertRCReturn(rc, rc);
2451
2452# if HC_ARCH_BITS == 64
2453 /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */
2454 if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS)
2455 pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase;
2456 if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS)
2457 pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase;
2458# endif
2459 }
2460#endif
2461 return rc;
2462}
2463
2464
2465/**
2466 * Saves certain host MSRs in the VM-Exit MSR-load area and some in the
2467 * host-state area of the VMCS. Theses MSRs will be automatically restored on
2468 * the host after every successful VM exit.
2469 *
2470 * @returns VBox status code.
2471 * @param pVM Pointer to the VM.
2472 * @param pVCpu Pointer to the VMCPU.
2473 */
2474DECLINLINE(int) hmR0VmxSaveHostMsrs(PVM pVM, PVMCPU pVCpu)
2475{
2476 AssertPtr(pVCpu);
2477 AssertPtr(pVCpu->hm.s.vmx.pvHostMsr);
2478
2479 int rc = VINF_SUCCESS;
2480#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2481 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
2482 uint32_t cHostMsrs = 0;
2483 uint32_t u32HostExtFeatures = pVM->hm.s.cpuid.u32AMDFeatureEDX;
2484
2485 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2486 {
2487 uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
2488
2489# if HC_ARCH_BITS == 64
2490 /* Paranoia. 64-bit code requires these bits to be set always. */
2491 Assert((u64HostEfer & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
2492
2493 /*
2494 * We currently do not save/restore host EFER, we just make sure it doesn't get modified by VT-x operation.
2495 * All guest accesses (read, write) on EFER cause VM-exits. If we are to conditionally load the guest EFER for
2496 * some reason (e.g. allow transparent reads) we would activate the code below.
2497 */
2498# if 0
2499 /* All our supported 64-bit host platforms must have NXE bit set. Otherwise we can change the below code to save EFER. */
2500 Assert(u64HostEfer & (MSR_K6_EFER_NXE));
2501 /* The SCE bit is only applicable in 64-bit mode. Save EFER if it doesn't match what the guest has.
2502 See Intel spec. 30.10.4.3 "Handling the SYSCALL and SYSRET Instructions". */
2503 if (CPUMIsGuestInLongMode(pVCpu))
2504 {
2505 uint64_t u64GuestEfer;
2506 rc = CPUMQueryGuestMsr(pVCpu, MSR_K6_EFER, &u64GuestEfer);
2507 AssertRC(rc);
2508
2509 if ((u64HostEfer & MSR_K6_EFER_SCE) != (u64GuestEfer & MSR_K6_EFER_SCE))
2510 {
2511 pHostMsr->u32Msr = MSR_K6_EFER;
2512 pHostMsr->u32Reserved = 0;
2513 pHostMsr->u64Value = u64HostEfer;
2514 pHostMsr++; cHostMsrs++;
2515 }
2516 }
2517# endif
2518# else /* HC_ARCH_BITS != 64 */
2519 pHostMsr->u32Msr = MSR_K6_EFER;
2520 pHostMsr->u32Reserved = 0;
2521# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2522 if (CPUMIsGuestInLongMode(pVCpu))
2523 {
2524 /* Must match the EFER value in our 64 bits switcher. */
2525 pHostMsr->u64Value = u64HostEfer | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
2526 }
2527 else
2528# endif
2529 pHostMsr->u64Value = u64HostEfer;
2530 pHostMsr++; cHostMsrs++;
2531# endif /* HC_ARCH_BITS == 64 */
2532 }
2533
2534# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2535 if (HMVMX_IS_64BIT_HOST_MODE())
2536 {
2537 pHostMsr->u32Msr = MSR_K6_STAR;
2538 pHostMsr->u32Reserved = 0;
2539 pHostMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
2540 pHostMsr++; cHostMsrs++;
2541 pHostMsr->u32Msr = MSR_K8_LSTAR;
2542 pHostMsr->u32Reserved = 0;
2543 pHostMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64-bit mode syscall rip */
2544 pHostMsr++; cHostMsrs++;
2545 pHostMsr->u32Msr = MSR_K8_SF_MASK;
2546 pHostMsr->u32Reserved = 0;
2547 pHostMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
2548 pHostMsr++; cHostMsrs++;
2549 pHostMsr->u32Msr = MSR_K8_KERNEL_GS_BASE;
2550 pHostMsr->u32Reserved = 0;
2551 pHostMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
2552 pHostMsr++; cHostMsrs++;
2553 }
2554# endif
2555
2556 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
2557 if (RT_UNLIKELY(cHostMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc)))
2558 {
2559 LogRel(("cHostMsrs=%u Cpu=%u\n", cHostMsrs, (unsigned)MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc)));
2560 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_HOST_MSR_STORAGE;
2561 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2562 }
2563
2564 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cHostMsrs);
2565#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2566
2567 /*
2568 * Host Sysenter MSRs.
2569 */
2570 rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
2571 AssertRCReturn(rc, rc);
2572#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2573 if (HMVMX_IS_64BIT_HOST_MODE())
2574 {
2575 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
2576 AssertRCReturn(rc, rc);
2577 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
2578 }
2579 else
2580 {
2581 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
2582 AssertRCReturn(rc, rc);
2583 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
2584 }
2585#elif HC_ARCH_BITS == 32
2586 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
2587 AssertRCReturn(rc, rc);
2588 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
2589#else
2590 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
2591 AssertRCReturn(rc, rc);
2592 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
2593#endif
2594 AssertRCReturn(rc, rc);
2595
2596 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT, IA32_EFER, also see
2597 * hmR0VmxSetupExitCtls() !! */
2598 return rc;
2599}
2600
2601
2602/**
2603 * Sets up VM-entry controls in the VMCS. These controls can affect things done
2604 * on VM-exit; e.g. "load debug controls", see Intel spec. 24.8.1 "VM-entry
2605 * controls".
2606 *
2607 * @returns VBox status code.
2608 * @param pVCpu Pointer to the VMCPU.
2609 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2610 * out-of-sync. Make sure to update the required fields
2611 * before using them.
2612 *
2613 * @remarks No-long-jump zone!!!
2614 */
2615DECLINLINE(int) hmR0VmxLoadGuestEntryCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2616{
2617 int rc = VINF_SUCCESS;
2618 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_VMX_ENTRY_CTLS)
2619 {
2620 PVM pVM = pVCpu->CTX_SUFF(pVM);
2621 uint32_t val = pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0; /* Bits set here must be set in the VMCS. */
2622 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxEntry.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2623
2624 /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */
2625 val |= VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG;
2626
2627 /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */
2628 if (CPUMIsGuestInLongModeEx(pMixedCtx))
2629 val |= VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST;
2630 else
2631 Assert(!(val & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST));
2632
2633 /*
2634 * The following should -not- be set (since we're not in SMM mode):
2635 * - VMX_VMCS_CTRL_ENTRY_ENTRY_SMM
2636 * - VMX_VMCS_CTRL_ENTRY_DEACTIVATE_DUALMON
2637 */
2638
2639 /** @todo VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR,
2640 * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR,
2641 * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR */
2642
2643 if ((val & zap) != val)
2644 {
2645 LogRel(("hmR0VmxLoadGuestEntryCtls: invalid VM-entry controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n",
2646 pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0, val, zap));
2647 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY;
2648 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2649 }
2650
2651 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, val);
2652 AssertRCReturn(rc, rc);
2653
2654 /* Update VCPU with the currently set VM-exit controls. */
2655 pVCpu->hm.s.vmx.u32EntryCtls = val;
2656 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_VMX_ENTRY_CTLS;
2657 }
2658 return rc;
2659}
2660
2661
2662/**
2663 * Sets up the VM-exit controls in the VMCS.
2664 *
2665 * @returns VBox status code.
2666 * @param pVM Pointer to the VM.
2667 * @param pVCpu Pointer to the VMCPU.
2668 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2669 * out-of-sync. Make sure to update the required fields
2670 * before using them.
2671 *
2672 * @remarks requires EFER.
2673 */
2674DECLINLINE(int) hmR0VmxLoadGuestExitCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2675{
2676 int rc = VINF_SUCCESS;
2677 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_VMX_EXIT_CTLS)
2678 {
2679 PVM pVM = pVCpu->CTX_SUFF(pVM);
2680 uint32_t val = pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0; /* Bits set here must be set in the VMCS. */
2681 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2682
2683 /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */
2684 val |= VMX_VMCS_CTRL_EXIT_SAVE_DEBUG;
2685
2686 /*
2687 * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary.
2688 * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in hmR0VmxSaveHostMsrs().
2689 */
2690#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2691 if (HMVMX_IS_64BIT_HOST_MODE())
2692 val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE;
2693 else
2694 Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE));
2695#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
2696 if (CPUMIsGuestInLongModeEx(pMixedCtx))
2697 val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE; /* The switcher goes to long mode. */
2698 else
2699 Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE));
2700#endif
2701
2702 /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */
2703 Assert(!(val & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT));
2704
2705 /** @todo VMX_VMCS_CTRL_EXIT_LOAD_PERF_MSR,
2706 * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_PAT_MSR,
2707 * VMX_VMCS_CTRL_EXIT_LOAD_HOST_PAT_MSR,
2708 * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR,
2709 * VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR. */
2710
2711 if (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER)
2712 val |= VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER;
2713
2714 if ((val & zap) != val)
2715 {
2716 LogRel(("hmR0VmxSetupProcCtls: invalid VM-exit controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n",
2717 pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0, val, zap));
2718 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT;
2719 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2720 }
2721
2722 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, val);
2723 AssertRCReturn(rc, rc);
2724
2725 /* Update VCPU with the currently set VM-exit controls. */
2726 pVCpu->hm.s.vmx.u32ExitCtls = val;
2727 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_VMX_EXIT_CTLS;
2728 }
2729 return rc;
2730}
2731
2732
2733/**
2734 * Loads the guest APIC and related state.
2735 *
2736 * @returns VBox status code.
2737 * @param pVM Pointer to the VM.
2738 * @param pVCpu Pointer to the VMCPU.
2739 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2740 * out-of-sync. Make sure to update the required fields
2741 * before using them.
2742 */
2743DECLINLINE(int) hmR0VmxLoadGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2744{
2745 int rc = VINF_SUCCESS;
2746 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_VMX_GUEST_APIC_STATE)
2747 {
2748 /* Setup TPR shadowing. Also setup TPR patching for 32-bit guests. */
2749 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
2750 {
2751 Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
2752
2753 bool fPendingIntr = false;
2754 uint8_t u8Tpr = 0;
2755 uint8_t u8PendingIntr = 0;
2756 rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr);
2757 AssertRCReturn(rc, rc);
2758
2759 /*
2760 * If there are external interrupts pending but masked by the TPR value, instruct VT-x to cause a VM-exit when
2761 * the guest lowers its TPR below the highest-priority pending interrupt and we can deliver the interrupt.
2762 * If there are no external interrupts pending, set threshold to 0 to not cause a VM-exit. We will eventually deliver
2763 * the interrupt when we VM-exit for other reasons.
2764 */
2765 pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8Tpr; /* Offset 0x80 is TPR in the APIC MMIO range. */
2766 uint32_t u32TprThreshold = 0;
2767 if (fPendingIntr)
2768 {
2769 /* Bits 3-0 of the TPR threshold field correspond to bits 7-4 of the TPR (which is the Task-Priority Class). */
2770 const uint8_t u8PendingPriority = (u8PendingIntr >> 4);
2771 const uint8_t u8TprPriority = (u8Tpr >> 4) & 7;
2772 if (u8PendingPriority <= u8TprPriority)
2773 u32TprThreshold = u8PendingPriority;
2774 else
2775 u32TprThreshold = u8TprPriority; /* Required for Vista 64-bit guest, see @bugref{6398}. */
2776 }
2777 Assert(!(u32TprThreshold & 0xfffffff0)); /* Bits 31:4 MBZ. */
2778
2779 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
2780 AssertRCReturn(rc, rc);
2781 }
2782
2783 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_VMX_GUEST_APIC_STATE;
2784 }
2785 return rc;
2786}
2787
2788
2789/**
2790 * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it).
2791 *
2792 * @returns Guest's interruptibility-state.
2793 * @param pVCpu Pointer to the VMCPU.
2794 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2795 * out-of-sync. Make sure to update the required fields
2796 * before using them.
2797 *
2798 * @remarks No-long-jump zone!!!
2799 * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag.
2800 */
2801DECLINLINE(uint32_t) hmR0VmxGetGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2802{
2803 /*
2804 * Instructions like STI and MOV SS inhibit interrupts till the next instruction completes. Check if we should
2805 * inhibit interrupts or clear any existing interrupt-inhibition.
2806 */
2807 uint32_t uIntrState = 0;
2808 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2809 {
2810 /* If inhibition is active, RIP & RFLAGS should've been accessed (i.e. read previously from the VMCS or from ring-3). */
2811 AssertMsg((pVCpu->hm.s.vmx.fUpdatedGuestState & (HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS))
2812 == (HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS), ("%#x\n", pVCpu->hm.s.vmx.fUpdatedGuestState));
2813 if (pMixedCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2814 {
2815 /*
2816 * We can clear the inhibit force flag as even if we go back to the recompiler without executing guest code in
2817 * VT-x, the flag's condition to be cleared is met and thus the cleared state is correct.
2818 */
2819 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2820 }
2821 else if (pMixedCtx->eflags.Bits.u1IF)
2822 uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI;
2823 else
2824 uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS;
2825 }
2826 return uIntrState;
2827}
2828
2829
2830/**
2831 * Loads the guest's interruptibility-state into the guest-state area in the
2832 * VMCS.
2833 *
2834 * @returns VBox status code.
2835 * @param pVCpu Pointer to the VMCPU.
2836 * @param uIntrState The interruptibility-state to set.
2837 */
2838static int hmR0VmxLoadGuestIntrState(PVMCPU pVCpu, uint32_t uIntrState)
2839{
2840 AssertMsg(!(uIntrState & 0xfffffff0), ("%#x\n", uIntrState)); /* Bits 31:4 MBZ. */
2841 Assert((uIntrState & 0x3) != 0x3); /* Block-by-STI and MOV SS cannot be simultaneously set. */
2842 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, uIntrState);
2843 AssertRCReturn(rc, rc);
2844 return rc;
2845}
2846
2847
2848/**
2849 * Loads the guest's RIP into the guest-state area in the VMCS.
2850 *
2851 * @returns VBox status code.
2852 * @param pVCpu Pointer to the VMCPU.
2853 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2854 * out-of-sync. Make sure to update the required fields
2855 * before using them.
2856 *
2857 * @remarks No-long-jump zone!!!
2858 */
2859static int hmR0VmxLoadGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2860{
2861 int rc = VINF_SUCCESS;
2862 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_RIP)
2863 {
2864 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pMixedCtx->rip);
2865 AssertRCReturn(rc, rc);
2866 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_RIP;
2867 Log4(("Load: VMX_VMCS_GUEST_RIP=%#RX64 fContextUseFlags=%#x\n", pMixedCtx->rip, pVCpu->hm.s.fContextUseFlags));
2868 }
2869 return rc;
2870}
2871
2872
2873/**
2874 * Loads the guest's RSP into the guest-state area in the VMCS.
2875 *
2876 * @returns VBox status code.
2877 * @param pVCpu Pointer to the VMCPU.
2878 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2879 * out-of-sync. Make sure to update the required fields
2880 * before using them.
2881 *
2882 * @remarks No-long-jump zone!!!
2883 */
2884static int hmR0VmxLoadGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2885{
2886 int rc = VINF_SUCCESS;
2887 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_RSP)
2888 {
2889 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pMixedCtx->rsp);
2890 AssertRCReturn(rc, rc);
2891 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_RSP;
2892 Log4(("Load: VMX_VMCS_GUEST_RSP=%#RX64\n", pMixedCtx->rsp));
2893 }
2894 return rc;
2895}
2896
2897
2898/**
2899 * Loads the guest's RFLAGS into the guest-state area in the VMCS.
2900 *
2901 * @returns VBox status code.
2902 * @param pVCpu Pointer to the VMCPU.
2903 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2904 * out-of-sync. Make sure to update the required fields
2905 * before using them.
2906 *
2907 * @remarks No-long-jump zone!!!
2908 */
2909static int hmR0VmxLoadGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2910{
2911 int rc = VINF_SUCCESS;
2912 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_RFLAGS)
2913 {
2914 /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ).
2915 Let us assert it as such and use 32-bit VMWRITE. */
2916 Assert(!(pMixedCtx->rflags.u64 >> 32));
2917 X86EFLAGS Eflags = pMixedCtx->eflags;
2918 Eflags.u32 &= VMX_EFLAGS_RESERVED_0; /* Bits 22-31, 15, 5 & 3 MBZ. */
2919 Eflags.u32 |= VMX_EFLAGS_RESERVED_1; /* Bit 1 MB1. */
2920
2921 /*
2922 * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so we can restore them on VM exit.
2923 * Modify the real-mode guest's eflags so that VT-x can run the real-mode guest code under Virtual 8086 mode.
2924 */
2925 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
2926 {
2927 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
2928 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
2929 pVCpu->hm.s.vmx.RealMode.Eflags.u32 = Eflags.u32; /* Save the original eflags of the real-mode guest. */
2930 Eflags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */
2931 Eflags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */
2932 }
2933
2934 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, Eflags.u32);
2935 AssertRCReturn(rc, rc);
2936
2937 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_RFLAGS;
2938 Log4(("Load: VMX_VMCS_GUEST_RFLAGS=%#RX32\n", Eflags.u32));
2939 }
2940 return rc;
2941}
2942
2943
2944/**
2945 * Loads the guest RIP, RSP and RFLAGS into the guest-state area in the VMCS.
2946 *
2947 * @returns VBox status code.
2948 * @param pVCpu Pointer to the VMCPU.
2949 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2950 * out-of-sync. Make sure to update the required fields
2951 * before using them.
2952 *
2953 * @remarks No-long-jump zone!!!
2954 */
2955DECLINLINE(int) hmR0VmxLoadGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2956{
2957 int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx);
2958 AssertRCReturn(rc, rc);
2959 rc = hmR0VmxLoadGuestRsp(pVCpu, pMixedCtx);
2960 AssertRCReturn(rc, rc);
2961 rc = hmR0VmxLoadGuestRflags(pVCpu, pMixedCtx);
2962 AssertRCReturn(rc, rc);
2963 return rc;
2964}
2965
2966
2967/**
2968 * Loads the guest CR0 control register into the guest-state area in the VMCS.
2969 * CR0 is partially shared with the host and we have to consider the FPU bits.
2970 *
2971 * @returns VBox status code.
2972 * @param pVM Pointer to the VM.
2973 * @param pVCpu Pointer to the VMCPU.
2974 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2975 * out-of-sync. Make sure to update the required fields
2976 * before using them.
2977 *
2978 * @remarks No-long-jump zone!!!
2979 */
2980static int hmR0VmxLoadSharedCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2981{
2982 /*
2983 * Guest CR0.
2984 * Guest FPU.
2985 */
2986 int rc = VINF_SUCCESS;
2987 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
2988 {
2989 Assert(!(pMixedCtx->cr0 >> 32));
2990 uint32_t u32GuestCR0 = pMixedCtx->cr0;
2991 PVM pVM = pVCpu->CTX_SUFF(pVM);
2992
2993 /* The guest's view (read access) of its CR0 is unblemished. */
2994 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32GuestCR0);
2995 AssertRCReturn(rc, rc);
2996 Log4(("Load: VMX_VMCS_CTRL_CR0_READ_SHADOW=%#RX32\n", u32GuestCR0));
2997
2998 /* Setup VT-x's view of the guest CR0. */
2999 /* Minimize VM-exits due to CR3 changes when we have NestedPaging. */
3000 if (pVM->hm.s.fNestedPaging)
3001 {
3002 if (CPUMIsGuestPagingEnabledEx(pMixedCtx))
3003 {
3004 /* The guest has paging enabled, let it access CR3 without causing a VM exit if supported. */
3005 pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
3006 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT);
3007 }
3008 else
3009 {
3010 /* The guest doesn't have paging enabled, make CR3 access to cause VM exits to update our shadow. */
3011 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
3012 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
3013 }
3014
3015 /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */
3016 if (pVM->hm.s.vmx.fUnrestrictedGuest)
3017 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
3018
3019 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3020 AssertRCReturn(rc, rc);
3021 }
3022 else
3023 u32GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
3024
3025 /*
3026 * Guest FPU bits.
3027 * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be set on the first
3028 * CPUs to support VT-x and no mention of with regards to UX in VM-entry checks.
3029 */
3030 u32GuestCR0 |= X86_CR0_NE;
3031 bool fInterceptNM = false;
3032 if (CPUMIsGuestFPUStateActive(pVCpu))
3033 {
3034 fInterceptNM = false; /* Guest FPU active, no need to VM-exit on #NM. */
3035 /* The guest should still get #NM exceptions when it expects it to, so we should not clear TS & MP bits here.
3036 We're only concerned about -us- not intercepting #NMs when the guest-FPU is active. Not the guest itself! */
3037 }
3038 else
3039 {
3040 fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */
3041 u32GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */
3042 | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */
3043 }
3044
3045 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
3046 bool fInterceptMF = false;
3047 if (!(pMixedCtx->cr0 & X86_CR0_NE))
3048 fInterceptMF = true;
3049
3050 /* Finally, intercept all exceptions as we cannot directly inject them in real-mode, see hmR0VmxInjectEventVmcs(). */
3051 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3052 {
3053 Assert(PDMVmmDevHeapIsEnabled(pVM));
3054 Assert(pVM->hm.s.vmx.pRealModeTSS);
3055 pVCpu->hm.s.vmx.u32XcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK;
3056 fInterceptNM = true;
3057 fInterceptMF = true;
3058 }
3059 else
3060 pVCpu->hm.s.vmx.u32XcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK;
3061
3062 if (fInterceptNM)
3063 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_NM);
3064 else
3065 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_NM);
3066
3067 if (fInterceptMF)
3068 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_MF);
3069 else
3070 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_MF);
3071
3072 /* Additional intercepts for debugging, define these yourself explicitly. */
3073#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3074 pVCpu->hm.s.vmx.u32XcptBitmap |= 0
3075 | RT_BIT(X86_XCPT_BP)
3076 | RT_BIT(X86_XCPT_DB)
3077 | RT_BIT(X86_XCPT_DE)
3078 | RT_BIT(X86_XCPT_NM)
3079 | RT_BIT(X86_XCPT_UD)
3080 | RT_BIT(X86_XCPT_NP)
3081 | RT_BIT(X86_XCPT_SS)
3082 | RT_BIT(X86_XCPT_GP)
3083 | RT_BIT(X86_XCPT_PF)
3084 | RT_BIT(X86_XCPT_MF)
3085 ;
3086#elif defined(HMVMX_ALWAYS_TRAP_PF)
3087 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_PF);
3088#endif
3089
3090 Assert(pVM->hm.s.fNestedPaging || (pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT(X86_XCPT_PF)));
3091
3092 /* Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW). */
3093 uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
3094 uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
3095 if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */
3096 uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG);
3097 else
3098 Assert((uSetCR0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG));
3099
3100 u32GuestCR0 |= uSetCR0;
3101 u32GuestCR0 &= uZapCR0;
3102 u32GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */
3103
3104 /* Write VT-x's view of the guest CR0 into the VMCS and update the exception bitmap. */
3105 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCR0);
3106 AssertRCReturn(rc, rc);
3107 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
3108 AssertRCReturn(rc, rc);
3109 Log4(("Load: VMX_VMCS_GUEST_CR0=%#RX32 (uSetCR0=%#RX32 uZapCR0=%#RX32)\n", u32GuestCR0, uSetCR0, uZapCR0));
3110
3111 /*
3112 * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed
3113 * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits
3114 * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables).
3115 */
3116 uint32_t u32CR0Mask = 0;
3117 u32CR0Mask = X86_CR0_PE
3118 | X86_CR0_NE
3119 | X86_CR0_WP
3120 | X86_CR0_PG
3121 | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */
3122 | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */
3123 | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */
3124
3125 /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM
3126 * enmGuestMode to be in-sync with the current mode. See @bugref{6398}
3127 * and @bugref{6944}. */
3128#if 0
3129 if (pVM->hm.s.vmx.fUnrestrictedGuest)
3130 u32CR0Mask &= ~X86_CR0_PE;
3131#endif
3132 if (pVM->hm.s.fNestedPaging)
3133 u32CR0Mask &= ~X86_CR0_WP;
3134
3135 /* If the guest FPU state is active, don't need to VM-exit on writes to FPU related bits in CR0. */
3136 if (fInterceptNM)
3137 {
3138 u32CR0Mask |= X86_CR0_TS
3139 | X86_CR0_MP;
3140 }
3141
3142 /* Write the CR0 mask into the VMCS and update the VCPU's copy of the current CR0 mask. */
3143 pVCpu->hm.s.vmx.u32CR0Mask = u32CR0Mask;
3144 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32CR0Mask);
3145 AssertRCReturn(rc, rc);
3146 Log4(("Load: VMX_VMCS_CTRL_CR0_MASK=%#RX32\n", u32CR0Mask));
3147
3148 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR0;
3149 }
3150 return rc;
3151}
3152
3153
3154/**
3155 * Loads the guest control registers (CR3, CR4) into the guest-state area
3156 * in the VMCS.
3157 *
3158 * @returns VBox status code.
3159 * @param pVM Pointer to the VM.
3160 * @param pVCpu Pointer to the VMCPU.
3161 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3162 * out-of-sync. Make sure to update the required fields
3163 * before using them.
3164 *
3165 * @remarks No-long-jump zone!!!
3166 */
3167static int hmR0VmxLoadGuestCR3AndCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3168{
3169 int rc = VINF_SUCCESS;
3170 PVM pVM = pVCpu->CTX_SUFF(pVM);
3171
3172 /*
3173 * Guest CR2.
3174 * It's always loaded in the assembler code. Nothing to do here.
3175 */
3176
3177 /*
3178 * Guest CR3.
3179 */
3180 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
3181 {
3182 RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS;
3183 if (pVM->hm.s.fNestedPaging)
3184 {
3185 pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu);
3186
3187 /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */
3188 Assert(pVCpu->hm.s.vmx.HCPhysEPTP);
3189 Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000)));
3190 Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff));
3191
3192 /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */
3193 pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB
3194 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
3195
3196 /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */
3197 AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */
3198 && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x3f) == 0, /* Bits 6:11 MBZ. */
3199 ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
3200
3201 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP);
3202 AssertRCReturn(rc, rc);
3203 Log4(("Load: VMX_VMCS64_CTRL_EPTP_FULL=%#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
3204
3205 if ( pVM->hm.s.vmx.fUnrestrictedGuest
3206 || CPUMIsGuestPagingEnabledEx(pMixedCtx))
3207 {
3208 /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */
3209 if (CPUMIsGuestInPAEModeEx(pMixedCtx))
3210 {
3211 rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); AssertRCReturn(rc, rc);
3212 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc);
3213 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc);
3214 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc);
3215 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc);
3216 }
3217
3218 /* The guest's view of its CR3 is unblemished with Nested Paging when the guest is using paging or we
3219 have Unrestricted Execution to handle the guest when it's not using paging. */
3220 GCPhysGuestCR3 = pMixedCtx->cr3;
3221 }
3222 else
3223 {
3224 /*
3225 * The guest is not using paging, but the CPU (VT-x) has to. While the guest thinks it accesses physical memory
3226 * directly, we use our identity-mapped page table to map guest-linear to guest-physical addresses.
3227 * EPT takes care of translating it to host-physical addresses.
3228 */
3229 RTGCPHYS GCPhys;
3230 Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable);
3231 Assert(PDMVmmDevHeapIsEnabled(pVM));
3232
3233 /* We obtain it here every time as the guest could have relocated this PCI region. */
3234 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
3235 AssertRCReturn(rc, rc);
3236
3237 GCPhysGuestCR3 = GCPhys;
3238 }
3239
3240 Log4(("Load: VMX_VMCS_GUEST_CR3=%#RGv (GstN)\n", GCPhysGuestCR3));
3241 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3);
3242 }
3243 else
3244 {
3245 /* Non-nested paging case, just use the hypervisor's CR3. */
3246 RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu);
3247
3248 Log4(("Load: VMX_VMCS_GUEST_CR3=%#RHv (HstN)\n", HCPhysGuestCR3));
3249 rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3);
3250 }
3251 AssertRCReturn(rc, rc);
3252
3253 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR3;
3254 }
3255
3256 /*
3257 * Guest CR4.
3258 */
3259 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
3260 {
3261 Assert(!(pMixedCtx->cr4 >> 32));
3262 uint32_t u32GuestCR4 = pMixedCtx->cr4;
3263
3264 /* The guest's view of its CR4 is unblemished. */
3265 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32GuestCR4);
3266 AssertRCReturn(rc, rc);
3267 Log4(("Load: VMX_VMCS_CTRL_CR4_READ_SHADOW=%#RX32\n", u32GuestCR4));
3268
3269 /* Setup VT-x's view of the guest CR4. */
3270 /*
3271 * If we're emulating real-mode using virtual-8086 mode, we want to redirect software interrupts to the 8086 program
3272 * interrupt handler. Clear the VME bit (the interrupt redirection bitmap is already all 0, see hmR3InitFinalizeR0())
3273 * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode".
3274 */
3275 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3276 {
3277 Assert(pVM->hm.s.vmx.pRealModeTSS);
3278 Assert(PDMVmmDevHeapIsEnabled(pVM));
3279 u32GuestCR4 &= ~X86_CR4_VME;
3280 }
3281
3282 if (pVM->hm.s.fNestedPaging)
3283 {
3284 if ( !CPUMIsGuestPagingEnabledEx(pMixedCtx)
3285 && !pVM->hm.s.vmx.fUnrestrictedGuest)
3286 {
3287 /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */
3288 u32GuestCR4 |= X86_CR4_PSE;
3289 /* Our identity mapping is a 32 bits page directory. */
3290 u32GuestCR4 &= ~X86_CR4_PAE;
3291 }
3292 /* else use guest CR4.*/
3293 }
3294 else
3295 {
3296 /*
3297 * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host
3298 * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables.
3299 */
3300 switch (pVCpu->hm.s.enmShadowMode)
3301 {
3302 case PGMMODE_REAL: /* Real-mode. */
3303 case PGMMODE_PROTECTED: /* Protected mode without paging. */
3304 case PGMMODE_32_BIT: /* 32-bit paging. */
3305 {
3306 u32GuestCR4 &= ~X86_CR4_PAE;
3307 break;
3308 }
3309
3310 case PGMMODE_PAE: /* PAE paging. */
3311 case PGMMODE_PAE_NX: /* PAE paging with NX. */
3312 {
3313 u32GuestCR4 |= X86_CR4_PAE;
3314 break;
3315 }
3316
3317 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
3318 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
3319#ifdef VBOX_ENABLE_64_BITS_GUESTS
3320 break;
3321#endif
3322 default:
3323 AssertFailed();
3324 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
3325 }
3326 }
3327
3328 /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */
3329 uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
3330 uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
3331 u32GuestCR4 |= uSetCR4;
3332 u32GuestCR4 &= uZapCR4;
3333
3334 /* Write VT-x's view of the guest CR4 into the VMCS. */
3335 Log4(("Load: VMX_VMCS_GUEST_CR4=%#RX32 (Set=%#RX32 Zap=%#RX32)\n", u32GuestCR4, uSetCR4, uZapCR4));
3336 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCR4);
3337 AssertRCReturn(rc, rc);
3338
3339 /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them, that would cause a VM exit. */
3340 uint32_t u32CR4Mask = 0;
3341 u32CR4Mask = X86_CR4_VME
3342 | X86_CR4_PAE
3343 | X86_CR4_PGE
3344 | X86_CR4_PSE
3345 | X86_CR4_VMXE;
3346 pVCpu->hm.s.vmx.u32CR4Mask = u32CR4Mask;
3347 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32CR4Mask);
3348 AssertRCReturn(rc, rc);
3349
3350 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR4;
3351 }
3352 return rc;
3353}
3354
3355
3356/**
3357 * Loads the guest debug registers into the guest-state area in the VMCS.
3358 * This also sets up whether #DB and MOV DRx accesses cause VM exits.
3359 *
3360 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3361 *
3362 * @returns VBox status code.
3363 * @param pVCpu Pointer to the VMCPU.
3364 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3365 * out-of-sync. Make sure to update the required fields
3366 * before using them.
3367 *
3368 * @remarks No-long-jump zone!!!
3369 */
3370static int hmR0VmxLoadSharedDebugState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3371{
3372 if (!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG))
3373 return VINF_SUCCESS;
3374
3375#ifdef VBOX_STRICT
3376 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3377 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
3378 {
3379 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3380 Assert((pMixedCtx->dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); /* Bits 63:32, 15, 14, 12, 11 are reserved. */
3381 Assert((pMixedCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); /* Bit 10 is reserved (RA1). */
3382 }
3383#endif
3384
3385 int rc;
3386 PVM pVM = pVCpu->CTX_SUFF(pVM);
3387 bool fInterceptDB = false;
3388 bool fInterceptMovDRx = false;
3389 if (pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu))
3390 {
3391 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3392 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG)
3393 {
3394 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG;
3395 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3396 AssertRCReturn(rc, rc);
3397 Assert(fInterceptDB == false);
3398 }
3399 else
3400 {
3401 pMixedCtx->eflags.u32 |= X86_EFL_TF;
3402 pVCpu->hm.s.fClearTrapFlag = true;
3403 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RFLAGS;
3404 fInterceptDB = true;
3405 }
3406 }
3407
3408 if (fInterceptDB || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3409 {
3410 /*
3411 * Use the combined guest and host DRx values found in the hypervisor
3412 * register set because the debugger has breakpoints active or someone
3413 * is single stepping on the host side without a monitor trap flag.
3414 *
3415 * Note! DBGF expects a clean DR6 state before executing guest code.
3416 */
3417#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3418 if ( CPUMIsGuestInLongModeEx(pMixedCtx)
3419 && !CPUMIsHyperDebugStateActivePending(pVCpu))
3420 {
3421 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3422 Assert(CPUMIsHyperDebugStateActivePending(pVCpu));
3423 Assert(!CPUMIsGuestDebugStateActivePending(pVCpu));
3424 }
3425 else
3426#endif
3427 if (!CPUMIsHyperDebugStateActive(pVCpu))
3428 {
3429 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3430 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3431 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3432 }
3433
3434 /* Update DR7. (The other DRx values are handled by CPUM one way or the other.) */
3435 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)CPUMGetHyperDR7(pVCpu));
3436 AssertRCReturn(rc, rc);
3437
3438 pVCpu->hm.s.fUsingHyperDR7 = true;
3439 fInterceptDB = true;
3440 fInterceptMovDRx = true;
3441 }
3442 else
3443 {
3444 /*
3445 * If the guest has enabled debug registers, we need to load them prior to
3446 * executing guest code so they'll trigger at the right time.
3447 */
3448 if (pMixedCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */
3449 {
3450#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3451 if ( CPUMIsGuestInLongModeEx(pMixedCtx)
3452 && !CPUMIsGuestDebugStateActivePending(pVCpu))
3453 {
3454 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3455 Assert(CPUMIsGuestDebugStateActivePending(pVCpu));
3456 Assert(!CPUMIsHyperDebugStateActivePending(pVCpu));
3457 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3458 }
3459 else
3460#endif
3461 if (CPUMIsGuestDebugStateActive(pVCpu))
3462 {
3463 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3464 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3465 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3466 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3467 }
3468 }
3469 /*
3470 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3471 * must intercept #DB in order to maintain a correct DR6 guest value.
3472 */
3473#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3474 else if ( ( CPUMIsGuestInLongModeEx(pMixedCtx)
3475 && !CPUMIsGuestDebugStateActivePending(pVCpu))
3476 || !CPUMIsGuestDebugStateActive(pVCpu))
3477#else
3478 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3479#endif
3480 {
3481 fInterceptMovDRx = true;
3482 fInterceptDB = true;
3483 }
3484
3485 /* Update guest DR7. */
3486 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, pMixedCtx->dr[7]);
3487 AssertRCReturn(rc, rc);
3488
3489 pVCpu->hm.s.fUsingHyperDR7 = false;
3490 }
3491
3492 /*
3493 * Update the exception bitmap regarding intercepting #DB generated by the guest.
3494 */
3495 if (fInterceptDB)
3496 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_DB);
3497 else if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3498 {
3499#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3500 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB);
3501#endif
3502 }
3503 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
3504 AssertRCReturn(rc, rc);
3505
3506 /*
3507 * Update the processor-based VM-execution controls regarding intercepting MOV DRx instructions.
3508 */
3509 if (fInterceptMovDRx)
3510 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
3511 else
3512 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
3513 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3514 AssertRCReturn(rc, rc);
3515
3516 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_DEBUG;
3517 return VINF_SUCCESS;
3518}
3519
3520
3521#ifdef VBOX_STRICT
3522/**
3523 * Strict function to validate segment registers.
3524 *
3525 * @remarks ASSUMES CR0 is up to date.
3526 */
3527static void hmR0VmxValidateSegmentRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
3528{
3529 /* Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". */
3530 /* NOTE: The reason we check for attribute value 0 and not just the unusable bit here is because hmR0VmxWriteSegmentReg()
3531 * only updates the VMCS' copy of the value with the unusable bit and doesn't change the guest-context value. */
3532 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
3533 && ( !CPUMIsGuestInRealModeEx(pCtx)
3534 && !CPUMIsGuestInV86ModeEx(pCtx)))
3535 {
3536 /* Protected mode checks */
3537 /* CS */
3538 Assert(pCtx->cs.Attr.n.u1Present);
3539 Assert(!(pCtx->cs.Attr.u & 0xf00));
3540 Assert(!(pCtx->cs.Attr.u & 0xfffe0000));
3541 Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff
3542 || !(pCtx->cs.Attr.n.u1Granularity));
3543 Assert( !(pCtx->cs.u32Limit & 0xfff00000)
3544 || (pCtx->cs.Attr.n.u1Granularity));
3545 /* CS cannot be loaded with NULL in protected mode. */
3546 Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS?!? */
3547 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
3548 Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl);
3549 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
3550 Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl);
3551 else
3552 AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl));
3553 /* SS */
3554 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
3555 Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL));
3556 if ( !(pCtx->cr0 & X86_CR0_PE)
3557 || pCtx->cs.Attr.n.u4Type == 3)
3558 {
3559 Assert(!pCtx->ss.Attr.n.u2Dpl);
3560 }
3561 if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
3562 {
3563 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
3564 Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7);
3565 Assert(pCtx->ss.Attr.n.u1Present);
3566 Assert(!(pCtx->ss.Attr.u & 0xf00));
3567 Assert(!(pCtx->ss.Attr.u & 0xfffe0000));
3568 Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff
3569 || !(pCtx->ss.Attr.n.u1Granularity));
3570 Assert( !(pCtx->ss.u32Limit & 0xfff00000)
3571 || (pCtx->ss.Attr.n.u1Granularity));
3572 }
3573 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */
3574 if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
3575 {
3576 Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3577 Assert(pCtx->ds.Attr.n.u1Present);
3578 Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL));
3579 Assert(!(pCtx->ds.Attr.u & 0xf00));
3580 Assert(!(pCtx->ds.Attr.u & 0xfffe0000));
3581 Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff
3582 || !(pCtx->ds.Attr.n.u1Granularity));
3583 Assert( !(pCtx->ds.u32Limit & 0xfff00000)
3584 || (pCtx->ds.Attr.n.u1Granularity));
3585 Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3586 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ));
3587 }
3588 if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
3589 {
3590 Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3591 Assert(pCtx->es.Attr.n.u1Present);
3592 Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL));
3593 Assert(!(pCtx->es.Attr.u & 0xf00));
3594 Assert(!(pCtx->es.Attr.u & 0xfffe0000));
3595 Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff
3596 || !(pCtx->es.Attr.n.u1Granularity));
3597 Assert( !(pCtx->es.u32Limit & 0xfff00000)
3598 || (pCtx->es.Attr.n.u1Granularity));
3599 Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3600 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ));
3601 }
3602 if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
3603 {
3604 Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3605 Assert(pCtx->fs.Attr.n.u1Present);
3606 Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL));
3607 Assert(!(pCtx->fs.Attr.u & 0xf00));
3608 Assert(!(pCtx->fs.Attr.u & 0xfffe0000));
3609 Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff
3610 || !(pCtx->fs.Attr.n.u1Granularity));
3611 Assert( !(pCtx->fs.u32Limit & 0xfff00000)
3612 || (pCtx->fs.Attr.n.u1Granularity));
3613 Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3614 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ));
3615 }
3616 if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
3617 {
3618 Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3619 Assert(pCtx->gs.Attr.n.u1Present);
3620 Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL));
3621 Assert(!(pCtx->gs.Attr.u & 0xf00));
3622 Assert(!(pCtx->gs.Attr.u & 0xfffe0000));
3623 Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff
3624 || !(pCtx->gs.Attr.n.u1Granularity));
3625 Assert( !(pCtx->gs.u32Limit & 0xfff00000)
3626 || (pCtx->gs.Attr.n.u1Granularity));
3627 Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3628 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ));
3629 }
3630 /* 64-bit capable CPUs. */
3631# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3632 Assert(!(pCtx->cs.u64Base >> 32));
3633 Assert(!pCtx->ss.Attr.u || !(pCtx->ss.u64Base >> 32));
3634 Assert(!pCtx->ds.Attr.u || !(pCtx->ds.u64Base >> 32));
3635 Assert(!pCtx->es.Attr.u || !(pCtx->es.u64Base >> 32));
3636# endif
3637 }
3638 else if ( CPUMIsGuestInV86ModeEx(pCtx)
3639 || ( CPUMIsGuestInRealModeEx(pCtx)
3640 && !pVM->hm.s.vmx.fUnrestrictedGuest))
3641 {
3642 /* Real and v86 mode checks. */
3643 /* hmR0VmxWriteSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */
3644 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
3645 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3646 {
3647 u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3;
3648 }
3649 else
3650 {
3651 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u;
3652 u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
3653 }
3654
3655 /* CS */
3656 AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel));
3657 Assert(pCtx->cs.u32Limit == 0xffff);
3658 Assert(u32CSAttr == 0xf3);
3659 /* SS */
3660 Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4);
3661 Assert(pCtx->ss.u32Limit == 0xffff);
3662 Assert(u32SSAttr == 0xf3);
3663 /* DS */
3664 Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4);
3665 Assert(pCtx->ds.u32Limit == 0xffff);
3666 Assert(u32DSAttr == 0xf3);
3667 /* ES */
3668 Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4);
3669 Assert(pCtx->es.u32Limit == 0xffff);
3670 Assert(u32ESAttr == 0xf3);
3671 /* FS */
3672 Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4);
3673 Assert(pCtx->fs.u32Limit == 0xffff);
3674 Assert(u32FSAttr == 0xf3);
3675 /* GS */
3676 Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4);
3677 Assert(pCtx->gs.u32Limit == 0xffff);
3678 Assert(u32GSAttr == 0xf3);
3679 /* 64-bit capable CPUs. */
3680# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3681 Assert(!(pCtx->cs.u64Base >> 32));
3682 Assert(!u32SSAttr || !(pCtx->ss.u64Base >> 32));
3683 Assert(!u32DSAttr || !(pCtx->ds.u64Base >> 32));
3684 Assert(!u32ESAttr || !(pCtx->es.u64Base >> 32));
3685# endif
3686 }
3687}
3688#endif /* VBOX_STRICT */
3689
3690
3691/**
3692 * Writes a guest segment register into the guest-state area in the VMCS.
3693 *
3694 * @returns VBox status code.
3695 * @param pVCpu Pointer to the VMCPU.
3696 * @param idxSel Index of the selector in the VMCS.
3697 * @param idxLimit Index of the segment limit in the VMCS.
3698 * @param idxBase Index of the segment base in the VMCS.
3699 * @param idxAccess Index of the access rights of the segment in the VMCS.
3700 * @param pSelReg Pointer to the segment selector.
3701 * @param pCtx Pointer to the guest-CPU context.
3702 *
3703 * @remarks No-long-jump zone!!!
3704 */
3705static int hmR0VmxWriteSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase,
3706 uint32_t idxAccess, PCPUMSELREG pSelReg, PCPUMCTX pCtx)
3707{
3708 int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */
3709 AssertRCReturn(rc, rc);
3710 rc = VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */
3711 AssertRCReturn(rc, rc);
3712 rc = VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/
3713 AssertRCReturn(rc, rc);
3714
3715 uint32_t u32Access = pSelReg->Attr.u;
3716 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3717 {
3718 /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */
3719 u32Access = 0xf3;
3720 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
3721 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
3722 }
3723 else
3724 {
3725 /*
3726 * The way to differentiate between whether this is really a null selector or was just a selector loaded with 0 in
3727 * real-mode is using the segment attributes. A selector loaded in real-mode with the value 0 is valid and usable in
3728 * protected-mode and we should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures NULL selectors
3729 * loaded in protected-mode have their attribute as 0.
3730 */
3731 if (!u32Access)
3732 u32Access = X86DESCATTR_UNUSABLE;
3733 }
3734
3735 /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */
3736 AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED),
3737 ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u));
3738
3739 rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */
3740 AssertRCReturn(rc, rc);
3741 return rc;
3742}
3743
3744
3745/**
3746 * Loads the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases)
3747 * into the guest-state area in the VMCS.
3748 *
3749 * @returns VBox status code.
3750 * @param pVM Pointer to the VM.
3751 * @param pVCPU Pointer to the VMCPU.
3752 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3753 * out-of-sync. Make sure to update the required fields
3754 * before using them.
3755 *
3756 * @remarks ASSUMES pMixedCtx->cr0 is up to date (strict builds validation).
3757 * @remarks No-long-jump zone!!!
3758 */
3759static int hmR0VmxLoadGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3760{
3761 int rc = VERR_INTERNAL_ERROR_5;
3762 PVM pVM = pVCpu->CTX_SUFF(pVM);
3763
3764 /*
3765 * Guest Segment registers: CS, SS, DS, ES, FS, GS.
3766 */
3767 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
3768 {
3769 /* Save the segment attributes for real-on-v86 mode hack, so we can restore them on VM-exit. */
3770 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3771 {
3772 pVCpu->hm.s.vmx.RealMode.AttrCS.u = pMixedCtx->cs.Attr.u;
3773 pVCpu->hm.s.vmx.RealMode.AttrSS.u = pMixedCtx->ss.Attr.u;
3774 pVCpu->hm.s.vmx.RealMode.AttrDS.u = pMixedCtx->ds.Attr.u;
3775 pVCpu->hm.s.vmx.RealMode.AttrES.u = pMixedCtx->es.Attr.u;
3776 pVCpu->hm.s.vmx.RealMode.AttrFS.u = pMixedCtx->fs.Attr.u;
3777 pVCpu->hm.s.vmx.RealMode.AttrGS.u = pMixedCtx->gs.Attr.u;
3778 }
3779
3780#ifdef VBOX_WITH_REM
3781 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
3782 {
3783 Assert(pVM->hm.s.vmx.pRealModeTSS);
3784 AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED);
3785 if ( pVCpu->hm.s.vmx.fWasInRealMode
3786 && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED)
3787 {
3788 /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute
3789 in real-mode (e.g. OpenBSD 4.0) */
3790 REMFlushTBs(pVM);
3791 Log4(("Load: Switch to protected mode detected!\n"));
3792 pVCpu->hm.s.vmx.fWasInRealMode = false;
3793 }
3794 }
3795#endif
3796 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_CS, VMX_VMCS32_GUEST_CS_LIMIT, VMX_VMCS_GUEST_CS_BASE,
3797 VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS, &pMixedCtx->cs, pMixedCtx);
3798 AssertRCReturn(rc, rc);
3799 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_SS, VMX_VMCS32_GUEST_SS_LIMIT, VMX_VMCS_GUEST_SS_BASE,
3800 VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS, &pMixedCtx->ss, pMixedCtx);
3801 AssertRCReturn(rc, rc);
3802 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_DS, VMX_VMCS32_GUEST_DS_LIMIT, VMX_VMCS_GUEST_DS_BASE,
3803 VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS, &pMixedCtx->ds, pMixedCtx);
3804 AssertRCReturn(rc, rc);
3805 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_ES, VMX_VMCS32_GUEST_ES_LIMIT, VMX_VMCS_GUEST_ES_BASE,
3806 VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS, &pMixedCtx->es, pMixedCtx);
3807 AssertRCReturn(rc, rc);
3808 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_FS, VMX_VMCS32_GUEST_FS_LIMIT, VMX_VMCS_GUEST_FS_BASE,
3809 VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS, &pMixedCtx->fs, pMixedCtx);
3810 AssertRCReturn(rc, rc);
3811 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_GS, VMX_VMCS32_GUEST_GS_LIMIT, VMX_VMCS_GUEST_GS_BASE,
3812 VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS, &pMixedCtx->gs, pMixedCtx);
3813 AssertRCReturn(rc, rc);
3814
3815 Log4(("Load: CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pMixedCtx->cs.Sel, pMixedCtx->cs.u64Base,
3816 pMixedCtx->cs.u32Limit, pMixedCtx->cs.Attr.u));
3817#ifdef VBOX_STRICT
3818 hmR0VmxValidateSegmentRegs(pVM, pVCpu, pMixedCtx);
3819#endif
3820 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_SEGMENT_REGS;
3821 }
3822
3823 /*
3824 * Guest TR.
3825 */
3826 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
3827 {
3828 /*
3829 * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is achieved
3830 * using the interrupt redirection bitmap (all bits cleared to let the guest handle INT-n's) in the TSS.
3831 * See hmR3InitFinalizeR0() to see how pRealModeTSS is setup.
3832 */
3833 uint16_t u16Sel = 0;
3834 uint32_t u32Limit = 0;
3835 uint64_t u64Base = 0;
3836 uint32_t u32AccessRights = 0;
3837
3838 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3839 {
3840 u16Sel = pMixedCtx->tr.Sel;
3841 u32Limit = pMixedCtx->tr.u32Limit;
3842 u64Base = pMixedCtx->tr.u64Base;
3843 u32AccessRights = pMixedCtx->tr.Attr.u;
3844 }
3845 else
3846 {
3847 Assert(pVM->hm.s.vmx.pRealModeTSS);
3848 Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMR3CanExecuteGuest() -XXX- what about inner loop changes? */
3849
3850 /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */
3851 RTGCPHYS GCPhys;
3852 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
3853 AssertRCReturn(rc, rc);
3854
3855 X86DESCATTR DescAttr;
3856 DescAttr.u = 0;
3857 DescAttr.n.u1Present = 1;
3858 DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
3859
3860 u16Sel = 0;
3861 u32Limit = HM_VTX_TSS_SIZE;
3862 u64Base = GCPhys; /* in real-mode phys = virt. */
3863 u32AccessRights = DescAttr.u;
3864 }
3865
3866 /* Validate. */
3867 Assert(!(u16Sel & RT_BIT(2)));
3868 AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY
3869 || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights));
3870 AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights));
3871 Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/
3872 Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/
3873 Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */
3874 Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */
3875 Assert( (u32Limit & 0xfff) == 0xfff
3876 || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */
3877 Assert( !(pMixedCtx->tr.u32Limit & 0xfff00000)
3878 || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */
3879
3880 rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_TR, u16Sel); AssertRCReturn(rc, rc);
3881 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); AssertRCReturn(rc, rc);
3882 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRCReturn(rc, rc);
3883 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); AssertRCReturn(rc, rc);
3884
3885 Log4(("Load: VMX_VMCS_GUEST_TR_BASE=%#RX64\n", u64Base));
3886 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_TR;
3887 }
3888
3889 /*
3890 * Guest GDTR.
3891 */
3892 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
3893 {
3894 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pMixedCtx->gdtr.cbGdt); AssertRCReturn(rc, rc);
3895 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pMixedCtx->gdtr.pGdt); AssertRCReturn(rc, rc);
3896
3897 Assert(!(pMixedCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */
3898 Log4(("Load: VMX_VMCS_GUEST_GDTR_BASE=%#RX64\n", pMixedCtx->gdtr.pGdt));
3899 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_GDTR;
3900 }
3901
3902 /*
3903 * Guest LDTR.
3904 */
3905 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
3906 {
3907 /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */
3908 uint32_t u32Access = 0;
3909 if (!pMixedCtx->ldtr.Attr.u)
3910 u32Access = X86DESCATTR_UNUSABLE;
3911 else
3912 u32Access = pMixedCtx->ldtr.Attr.u;
3913
3914 rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_LDTR, pMixedCtx->ldtr.Sel); AssertRCReturn(rc, rc);
3915 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pMixedCtx->ldtr.u32Limit); AssertRCReturn(rc, rc);
3916 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pMixedCtx->ldtr.u64Base); AssertRCReturn(rc, rc);
3917 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); AssertRCReturn(rc, rc);
3918
3919 /* Validate. */
3920 if (!(u32Access & X86DESCATTR_UNUSABLE))
3921 {
3922 Assert(!(pMixedCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */
3923 Assert(pMixedCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */
3924 Assert(!pMixedCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */
3925 Assert(pMixedCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */
3926 Assert(!pMixedCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */
3927 Assert(!(pMixedCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */
3928 Assert( (pMixedCtx->ldtr.u32Limit & 0xfff) == 0xfff
3929 || !pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */
3930 Assert( !(pMixedCtx->ldtr.u32Limit & 0xfff00000)
3931 || pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */
3932 }
3933
3934 Log4(("Load: VMX_VMCS_GUEST_LDTR_BASE=%#RX64\n", pMixedCtx->ldtr.u64Base));
3935 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_LDTR;
3936 }
3937
3938 /*
3939 * Guest IDTR.
3940 */
3941 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
3942 {
3943 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pMixedCtx->idtr.cbIdt); AssertRCReturn(rc, rc);
3944 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pMixedCtx->idtr.pIdt); AssertRCReturn(rc, rc);
3945
3946 Assert(!(pMixedCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */
3947 Log4(("Load: VMX_VMCS_GUEST_IDTR_BASE=%#RX64\n", pMixedCtx->idtr.pIdt));
3948 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_IDTR;
3949 }
3950
3951 return VINF_SUCCESS;
3952}
3953
3954
3955/**
3956 * Loads certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
3957 * areas. These MSRs will automatically be loaded to the host CPU on every
3958 * successful VM entry and stored from the host CPU on every successful VM exit.
3959 * Also loads the sysenter MSRs into the guest-state area in the VMCS.
3960 *
3961 * @returns VBox status code.
3962 * @param pVCpu Pointer to the VMCPU.
3963 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3964 * out-of-sync. Make sure to update the required fields
3965 * before using them.
3966 *
3967 * @remarks No-long-jump zone!!!
3968 */
3969static int hmR0VmxLoadGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3970{
3971 AssertPtr(pVCpu);
3972 AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr);
3973
3974 /*
3975 * MSRs covered by Auto-load/store: EFER, LSTAR, STAR, SF_MASK, TSC_AUX (RDTSCP).
3976 */
3977 int rc = VINF_SUCCESS;
3978 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_VMX_GUEST_AUTO_MSRS)
3979 {
3980#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3981 PVM pVM = pVCpu->CTX_SUFF(pVM);
3982 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
3983 uint32_t cGuestMsrs = 0;
3984
3985 /* See Intel spec. 4.1.4 "Enumeration of Paging Features by CPUID". */
3986 /** @todo r=ramshankar: Optimize this further to do lazy restoration and only
3987 * when the guest really is in 64-bit mode. */
3988 bool fSupportsLongMode = CPUMGetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE);
3989 if (fSupportsLongMode)
3990 {
3991 pGuestMsr->u32Msr = MSR_K8_LSTAR;
3992 pGuestMsr->u32Reserved = 0;
3993 pGuestMsr->u64Value = pMixedCtx->msrLSTAR; /* 64 bits mode syscall rip */
3994 pGuestMsr++; cGuestMsrs++;
3995 pGuestMsr->u32Msr = MSR_K6_STAR;
3996 pGuestMsr->u32Reserved = 0;
3997 pGuestMsr->u64Value = pMixedCtx->msrSTAR; /* legacy syscall eip, cs & ss */
3998 pGuestMsr++; cGuestMsrs++;
3999 pGuestMsr->u32Msr = MSR_K8_SF_MASK;
4000 pGuestMsr->u32Reserved = 0;
4001 pGuestMsr->u64Value = pMixedCtx->msrSFMASK; /* syscall flag mask */
4002 pGuestMsr++; cGuestMsrs++;
4003 pGuestMsr->u32Msr = MSR_K8_KERNEL_GS_BASE;
4004 pGuestMsr->u32Reserved = 0;
4005 pGuestMsr->u64Value = pMixedCtx->msrKERNELGSBASE; /* swapgs exchange value */
4006 pGuestMsr++; cGuestMsrs++;
4007 }
4008
4009 /*
4010 * RDTSCP requires the TSC_AUX MSR. Host and guest share the physical MSR. So we have to
4011 * load the guest's copy if the guest can execute RDTSCP without causing VM-exits.
4012 */
4013 if ( CPUMGetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_RDTSCP)
4014 && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP))
4015 {
4016 pGuestMsr->u32Msr = MSR_K8_TSC_AUX;
4017 pGuestMsr->u32Reserved = 0;
4018 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pGuestMsr->u64Value);
4019 AssertRCReturn(rc, rc);
4020 pGuestMsr++; cGuestMsrs++;
4021 }
4022
4023 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
4024 if (cGuestMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc))
4025 {
4026 LogRel(("CPU autoload/store MSR count in VMCS exceeded cGuestMsrs=%u.\n", cGuestMsrs));
4027 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
4028 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
4029 }
4030
4031 /* Update the VCPU's copy of the guest MSR count. */
4032 pVCpu->hm.s.vmx.cGuestMsrs = cGuestMsrs;
4033 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cGuestMsrs); AssertRCReturn(rc, rc);
4034 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cGuestMsrs); AssertRCReturn(rc, rc);
4035#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
4036
4037 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_VMX_GUEST_AUTO_MSRS;
4038 }
4039
4040 /*
4041 * Guest Sysenter MSRs.
4042 * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause
4043 * VM-exits on WRMSRs for these MSRs.
4044 */
4045 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SYSENTER_CS_MSR)
4046 {
4047 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pMixedCtx->SysEnter.cs); AssertRCReturn(rc, rc);
4048 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_SYSENTER_CS_MSR;
4049 }
4050 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SYSENTER_EIP_MSR)
4051 {
4052 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pMixedCtx->SysEnter.eip); AssertRCReturn(rc, rc);
4053 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR;
4054 }
4055 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SYSENTER_ESP_MSR)
4056 {
4057 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pMixedCtx->SysEnter.esp); AssertRCReturn(rc, rc);
4058 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR;
4059 }
4060
4061 return rc;
4062}
4063
4064
4065/**
4066 * Loads the guest activity state into the guest-state area in the VMCS.
4067 *
4068 * @returns VBox status code.
4069 * @param pVCpu Pointer to the VMCPU.
4070 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4071 * out-of-sync. Make sure to update the required fields
4072 * before using them.
4073 *
4074 * @remarks No-long-jump zone!!!
4075 */
4076static int hmR0VmxLoadGuestActivityState(PVMCPU pVCpu, PCPUMCTX pCtx)
4077{
4078 /** @todo See if we can make use of other states, e.g.
4079 * VMX_VMCS_GUEST_ACTIVITY_SHUTDOWN or HLT. */
4080 int rc = VINF_SUCCESS;
4081 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_VMX_GUEST_ACTIVITY_STATE)
4082 {
4083 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE);
4084 AssertRCReturn(rc, rc);
4085 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_VMX_GUEST_ACTIVITY_STATE;
4086 }
4087 return rc;
4088}
4089
4090
4091/**
4092 * Sets up the appropriate function to run guest code.
4093 *
4094 * @returns VBox status code.
4095 * @param pVCpu Pointer to the VMCPU.
4096 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4097 * out-of-sync. Make sure to update the required fields
4098 * before using them.
4099 *
4100 * @remarks No-long-jump zone!!!
4101 */
4102static int hmR0VmxSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4103{
4104 if (CPUMIsGuestInLongModeEx(pMixedCtx))
4105 {
4106#ifndef VBOX_ENABLE_64_BITS_GUESTS
4107 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
4108#endif
4109 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */
4110#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4111 /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */
4112 if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64)
4113 {
4114 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
4115 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS;
4116 }
4117#else
4118 /* 64-bit host or hybrid host. */
4119 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
4120#endif
4121 }
4122 else
4123 {
4124 /* Guest is not in long mode, use the 32-bit handler. */
4125#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4126 if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32)
4127 {
4128 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
4129 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS;
4130 }
4131#else
4132 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
4133#endif
4134 }
4135 Assert(pVCpu->hm.s.vmx.pfnStartVM);
4136 return VINF_SUCCESS;
4137}
4138
4139
4140/**
4141 * Wrapper for running the guest code in VT-x.
4142 *
4143 * @returns VBox strict status code.
4144 * @param pVM Pointer to the VM.
4145 * @param pVCpu Pointer to the VMCPU.
4146 * @param pCtx Pointer to the guest-CPU context.
4147 *
4148 * @remarks No-long-jump zone!!!
4149 */
4150DECLINLINE(int) hmR0VmxRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4151{
4152 /*
4153 * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations
4154 * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper.
4155 * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details.
4156 */
4157 const bool fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED);
4158 /** @todo Add stats for resume vs launch. */
4159#ifdef VBOX_WITH_KERNEL_USING_XMM
4160 return HMR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
4161#else
4162 return pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
4163#endif
4164}
4165
4166
4167/**
4168 * Reports world-switch error and dumps some useful debug info.
4169 *
4170 * @param pVM Pointer to the VM.
4171 * @param pVCpu Pointer to the VMCPU.
4172 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4173 * @param pCtx Pointer to the guest-CPU context.
4174 * @param pVmxTransient Pointer to the VMX transient structure (only
4175 * exitReason updated).
4176 */
4177static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx, PVMXTRANSIENT pVmxTransient)
4178{
4179 Assert(pVM);
4180 Assert(pVCpu);
4181 Assert(pCtx);
4182 Assert(pVmxTransient);
4183 HMVMX_ASSERT_PREEMPT_SAFE();
4184
4185 Log4(("VM-entry failure: %Rrc\n", rcVMRun));
4186 switch (rcVMRun)
4187 {
4188 case VERR_VMX_INVALID_VMXON_PTR:
4189 AssertFailed();
4190 break;
4191 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4192 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4193 {
4194 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4195 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4196 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
4197 AssertRC(rc);
4198
4199 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
4200 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4201 Cannot do it here as we may have been long preempted. */
4202
4203#ifdef VBOX_STRICT
4204 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4205 pVmxTransient->uExitReason));
4206 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQualification));
4207 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4208 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4209 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4210 else
4211 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4212 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4213 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4214
4215 /* VMX control bits. */
4216 uint32_t u32Val;
4217 uint64_t u64Val;
4218 HMVMXHCUINTREG uHCReg;
4219 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc);
4220 Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val));
4221 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc);
4222 Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val));
4223 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc);
4224 Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val));
4225 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc);
4226 Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val));
4227 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc);
4228 Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val));
4229 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc);
4230 Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val));
4231 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc);
4232 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val));
4233 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc);
4234 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val));
4235 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc);
4236 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val));
4237 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc);
4238 Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val));
4239 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc);
4240 Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val));
4241 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
4242 Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val));
4243 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
4244 Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val));
4245 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc);
4246 Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val));
4247 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc);
4248 Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val));
4249 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc);
4250 Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val));
4251 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
4252 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
4253 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
4254 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
4255 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
4256 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
4257 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
4258 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
4259 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
4260 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
4261
4262 /* Guest bits. */
4263 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc);
4264 Log4(("Old Guest Rip %#RX64 New %#RX64\n", pCtx->rip, u64Val));
4265 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc);
4266 Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pCtx->rsp, u64Val));
4267 rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc);
4268 Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pCtx->eflags.u32, u32Val));
4269 rc = VMXReadVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, &u32Val); AssertRC(rc);
4270 Log4(("VMX_VMCS16_GUEST_FIELD_VPID %u\n", u32Val));
4271
4272 /* Host bits. */
4273 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc);
4274 Log4(("Host CR0 %#RHr\n", uHCReg));
4275 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc);
4276 Log4(("Host CR3 %#RHr\n", uHCReg));
4277 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc);
4278 Log4(("Host CR4 %#RHr\n", uHCReg));
4279
4280 RTGDTR HostGdtr;
4281 PCX86DESCHC pDesc;
4282 ASMGetGDTR(&HostGdtr);
4283 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_CS, &u32Val); AssertRC(rc);
4284 Log4(("Host CS %#08x\n", u32Val));
4285 if (u32Val < HostGdtr.cbGdt)
4286 {
4287 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4288 HMR0DumpDescriptor(pDesc, u32Val, "CS: ");
4289 }
4290
4291 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_DS, &u32Val); AssertRC(rc);
4292 Log4(("Host DS %#08x\n", u32Val));
4293 if (u32Val < HostGdtr.cbGdt)
4294 {
4295 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4296 HMR0DumpDescriptor(pDesc, u32Val, "DS: ");
4297 }
4298
4299 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_ES, &u32Val); AssertRC(rc);
4300 Log4(("Host ES %#08x\n", u32Val));
4301 if (u32Val < HostGdtr.cbGdt)
4302 {
4303 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4304 HMR0DumpDescriptor(pDesc, u32Val, "ES: ");
4305 }
4306
4307 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_FS, &u32Val); AssertRC(rc);
4308 Log4(("Host FS %#08x\n", u32Val));
4309 if (u32Val < HostGdtr.cbGdt)
4310 {
4311 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4312 HMR0DumpDescriptor(pDesc, u32Val, "FS: ");
4313 }
4314
4315 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_GS, &u32Val); AssertRC(rc);
4316 Log4(("Host GS %#08x\n", u32Val));
4317 if (u32Val < HostGdtr.cbGdt)
4318 {
4319 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4320 HMR0DumpDescriptor(pDesc, u32Val, "GS: ");
4321 }
4322
4323 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_SS, &u32Val); AssertRC(rc);
4324 Log4(("Host SS %#08x\n", u32Val));
4325 if (u32Val < HostGdtr.cbGdt)
4326 {
4327 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4328 HMR0DumpDescriptor(pDesc, u32Val, "SS: ");
4329 }
4330
4331 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_TR, &u32Val); AssertRC(rc);
4332 Log4(("Host TR %#08x\n", u32Val));
4333 if (u32Val < HostGdtr.cbGdt)
4334 {
4335 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4336 HMR0DumpDescriptor(pDesc, u32Val, "TR: ");
4337 }
4338
4339 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc);
4340 Log4(("Host TR Base %#RHv\n", uHCReg));
4341 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc);
4342 Log4(("Host GDTR Base %#RHv\n", uHCReg));
4343 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc);
4344 Log4(("Host IDTR Base %#RHv\n", uHCReg));
4345 rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc);
4346 Log4(("Host SYSENTER CS %#08x\n", u32Val));
4347 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc);
4348 Log4(("Host SYSENTER EIP %#RHv\n", uHCReg));
4349 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc);
4350 Log4(("Host SYSENTER ESP %#RHv\n", uHCReg));
4351 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc);
4352 Log4(("Host RSP %#RHv\n", uHCReg));
4353 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc);
4354 Log4(("Host RIP %#RHv\n", uHCReg));
4355# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4356 if (HMVMX_IS_64BIT_HOST_MODE())
4357 {
4358 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4359 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4360 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4361 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4362 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4363 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4364 }
4365# endif
4366#endif /* VBOX_STRICT */
4367 break;
4368 }
4369
4370 default:
4371 /* Impossible */
4372 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4373 break;
4374 }
4375 NOREF(pVM);
4376}
4377
4378
4379#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4380#ifndef VMX_USE_CACHED_VMCS_ACCESSES
4381# error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!"
4382#endif
4383#ifdef VBOX_STRICT
4384static bool hmR0VmxIsValidWriteField(uint32_t idxField)
4385{
4386 switch (idxField)
4387 {
4388 case VMX_VMCS_GUEST_RIP:
4389 case VMX_VMCS_GUEST_RSP:
4390 case VMX_VMCS_GUEST_SYSENTER_EIP:
4391 case VMX_VMCS_GUEST_SYSENTER_ESP:
4392 case VMX_VMCS_GUEST_GDTR_BASE:
4393 case VMX_VMCS_GUEST_IDTR_BASE:
4394 case VMX_VMCS_GUEST_CS_BASE:
4395 case VMX_VMCS_GUEST_DS_BASE:
4396 case VMX_VMCS_GUEST_ES_BASE:
4397 case VMX_VMCS_GUEST_FS_BASE:
4398 case VMX_VMCS_GUEST_GS_BASE:
4399 case VMX_VMCS_GUEST_SS_BASE:
4400 case VMX_VMCS_GUEST_LDTR_BASE:
4401 case VMX_VMCS_GUEST_TR_BASE:
4402 case VMX_VMCS_GUEST_CR3:
4403 return true;
4404 }
4405 return false;
4406}
4407
4408static bool hmR0VmxIsValidReadField(uint32_t idxField)
4409{
4410 switch (idxField)
4411 {
4412 /* Read-only fields. */
4413 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4414 return true;
4415 }
4416 /* Remaining readable fields should also be writable. */
4417 return hmR0VmxIsValidWriteField(idxField);
4418}
4419#endif /* VBOX_STRICT */
4420
4421
4422/**
4423 * Executes the specified handler in 64-bit mode.
4424 *
4425 * @returns VBox status code.
4426 * @param pVM Pointer to the VM.
4427 * @param pVCpu Pointer to the VMCPU.
4428 * @param pCtx Pointer to the guest CPU context.
4429 * @param enmOp The operation to perform.
4430 * @param cbParam Number of parameters.
4431 * @param paParam Array of 32-bit parameters.
4432 */
4433VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam,
4434 uint32_t *paParam)
4435{
4436 int rc, rc2;
4437 PHMGLOBALCPUINFO pCpu;
4438 RTHCPHYS HCPhysCpuPage;
4439 RTCCUINTREG uOldEflags;
4440
4441 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
4442 Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END);
4443 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
4444 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
4445
4446#ifdef VBOX_STRICT
4447 for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++)
4448 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
4449
4450 for (uint32_t i = 0; i <pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries; i++)
4451 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
4452#endif
4453
4454 /* Disable interrupts. */
4455 uOldEflags = ASMIntDisableFlags();
4456
4457#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
4458 RTCPUID idHostCpu = RTMpCpuId();
4459 CPUMR0SetLApic(pVCpu, idHostCpu);
4460#endif
4461
4462 pCpu = HMR0GetCurrentCpu();
4463 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4464
4465 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
4466 VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
4467
4468 /* Leave VMX Root Mode. */
4469 VMXDisable();
4470
4471 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4472
4473 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
4474 CPUMSetHyperEIP(pVCpu, enmOp);
4475 for (int i = (int)cbParam - 1; i >= 0; i--)
4476 CPUMPushHyper(pVCpu, paParam[i]);
4477
4478 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
4479
4480 /* Call the switcher. */
4481 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
4482 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
4483
4484 /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */
4485 /* Make sure the VMX instructions don't cause #UD faults. */
4486 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
4487
4488 /* Re-enter VMX Root Mode */
4489 rc2 = VMXEnable(HCPhysCpuPage);
4490 if (RT_FAILURE(rc2))
4491 {
4492 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4493 ASMSetFlags(uOldEflags);
4494 return rc2;
4495 }
4496
4497 rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
4498 AssertRC(rc2);
4499 Assert(!(ASMGetFlags() & X86_EFL_IF));
4500 ASMSetFlags(uOldEflags);
4501 return rc;
4502}
4503
4504
4505/**
4506 * Prepares for and executes VMLAUNCH (64 bits guests) for 32-bit hosts
4507 * supporting 64-bit guests.
4508 *
4509 * @returns VBox status code.
4510 * @param fResume Whether to VMLAUNCH or VMRESUME.
4511 * @param pCtx Pointer to the guest-CPU context.
4512 * @param pCache Pointer to the VMCS cache.
4513 * @param pVM Pointer to the VM.
4514 * @param pVCpu Pointer to the VMCPU.
4515 */
4516DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
4517{
4518 uint32_t aParam[6];
4519 PHMGLOBALCPUINFO pCpu = NULL;
4520 RTHCPHYS HCPhysCpuPage = 0;
4521 int rc = VERR_INTERNAL_ERROR_5;
4522
4523 pCpu = HMR0GetCurrentCpu();
4524 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4525
4526#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4527 pCache->uPos = 1;
4528 pCache->interPD = PGMGetInterPaeCR3(pVM);
4529 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
4530#endif
4531
4532#ifdef VBOX_STRICT
4533 pCache->TestIn.HCPhysCpuPage = 0;
4534 pCache->TestIn.HCPhysVmcs = 0;
4535 pCache->TestIn.pCache = 0;
4536 pCache->TestOut.HCPhysVmcs = 0;
4537 pCache->TestOut.pCache = 0;
4538 pCache->TestOut.pCtx = 0;
4539 pCache->TestOut.eflags = 0;
4540#endif
4541
4542 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
4543 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
4544 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */
4545 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */
4546 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
4547 aParam[5] = 0;
4548
4549#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4550 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
4551 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
4552#endif
4553 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_VMXRCStartVM64, 6, &aParam[0]);
4554
4555#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4556 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
4557 Assert(pCtx->dr[4] == 10);
4558 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
4559#endif
4560
4561#ifdef VBOX_STRICT
4562 AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
4563 AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
4564 pVCpu->hm.s.vmx.HCPhysVmcs));
4565 AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
4566 pCache->TestOut.HCPhysVmcs));
4567 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
4568 pCache->TestOut.pCache));
4569 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
4570 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
4571 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
4572 pCache->TestOut.pCtx));
4573 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
4574#endif
4575 return rc;
4576}
4577
4578
4579/**
4580 * Initialize the VMCS-Read cache. The VMCS cache is used for 32-bit hosts
4581 * running 64-bit guests (except 32-bit Darwin which runs with 64-bit paging in
4582 * 32-bit mode) for 64-bit fields that cannot be accessed in 32-bit mode. Some
4583 * 64-bit fields -can- be accessed (those that have a 32-bit FULL & HIGH part).
4584 *
4585 * @returns VBox status code.
4586 * @param pVM Pointer to the VM.
4587 * @param pVCpu Pointer to the VMCPU.
4588 */
4589static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu)
4590{
4591#define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \
4592{ \
4593 Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \
4594 pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \
4595 pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \
4596 ++cReadFields; \
4597}
4598
4599 AssertPtr(pVM);
4600 AssertPtr(pVCpu);
4601 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
4602 uint32_t cReadFields = 0;
4603
4604 /*
4605 * Don't remove the #if 0'd fields in this code. They're listed here for consistency
4606 * and serve to indicate exceptions to the rules.
4607 */
4608
4609 /* Guest-natural selector base fields. */
4610#if 0
4611 /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */
4612 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0);
4613 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4);
4614#endif
4615 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE);
4616 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE);
4617 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE);
4618 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE);
4619 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE);
4620 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE);
4621 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE);
4622 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE);
4623 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE);
4624 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE);
4625 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP);
4626 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP);
4627#if 0
4628 /* Unused natural width guest-state fields. */
4629 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS);
4630 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */
4631#endif
4632 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
4633 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
4634
4635 /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for these 64-bit fields (using "FULL" and "HIGH" fields). */
4636#if 0
4637 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL);
4638 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL);
4639 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL);
4640 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL);
4641 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL);
4642 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL);
4643 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL);
4644 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL);
4645 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL);
4646#endif
4647
4648 /* Natural width guest-state fields. */
4649 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
4650#if 0
4651 /* Currently unused field. */
4652 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR);
4653#endif
4654
4655 if (pVM->hm.s.fNestedPaging)
4656 {
4657 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3);
4658 AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields,
4659 VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX));
4660 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
4661 }
4662 else
4663 {
4664 AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX));
4665 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
4666 }
4667
4668#undef VMXLOCAL_INIT_READ_CACHE_FIELD
4669 return VINF_SUCCESS;
4670}
4671
4672
4673/**
4674 * Writes a field into the VMCS. This can either directly invoke a VMWRITE or
4675 * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except
4676 * darwin, running 64-bit guests).
4677 *
4678 * @returns VBox status code.
4679 * @param pVCpu Pointer to the VMCPU.
4680 * @param idxField The VMCS field encoding.
4681 * @param u64Val 16, 32 or 64 bits value.
4682 */
4683VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4684{
4685 int rc;
4686 switch (idxField)
4687 {
4688 /*
4689 * These fields consists of a "FULL" and a "HIGH" part which can be written to individually.
4690 */
4691 /* 64-bit Control fields. */
4692 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
4693 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
4694 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
4695 case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
4696 case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
4697 case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
4698 case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL:
4699 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
4700 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
4701 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
4702 case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL:
4703 case VMX_VMCS64_CTRL_EPTP_FULL:
4704 case VMX_VMCS64_CTRL_EPTP_LIST_FULL:
4705 /* 64-bit Guest-state fields. */
4706 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
4707 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
4708 case VMX_VMCS64_GUEST_PAT_FULL:
4709 case VMX_VMCS64_GUEST_EFER_FULL:
4710 case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL:
4711 case VMX_VMCS64_GUEST_PDPTE0_FULL:
4712 case VMX_VMCS64_GUEST_PDPTE1_FULL:
4713 case VMX_VMCS64_GUEST_PDPTE2_FULL:
4714 case VMX_VMCS64_GUEST_PDPTE3_FULL:
4715 /* 64-bit Host-state fields. */
4716 case VMX_VMCS64_HOST_FIELD_PAT_FULL:
4717 case VMX_VMCS64_HOST_FIELD_EFER_FULL:
4718 case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL:
4719 {
4720 rc = VMXWriteVmcs32(idxField, u64Val);
4721 rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32));
4722 break;
4723 }
4724
4725 /*
4726 * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit
4727 * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then.
4728 */
4729 /* Natural-width Guest-state fields. */
4730 case VMX_VMCS_GUEST_CR3:
4731 case VMX_VMCS_GUEST_ES_BASE:
4732 case VMX_VMCS_GUEST_CS_BASE:
4733 case VMX_VMCS_GUEST_SS_BASE:
4734 case VMX_VMCS_GUEST_DS_BASE:
4735 case VMX_VMCS_GUEST_FS_BASE:
4736 case VMX_VMCS_GUEST_GS_BASE:
4737 case VMX_VMCS_GUEST_LDTR_BASE:
4738 case VMX_VMCS_GUEST_TR_BASE:
4739 case VMX_VMCS_GUEST_GDTR_BASE:
4740 case VMX_VMCS_GUEST_IDTR_BASE:
4741 case VMX_VMCS_GUEST_RSP:
4742 case VMX_VMCS_GUEST_RIP:
4743 case VMX_VMCS_GUEST_SYSENTER_ESP:
4744 case VMX_VMCS_GUEST_SYSENTER_EIP:
4745 {
4746 if (!(u64Val >> 32))
4747 {
4748 /* If this field is 64-bit, VT-x will zero out the top bits. */
4749 rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val);
4750 }
4751 else
4752 {
4753 /* Assert that only the 32->64 switcher case should ever come here. */
4754 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests);
4755 rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val);
4756 }
4757 break;
4758 }
4759
4760 default:
4761 {
4762 AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val));
4763 rc = VERR_INVALID_PARAMETER;
4764 break;
4765 }
4766 }
4767 AssertRCReturn(rc, rc);
4768 return rc;
4769}
4770
4771
4772/**
4773 * Queue up a VMWRITE by using the VMCS write cache. This is only used on 32-bit
4774 * hosts (except darwin) for 64-bit guests.
4775 *
4776 * @param pVCpu Pointer to the VMCPU.
4777 * @param idxField The VMCS field encoding.
4778 * @param u64Val 16, 32 or 64 bits value.
4779 */
4780VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4781{
4782 AssertPtr(pVCpu);
4783 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
4784
4785 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
4786 ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
4787
4788 /* Make sure there are no duplicates. */
4789 for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++)
4790 {
4791 if (pCache->Write.aField[i] == idxField)
4792 {
4793 pCache->Write.aFieldVal[i] = u64Val;
4794 return VINF_SUCCESS;
4795 }
4796 }
4797
4798 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
4799 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
4800 pCache->Write.cValidEntries++;
4801 return VINF_SUCCESS;
4802}
4803
4804/* Enable later when the assembly code uses these as callbacks. */
4805#if 0
4806/*
4807 * Loads the VMCS write-cache into the CPU (by executing VMWRITEs).
4808 *
4809 * @param pVCpu Pointer to the VMCPU.
4810 * @param pCache Pointer to the VMCS cache.
4811 *
4812 * @remarks No-long-jump zone!!!
4813 */
4814VMMR0DECL(void) VMXWriteCachedVmcsLoad(PVMCPU pVCpu, PVMCSCACHE pCache)
4815{
4816 AssertPtr(pCache);
4817 for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++)
4818 {
4819 int rc = VMXWriteVmcs64(pCache->Write.aField[i], pCache->Write.aFieldVal[i]);
4820 AssertRC(rc);
4821 }
4822 pCache->Write.cValidEntries = 0;
4823}
4824
4825
4826/**
4827 * Stores the VMCS read-cache from the CPU (by executing VMREADs).
4828 *
4829 * @param pVCpu Pointer to the VMCPU.
4830 * @param pCache Pointer to the VMCS cache.
4831 *
4832 * @remarks No-long-jump zone!!!
4833 */
4834VMMR0DECL(void) VMXReadCachedVmcsStore(PVMCPU pVCpu, PVMCSCACHE pCache)
4835{
4836 AssertPtr(pCache);
4837 for (uint32_t i = 0; i < pCache->Read.cValidEntries; i++)
4838 {
4839 int rc = VMXReadVmcs64(pCache->Read.aField[i], &pCache->Read.aFieldVal[i]);
4840 AssertRC(rc);
4841 }
4842}
4843#endif
4844#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
4845
4846
4847/**
4848 * Sets up the usage of TSC-offsetting and updates the VMCS. If offsetting is
4849 * not possible, cause VM-exits on RDTSC(P)s. Also sets up the VMX preemption
4850 * timer.
4851 *
4852 * @returns VBox status code.
4853 * @param pVCpu Pointer to the VMCPU.
4854 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4855 * out-of-sync. Make sure to update the required fields
4856 * before using them.
4857 * @remarks No-long-jump zone!!!
4858 */
4859static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4860{
4861 int rc = VERR_INTERNAL_ERROR_5;
4862 bool fOffsettedTsc = false;
4863 PVM pVM = pVCpu->CTX_SUFF(pVM);
4864 if (pVM->hm.s.vmx.fUsePreemptTimer)
4865 {
4866 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
4867
4868 /* Make sure the returned values have sane upper and lower boundaries. */
4869 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
4870 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */
4871 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
4872 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4873
4874 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4875 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc);
4876 }
4877 else
4878 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
4879
4880 if (fOffsettedTsc)
4881 {
4882 uint64_t u64CurTSC = ASMReadTSC();
4883 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
4884 {
4885 /* Note: VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
4886 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset); AssertRC(rc);
4887
4888 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
4889 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
4890 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
4891 }
4892 else
4893 {
4894 /* VM-exit on RDTSC(P) as we would otherwise pass decreasing TSC values to the guest. */
4895 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
4896 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
4897 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
4898 }
4899 }
4900 else
4901 {
4902 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4903 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
4904 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
4905 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
4906 }
4907}
4908
4909
4910/**
4911 * Determines if an exception is a contributory exception. Contributory
4912 * exceptions are ones which can cause double-faults. Page-fault is
4913 * intentionally not included here as it's a conditional contributory exception.
4914 *
4915 * @returns true if the exception is contributory, false otherwise.
4916 * @param uVector The exception vector.
4917 */
4918DECLINLINE(bool) hmR0VmxIsContributoryXcpt(const uint32_t uVector)
4919{
4920 switch (uVector)
4921 {
4922 case X86_XCPT_GP:
4923 case X86_XCPT_SS:
4924 case X86_XCPT_NP:
4925 case X86_XCPT_TS:
4926 case X86_XCPT_DE:
4927 return true;
4928 default:
4929 break;
4930 }
4931 return false;
4932}
4933
4934
4935/**
4936 * Sets an event as a pending event to be injected into the guest.
4937 *
4938 * @param pVCpu Pointer to the VMCPU.
4939 * @param u32IntrInfo The VM-entry interruption-information field.
4940 * @param cbInstr The VM-entry instruction length in bytes (for software
4941 * interrupts, exceptions and privileged software
4942 * exceptions).
4943 * @param u32ErrCode The VM-entry exception error code.
4944 * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
4945 * page-fault.
4946 *
4947 * @remarks Statistics counter assumes this is a guest event being injected or
4948 * re-injected into the guest, i.e. 'StatInjectPendingReflect' is
4949 * always incremented.
4950 */
4951DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntrInfo, uint32_t cbInstr, uint32_t u32ErrCode,
4952 RTGCUINTPTR GCPtrFaultAddress)
4953{
4954 Assert(!pVCpu->hm.s.Event.fPending);
4955 pVCpu->hm.s.Event.fPending = true;
4956 pVCpu->hm.s.Event.u64IntrInfo = u32IntrInfo;
4957 pVCpu->hm.s.Event.u32ErrCode = u32ErrCode;
4958 pVCpu->hm.s.Event.cbInstr = cbInstr;
4959 pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
4960
4961 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
4962}
4963
4964
4965/**
4966 * Sets a double-fault (#DF) exception as pending-for-injection into the VM.
4967 *
4968 * @param pVCpu Pointer to the VMCPU.
4969 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4970 * out-of-sync. Make sure to update the required fields
4971 * before using them.
4972 */
4973DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4974{
4975 uint32_t u32IntrInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID;
4976 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4977 u32IntrInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
4978 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
4979}
4980
4981
4982/**
4983 * Handle a condition that occurred while delivering an event through the guest
4984 * IDT.
4985 *
4986 * @returns VBox status code (informational error codes included).
4987 * @retval VINF_SUCCESS if we should continue handling the VM-exit.
4988 * @retval VINF_HM_DOUBLE_FAULT if a #DF condition was detected and we ought to
4989 * continue execution of the guest which will delivery the #DF.
4990 * @retval VINF_EM_RESET if we detected a triple-fault condition.
4991 *
4992 * @param pVCpu Pointer to the VMCPU.
4993 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4994 * out-of-sync. Make sure to update the required fields
4995 * before using them.
4996 * @param pVmxTransient Pointer to the VMX transient structure.
4997 *
4998 * @remarks No-long-jump zone!!!
4999 */
5000static int hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
5001{
5002 int rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
5003 AssertRCReturn(rc, rc);
5004 if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo))
5005 {
5006 rc = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
5007 AssertRCReturn(rc, rc);
5008
5009 uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
5010 uint32_t uExitVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVmxTransient->uExitIntrInfo);
5011 uint32_t uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
5012
5013 typedef enum
5014 {
5015 VMXREFLECTXCPT_XCPT, /* Reflect the exception to the guest or for further evaluation by VMM. */
5016 VMXREFLECTXCPT_DF, /* Reflect the exception as a double-fault to the guest. */
5017 VMXREFLECTXCPT_TF, /* Indicate a triple faulted state to the VMM. */
5018 VMXREFLECTXCPT_NONE /* Nothing to reflect. */
5019 } VMXREFLECTXCPT;
5020
5021 /* See Intel spec. 30.7.1.1 "Reflecting Exceptions to Guest Software". */
5022 VMXREFLECTXCPT enmReflect = VMXREFLECTXCPT_NONE;
5023 if (VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntrInfo))
5024 {
5025 if (uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT)
5026 {
5027 enmReflect = VMXREFLECTXCPT_XCPT;
5028#ifdef VBOX_STRICT
5029 if ( hmR0VmxIsContributoryXcpt(uIdtVector)
5030 && uExitVector == X86_XCPT_PF)
5031 {
5032 Log4(("IDT: vcpu[%RU32] Contributory #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5033 }
5034#endif
5035 if ( uExitVector == X86_XCPT_PF
5036 && uIdtVector == X86_XCPT_PF)
5037 {
5038 pVmxTransient->fVectoringPF = true;
5039 Log4(("IDT: vcpu[%RU32] Vectoring #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5040 }
5041 else if ( (pVCpu->hm.s.vmx.u32XcptBitmap & HMVMX_CONTRIBUTORY_XCPT_MASK)
5042 && hmR0VmxIsContributoryXcpt(uExitVector)
5043 && ( hmR0VmxIsContributoryXcpt(uIdtVector)
5044 || uIdtVector == X86_XCPT_PF))
5045 {
5046 enmReflect = VMXREFLECTXCPT_DF;
5047 }
5048 else if (uIdtVector == X86_XCPT_DF)
5049 enmReflect = VMXREFLECTXCPT_TF;
5050 }
5051 else if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
5052 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT
5053 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI)
5054 {
5055 /*
5056 * Ignore software interrupts (INT n), software exceptions (#BP, #OF) and privileged software exception
5057 * (whatever they are) as they reoccur when restarting the instruction.
5058 */
5059 enmReflect = VMXREFLECTXCPT_XCPT;
5060 }
5061 }
5062 else
5063 {
5064 /*
5065 * If event delivery caused an EPT violation/misconfig or APIC access VM-exit, then the VM-exit
5066 * interruption-information will not be valid and we end up here. In such cases, it is sufficient to reflect the
5067 * original exception to the guest after handling the VM-exit.
5068 */
5069 if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
5070 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT
5071 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI)
5072 {
5073 enmReflect = VMXREFLECTXCPT_XCPT;
5074 }
5075 }
5076
5077 switch (enmReflect)
5078 {
5079 case VMXREFLECTXCPT_XCPT:
5080 {
5081 Assert( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT
5082 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
5083 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT);
5084
5085 uint32_t u32ErrCode = 0;
5086 if (VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo))
5087 {
5088 rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
5089 AssertRCReturn(rc, rc);
5090 u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
5091 }
5092
5093 /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF. See hmR0VmxExitXcptPF(). */
5094 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INTR_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
5095 0 /* cbInstr */, u32ErrCode, pMixedCtx->cr2);
5096 rc = VINF_SUCCESS;
5097 Log4(("IDT: vcpu[%RU32] Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->idCpu,
5098 pVCpu->hm.s.Event.u64IntrInfo, pVCpu->hm.s.Event.u32ErrCode));
5099
5100 break;
5101 }
5102
5103 case VMXREFLECTXCPT_DF:
5104 {
5105 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
5106 rc = VINF_HM_DOUBLE_FAULT;
5107 Log4(("IDT: vcpu[%RU32] Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->idCpu,
5108 pVCpu->hm.s.Event.u64IntrInfo, uIdtVector, uExitVector));
5109
5110 break;
5111 }
5112
5113 case VMXREFLECTXCPT_TF:
5114 {
5115 rc = VINF_EM_RESET;
5116 Log4(("IDT: vcpu[%RU32] Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", pVCpu->idCpu, uIdtVector,
5117 uExitVector));
5118 break;
5119 }
5120
5121 default:
5122 Assert(rc == VINF_SUCCESS);
5123 break;
5124 }
5125 }
5126 Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET);
5127 return rc;
5128}
5129
5130
5131/**
5132 * Saves the guest's CR0 register from the VMCS into the guest-CPU context.
5133 *
5134 * @returns VBox status code.
5135 * @param pVCpu Pointer to the VMCPU.
5136 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5137 * out-of-sync. Make sure to update the required fields
5138 * before using them.
5139 *
5140 * @remarks No-long-jump zone!!!
5141 */
5142static int hmR0VmxSaveGuestCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5143{
5144 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR0))
5145 {
5146 uint32_t uVal = 0;
5147 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &uVal);
5148 AssertRCReturn(rc, rc);
5149
5150 uint32_t uShadow = 0;
5151 rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uShadow);
5152 AssertRCReturn(rc, rc);
5153
5154 uVal = (uShadow & pVCpu->hm.s.vmx.u32CR0Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR0Mask);
5155 CPUMSetGuestCR0(pVCpu, uVal);
5156 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_CR0;
5157 }
5158 return VINF_SUCCESS;
5159}
5160
5161
5162/**
5163 * Saves the guest's CR4 register from the VMCS into the guest-CPU context.
5164 *
5165 * @returns VBox status code.
5166 * @param pVCpu Pointer to the VMCPU.
5167 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5168 * out-of-sync. Make sure to update the required fields
5169 * before using them.
5170 *
5171 * @remarks No-long-jump zone!!!
5172 */
5173static int hmR0VmxSaveGuestCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5174{
5175 int rc = VINF_SUCCESS;
5176 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR4))
5177 {
5178 uint32_t uVal = 0;
5179 uint32_t uShadow = 0;
5180 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &uVal);
5181 AssertRCReturn(rc, rc);
5182 rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uShadow);
5183 AssertRCReturn(rc, rc);
5184
5185 uVal = (uShadow & pVCpu->hm.s.vmx.u32CR4Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR4Mask);
5186 CPUMSetGuestCR4(pVCpu, uVal);
5187 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_CR4;
5188 }
5189 return rc;
5190}
5191
5192
5193/**
5194 * Saves the guest's RIP register from the VMCS into the guest-CPU context.
5195 *
5196 * @returns VBox status code.
5197 * @param pVCpu Pointer to the VMCPU.
5198 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5199 * out-of-sync. Make sure to update the required fields
5200 * before using them.
5201 *
5202 * @remarks No-long-jump zone!!!
5203 */
5204static int hmR0VmxSaveGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5205{
5206 int rc = VINF_SUCCESS;
5207 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RIP))
5208 {
5209 uint64_t u64Val = 0;
5210 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val);
5211 AssertRCReturn(rc, rc);
5212
5213 pMixedCtx->rip = u64Val;
5214 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_RIP;
5215 }
5216 return rc;
5217}
5218
5219
5220/**
5221 * Saves the guest's RSP register from the VMCS into the guest-CPU context.
5222 *
5223 * @returns VBox status code.
5224 * @param pVCpu Pointer to the VMCPU.
5225 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5226 * out-of-sync. Make sure to update the required fields
5227 * before using them.
5228 *
5229 * @remarks No-long-jump zone!!!
5230 */
5231static int hmR0VmxSaveGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5232{
5233 int rc = VINF_SUCCESS;
5234 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RSP))
5235 {
5236 uint64_t u64Val = 0;
5237 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val);
5238 AssertRCReturn(rc, rc);
5239
5240 pMixedCtx->rsp = u64Val;
5241 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_RSP;
5242 }
5243 return rc;
5244}
5245
5246
5247/**
5248 * Saves the guest's RFLAGS from the VMCS into the guest-CPU context.
5249 *
5250 * @returns VBox status code.
5251 * @param pVCpu Pointer to the VMCPU.
5252 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5253 * out-of-sync. Make sure to update the required fields
5254 * before using them.
5255 *
5256 * @remarks No-long-jump zone!!!
5257 */
5258static int hmR0VmxSaveGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5259{
5260 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS))
5261 {
5262 uint32_t uVal = 0;
5263 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &uVal);
5264 AssertRCReturn(rc, rc);
5265
5266 pMixedCtx->eflags.u32 = uVal;
5267 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) /* Undo our real-on-v86-mode changes to eflags if necessary. */
5268 {
5269 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
5270 Log4(("Saving real-mode EFLAGS VT-x view=%#RX32\n", pMixedCtx->eflags.u32));
5271
5272 pMixedCtx->eflags.Bits.u1VM = 0;
5273 pMixedCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL;
5274 }
5275
5276 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_RFLAGS;
5277 }
5278 return VINF_SUCCESS;
5279}
5280
5281
5282/**
5283 * Wrapper for saving the guest's RIP, RSP and RFLAGS from the VMCS into the
5284 * guest-CPU context.
5285 */
5286DECLINLINE(int) hmR0VmxSaveGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5287{
5288 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
5289 rc |= hmR0VmxSaveGuestRsp(pVCpu, pMixedCtx);
5290 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
5291 return rc;
5292}
5293
5294
5295/**
5296 * Saves the guest's interruptibility-state ("interrupt shadow" as AMD calls it)
5297 * from the guest-state area in the VMCS.
5298 *
5299 * @param pVCpu Pointer to the VMCPU.
5300 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5301 * out-of-sync. Make sure to update the required fields
5302 * before using them.
5303 *
5304 * @remarks No-long-jump zone!!!
5305 */
5306static void hmR0VmxSaveGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5307{
5308 uint32_t uIntrState = 0;
5309 int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
5310 AssertRC(rc);
5311
5312 if (!uIntrState)
5313 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
5314 else
5315 {
5316 Assert( uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
5317 || uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
5318 rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
5319 AssertRC(rc);
5320 rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* for hmR0VmxGetGuestIntrState(). */
5321 AssertRC(rc);
5322
5323 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
5324 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
5325 }
5326}
5327
5328
5329/**
5330 * Saves the guest's activity state.
5331 *
5332 * @returns VBox status code.
5333 * @param pVCpu Pointer to the VMCPU.
5334 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5335 * out-of-sync. Make sure to update the required fields
5336 * before using them.
5337 *
5338 * @remarks No-long-jump zone!!!
5339 */
5340static int hmR0VmxSaveGuestActivityState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5341{
5342 /* Nothing to do for now until we make use of different guest-CPU activity state. Just update the flag. */
5343 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_ACTIVITY_STATE;
5344 return VINF_SUCCESS;
5345}
5346
5347
5348/**
5349 * Saves the guest SYSENTER MSRs (SYSENTER_CS, SYSENTER_EIP, SYSENTER_ESP) from
5350 * the current VMCS into the guest-CPU context.
5351 *
5352 * @returns VBox status code.
5353 * @param pVCpu Pointer to the VMCPU.
5354 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5355 * out-of-sync. Make sure to update the required fields
5356 * before using them.
5357 *
5358 * @remarks No-long-jump zone!!!
5359 */
5360static int hmR0VmxSaveGuestSysenterMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5361{
5362 int rc = VINF_SUCCESS;
5363 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR))
5364 {
5365 uint32_t u32Val = 0;
5366 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRCReturn(rc, rc);
5367 pMixedCtx->SysEnter.cs = u32Val;
5368 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR;
5369 }
5370
5371 uint64_t u64Val = 0;
5372 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR))
5373 {
5374 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &u64Val); AssertRCReturn(rc, rc);
5375 pMixedCtx->SysEnter.eip = u64Val;
5376 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR;
5377 }
5378 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR))
5379 {
5380 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &u64Val); AssertRCReturn(rc, rc);
5381 pMixedCtx->SysEnter.esp = u64Val;
5382 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR;
5383 }
5384 return rc;
5385}
5386
5387
5388/**
5389 * Saves the guest FS_BASE MSRs from the current VMCS into the guest-CPU
5390 * context.
5391 *
5392 * @returns VBox status code.
5393 * @param pVCpu Pointer to the VMCPU.
5394 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5395 * out-of-sync. Make sure to update the required fields
5396 * before using them.
5397 *
5398 * @remarks No-long-jump zone!!!
5399 */
5400static int hmR0VmxSaveGuestFSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5401{
5402 int rc = VINF_SUCCESS;
5403 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_FS_BASE_MSR))
5404 {
5405 uint64_t u64Val = 0;
5406 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_FS_BASE, &u64Val); AssertRCReturn(rc, rc);
5407 pMixedCtx->fs.u64Base = u64Val;
5408 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_FS_BASE_MSR;
5409 }
5410 return rc;
5411}
5412
5413
5414/**
5415 * Saves the guest GS_BASE MSRs from the current VMCS into the guest-CPU
5416 * context.
5417 *
5418 * @returns VBox status code.
5419 * @param pVCpu Pointer to the VMCPU.
5420 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5421 * out-of-sync. Make sure to update the required fields
5422 * before using them.
5423 *
5424 * @remarks No-long-jump zone!!!
5425 */
5426static int hmR0VmxSaveGuestGSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5427{
5428 int rc = VINF_SUCCESS;
5429 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_GS_BASE_MSR))
5430 {
5431 uint64_t u64Val = 0;
5432 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GS_BASE, &u64Val); AssertRCReturn(rc, rc);
5433 pMixedCtx->gs.u64Base = u64Val;
5434 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_GS_BASE_MSR;
5435 }
5436 return rc;
5437}
5438
5439
5440/**
5441 * Saves the auto load/store'd guest MSRs from the current VMCS into the
5442 * guest-CPU context. Currently these are LSTAR, STAR, SFMASK, KERNEL-GS BASE
5443 * and TSC_AUX.
5444 *
5445 * @returns VBox status code.
5446 * @param pVCpu Pointer to the VMCPU.
5447 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5448 * out-of-sync. Make sure to update the required fields
5449 * before using them.
5450 *
5451 * @remarks No-long-jump zone!!!
5452 */
5453static int hmR0VmxSaveGuestAutoLoadStoreMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5454{
5455 if (pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS)
5456 return VINF_SUCCESS;
5457
5458#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
5459 for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++)
5460 {
5461 PVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
5462 pMsr += i;
5463 switch (pMsr->u32Msr)
5464 {
5465 case MSR_K8_LSTAR: pMixedCtx->msrLSTAR = pMsr->u64Value; break;
5466 case MSR_K6_STAR: pMixedCtx->msrSTAR = pMsr->u64Value; break;
5467 case MSR_K8_SF_MASK: pMixedCtx->msrSFMASK = pMsr->u64Value; break;
5468 case MSR_K8_TSC_AUX: CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value); break;
5469 case MSR_K8_KERNEL_GS_BASE: pMixedCtx->msrKERNELGSBASE = pMsr->u64Value; break;
5470 case MSR_K6_EFER: /* EFER can't be changed without causing a VM-exit. */ break;
5471 default:
5472 {
5473 AssertFailed();
5474 return VERR_HM_UNEXPECTED_LD_ST_MSR;
5475 }
5476 }
5477 }
5478#endif
5479
5480 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS;
5481 return VINF_SUCCESS;
5482}
5483
5484
5485/**
5486 * Saves the guest control registers from the current VMCS into the guest-CPU
5487 * context.
5488 *
5489 * @returns VBox status code.
5490 * @param pVCpu Pointer to the VMCPU.
5491 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5492 * out-of-sync. Make sure to update the required fields
5493 * before using them.
5494 *
5495 * @remarks No-long-jump zone!!!
5496 */
5497static int hmR0VmxSaveGuestControlRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5498{
5499 /* Guest CR0. Guest FPU. */
5500 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
5501 AssertRCReturn(rc, rc);
5502
5503 /* Guest CR4. */
5504 rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
5505 AssertRCReturn(rc, rc);
5506
5507 /* Guest CR2 - updated always during the world-switch or in #PF. */
5508 /* Guest CR3. Only changes with Nested Paging. This must be done -after- saving CR0 and CR4 from the guest! */
5509 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR3))
5510 {
5511 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR0);
5512 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR4);
5513
5514 PVM pVM = pVCpu->CTX_SUFF(pVM);
5515 if ( pVM->hm.s.vmx.fUnrestrictedGuest
5516 || ( pVM->hm.s.fNestedPaging
5517 && CPUMIsGuestPagingEnabledEx(pMixedCtx)))
5518 {
5519 uint64_t u64Val = 0;
5520 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val);
5521 if (pMixedCtx->cr3 != u64Val)
5522 {
5523 CPUMSetGuestCR3(pVCpu, u64Val);
5524 if (VMMRZCallRing3IsEnabled(pVCpu))
5525 {
5526 PGMUpdateCR3(pVCpu, u64Val);
5527 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5528 }
5529 else
5530 {
5531 /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMUpdateCR3().*/
5532 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
5533 }
5534 }
5535
5536 /* If the guest is in PAE mode, sync back the PDPE's into the guest state. */
5537 if (CPUMIsGuestInPAEModeEx(pMixedCtx)) /* Reads CR0, CR4 and EFER MSR (EFER is always up-to-date). */
5538 {
5539 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc);
5540 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc);
5541 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc);
5542 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc);
5543
5544 if (VMMRZCallRing3IsEnabled(pVCpu))
5545 {
5546 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
5547 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
5548 }
5549 else
5550 {
5551 /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMGstUpdatePaePdpes(). */
5552 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES);
5553 }
5554 }
5555 }
5556
5557 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_CR3;
5558 }
5559
5560 /*
5561 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback()
5562 * -> VMMRZCallRing3Disable() -> hmR0VmxSaveGuestState() -> Set VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
5563 * -> continue with VM-exit handling -> hmR0VmxSaveGuestControlRegs() and here we are.
5564 *
5565 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
5566 */
5567 if (VMMRZCallRing3IsEnabled(pVCpu))
5568 {
5569 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5570 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5571
5572 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
5573 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
5574
5575 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5576 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
5577 }
5578
5579 return rc;
5580}
5581
5582
5583/**
5584 * Reads a guest segment register from the current VMCS into the guest-CPU
5585 * context.
5586 *
5587 * @returns VBox status code.
5588 * @param pVCpu Pointer to the VMCPU.
5589 * @param idxSel Index of the selector in the VMCS.
5590 * @param idxLimit Index of the segment limit in the VMCS.
5591 * @param idxBase Index of the segment base in the VMCS.
5592 * @param idxAccess Index of the access rights of the segment in the VMCS.
5593 * @param pSelReg Pointer to the segment selector.
5594 *
5595 * @remarks No-long-jump zone!!!
5596 * @remarks Never call this function directly!!! Use the VMXLOCAL_READ_SEG()
5597 * macro as that takes care of whether to read from the VMCS cache or
5598 * not.
5599 */
5600DECLINLINE(int) hmR0VmxReadSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess,
5601 PCPUMSELREG pSelReg)
5602{
5603 uint32_t u32Val = 0;
5604 int rc = VMXReadVmcs32(idxSel, &u32Val);
5605 AssertRCReturn(rc, rc);
5606 pSelReg->Sel = (uint16_t)u32Val;
5607 pSelReg->ValidSel = (uint16_t)u32Val;
5608 pSelReg->fFlags = CPUMSELREG_FLAGS_VALID;
5609
5610 rc = VMXReadVmcs32(idxLimit, &u32Val);
5611 AssertRCReturn(rc, rc);
5612 pSelReg->u32Limit = u32Val;
5613
5614 uint64_t u64Val = 0;
5615 rc = VMXReadVmcsGstNByIdxVal(idxBase, &u64Val);
5616 AssertRCReturn(rc, rc);
5617 pSelReg->u64Base = u64Val;
5618
5619 rc = VMXReadVmcs32(idxAccess, &u32Val);
5620 AssertRCReturn(rc, rc);
5621 pSelReg->Attr.u = u32Val;
5622
5623 /*
5624 * If VT-x marks the segment as unusable, most other bits remain undefined:
5625 * - For CS the L, D and G bits have meaning.
5626 * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox).
5627 * - For the remaining data segments no bits are defined.
5628 *
5629 * The present bit and the unusable bit has been observed to be set at the
5630 * same time (the selector was supposed to invalid as we started executing
5631 * a V8086 interrupt in ring-0).
5632 *
5633 * What should be important for the rest of the VBox code that the P bit is
5634 * cleared. Some of the other VBox code recognizes the unusable bit, but
5635 * AMD-V certainly don't, and REM doesn't really either. So, to be on the
5636 * safe side here, we'll strip off P and other bits we don't care about. If
5637 * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed.
5638 *
5639 * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers".
5640 */
5641 if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE)
5642 {
5643 Assert(idxSel != VMX_VMCS16_GUEST_FIELD_TR); /* TR is the only selector that can never be unusable. */
5644
5645 /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */
5646 pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G
5647 | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT;
5648
5649 Log4(("hmR0VmxReadSegmentReg: Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Val, pSelReg->Attr.u));
5650#ifdef DEBUG_bird
5651 AssertMsg((u32Val & ~X86DESCATTR_P) == pSelReg->Attr.u,
5652 ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n",
5653 idxSel, u32Val, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit));
5654#endif
5655 }
5656 return VINF_SUCCESS;
5657}
5658
5659
5660#ifdef VMX_USE_CACHED_VMCS_ACCESSES
5661# define VMXLOCAL_READ_SEG(Sel, CtxSel) \
5662 hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
5663 VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel)
5664#else
5665# define VMXLOCAL_READ_SEG(Sel, CtxSel) \
5666 hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
5667 VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel)
5668#endif
5669
5670
5671/**
5672 * Saves the guest segment registers from the current VMCS into the guest-CPU
5673 * context.
5674 *
5675 * @returns VBox status code.
5676 * @param pVCpu Pointer to the VMCPU.
5677 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5678 * out-of-sync. Make sure to update the required fields
5679 * before using them.
5680 *
5681 * @remarks No-long-jump zone!!!
5682 */
5683static int hmR0VmxSaveGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5684{
5685 /* Guest segment registers. */
5686 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SEGMENT_REGS))
5687 {
5688 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); AssertRCReturn(rc, rc);
5689 rc = VMXLOCAL_READ_SEG(CS, cs); AssertRCReturn(rc, rc);
5690 rc = VMXLOCAL_READ_SEG(SS, ss); AssertRCReturn(rc, rc);
5691 rc = VMXLOCAL_READ_SEG(DS, ds); AssertRCReturn(rc, rc);
5692 rc = VMXLOCAL_READ_SEG(ES, es); AssertRCReturn(rc, rc);
5693 rc = VMXLOCAL_READ_SEG(FS, fs); AssertRCReturn(rc, rc);
5694 rc = VMXLOCAL_READ_SEG(GS, gs); AssertRCReturn(rc, rc);
5695
5696 /* Restore segment attributes for real-on-v86 mode hack. */
5697 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
5698 {
5699 pMixedCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u;
5700 pMixedCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u;
5701 pMixedCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u;
5702 pMixedCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u;
5703 pMixedCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u;
5704 pMixedCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u;
5705 }
5706 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SEGMENT_REGS;
5707 }
5708
5709 return VINF_SUCCESS;
5710}
5711
5712
5713/**
5714 * Saves the guest descriptor table registers and task register from the current
5715 * VMCS into the guest-CPU context.
5716 *
5717 * @returns VBox status code.
5718 * @param pVCpu Pointer to the VMCPU.
5719 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5720 * out-of-sync. Make sure to update the required fields
5721 * before using them.
5722 *
5723 * @remarks No-long-jump zone!!!
5724 */
5725static int hmR0VmxSaveGuestTableRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5726{
5727 int rc = VINF_SUCCESS;
5728
5729 /* Guest LDTR. */
5730 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_LDTR))
5731 {
5732 rc = VMXLOCAL_READ_SEG(LDTR, ldtr);
5733 AssertRCReturn(rc, rc);
5734 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_LDTR;
5735 }
5736
5737 /* Guest GDTR. */
5738 uint64_t u64Val = 0;
5739 uint32_t u32Val = 0;
5740 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_GDTR))
5741 {
5742 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); AssertRCReturn(rc, rc);
5743 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc);
5744 pMixedCtx->gdtr.pGdt = u64Val;
5745 pMixedCtx->gdtr.cbGdt = u32Val;
5746 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_GDTR;
5747 }
5748
5749 /* Guest IDTR. */
5750 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_IDTR))
5751 {
5752 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); AssertRCReturn(rc, rc);
5753 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc);
5754 pMixedCtx->idtr.pIdt = u64Val;
5755 pMixedCtx->idtr.cbIdt = u32Val;
5756 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_IDTR;
5757 }
5758
5759 /* Guest TR. */
5760 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_TR))
5761 {
5762 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
5763 AssertRCReturn(rc, rc);
5764
5765 /* For real-mode emulation using virtual-8086 mode we have the fake TSS (pRealModeTSS) in TR, don't save the fake one. */
5766 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
5767 {
5768 rc = VMXLOCAL_READ_SEG(TR, tr);
5769 AssertRCReturn(rc, rc);
5770 }
5771 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_TR;
5772 }
5773 return rc;
5774}
5775
5776#undef VMXLOCAL_READ_SEG
5777
5778
5779/**
5780 * Saves the guest debug-register DR7 from the current VMCS into the guest-CPU
5781 * context.
5782 *
5783 * @returns VBox status code.
5784 * @param pVCpu Pointer to the VMCPU.
5785 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5786 * out-of-sync. Make sure to update the required fields
5787 * before using them.
5788 *
5789 * @remarks No-long-jump zone!!!
5790 */
5791static int hmR0VmxSaveGuestDR7(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5792{
5793 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_DEBUG))
5794 {
5795 if (!pVCpu->hm.s.fUsingHyperDR7)
5796 {
5797 /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */
5798 uint32_t u32Val;
5799 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val); AssertRCReturn(rc, rc);
5800 pMixedCtx->dr[7] = u32Val;
5801 }
5802
5803 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_DEBUG;
5804 }
5805 return VINF_SUCCESS;
5806}
5807
5808
5809/**
5810 * Saves the guest APIC state from the current VMCS into the guest-CPU context.
5811 *
5812 * @returns VBox status code.
5813 * @param pVCpu Pointer to the VMCPU.
5814 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5815 * out-of-sync. Make sure to update the required fields
5816 * before using them.
5817 *
5818 * @remarks No-long-jump zone!!!
5819 */
5820static int hmR0VmxSaveGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5821{
5822 /* Updating TPR is already done in hmR0VmxPostRunGuest(). Just update the flag. */
5823 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_APIC_STATE;
5824 return VINF_SUCCESS;
5825}
5826
5827
5828/**
5829 * Saves the entire guest state from the currently active VMCS into the
5830 * guest-CPU context. This essentially VMREADs all guest-data.
5831 *
5832 * @returns VBox status code.
5833 * @param pVCpu Pointer to the VMCPU.
5834 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5835 * out-of-sync. Make sure to update the required fields
5836 * before using them.
5837 */
5838static int hmR0VmxSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5839{
5840 Assert(pVCpu);
5841 Assert(pMixedCtx);
5842
5843 if (pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL)
5844 return VINF_SUCCESS;
5845
5846 /* Though we can longjmp to ring-3 due to log-flushes here and get recalled
5847 again on the ring-3 callback path, there is no real need to. */
5848 if (VMMRZCallRing3IsEnabled(pVCpu))
5849 VMMR0LogFlushDisable(pVCpu);
5850 else
5851 Assert(VMMR0IsLogFlushDisabled(pVCpu));
5852 Log4Func(("vcpu[%RU32]\n", pVCpu->idCpu));
5853
5854 int rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
5855 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestRipRspRflags failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5856
5857 rc = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
5858 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestControlRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5859
5860 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
5861 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSegmentRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5862
5863 rc = hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx);
5864 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestTableRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5865
5866 rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
5867 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestDR7 failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5868
5869 rc = hmR0VmxSaveGuestSysenterMsrs(pVCpu, pMixedCtx);
5870 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSysenterMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5871
5872 rc = hmR0VmxSaveGuestFSBaseMsr(pVCpu, pMixedCtx);
5873 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestFSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5874
5875 rc = hmR0VmxSaveGuestGSBaseMsr(pVCpu, pMixedCtx);
5876 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestGSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5877
5878 rc = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
5879 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestAutoLoadStoreMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5880
5881 rc = hmR0VmxSaveGuestActivityState(pVCpu, pMixedCtx);
5882 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestActivityState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5883
5884 rc = hmR0VmxSaveGuestApicState(pVCpu, pMixedCtx);
5885 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestApicState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5886
5887 AssertMsg(pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL,
5888 ("Missed guest state bits while saving state; residue %RX32\n", pVCpu->hm.s.vmx.fUpdatedGuestState));
5889
5890 if (VMMRZCallRing3IsEnabled(pVCpu))
5891 VMMR0LogFlushEnable(pVCpu);
5892
5893 return rc;
5894}
5895
5896
5897/**
5898 * Check per-VM and per-VCPU force flag actions that require us to go back to
5899 * ring-3 for one reason or another.
5900 *
5901 * @returns VBox status code (information status code included).
5902 * @retval VINF_SUCCESS if we don't have any actions that require going back to
5903 * ring-3.
5904 * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
5905 * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
5906 * interrupts)
5907 * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
5908 * all EMTs to be in ring-3.
5909 * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
5910 * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
5911 * to the EM loop.
5912 *
5913 * @param pVM Pointer to the VM.
5914 * @param pVCpu Pointer to the VMCPU.
5915 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5916 * out-of-sync. Make sure to update the required fields
5917 * before using them.
5918 */
5919static int hmR0VmxCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5920{
5921 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5922
5923 if ( VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction
5924 ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
5925 || VMCPU_FF_IS_PENDING(pVCpu, !pVCpu->hm.s.fSingleInstruction
5926 ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
5927 {
5928 /* We need the control registers now, make sure the guest-CPU context is updated. */
5929 int rc3 = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
5930 AssertRCReturn(rc3, rc3);
5931
5932 /* Pending HM CR3 sync. */
5933 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5934 {
5935 int rc2 = PGMUpdateCR3(pVCpu, pMixedCtx->cr3);
5936 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
5937 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
5938 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5939 }
5940
5941 /* Pending HM PAE PDPEs. */
5942 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
5943 {
5944 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
5945 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
5946 }
5947
5948 /* Pending PGM C3 sync. */
5949 if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
5950 {
5951 int rc2 = PGMSyncCR3(pVCpu, pMixedCtx->cr0, pMixedCtx->cr3, pMixedCtx->cr4,
5952 VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
5953 if (rc2 != VINF_SUCCESS)
5954 {
5955 AssertRC(rc2);
5956 Log4(("hmR0VmxCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", rc2));
5957 return rc2;
5958 }
5959 }
5960
5961 /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
5962 if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK)
5963 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
5964 {
5965 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
5966 int rc2 = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
5967 Log4(("hmR0VmxCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2));
5968 return rc2;
5969 }
5970
5971 /* Pending VM request packets, such as hardware interrupts. */
5972 if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST)
5973 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST))
5974 {
5975 Log4(("hmR0VmxCheckForceFlags: Pending VM request forcing us back to ring-3\n"));
5976 return VINF_EM_PENDING_REQUEST;
5977 }
5978
5979 /* Pending PGM pool flushes. */
5980 if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
5981 {
5982 Log4(("hmR0VmxCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n"));
5983 return VINF_PGM_POOL_FLUSH_PENDING;
5984 }
5985
5986 /* Pending DMA requests. */
5987 if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA))
5988 {
5989 Log4(("hmR0VmxCheckForceFlags: Pending DMA request forcing us back to ring-3\n"));
5990 return VINF_EM_RAW_TO_R3;
5991 }
5992 }
5993
5994 return VINF_SUCCESS;
5995}
5996
5997
5998/**
5999 * Converts any TRPM trap into a pending HM event. This is typically used when
6000 * entering from ring-3 (not longjmp returns).
6001 *
6002 * @param pVCpu Pointer to the VMCPU.
6003 */
6004static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu)
6005{
6006 Assert(TRPMHasTrap(pVCpu));
6007 Assert(!pVCpu->hm.s.Event.fPending);
6008
6009 uint8_t uVector;
6010 TRPMEVENT enmTrpmEvent;
6011 RTGCUINT uErrCode;
6012 RTGCUINTPTR GCPtrFaultAddress;
6013 uint8_t cbInstr;
6014
6015 int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr);
6016 AssertRC(rc);
6017
6018 /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntrInfo. */
6019 uint32_t u32IntrInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID;
6020 if (enmTrpmEvent == TRPM_TRAP)
6021 {
6022 switch (uVector)
6023 {
6024 case X86_XCPT_BP:
6025 case X86_XCPT_OF:
6026 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6027 break;
6028
6029 case X86_XCPT_PF:
6030 case X86_XCPT_DF:
6031 case X86_XCPT_TS:
6032 case X86_XCPT_NP:
6033 case X86_XCPT_SS:
6034 case X86_XCPT_GP:
6035 case X86_XCPT_AC:
6036 u32IntrInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6037 /* no break! */
6038 default:
6039 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6040 break;
6041 }
6042 }
6043 else if (enmTrpmEvent == TRPM_HARDWARE_INT)
6044 {
6045 if (uVector == X86_XCPT_NMI)
6046 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6047 else
6048 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6049 }
6050 else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
6051 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6052 else
6053 AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
6054
6055 rc = TRPMResetTrap(pVCpu);
6056 AssertRC(rc);
6057 Log4(("TRPM->HM event: u32IntrInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n",
6058 u32IntrInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress));
6059
6060 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, cbInstr, uErrCode, GCPtrFaultAddress);
6061 STAM_COUNTER_DEC(&pVCpu->hm.s.StatInjectPendingReflect);
6062}
6063
6064
6065/**
6066 * Converts any pending HM event into a TRPM trap. Typically used when leaving
6067 * VT-x to execute any instruction.
6068 *
6069 * @param pvCpu Pointer to the VMCPU.
6070 */
6071static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu)
6072{
6073 Assert(pVCpu->hm.s.Event.fPending);
6074
6075 uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo);
6076 uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo);
6077 bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo);
6078 uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode;
6079
6080 /* If a trap was already pending, we did something wrong! */
6081 Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
6082
6083 TRPMEVENT enmTrapType;
6084 switch (uVectorType)
6085 {
6086 case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
6087 case VMX_IDT_VECTORING_INFO_TYPE_NMI:
6088 enmTrapType = TRPM_HARDWARE_INT;
6089 break;
6090
6091 case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
6092 enmTrapType = TRPM_SOFTWARE_INT;
6093 break;
6094
6095 case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
6096 case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */
6097 case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
6098 enmTrapType = TRPM_TRAP;
6099 break;
6100
6101 default:
6102 AssertMsgFailed(("Invalid trap type %#x\n", uVectorType));
6103 enmTrapType = TRPM_32BIT_HACK;
6104 break;
6105 }
6106
6107 Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType));
6108
6109 int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
6110 AssertRC(rc);
6111
6112 if (fErrorCodeValid)
6113 TRPMSetErrorCode(pVCpu, uErrorCode);
6114
6115 if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
6116 && uVector == X86_XCPT_PF)
6117 {
6118 TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
6119 }
6120 else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
6121 || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
6122 || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
6123 {
6124 AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
6125 || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF),
6126 ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
6127 TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
6128 }
6129 pVCpu->hm.s.Event.fPending = false;
6130}
6131
6132
6133/**
6134 * Does the necessary state syncing before returning to ring-3 for any reason
6135 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
6136 *
6137 * @returns VBox status code.
6138 * @param pVM Pointer to the VM.
6139 * @param pVCpu Pointer to the VMCPU.
6140 * @param pMixedCtx Pointer to the guest-CPU context. The data may
6141 * be out-of-sync. Make sure to update the required
6142 * fields before using them.
6143 * @param fSaveGuestState Whether to save the guest state or not.
6144 *
6145 * @remarks No-long-jmp zone!!!
6146 */
6147static int hmR0VmxLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fSaveGuestState)
6148{
6149 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6150 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6151
6152 RTCPUID idCpu = RTMpCpuId();
6153 Log4Func(("HostCpuId=%u\n", idCpu));
6154
6155 /* Save the guest state if necessary. */
6156 if ( fSaveGuestState
6157 && pVCpu->hm.s.vmx.fUpdatedGuestState != HMVMX_UPDATED_GUEST_ALL)
6158 {
6159 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
6160 AssertRCReturn(rc, rc);
6161 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL);
6162 }
6163
6164 /* Restore host FPU state if necessary and resync on next R0 reentry .*/
6165 if (CPUMIsGuestFPUStateActive(pVCpu))
6166 {
6167 if (!fSaveGuestState)
6168 {
6169 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6170 AssertRCReturn(rc, rc);
6171 }
6172 CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx);
6173 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
6174 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
6175 }
6176
6177 /* Restore host debug registers if necessary and resync on next R0 reentry. */
6178#ifdef VBOX_STRICT
6179 if (CPUMIsHyperDebugStateActive(pVCpu))
6180 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT);
6181#endif
6182 if (CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */))
6183 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
6184 Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu));
6185 Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu));
6186
6187#if HC_ARCH_BITS == 64
6188 /* Restore host-state bits that VT-x only restores partially. */
6189 if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
6190 && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
6191 {
6192 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu));
6193 VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
6194 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
6195 }
6196#endif
6197
6198 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
6199 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatLoadGuestState);
6200 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
6201 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
6202 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
6203 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
6204 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
6205 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
6206
6207 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
6208
6209 /** @todo This kinda defeats the purpose of having preemption hooks.
6210 * The problem is, deregistering the hooks should be moved to a place that
6211 * lasts until the EMT is about to be destroyed not everytime while leaving HM
6212 * context.
6213 */
6214 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE)
6215 {
6216 int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
6217 AssertRCReturn(rc, rc);
6218
6219 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
6220 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
6221 }
6222 Assert(!(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED));
6223 NOREF(idCpu);
6224
6225 return VINF_SUCCESS;
6226}
6227
6228
6229/**
6230 * Leaves the VT-x session.
6231 *
6232 * @returns VBox status code.
6233 * @param pVM Pointer to the VM.
6234 * @param pVCpu Pointer to the VMCPU.
6235 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6236 * out-of-sync. Make sure to update the required fields
6237 * before using them.
6238 *
6239 * @remarks No-long-jmp zone!!!
6240 */
6241DECLINLINE(int) hmR0VmxLeaveSession(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6242{
6243 HM_DISABLE_PREEMPT_IF_NEEDED();
6244 HMVMX_ASSERT_CPU_SAFE();
6245 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6246 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6247
6248 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
6249 and done this from the VMXR0ThreadCtxCallback(). */
6250 if (!pVCpu->hm.s.fLeaveDone)
6251 {
6252 int rc2 = hmR0VmxLeave(pVM, pVCpu, pMixedCtx, true /* fSaveGuestState */);
6253 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT_IF_NEEDED(), rc2);
6254 pVCpu->hm.s.fLeaveDone = true;
6255 }
6256
6257 /* Deregister hook now that we've left HM context before re-enabling preemption. */
6258 /** @todo This is bad. Deregistering here means we need to VMCLEAR always
6259 * (longjmp/exit-to-r3) in VT-x which is not efficient. */
6260 if (VMMR0ThreadCtxHooksAreRegistered(pVCpu))
6261 VMMR0ThreadCtxHooksDeregister(pVCpu);
6262
6263 /* Leave HM context. This takes care of local init (term). */
6264 int rc = HMR0LeaveCpu(pVCpu);
6265
6266 HM_RESTORE_PREEMPT_IF_NEEDED();
6267
6268 return rc;
6269}
6270
6271
6272/**
6273 * Does the necessary state syncing before doing a longjmp to ring-3.
6274 *
6275 * @returns VBox status code.
6276 * @param pVM Pointer to the VM.
6277 * @param pVCpu Pointer to the VMCPU.
6278 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6279 * out-of-sync. Make sure to update the required fields
6280 * before using them.
6281 *
6282 * @remarks No-long-jmp zone!!!
6283 */
6284DECLINLINE(int) hmR0VmxLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6285{
6286 return hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx);
6287}
6288
6289
6290/**
6291 * Take necessary actions before going back to ring-3.
6292 *
6293 * An action requires us to go back to ring-3. This function does the necessary
6294 * steps before we can safely return to ring-3. This is not the same as longjmps
6295 * to ring-3, this is voluntary and prepares the guest so it may continue
6296 * executing outside HM (recompiler/IEM).
6297 *
6298 * @returns VBox status code.
6299 * @param pVM Pointer to the VM.
6300 * @param pVCpu Pointer to the VMCPU.
6301 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6302 * out-of-sync. Make sure to update the required fields
6303 * before using them.
6304 * @param rcExit The reason for exiting to ring-3. Can be
6305 * VINF_VMM_UNKNOWN_RING3_CALL.
6306 */
6307static int hmR0VmxExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, int rcExit)
6308{
6309 Assert(pVM);
6310 Assert(pVCpu);
6311 Assert(pMixedCtx);
6312 HMVMX_ASSERT_PREEMPT_SAFE();
6313
6314 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
6315 {
6316 VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VMCSPhys);
6317 pVCpu->hm.s.vmx.LastError.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs;
6318 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
6319 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
6320 }
6321
6322 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
6323 VMMRZCallRing3Disable(pVCpu);
6324 Log4(("hmR0VmxExitToRing3: pVCpu=%p idCpu=%RU32 rcExit=%d\n", pVCpu, pVCpu->idCpu, rcExit));
6325
6326 /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
6327 if (pVCpu->hm.s.Event.fPending)
6328 {
6329 hmR0VmxPendingEventToTrpmTrap(pVCpu);
6330 Assert(!pVCpu->hm.s.Event.fPending);
6331 }
6332
6333 /* Save guest state and restore host state bits. */
6334 int rc = hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx);
6335 AssertRCReturn(rc, rc);
6336 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
6337
6338 /* Sync recompiler state. */
6339 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
6340 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
6341 | CPUM_CHANGED_LDTR
6342 | CPUM_CHANGED_GDTR
6343 | CPUM_CHANGED_IDTR
6344 | CPUM_CHANGED_TR
6345 | CPUM_CHANGED_HIDDEN_SEL_REGS);
6346 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR0);
6347 if ( pVM->hm.s.fNestedPaging
6348 && CPUMIsGuestPagingEnabledEx(pMixedCtx))
6349 {
6350 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
6351 }
6352
6353 /*
6354 * Clear the X86_EFL_TF if necessary.
6355 */
6356 if (pVCpu->hm.s.fClearTrapFlag)
6357 {
6358 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS);
6359 pMixedCtx->eflags.Bits.u1TF = 0;
6360 pVCpu->hm.s.fClearTrapFlag = false;
6361 }
6362 /** @todo there seems to be issues with the resume flag when the monitor trap
6363 * flag is pending without being used. Seen early in bios init when
6364 * accessing APIC page in prot mode. */
6365
6366 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
6367 if (rcExit != VINF_EM_RAW_INTERRUPT)
6368 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL_GUEST;
6369
6370 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
6371
6372 /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */
6373 VMMRZCallRing3RemoveNotification(pVCpu);
6374 VMMRZCallRing3Enable(pVCpu);
6375
6376 return rc;
6377}
6378
6379
6380/**
6381 * VMMRZCallRing3() callback wrapper which saves the guest state before we
6382 * longjump to ring-3 and possibly get preempted.
6383 *
6384 * @returns VBox status code.
6385 * @param pVCpu Pointer to the VMCPU.
6386 * @param enmOperation The operation causing the ring-3 longjump.
6387 * @param pvUser Opaque pointer to the guest-CPU context. The data
6388 * may be out-of-sync. Make sure to update the required
6389 * fields before using them.
6390 */
6391DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
6392{
6393 if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
6394 {
6395 VMMRZCallRing3RemoveNotification(pVCpu);
6396 HM_DISABLE_PREEMPT_IF_NEEDED();
6397
6398 /* If anything here asserts or fails, good luck. */
6399 if (CPUMIsGuestFPUStateActive(pVCpu))
6400 CPUMR0SaveGuestFPU(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser);
6401
6402 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
6403
6404#if HC_ARCH_BITS == 64
6405 /* Restore host-state bits that VT-x only restores partially. */
6406 if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
6407 && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
6408 {
6409 VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
6410 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
6411 }
6412#endif
6413 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
6414 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE)
6415 {
6416 VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
6417 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
6418 }
6419
6420 if (VMMR0ThreadCtxHooksAreRegistered(pVCpu))
6421 VMMR0ThreadCtxHooksDeregister(pVCpu);
6422
6423 HMR0LeaveCpu(pVCpu);
6424 HM_RESTORE_PREEMPT_IF_NEEDED();
6425 return VINF_SUCCESS;
6426 }
6427
6428 Assert(pVCpu);
6429 Assert(pvUser);
6430 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6431 HMVMX_ASSERT_PREEMPT_SAFE();
6432
6433 VMMRZCallRing3Disable(pVCpu);
6434 Assert(VMMR0IsLogFlushDisabled(pVCpu));
6435
6436 Log4(("hmR0VmxCallRing3Callback->hmR0VmxLongJmpToRing3 pVCpu=%p idCpu=%RU32\n enmOperation=%d", pVCpu, pVCpu->idCpu,
6437 enmOperation));
6438
6439 int rc = hmR0VmxLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser);
6440 AssertRCReturn(rc, rc);
6441
6442 VMMRZCallRing3Enable(pVCpu);
6443 return VINF_SUCCESS;
6444}
6445
6446
6447/**
6448 * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to
6449 * cause a VM-exit as soon as the guest is in a state to receive interrupts.
6450 *
6451 * @param pVCpu Pointer to the VMCPU.
6452 */
6453DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu)
6454{
6455 if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT))
6456 {
6457 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT))
6458 {
6459 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
6460 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
6461 AssertRC(rc);
6462 }
6463 } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */
6464}
6465
6466
6467/**
6468 * Evaluates the event to be delivered to the guest and sets it as the pending
6469 * event.
6470 *
6471 * @param pVCpu Pointer to the VMCPU.
6472 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6473 * out-of-sync. Make sure to update the required fields
6474 * before using them.
6475 */
6476static void hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6477{
6478 Assert(!pVCpu->hm.s.Event.fPending);
6479
6480 /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
6481 uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx);
6482 bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
6483 bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
6484
6485 Assert(!fBlockSti || (pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS));
6486 Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/
6487 && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI));
6488 Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
6489 Assert(!TRPMHasTrap(pVCpu));
6490
6491 /** @todo SMI. SMIs take priority over NMIs. */
6492 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts . */
6493 {
6494 /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */
6495 if ( !fBlockMovSS
6496 && !fBlockSti)
6497 {
6498 /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */
6499 Log4(("Pending NMI vcpu[%RU32]\n", pVCpu->idCpu));
6500 uint32_t u32IntrInfo = X86_XCPT_NMI | VMX_EXIT_INTERRUPTION_INFO_VALID;
6501 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6502
6503 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddres */);
6504 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
6505 }
6506 else
6507 hmR0VmxSetIntWindowExitVmcs(pVCpu);
6508 }
6509 else if ( VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
6510 && !pVCpu->hm.s.fSingleInstruction)
6511 {
6512 /*
6513 * Check if the guest can receive external interrupts (PIC/APIC). Once we do PDMGetInterrupt() we -must- deliver
6514 * the interrupt ASAP. We must not execute any guest code until we inject the interrupt which is why it is
6515 * evaluated here and not set as pending, solely based on the force-flags.
6516 */
6517 int rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
6518 AssertRC(rc);
6519 const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF);
6520 if ( !fBlockInt
6521 && !fBlockSti
6522 && !fBlockMovSS)
6523 {
6524 uint8_t u8Interrupt;
6525 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
6526 if (RT_SUCCESS(rc))
6527 {
6528 Log4(("Pending interrupt vcpu[%RU32] u8Interrupt=%#x \n", pVCpu->idCpu, u8Interrupt));
6529 uint32_t u32IntrInfo = u8Interrupt | VMX_EXIT_INTERRUPTION_INFO_VALID;
6530 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6531
6532 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */);
6533 }
6534 else
6535 {
6536 /** @todo Does this actually happen? If not turn it into an assertion. */
6537 Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)));
6538 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
6539 }
6540 }
6541 else
6542 hmR0VmxSetIntWindowExitVmcs(pVCpu);
6543 }
6544}
6545
6546
6547/**
6548 * Injects any pending events into the guest if the guest is in a state to
6549 * receive them.
6550 *
6551 * @returns VBox status code (informational status codes included).
6552 * @param pVCpu Pointer to the VMCPU.
6553 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6554 * out-of-sync. Make sure to update the required fields
6555 * before using them.
6556 */
6557static int hmR0VmxInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6558{
6559 HMVMX_ASSERT_PREEMPT_SAFE();
6560 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6561
6562 /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
6563 uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx);
6564 bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
6565 bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
6566
6567 Assert(!fBlockSti || (pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS));
6568 Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/
6569 && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI));
6570 Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
6571 Assert(!TRPMHasTrap(pVCpu));
6572
6573 int rc = VINF_SUCCESS;
6574 if (pVCpu->hm.s.Event.fPending)
6575 {
6576#if defined(VBOX_STRICT) || defined(VBOX_WITH_STATISTICS)
6577 uint32_t uIntrType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo);
6578 if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
6579 {
6580 rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
6581 AssertRCReturn(rc, rc);
6582 const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF);
6583 Assert(!fBlockInt);
6584 Assert(!fBlockSti);
6585 Assert(!fBlockMovSS);
6586 }
6587 else if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
6588 {
6589 Assert(!fBlockSti);
6590 Assert(!fBlockMovSS);
6591 }
6592#endif
6593 Log4(("Injecting pending event vcpu[%RU32] u64IntrInfo=%#RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntrInfo));
6594 rc = hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, pVCpu->hm.s.Event.u64IntrInfo, pVCpu->hm.s.Event.cbInstr,
6595 pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, &uIntrState);
6596 AssertRCReturn(rc, rc);
6597
6598 /* Update the interruptibility-state as it could have been changed by
6599 hmR0VmxInjectEventVmcs() (e.g. real-on-v86 guest injecting software interrupts) */
6600 fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
6601 fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
6602
6603#ifdef VBOX_WITH_STATISTICS
6604 if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
6605 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
6606 else
6607 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
6608#endif
6609 }
6610
6611 /* Delivery pending debug exception if the guest is single-stepping. Evaluate and set the BS bit. */
6612 int rc2 = VINF_SUCCESS;
6613 if ( fBlockSti
6614 || fBlockMovSS)
6615 {
6616 if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu))
6617 {
6618 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS);
6619 if (pMixedCtx->eflags.Bits.u1TF) /* We don't have any IA32_DEBUGCTL MSR for guests. Treat as all bits 0. */
6620 {
6621 /*
6622 * The pending-debug exceptions field is cleared on all VM-exits except VMX_EXIT_TPR_BELOW_THRESHOLD,
6623 * VMX_EXIT_MTF, VMX_EXIT_APIC_WRITE and VMX_EXIT_VIRTUALIZED_EOI.
6624 * See Intel spec. 27.3.4 "Saving Non-Register State".
6625 */
6626 rc2 = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, VMX_VMCS_GUEST_DEBUG_EXCEPTIONS_BS);
6627 AssertRCReturn(rc, rc);
6628 }
6629 }
6630 else
6631 {
6632 /* We are single-stepping in the hypervisor debugger, clear interrupt inhibition as setting the BS bit would mean
6633 delivering a #DB to the guest upon VM-entry when it shouldn't be. */
6634 uIntrState = 0;
6635 }
6636 }
6637
6638 /*
6639 * There's no need to clear the VM entry-interruption information field here if we're not injecting anything.
6640 * VT-x clears the valid bit on every VM-exit. See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
6641 */
6642 rc2 = hmR0VmxLoadGuestIntrState(pVCpu, uIntrState);
6643 AssertRC(rc2);
6644
6645 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET);
6646 return rc;
6647}
6648
6649
6650/**
6651 * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM.
6652 *
6653 * @param pVCpu Pointer to the VMCPU.
6654 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6655 * out-of-sync. Make sure to update the required fields
6656 * before using them.
6657 */
6658DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6659{
6660 uint32_t u32IntrInfo = X86_XCPT_UD | VMX_EXIT_INTERRUPTION_INFO_VALID;
6661 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6662}
6663
6664
6665/**
6666 * Injects a double-fault (#DF) exception into the VM.
6667 *
6668 * @returns VBox status code (informational status code included).
6669 * @param pVCpu Pointer to the VMCPU.
6670 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6671 * out-of-sync. Make sure to update the required fields
6672 * before using them.
6673 */
6674DECLINLINE(int) hmR0VmxInjectXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t *puIntrState)
6675{
6676 uint32_t u32IntrInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID;
6677 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6678 u32IntrInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6679 return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntrInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */,
6680 puIntrState);
6681}
6682
6683
6684/**
6685 * Sets a debug (#DB) exception as pending-for-injection into the VM.
6686 *
6687 * @param pVCpu Pointer to the VMCPU.
6688 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6689 * out-of-sync. Make sure to update the required fields
6690 * before using them.
6691 */
6692DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6693{
6694 uint32_t u32IntrInfo = X86_XCPT_DB | VMX_EXIT_INTERRUPTION_INFO_VALID;
6695 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6696 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6697}
6698
6699
6700/**
6701 * Sets an overflow (#OF) exception as pending-for-injection into the VM.
6702 *
6703 * @param pVCpu Pointer to the VMCPU.
6704 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6705 * out-of-sync. Make sure to update the required fields
6706 * before using them.
6707 * @param cbInstr The value of RIP that is to be pushed on the guest
6708 * stack.
6709 */
6710DECLINLINE(void) hmR0VmxSetPendingXcptOF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t cbInstr)
6711{
6712 uint32_t u32IntrInfo = X86_XCPT_OF | VMX_EXIT_INTERRUPTION_INFO_VALID;
6713 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6714 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6715}
6716
6717
6718/**
6719 * Injects a general-protection (#GP) fault into the VM.
6720 *
6721 * @returns VBox status code (informational status code included).
6722 * @param pVCpu Pointer to the VMCPU.
6723 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6724 * out-of-sync. Make sure to update the required fields
6725 * before using them.
6726 * @param u32ErrorCode The error code associated with the #GP.
6727 */
6728DECLINLINE(int) hmR0VmxInjectXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fErrorCodeValid, uint32_t u32ErrorCode,
6729 uint32_t *puIntrState)
6730{
6731 uint32_t u32IntrInfo = X86_XCPT_GP | VMX_EXIT_INTERRUPTION_INFO_VALID;
6732 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6733 if (fErrorCodeValid)
6734 u32IntrInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6735 return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntrInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */,
6736 puIntrState);
6737}
6738
6739
6740/**
6741 * Sets a software interrupt (INTn) as pending-for-injection into the VM.
6742 *
6743 * @param pVCpu Pointer to the VMCPU.
6744 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6745 * out-of-sync. Make sure to update the required fields
6746 * before using them.
6747 * @param uVector The software interrupt vector number.
6748 * @param cbInstr The value of RIP that is to be pushed on the guest
6749 * stack.
6750 */
6751DECLINLINE(void) hmR0VmxSetPendingIntN(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint16_t uVector, uint32_t cbInstr)
6752{
6753 uint32_t u32IntrInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID;
6754 if ( uVector == X86_XCPT_BP
6755 || uVector == X86_XCPT_OF)
6756 {
6757 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6758 }
6759 else
6760 u32IntrInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6761 hmR0VmxSetPendingEvent(pVCpu, u32IntrInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6762}
6763
6764
6765/**
6766 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
6767 * stack.
6768 *
6769 * @returns VBox status code (information status code included).
6770 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
6771 * @param pVM Pointer to the VM.
6772 * @param pMixedCtx Pointer to the guest-CPU context.
6773 * @param uValue The value to push to the guest stack.
6774 */
6775DECLINLINE(int) hmR0VmxRealModeGuestStackPush(PVM pVM, PCPUMCTX pMixedCtx, uint16_t uValue)
6776{
6777 /*
6778 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
6779 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
6780 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
6781 */
6782 if (pMixedCtx->sp == 1)
6783 return VINF_EM_RESET;
6784 pMixedCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
6785 int rc = PGMPhysSimpleWriteGCPhys(pVM, pMixedCtx->ss.u64Base + pMixedCtx->sp, &uValue, sizeof(uint16_t));
6786 AssertRCReturn(rc, rc);
6787 return rc;
6788}
6789
6790
6791/**
6792 * Injects an event into the guest upon VM-entry by updating the relevant fields
6793 * in the VM-entry area in the VMCS.
6794 *
6795 * @returns VBox status code (informational error codes included).
6796 * @retval VINF_SUCCESS if the event is successfully injected into the VMCS.
6797 * @retval VINF_EM_RESET if event injection resulted in a triple-fault.
6798 *
6799 * @param pVCpu Pointer to the VMCPU.
6800 * @param pMixedCtx Pointer to the guest-CPU context. The data may
6801 * be out-of-sync. Make sure to update the required
6802 * fields before using them.
6803 * @param u64IntrInfo The VM-entry interruption-information field.
6804 * @param cbInstr The VM-entry instruction length in bytes (for
6805 * software interrupts, exceptions and privileged
6806 * software exceptions).
6807 * @param u32ErrCode The VM-entry exception error code.
6808 * @param GCPtrFaultAddress The page-fault address for #PF exceptions.
6809 * @param puIntrState Pointer to the current guest interruptibility-state.
6810 * This interruptibility-state will be updated if
6811 * necessary. This cannot not be NULL.
6812 *
6813 * @remarks Requires CR0!
6814 * @remarks No-long-jump zone!!!
6815 */
6816static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntrInfo, uint32_t cbInstr,
6817 uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntrState)
6818{
6819 /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */
6820 AssertMsg(u64IntrInfo >> 32 == 0, ("%#RX64\n", u64IntrInfo));
6821 Assert(puIntrState);
6822 uint32_t u32IntrInfo = (uint32_t)u64IntrInfo;
6823
6824 const uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(u32IntrInfo);
6825 const uint32_t uIntrType = VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntrInfo);
6826
6827#ifdef VBOX_STRICT
6828 /* Validate the error-code-valid bit for hardware exceptions. */
6829 if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT)
6830 {
6831 switch (uVector)
6832 {
6833 case X86_XCPT_PF:
6834 case X86_XCPT_DF:
6835 case X86_XCPT_TS:
6836 case X86_XCPT_NP:
6837 case X86_XCPT_SS:
6838 case X86_XCPT_GP:
6839 case X86_XCPT_AC:
6840 AssertMsg(VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntrInfo),
6841 ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector));
6842 /* fallthru */
6843 default:
6844 break;
6845 }
6846 }
6847#endif
6848
6849 /* Cannot inject an NMI when block-by-MOV SS is in effect. */
6850 Assert( uIntrType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI
6851 || !(*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS));
6852
6853 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]);
6854
6855 /* We require CR0 to check if the guest is in real-mode. */
6856 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6857 AssertRCReturn(rc, rc);
6858
6859 /*
6860 * Hardware interrupts & exceptions cannot be delivered through the software interrupt redirection bitmap to the real
6861 * mode task in virtual-8086 mode. We must jump to the interrupt handler in the (real-mode) guest.
6862 * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode" for interrupt & exception classes.
6863 * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling.
6864 */
6865 if (CPUMIsGuestInRealModeEx(pMixedCtx))
6866 {
6867 PVM pVM = pVCpu->CTX_SUFF(pVM);
6868 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
6869 {
6870 Assert(PDMVmmDevHeapIsEnabled(pVM));
6871 Assert(pVM->hm.s.vmx.pRealModeTSS);
6872
6873 /* We require RIP, RSP, RFLAGS, CS, IDTR. Save the required ones from the VMCS. */
6874 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
6875 rc |= hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx);
6876 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
6877 AssertRCReturn(rc, rc);
6878 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RIP);
6879
6880 /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */
6881 const size_t cbIdtEntry = sizeof(X86IDTR16);
6882 if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pMixedCtx->idtr.cbIdt)
6883 {
6884 /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */
6885 if (uVector == X86_XCPT_DF)
6886 return VINF_EM_RESET;
6887 else if (uVector == X86_XCPT_GP)
6888 {
6889 /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */
6890 return hmR0VmxInjectXcptDF(pVCpu, pMixedCtx, puIntrState);
6891 }
6892
6893 /* If we're injecting an interrupt/exception with no valid IDT entry, inject a general-protection fault. */
6894 /* No error codes for exceptions in real-mode. See Intel spec. 20.1.4 "Interrupt and Exception Handling" */
6895 return hmR0VmxInjectXcptGP(pVCpu, pMixedCtx, false /* fErrCodeValid */, 0 /* u32ErrCode */, puIntrState);
6896 }
6897
6898 /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */
6899 uint16_t uGuestIp = pMixedCtx->ip;
6900 if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
6901 {
6902 Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
6903 /* #BP and #OF are both benign traps, we need to resume the next instruction. */
6904 uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr;
6905 }
6906 else if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT)
6907 uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr;
6908
6909 /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */
6910 X86IDTR16 IdtEntry;
6911 RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pMixedCtx->idtr.pIdt + uVector * cbIdtEntry;
6912 rc = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry);
6913 AssertRCReturn(rc, rc);
6914
6915 /* Construct the stack frame for the interrupt/exception handler. */
6916 rc = hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->eflags.u32);
6917 rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->cs.Sel);
6918 rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, uGuestIp);
6919 AssertRCReturn(rc, rc);
6920
6921 /* Clear the required eflag bits and jump to the interrupt/exception handler. */
6922 if (rc == VINF_SUCCESS)
6923 {
6924 pMixedCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
6925 pMixedCtx->rip = IdtEntry.offSel;
6926 pMixedCtx->cs.Sel = IdtEntry.uSel;
6927 pMixedCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry;
6928 if ( uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT
6929 && uVector == X86_XCPT_PF)
6930 {
6931 pMixedCtx->cr2 = GCPtrFaultAddress;
6932 }
6933
6934 /* If any other guest-state bits are changed here, make sure to update
6935 hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */
6936 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SEGMENT_REGS
6937 | HM_CHANGED_GUEST_RIP
6938 | HM_CHANGED_GUEST_RFLAGS
6939 | HM_CHANGED_GUEST_RSP;
6940
6941 /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */
6942 if (*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
6943 {
6944 Assert( uIntrType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI
6945 && uIntrType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT);
6946 Log4(("Clearing inhibition due to STI.\n"));
6947 *puIntrState &= ~VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI;
6948 }
6949 Log4(("Injecting real-mode: u32IntrInfo=%#x u32ErrCode=%#x instrlen=%#x\n", u32IntrInfo, u32ErrCode, cbInstr));
6950
6951 /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo'
6952 it, if we are returning to ring-3 before executing guest code. */
6953 pVCpu->hm.s.Event.fPending = false;
6954 }
6955 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET);
6956 return rc;
6957 }
6958 else
6959 {
6960 /*
6961 * For unrestricted execution enabled CPUs running real-mode guests, we must not set the deliver-error-code bit.
6962 * See Intel spec. 26.2.1.3 "VM-Entry Control Fields".
6963 */
6964 u32IntrInfo &= ~VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6965 }
6966 }
6967
6968 /* Validate. */
6969 Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(u32IntrInfo)); /* Bit 31 (Valid bit) must be set by caller. */
6970 Assert(!VMX_EXIT_INTERRUPTION_INFO_NMI_UNBLOCK(u32IntrInfo)); /* Bit 12 MBZ. */
6971 Assert(!(u32IntrInfo & 0x7ffff000)); /* Bits 30:12 MBZ. */
6972
6973 /* Inject. */
6974 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntrInfo);
6975 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntrInfo))
6976 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode);
6977 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
6978
6979 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntrInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT
6980 && uVector == X86_XCPT_PF)
6981 {
6982 pMixedCtx->cr2 = GCPtrFaultAddress;
6983 }
6984
6985 Log4(("Injecting vcpu[%RU32] u32IntrInfo=%#x u32ErrCode=%#x cbInstr=%#x pMixedCtx->uCR2=%#RX64\n", pVCpu->idCpu,
6986 u32IntrInfo, u32ErrCode, cbInstr, pMixedCtx->cr2));
6987
6988 AssertRCReturn(rc, rc);
6989 return rc;
6990}
6991
6992
6993/**
6994 * Clears the interrupt-window exiting control in the VMCS and if necessary
6995 * clears the current event in the VMCS as well.
6996 *
6997 * @returns VBox status code.
6998 * @param pVCpu Pointer to the VMCPU.
6999 *
7000 * @remarks Use this function only to clear events that have not yet been
7001 * delivered to the guest but are injected in the VMCS!
7002 * @remarks No-long-jump zone!!!
7003 */
7004static void hmR0VmxClearEventVmcs(PVMCPU pVCpu)
7005{
7006 int rc;
7007 Log4Func(("vcpu[%d]\n", pVCpu->idCpu));
7008
7009 /* Clear interrupt-window exiting control. */
7010 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)
7011 {
7012 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
7013 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
7014 AssertRC(rc);
7015 }
7016
7017 if (!pVCpu->hm.s.Event.fPending)
7018 return;
7019
7020#ifdef VBOX_STRICT
7021 uint32_t u32EntryInfo;
7022 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
7023 AssertRC(rc);
7024 Assert(VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo));
7025#endif
7026
7027 /* Clear the entry-interruption field (including the valid bit). */
7028 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);
7029 AssertRC(rc);
7030
7031 /* Clear the pending debug exception field. */
7032 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
7033 AssertRC(rc);
7034}
7035
7036
7037/**
7038 * Enters the VT-x session.
7039 *
7040 * @returns VBox status code.
7041 * @param pVM Pointer to the VM.
7042 * @param pVCpu Pointer to the VMCPU.
7043 * @param pCpu Pointer to the CPU info struct.
7044 */
7045VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
7046{
7047 AssertPtr(pVM);
7048 AssertPtr(pVCpu);
7049 Assert(pVM->hm.s.vmx.fSupported);
7050 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7051 NOREF(pCpu);
7052
7053 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7054 Assert((pVCpu->hm.s.fContextUseFlags & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE))
7055 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE));
7056
7057#ifdef VBOX_STRICT
7058 /* Make sure we're in VMX root mode. */
7059 RTCCUINTREG u32HostCR4 = ASMGetCR4();
7060 if (!(u32HostCR4 & X86_CR4_VMXE))
7061 {
7062 LogRel(("VMXR0Enter: X86_CR4_VMXE bit in CR4 is not set!\n"));
7063 return VERR_VMX_X86_CR4_VMXE_CLEARED;
7064 }
7065#endif
7066
7067 /*
7068 * Load the VCPU's VMCS as the current (and active) one.
7069 */
7070 Assert(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR);
7071 int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
7072 if (RT_FAILURE(rc))
7073 return rc;
7074
7075 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE;
7076 pVCpu->hm.s.fLeaveDone = false;
7077 Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
7078
7079 return VINF_SUCCESS;
7080}
7081
7082
7083/**
7084 * The thread-context callback (only on platforms which support it).
7085 *
7086 * @param enmEvent The thread-context event.
7087 * @param pVCpu Pointer to the VMCPU.
7088 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
7089 * @thread EMT.
7090 */
7091VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
7092{
7093 switch (enmEvent)
7094 {
7095 case RTTHREADCTXEVENT_PREEMPTING:
7096 {
7097 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7098 Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu));
7099 VMCPU_ASSERT_EMT(pVCpu);
7100
7101 PVM pVM = pVCpu->CTX_SUFF(pVM);
7102 PCPUMCTX pMixedCtx = CPUMQueryGuestCtxPtr(pVCpu);
7103
7104 /* No longjmps (logger flushes, locks) in this fragile context. */
7105 VMMRZCallRing3Disable(pVCpu);
7106 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
7107
7108 /*
7109 * Restore host-state (FPU, debug etc.)
7110 */
7111 if (!pVCpu->hm.s.fLeaveDone)
7112 {
7113 /* Do -not- save guest-state here as we might already be in the middle of saving it (esp. bad if we are
7114 holding the PGM lock while saving the guest state (see hmR0VmxSaveGuestControlRegs()). */
7115 hmR0VmxLeave(pVM, pVCpu, pMixedCtx, false /* fSaveGuestState */);
7116 pVCpu->hm.s.fLeaveDone = true;
7117 }
7118
7119 /* Leave HM context, takes care of local init (term). */
7120 int rc = HMR0LeaveCpu(pVCpu);
7121 AssertRC(rc); NOREF(rc);
7122
7123 /* Restore longjmp state. */
7124 VMMRZCallRing3Enable(pVCpu);
7125 STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptPreempting);
7126 break;
7127 }
7128
7129 case RTTHREADCTXEVENT_RESUMED:
7130 {
7131 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7132 Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu));
7133 VMCPU_ASSERT_EMT(pVCpu);
7134
7135 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
7136 VMMRZCallRing3Disable(pVCpu);
7137 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
7138
7139 /* Initialize the bare minimum state required for HM. This takes care of
7140 initializing VT-x if necessary (onlined CPUs, local init etc.) */
7141 int rc = HMR0EnterCpu(pVCpu);
7142 AssertRC(rc);
7143 Assert((pVCpu->hm.s.fContextUseFlags & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE))
7144 == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE));
7145
7146 /* Load the active VMCS as the current one. */
7147 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR)
7148 {
7149 rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
7150 AssertRC(rc); NOREF(rc);
7151 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE;
7152 Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
7153 }
7154 pVCpu->hm.s.fLeaveDone = false;
7155
7156 /* Restore longjmp state. */
7157 VMMRZCallRing3Enable(pVCpu);
7158 break;
7159 }
7160
7161 default:
7162 break;
7163 }
7164}
7165
7166
7167/**
7168 * Saves the host state in the VMCS host-state.
7169 * Sets up the VM-exit MSR-load area.
7170 *
7171 * The CPU state will be loaded from these fields on every successful VM-exit.
7172 *
7173 * @returns VBox status code.
7174 * @param pVM Pointer to the VM.
7175 * @param pVCpu Pointer to the VMCPU.
7176 *
7177 * @remarks No-long-jump zone!!!
7178 */
7179static int hmR0VmxSaveHostState(PVM pVM, PVMCPU pVCpu)
7180{
7181 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7182
7183 if (!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT))
7184 return VINF_SUCCESS;
7185
7186 int rc = hmR0VmxSaveHostControlRegs(pVM, pVCpu);
7187 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostControlRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7188
7189 rc = hmR0VmxSaveHostSegmentRegs(pVM, pVCpu);
7190 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostSegmentRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7191
7192 rc = hmR0VmxSaveHostMsrs(pVM, pVCpu);
7193 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostMsrs failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7194
7195 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_HOST_CONTEXT;
7196 return rc;
7197}
7198
7199
7200/**
7201 * Saves the host state in the VMCS host-state.
7202 *
7203 * @returns VBox status code.
7204 * @param pVM Pointer to the VM.
7205 * @param pVCpu Pointer to the VMCPU.
7206 *
7207 * @remarks No-long-jump zone!!!
7208 */
7209VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
7210{
7211 AssertPtr(pVM);
7212 AssertPtr(pVCpu);
7213
7214 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7215
7216 /* Save the host state here while entering HM context. When thread-context hooks are used, we might get preempted
7217 and have to resave the host state but most of the time we won't be, so do it here before we disable interrupts. */
7218 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7219 return hmR0VmxSaveHostState(pVM, pVCpu);
7220}
7221
7222
7223/**
7224 * Loads the guest state into the VMCS guest-state area. The CPU state will be
7225 * loaded from these fields on every successful VM-entry.
7226 *
7227 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas.
7228 * Sets up the VM-entry controls.
7229 * Sets up the appropriate VMX non-root function to execute guest code based on
7230 * the guest CPU mode.
7231 *
7232 * @returns VBox status code.
7233 * @param pVM Pointer to the VM.
7234 * @param pVCpu Pointer to the VMCPU.
7235 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7236 * out-of-sync. Make sure to update the required fields
7237 * before using them.
7238 *
7239 * @remarks No-long-jump zone!!!
7240 */
7241static int hmR0VmxLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7242{
7243 AssertPtr(pVM);
7244 AssertPtr(pVCpu);
7245 AssertPtr(pMixedCtx);
7246 HMVMX_ASSERT_PREEMPT_SAFE();
7247
7248#ifdef LOG_ENABLED
7249 /** @todo r=ramshankar: I'm not able to use VMMRZCallRing3Disable() here,
7250 * probably not initialized yet? Anyway this will do for now.
7251 *
7252 * Update: Should be possible once VMXR0LoadGuestState() is removed as an
7253 * interface and disable ring-3 calls when thread-context hooks are not
7254 * available. */
7255 bool fCallerDisabledLogFlush = VMMR0IsLogFlushDisabled(pVCpu);
7256 VMMR0LogFlushDisable(pVCpu);
7257#endif
7258
7259 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7260
7261 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x);
7262
7263 /* Determine real-on-v86 mode. */
7264 pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false;
7265 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
7266 && CPUMIsGuestInRealModeEx(pMixedCtx))
7267 {
7268 pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true;
7269 }
7270
7271 /*
7272 * Load the guest-state into the VMCS.
7273 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
7274 * Ideally, assert that the cross-dependent bits are up to date at the point of using it.
7275 */
7276 int rc = hmR0VmxSetupVMRunHandler(pVCpu, pMixedCtx);
7277 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7278
7279 /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */
7280 rc = hmR0VmxLoadGuestEntryCtls(pVCpu, pMixedCtx);
7281 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestEntryCtls! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7282
7283 /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */
7284 rc = hmR0VmxLoadGuestExitCtls(pVCpu, pMixedCtx);
7285 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupExitCtls failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7286
7287 rc = hmR0VmxLoadGuestActivityState(pVCpu, pMixedCtx);
7288 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestActivityState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7289
7290 rc = hmR0VmxLoadGuestCR3AndCR4(pVCpu, pMixedCtx);
7291 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestCR3AndCR4: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7292
7293 /* Assumes pMixedCtx->cr0 is up-to-date (strict builds require CR0 for segment register validation checks). */
7294 rc = hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx);
7295 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestSegmentRegs: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7296
7297 rc = hmR0VmxLoadGuestMsrs(pVCpu, pMixedCtx);
7298 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestMsrs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7299
7300 rc = hmR0VmxLoadGuestApicState(pVCpu, pMixedCtx);
7301 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7302
7303 /*
7304 * Loading Rflags here is fine, even though Rflags.TF might depend on guest debug state (which is not loaded here).
7305 * It is re-evaluated and updated if necessary in hmR0VmxLoadSharedState().
7306 */
7307 rc = hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx);
7308 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestRipRspRflags! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7309
7310 /* Clear any unused and reserved bits. */
7311 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR2;
7312
7313#ifdef LOG_ENABLED
7314 /* Only reenable log-flushing if the caller has it enabled. */
7315 if (!fCallerDisabledLogFlush)
7316 VMMR0LogFlushEnable(pVCpu);
7317#endif
7318
7319 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x);
7320 return rc;
7321}
7322
7323
7324/**
7325 * Loads the state shared between the host and guest into the VMCS.
7326 *
7327 * @param pVM Pointer to the VM.
7328 * @param pVCpu Pointer to the VMCPU.
7329 * @param pCtx Pointer to the guest-CPU context.
7330 *
7331 * @remarks No-long-jump zone!!!
7332 */
7333static void hmR0VmxLoadSharedState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7334{
7335 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7336 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7337
7338 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
7339 {
7340 int rc = hmR0VmxLoadSharedCR0(pVCpu, pCtx);
7341 AssertRC(rc);
7342 }
7343
7344 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG)
7345 {
7346 int rc = hmR0VmxLoadSharedDebugState(pVCpu, pCtx);
7347 AssertRC(rc);
7348
7349 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
7350 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_RFLAGS)
7351 {
7352 rc = hmR0VmxLoadGuestRflags(pVCpu, pCtx);
7353 AssertRC(rc);
7354 }
7355 }
7356
7357 AssertMsg(!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_GUEST_SHARED_STATE), ("fContextUseFlags=%#x\n",
7358 pVCpu->hm.s.fContextUseFlags));
7359}
7360
7361
7362/**
7363 * Worker for loading the guest-state bits in the inner VT-x execution loop.
7364 *
7365 * @param pVM Pointer to the VM.
7366 * @param pVCpu Pointer to the VMCPU.
7367 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7368 * out-of-sync. Make sure to update the required fields
7369 * before using them.
7370 */
7371DECLINLINE(void) hmR0VmxLoadGuestStateOptimal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7372{
7373 HMVMX_ASSERT_PREEMPT_SAFE();
7374
7375 Log5(("LoadFlags=%#RX32\n", pVCpu->hm.s.fContextUseFlags));
7376#ifdef HMVMX_SYNC_FULL_GUEST_STATE
7377 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL_GUEST;
7378#endif
7379
7380 if (pVCpu->hm.s.fContextUseFlags == HM_CHANGED_GUEST_RIP)
7381 {
7382 int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx);
7383 AssertRC(rc);
7384 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
7385 }
7386 else if (pVCpu->hm.s.fContextUseFlags)
7387 {
7388 int rc = hmR0VmxLoadGuestState(pVM, pVCpu, pMixedCtx);
7389 AssertRC(rc);
7390 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
7391 }
7392
7393 /* All the guest state bits should be loaded except maybe the host context and shared host/guest bits. */
7394 AssertMsg( !(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_ALL_GUEST)
7395 || !(pVCpu->hm.s.fContextUseFlags & ~(HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE)),
7396 ("fContextUseFlags=%#x\n", pVCpu->hm.s.fContextUseFlags));
7397
7398#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
7399 uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVM, pVCpu, pMixedCtx);
7400 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
7401 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
7402#endif
7403}
7404
7405
7406/**
7407 * Does the preparations before executing guest code in VT-x.
7408 *
7409 * This may cause longjmps to ring-3 and may even result in rescheduling to the
7410 * recompiler. We must be cautious what we do here regarding committing
7411 * guest-state information into the VMCS assuming we assuredly execute the
7412 * guest in VT-x mode. If we fall back to the recompiler after updating the VMCS
7413 * and clearing the common-state (TRPM/forceflags), we must undo those changes
7414 * so that the recompiler can (and should) use them when it resumes guest
7415 * execution. Otherwise such operations must be done when we can no longer
7416 * exit to ring-3.
7417 *
7418 * @returns Strict VBox status code.
7419 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
7420 * have been disabled.
7421 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
7422 * double-fault into the guest.
7423 * @retval VINF_* scheduling changes, we have to go back to ring-3.
7424 *
7425 * @param pVM Pointer to the VM.
7426 * @param pVCpu Pointer to the VMCPU.
7427 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7428 * out-of-sync. Make sure to update the required fields
7429 * before using them.
7430 * @param pVmxTransient Pointer to the VMX transient structure.
7431 *
7432 * @remarks Called with preemption disabled. In the VINF_SUCCESS return case
7433 * interrupts will be disabled.
7434 */
7435static int hmR0VmxPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
7436{
7437 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7438
7439#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
7440 PGMRZDynMapFlushAutoSet(pVCpu);
7441#endif
7442
7443 /* Check force flag actions that might require us to go back to ring-3. */
7444 int rc = hmR0VmxCheckForceFlags(pVM, pVCpu, pMixedCtx);
7445 if (rc != VINF_SUCCESS)
7446 return rc;
7447
7448#ifndef IEM_VERIFICATION_MODE_FULL
7449 /* Setup the Virtualized APIC accesses. pMixedCtx->msrApicBase is always up-to-date. It's not part of the VMCS. */
7450 if ( pVCpu->hm.s.vmx.u64MsrApicBase != pMixedCtx->msrApicBase
7451 && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
7452 {
7453 Assert(pVM->hm.s.vmx.HCPhysApicAccess);
7454 RTGCPHYS GCPhysApicBase;
7455 GCPhysApicBase = pMixedCtx->msrApicBase;
7456 GCPhysApicBase &= PAGE_BASE_GC_MASK;
7457
7458 /* Unalias any existing mapping. */
7459 rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
7460 AssertRCReturn(rc, rc);
7461
7462 /* Map the HC APIC-access page into the GC space, this also updates the shadow page tables if necessary. */
7463 Log4(("Mapped HC APIC-access page into GC: GCPhysApicBase=%#RGv\n", GCPhysApicBase));
7464 rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
7465 AssertRCReturn(rc, rc);
7466
7467 pVCpu->hm.s.vmx.u64MsrApicBase = pMixedCtx->msrApicBase;
7468 }
7469#endif /* !IEM_VERIFICATION_MODE_FULL */
7470
7471 /* Load the guest state bits, we can handle longjmps/getting preempted here. */
7472 hmR0VmxLoadGuestStateOptimal(pVM, pVCpu, pMixedCtx);
7473
7474 /*
7475 * Evaluate events as pending-for-injection into the guest. Toggling of force-flags here is safe as long as
7476 * we update TRPM on premature exits to ring-3 before executing guest code. We must NOT restore the force-flags.
7477 */
7478 if (TRPMHasTrap(pVCpu))
7479 hmR0VmxTrpmTrapToPendingEvent(pVCpu);
7480 else if (!pVCpu->hm.s.Event.fPending)
7481 hmR0VmxEvaluatePendingEvent(pVCpu, pMixedCtx);
7482
7483 /*
7484 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus needs to be done with
7485 * longjmps or interrupts + preemption enabled. Event injection might also result in triple-faulting the VM.
7486 */
7487 rc = hmR0VmxInjectPendingEvent(pVCpu, pMixedCtx);
7488 if (RT_UNLIKELY(rc != VINF_SUCCESS))
7489 {
7490 Assert(rc == VINF_EM_RESET);
7491 return rc;
7492 }
7493
7494 /*
7495 * No longjmps to ring-3 from this point on!!!
7496 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
7497 * This also disables flushing of the R0-logger instance (if any).
7498 */
7499 VMMRZCallRing3Disable(pVCpu);
7500
7501 /*
7502 * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.)
7503 * when thread-context hooks aren't used and we've been running with preemption disabled for a while.
7504 *
7505 * We need to check for force-flags that could've possible been altered since we last checked them (e.g.
7506 * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}).
7507 *
7508 * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before
7509 * executing guest code.
7510 */
7511 pVmxTransient->uEflags = ASMIntDisableFlags();
7512 if ( VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
7513 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
7514 {
7515 hmR0VmxClearEventVmcs(pVCpu);
7516 ASMSetFlags(pVmxTransient->uEflags);
7517 VMMRZCallRing3Enable(pVCpu);
7518 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
7519 return VINF_EM_RAW_TO_R3;
7520 }
7521 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
7522 {
7523 hmR0VmxClearEventVmcs(pVCpu);
7524 ASMSetFlags(pVmxTransient->uEflags);
7525 VMMRZCallRing3Enable(pVCpu);
7526 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
7527 return VINF_EM_RAW_INTERRUPT;
7528 }
7529
7530 /* We've injected any pending events. This is really the point of no return (to ring-3). */
7531 pVCpu->hm.s.Event.fPending = false;
7532
7533 return VINF_SUCCESS;
7534}
7535
7536
7537/**
7538 * Prepares to run guest code in VT-x and we've committed to doing so. This
7539 * means there is no backing out to ring-3 or anywhere else at this
7540 * point.
7541 *
7542 * @param pVM Pointer to the VM.
7543 * @param pVCpu Pointer to the VMCPU.
7544 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7545 * out-of-sync. Make sure to update the required fields
7546 * before using them.
7547 * @param pVmxTransient Pointer to the VMX transient structure.
7548 *
7549 * @remarks Called with preemption disabled.
7550 * @remarks No-long-jump zone!!!
7551 */
7552static void hmR0VmxPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
7553{
7554 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7555 Assert(VMMR0IsLogFlushDisabled(pVCpu));
7556 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7557
7558 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
7559 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */
7560
7561 /*
7562 * If we are injecting events to a real-on-v86 mode guest, we may have to update
7563 * RIP and some other registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs().
7564 * Reload only the necessary state, the assertion will catch if other parts of the code
7565 * change.
7566 */
7567 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
7568 {
7569 hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx);
7570 hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx);
7571 }
7572
7573#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE
7574 if (!CPUMIsGuestFPUStateActive(pVCpu))
7575 CPUMR0LoadGuestFPU(pVM, pVCpu, pMixedCtx);
7576 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
7577#endif
7578
7579 /*
7580 * Load the host state bits as we may've been preempted (only happens when
7581 * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM).
7582 */
7583 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
7584 {
7585 /* This ASSUMES that pfnStartVM has been set up already. */
7586 int rc = hmR0VmxSaveHostState(pVM, pVCpu);
7587 AssertRC(rc);
7588 STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptSaveHostState);
7589 }
7590 Assert(!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT));
7591
7592 /*
7593 * Load the state shared between host and guest (FPU, debug).
7594 */
7595 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_GUEST_SHARED_STATE)
7596 hmR0VmxLoadSharedState(pVM, pVCpu, pMixedCtx);
7597 AssertMsg(!pVCpu->hm.s.fContextUseFlags, ("fContextUseFlags=%#x\n", pVCpu->hm.s.fContextUseFlags));
7598
7599 /* Store status of the shared guest-host state at the time of VM-entry. */
7600#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
7601 if (CPUMIsGuestInLongModeEx(pMixedCtx))
7602 {
7603 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu);
7604 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu);
7605 }
7606 else
7607#endif
7608 {
7609 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
7610 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
7611 }
7612 pVmxTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu);
7613
7614 /*
7615 * Cache the TPR-shadow for checking on every VM-exit if it might have changed.
7616 */
7617 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
7618 pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[0x80];
7619
7620 PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu();
7621 RTCPUID idCurrentCpu = pCpu->idCpu;
7622 if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer
7623 || idCurrentCpu != pVCpu->hm.s.idLastCpu)
7624 {
7625 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pMixedCtx);
7626 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false;
7627 }
7628
7629 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB-shootdowns, set this across the world switch. */
7630 hmR0VmxFlushTaggedTlb(pVCpu, pCpu); /* Invalidate the appropriate guest entries from the TLB. */
7631 Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu);
7632 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */
7633
7634 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
7635
7636 TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about
7637 to start executing. */
7638
7639#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
7640 /*
7641 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
7642 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
7643 */
7644 if ( (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
7645 && !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT))
7646 {
7647 pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
7648 uint64_t u64HostTscAux = 0;
7649 int rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64HostTscAux);
7650 AssertRC(rc2);
7651 ASMWrMsr(MSR_K8_TSC_AUX, u64HostTscAux);
7652 }
7653#endif
7654}
7655
7656
7657/**
7658 * Performs some essential restoration of state after running guest code in
7659 * VT-x.
7660 *
7661 * @param pVM Pointer to the VM.
7662 * @param pVCpu Pointer to the VMCPU.
7663 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
7664 * out-of-sync. Make sure to update the required fields
7665 * before using them.
7666 * @param pVmxTransient Pointer to the VMX transient structure.
7667 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
7668 *
7669 * @remarks Called with interrupts disabled, and returns with interrups enabled!
7670 *
7671 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
7672 * unconditionally when it is safe to do so.
7673 */
7674static void hmR0VmxPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, int rcVMRun)
7675{
7676 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7677
7678 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB-shootdowns. */
7679 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for TLB-shootdowns. */
7680 pVCpu->hm.s.vmx.fUpdatedGuestState = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
7681 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
7682 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
7683
7684 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT))
7685 {
7686#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
7687 /* Restore host's TSC_AUX. */
7688 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
7689 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux);
7690#endif
7691 /** @todo Find a way to fix hardcoding a guestimate. */
7692 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC()
7693 + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
7694 }
7695
7696 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
7697 TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */
7698 Assert(!(ASMGetFlags() & X86_EFL_IF));
7699 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
7700
7701#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE
7702 if (CPUMIsGuestFPUStateActive(pVCpu))
7703 {
7704 hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
7705 CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx);
7706 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
7707 }
7708#endif
7709
7710 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */
7711 pVCpu->hm.s.vmx.uVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
7712 ASMSetFlags(pVmxTransient->uEflags); /* Enable interrupts. */
7713 VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */
7714
7715 /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */
7716 uint32_t uExitReason;
7717 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
7718 rc |= hmR0VmxReadEntryIntrInfoVmcs(pVmxTransient);
7719 AssertRC(rc);
7720 pVmxTransient->uExitReason = (uint16_t)VMX_EXIT_REASON_BASIC(uExitReason);
7721 pVmxTransient->fVMEntryFailed = !!VMX_ENTRY_INTERRUPTION_INFO_VALID(pVmxTransient->uEntryIntrInfo);
7722
7723 /* If the VMLAUNCH/VMRESUME failed, we can bail out early. This does -not- cover VMX_EXIT_ERR_*. */
7724 if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
7725 {
7726 Log4(("VM-entry failure: pVCpu=%p idCpu=%RU32 rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", pVCpu, pVCpu->idCpu, rcVMRun,
7727 pVmxTransient->fVMEntryFailed));
7728 return;
7729 }
7730
7731 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
7732 {
7733 /* Update the guest interruptibility-state from the VMCS. */
7734 hmR0VmxSaveGuestIntrState(pVCpu, pMixedCtx);
7735#if defined(HMVMX_SYNC_FULL_GUEST_STATE) || defined(HMVMX_SAVE_FULL_GUEST_STATE)
7736 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
7737 AssertRC(rc);
7738#endif
7739 /*
7740 * If the TPR was raised by the guest, it wouldn't cause a VM-exit immediately. Instead we sync the TPR lazily whenever
7741 * we eventually get a VM-exit for any reason. This maybe expensive as PDMApicSetTPR() can longjmp to ring-3 and which is
7742 * why it's done here as it's easier and no less efficient to deal with it here than making hmR0VmxSaveGuestState()
7743 * cope with longjmps safely (see VMCPU_FF_HM_UPDATE_CR3 handling).
7744 */
7745 if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
7746 && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[0x80])
7747 {
7748 rc = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]);
7749 AssertRC(rc);
7750 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_VMX_GUEST_APIC_STATE;
7751 }
7752 }
7753}
7754
7755
7756
7757/**
7758 * Runs the guest code using VT-x the normal way.
7759 *
7760 * @returns VBox status code.
7761 * @param pVM Pointer to the VM.
7762 * @param pVCpu Pointer to the VMCPU.
7763 * @param pCtx Pointer to the guest-CPU context.
7764 *
7765 * @note Mostly the same as hmR0VmxRunGuestCodeStep.
7766 * @remarks Called with preemption disabled.
7767 */
7768static int hmR0VmxRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7769{
7770 VMXTRANSIENT VmxTransient;
7771 VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
7772 int rc = VERR_INTERNAL_ERROR_5;
7773 uint32_t cLoops = 0;
7774
7775 for (;; cLoops++)
7776 {
7777 Assert(!HMR0SuspendPending());
7778 HMVMX_ASSERT_CPU_SAFE();
7779
7780 /* Preparatory work for running guest code, this may force us to return
7781 to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
7782 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7783 rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient);
7784 if (rc != VINF_SUCCESS)
7785 break;
7786
7787 hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient);
7788 rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx);
7789 /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */
7790
7791 /* Restore any residual host-state and save any bits shared between host
7792 and guest into the guest-CPU state. Re-enables interrupts! */
7793 hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc);
7794
7795 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7796 if (RT_UNLIKELY(rc != VINF_SUCCESS))
7797 {
7798 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
7799 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient);
7800 return rc;
7801 }
7802
7803 /* Handle the VM-exit. */
7804 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7805 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
7806 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7807 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
7808 HMVMX_START_EXIT_DISPATCH_PROF();
7809#ifdef HMVMX_USE_FUNCTION_TABLE
7810 rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient);
7811#else
7812 rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason);
7813#endif
7814 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
7815 if (rc != VINF_SUCCESS)
7816 break;
7817 else if (cLoops > pVM->hm.s.cMaxResumeLoops)
7818 {
7819 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
7820 rc = VINF_EM_RAW_INTERRUPT;
7821 break;
7822 }
7823 }
7824
7825 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7826 return rc;
7827}
7828
7829
7830/**
7831 * Single steps guest code using VT-x.
7832 *
7833 * @returns VBox status code.
7834 * @param pVM Pointer to the VM.
7835 * @param pVCpu Pointer to the VMCPU.
7836 * @param pCtx Pointer to the guest-CPU context.
7837 *
7838 * @note Mostly the same as hmR0VmxRunGuestCodeNormal.
7839 * @remarks Called with preemption disabled.
7840 */
7841static int hmR0VmxRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7842{
7843 VMXTRANSIENT VmxTransient;
7844 VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
7845 int rc = VERR_INTERNAL_ERROR_5;
7846 uint32_t cLoops = 0;
7847 uint16_t uCsStart = pCtx->cs.Sel;
7848 uint64_t uRipStart = pCtx->rip;
7849
7850 for (;; cLoops++)
7851 {
7852 Assert(!HMR0SuspendPending());
7853 HMVMX_ASSERT_CPU_SAFE();
7854
7855 /* Preparatory work for running guest code, this may force us to return
7856 to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
7857 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7858 rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient);
7859 if (rc != VINF_SUCCESS)
7860 break;
7861
7862 hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient);
7863 rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx);
7864 /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */
7865
7866 /* Restore any residual host-state and save any bits shared between host
7867 and guest into the guest-CPU state. Re-enables interrupts! */
7868 hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc);
7869
7870 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7871 if (RT_UNLIKELY(rc != VINF_SUCCESS))
7872 {
7873 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
7874 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient);
7875 return rc;
7876 }
7877
7878 /* Handle the VM-exit. */
7879 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7880 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
7881 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7882 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
7883 HMVMX_START_EXIT_DISPATCH_PROF();
7884#ifdef HMVMX_USE_FUNCTION_TABLE
7885 rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient);
7886#else
7887 rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason);
7888#endif
7889 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
7890 if (rc != VINF_SUCCESS)
7891 break;
7892 else if (cLoops > pVM->hm.s.cMaxResumeLoops)
7893 {
7894 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
7895 rc = VINF_EM_RAW_INTERRUPT;
7896 break;
7897 }
7898
7899 /*
7900 * Did the RIP change, if so, consider it a single step.
7901 * Otherwise, make sure one of the TFs gets set.
7902 */
7903 int rc2 = hmR0VmxSaveGuestRip(pVCpu, pCtx);
7904 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pCtx);
7905 AssertRCReturn(rc2, rc2);
7906 if ( pCtx->rip != uRipStart
7907 || pCtx->cs.Sel != uCsStart)
7908 {
7909 rc = VINF_EM_DBG_STEPPED;
7910 break;
7911 }
7912 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
7913 }
7914
7915 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7916 return rc;
7917}
7918
7919
7920/**
7921 * Runs the guest code using VT-x.
7922 *
7923 * @returns VBox status code.
7924 * @param pVM Pointer to the VM.
7925 * @param pVCpu Pointer to the VMCPU.
7926 * @param pCtx Pointer to the guest-CPU context.
7927 *
7928 * @remarks Called with preemption disabled.
7929 */
7930VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7931{
7932 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7933 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL);
7934 HMVMX_ASSERT_PREEMPT_SAFE();
7935
7936 VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx);
7937
7938 int rc;
7939 if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu))
7940 rc = hmR0VmxRunGuestCodeNormal(pVM, pVCpu, pCtx);
7941 else
7942 rc = hmR0VmxRunGuestCodeStep(pVM, pVCpu, pCtx);
7943
7944 if (rc == VERR_EM_INTERPRETER)
7945 rc = VINF_EM_RAW_EMULATE_INSTR;
7946 else if (rc == VINF_EM_RESET)
7947 rc = VINF_EM_TRIPLE_FAULT;
7948
7949 int rc2 = hmR0VmxExitToRing3(pVM, pVCpu, pCtx, rc);
7950 if (RT_FAILURE(rc2))
7951 {
7952 pVCpu->hm.s.u32HMError = rc;
7953 rc = rc2;
7954 }
7955 Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
7956 return rc;
7957}
7958
7959
7960#ifndef HMVMX_USE_FUNCTION_TABLE
7961DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason)
7962{
7963 int rc;
7964 switch (rcReason)
7965 {
7966 case VMX_EXIT_EPT_MISCONFIG: rc = hmR0VmxExitEptMisconfig(pVCpu, pMixedCtx, pVmxTransient); break;
7967 case VMX_EXIT_EPT_VIOLATION: rc = hmR0VmxExitEptViolation(pVCpu, pMixedCtx, pVmxTransient); break;
7968 case VMX_EXIT_IO_INSTR: rc = hmR0VmxExitIoInstr(pVCpu, pMixedCtx, pVmxTransient); break;
7969 case VMX_EXIT_CPUID: rc = hmR0VmxExitCpuid(pVCpu, pMixedCtx, pVmxTransient); break;
7970 case VMX_EXIT_RDTSC: rc = hmR0VmxExitRdtsc(pVCpu, pMixedCtx, pVmxTransient); break;
7971 case VMX_EXIT_RDTSCP: rc = hmR0VmxExitRdtscp(pVCpu, pMixedCtx, pVmxTransient); break;
7972 case VMX_EXIT_APIC_ACCESS: rc = hmR0VmxExitApicAccess(pVCpu, pMixedCtx, pVmxTransient); break;
7973 case VMX_EXIT_XCPT_OR_NMI: rc = hmR0VmxExitXcptOrNmi(pVCpu, pMixedCtx, pVmxTransient); break;
7974 case VMX_EXIT_MOV_CRX: rc = hmR0VmxExitMovCRx(pVCpu, pMixedCtx, pVmxTransient); break;
7975 case VMX_EXIT_EXT_INT: rc = hmR0VmxExitExtInt(pVCpu, pMixedCtx, pVmxTransient); break;
7976 case VMX_EXIT_INT_WINDOW: rc = hmR0VmxExitIntWindow(pVCpu, pMixedCtx, pVmxTransient); break;
7977 case VMX_EXIT_MWAIT: rc = hmR0VmxExitMwait(pVCpu, pMixedCtx, pVmxTransient); break;
7978 case VMX_EXIT_MONITOR: rc = hmR0VmxExitMonitor(pVCpu, pMixedCtx, pVmxTransient); break;
7979 case VMX_EXIT_TASK_SWITCH: rc = hmR0VmxExitTaskSwitch(pVCpu, pMixedCtx, pVmxTransient); break;
7980 case VMX_EXIT_PREEMPT_TIMER: rc = hmR0VmxExitPreemptTimer(pVCpu, pMixedCtx, pVmxTransient); break;
7981 case VMX_EXIT_RDMSR: rc = hmR0VmxExitRdmsr(pVCpu, pMixedCtx, pVmxTransient); break;
7982 case VMX_EXIT_WRMSR: rc = hmR0VmxExitWrmsr(pVCpu, pMixedCtx, pVmxTransient); break;
7983 case VMX_EXIT_MOV_DRX: rc = hmR0VmxExitMovDRx(pVCpu, pMixedCtx, pVmxTransient); break;
7984 case VMX_EXIT_TPR_BELOW_THRESHOLD: rc = hmR0VmxExitTprBelowThreshold(pVCpu, pMixedCtx, pVmxTransient); break;
7985 case VMX_EXIT_HLT: rc = hmR0VmxExitHlt(pVCpu, pMixedCtx, pVmxTransient); break;
7986 case VMX_EXIT_INVD: rc = hmR0VmxExitInvd(pVCpu, pMixedCtx, pVmxTransient); break;
7987 case VMX_EXIT_INVLPG: rc = hmR0VmxExitInvlpg(pVCpu, pMixedCtx, pVmxTransient); break;
7988 case VMX_EXIT_RSM: rc = hmR0VmxExitRsm(pVCpu, pMixedCtx, pVmxTransient); break;
7989 case VMX_EXIT_MTF: rc = hmR0VmxExitMtf(pVCpu, pMixedCtx, pVmxTransient); break;
7990 case VMX_EXIT_PAUSE: rc = hmR0VmxExitPause(pVCpu, pMixedCtx, pVmxTransient); break;
7991 case VMX_EXIT_XDTR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break;
7992 case VMX_EXIT_TR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break;
7993 case VMX_EXIT_WBINVD: rc = hmR0VmxExitWbinvd(pVCpu, pMixedCtx, pVmxTransient); break;
7994 case VMX_EXIT_XSETBV: rc = hmR0VmxExitXsetbv(pVCpu, pMixedCtx, pVmxTransient); break;
7995 case VMX_EXIT_RDRAND: rc = hmR0VmxExitRdrand(pVCpu, pMixedCtx, pVmxTransient); break;
7996 case VMX_EXIT_INVPCID: rc = hmR0VmxExitInvpcid(pVCpu, pMixedCtx, pVmxTransient); break;
7997 case VMX_EXIT_GETSEC: rc = hmR0VmxExitGetsec(pVCpu, pMixedCtx, pVmxTransient); break;
7998 case VMX_EXIT_RDPMC: rc = hmR0VmxExitRdpmc(pVCpu, pMixedCtx, pVmxTransient); break;
7999
8000 case VMX_EXIT_TRIPLE_FAULT: rc = hmR0VmxExitTripleFault(pVCpu, pMixedCtx, pVmxTransient); break;
8001 case VMX_EXIT_NMI_WINDOW: rc = hmR0VmxExitNmiWindow(pVCpu, pMixedCtx, pVmxTransient); break;
8002 case VMX_EXIT_INIT_SIGNAL: rc = hmR0VmxExitInitSignal(pVCpu, pMixedCtx, pVmxTransient); break;
8003 case VMX_EXIT_SIPI: rc = hmR0VmxExitSipi(pVCpu, pMixedCtx, pVmxTransient); break;
8004 case VMX_EXIT_IO_SMI: rc = hmR0VmxExitIoSmi(pVCpu, pMixedCtx, pVmxTransient); break;
8005 case VMX_EXIT_SMI: rc = hmR0VmxExitSmi(pVCpu, pMixedCtx, pVmxTransient); break;
8006 case VMX_EXIT_ERR_MSR_LOAD: rc = hmR0VmxExitErrMsrLoad(pVCpu, pMixedCtx, pVmxTransient); break;
8007 case VMX_EXIT_ERR_INVALID_GUEST_STATE: rc = hmR0VmxExitErrInvalidGuestState(pVCpu, pMixedCtx, pVmxTransient); break;
8008 case VMX_EXIT_ERR_MACHINE_CHECK: rc = hmR0VmxExitErrMachineCheck(pVCpu, pMixedCtx, pVmxTransient); break;
8009
8010 case VMX_EXIT_VMCALL:
8011 case VMX_EXIT_VMCLEAR:
8012 case VMX_EXIT_VMLAUNCH:
8013 case VMX_EXIT_VMPTRLD:
8014 case VMX_EXIT_VMPTRST:
8015 case VMX_EXIT_VMREAD:
8016 case VMX_EXIT_VMRESUME:
8017 case VMX_EXIT_VMWRITE:
8018 case VMX_EXIT_VMXOFF:
8019 case VMX_EXIT_VMXON:
8020 case VMX_EXIT_INVEPT:
8021 case VMX_EXIT_INVVPID:
8022 case VMX_EXIT_VMFUNC:
8023 rc = hmR0VmxExitSetPendingXcptUD(pVCpu, pMixedCtx, pVmxTransient);
8024 break;
8025 default:
8026 rc = hmR0VmxExitErrUndefined(pVCpu, pMixedCtx, pVmxTransient);
8027 break;
8028 }
8029 return rc;
8030}
8031#endif
8032
8033#ifdef DEBUG
8034/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
8035# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \
8036 RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
8037
8038# define HMVMX_ASSERT_PREEMPT_CPUID() \
8039 do \
8040 { \
8041 RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
8042 AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
8043 } while (0)
8044
8045# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() \
8046 do { \
8047 AssertPtr(pVCpu); \
8048 AssertPtr(pMixedCtx); \
8049 AssertPtr(pVmxTransient); \
8050 Assert(pVmxTransient->fVMEntryFailed == false); \
8051 Assert(ASMIntAreEnabled()); \
8052 HMVMX_ASSERT_PREEMPT_SAFE(); \
8053 HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \
8054 Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", pVCpu->idCpu)); \
8055 HMVMX_ASSERT_PREEMPT_SAFE(); \
8056 if (VMMR0IsLogFlushDisabled(pVCpu)) \
8057 HMVMX_ASSERT_PREEMPT_CPUID(); \
8058 HMVMX_STOP_EXIT_DISPATCH_PROF(); \
8059 } while (0)
8060
8061# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() \
8062 do { \
8063 Log4Func(("\n")); \
8064 } while(0)
8065#else /* Release builds */
8066# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() do { HMVMX_STOP_EXIT_DISPATCH_PROF(); } while(0)
8067# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() do { } while(0)
8068#endif
8069
8070
8071/**
8072 * Advances the guest RIP after reading it from the VMCS.
8073 *
8074 * @returns VBox status code.
8075 * @param pVCpu Pointer to the VMCPU.
8076 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
8077 * out-of-sync. Make sure to update the required fields
8078 * before using them.
8079 * @param pVmxTransient Pointer to the VMX transient structure.
8080 *
8081 * @remarks No-long-jump zone!!!
8082 */
8083DECLINLINE(int) hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8084{
8085 int rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
8086 rc |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
8087 AssertRCReturn(rc, rc);
8088
8089 pMixedCtx->rip += pVmxTransient->cbInstr;
8090 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP;
8091 return rc;
8092}
8093
8094
8095/**
8096 * Tries to determine what part of the guest-state VT-x has deemed as invalid
8097 * and update error record fields accordingly.
8098 *
8099 * @return VMX_IGS_* return codes.
8100 * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything
8101 * wrong with the guest state.
8102 *
8103 * @param pVM Pointer to the VM.
8104 * @param pVCpu Pointer to the VMCPU.
8105 * @param pCtx Pointer to the guest-CPU state.
8106 */
8107static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8108{
8109#define HMVMX_ERROR_BREAK(err) { uError = (err); break; }
8110#define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \
8111 uError = (err); \
8112 break; \
8113 } else do {} while (0)
8114/* Duplicate of IEM_IS_CANONICAL(). */
8115#define HMVMX_IS_CANONICAL(a_u64Addr) ((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000) < UINT64_C(0x1000000000000))
8116
8117 int rc;
8118 uint32_t uError = VMX_IGS_ERROR;
8119 uint32_t u32Val;
8120 bool fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest;
8121
8122 do
8123 {
8124 /*
8125 * CR0.
8126 */
8127 uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
8128 uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
8129 /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG).
8130 See Intel spec. 26.3.1 "Checks on guest Guest Control Registers, Debug Registers and MSRs." */
8131 if (fUnrestrictedGuest)
8132 uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG);
8133
8134 uint32_t u32GuestCR0;
8135 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCR0);
8136 AssertRCBreak(rc);
8137 HMVMX_CHECK_BREAK((u32GuestCR0 & uSetCR0) == uSetCR0, VMX_IGS_CR0_FIXED1);
8138 HMVMX_CHECK_BREAK(!(u32GuestCR0 & ~uZapCR0), VMX_IGS_CR0_FIXED0);
8139 if ( !fUnrestrictedGuest
8140 && (u32GuestCR0 & X86_CR0_PG)
8141 && !(u32GuestCR0 & X86_CR0_PE))
8142 {
8143 HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO);
8144 }
8145
8146 /*
8147 * CR4.
8148 */
8149 uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
8150 uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
8151
8152 uint32_t u32GuestCR4;
8153 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCR4);
8154 AssertRCBreak(rc);
8155 HMVMX_CHECK_BREAK((u32GuestCR4 & uSetCR4) == uSetCR4, VMX_IGS_CR4_FIXED1);
8156 HMVMX_CHECK_BREAK(!(u32GuestCR4 & ~uZapCR4), VMX_IGS_CR4_FIXED0);
8157
8158 /*
8159 * IA32_DEBUGCTL MSR.
8160 */
8161 uint64_t u64Val;
8162 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val);
8163 AssertRCBreak(rc);
8164 if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
8165 && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */
8166 {
8167 HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED);
8168 }
8169 uint64_t u64DebugCtlMsr = u64Val;
8170
8171#ifdef VBOX_STRICT
8172 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
8173 AssertRCBreak(rc);
8174 Assert(u32Val == pVCpu->hm.s.vmx.u32ProcCtls);
8175#endif
8176 bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST);
8177
8178 /*
8179 * RIP and RFLAGS.
8180 */
8181 uint32_t u32Eflags;
8182#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8183 if (HMVMX_IS_64BIT_HOST_MODE())
8184 {
8185 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val);
8186 AssertRCBreak(rc);
8187 /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */
8188 if ( !fLongModeGuest
8189 || !pCtx->cs.Attr.n.u1Long)
8190 {
8191 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID);
8192 }
8193 /** @todo If the processor supports N < 64 linear-address bits, bits 63:N
8194 * must be identical if the "IA32e mode guest" VM-entry control is 1
8195 * and CS.L is 1. No check applies if the CPU supports 64
8196 * linear-address bits. */
8197
8198 /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */
8199 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val);
8200 AssertRCBreak(rc);
8201 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */
8202 VMX_IGS_RFLAGS_RESERVED);
8203 HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
8204 u32Eflags = u64Val;
8205 }
8206 else
8207#endif
8208 {
8209 rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags);
8210 AssertRCBreak(rc);
8211 HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */
8212 HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
8213 }
8214
8215 if ( fLongModeGuest
8216 || ( fUnrestrictedGuest
8217 && !(u32GuestCR0 & X86_CR0_PE)))
8218 {
8219 HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID);
8220 }
8221
8222 uint32_t u32EntryInfo;
8223 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
8224 AssertRCBreak(rc);
8225 if ( VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo)
8226 && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
8227 {
8228 HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID);
8229 }
8230
8231 /*
8232 * 64-bit checks.
8233 */
8234#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8235 if (HMVMX_IS_64BIT_HOST_MODE())
8236 {
8237 if ( fLongModeGuest
8238 && !fUnrestrictedGuest)
8239 {
8240 HMVMX_CHECK_BREAK(u32GuestCR0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE);
8241 HMVMX_CHECK_BREAK(u32GuestCR4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE);
8242 }
8243
8244 if ( !fLongModeGuest
8245 && (u32GuestCR4 & X86_CR4_PCIDE))
8246 {
8247 HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE);
8248 }
8249
8250 /** @todo CR3 field must be such that bits 63:52 and bits in the range
8251 * 51:32 beyond the processor's physical-address width are 0. */
8252
8253 if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
8254 && (pCtx->dr[7] & X86_DR7_MBZ_MASK))
8255 {
8256 HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED);
8257 }
8258
8259 rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val);
8260 AssertRCBreak(rc);
8261 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL);
8262
8263 rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val);
8264 AssertRCBreak(rc);
8265 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL);
8266 }
8267#endif
8268
8269 /*
8270 * PERF_GLOBAL MSR.
8271 */
8272 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR)
8273 {
8274 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val);
8275 AssertRCBreak(rc);
8276 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)),
8277 VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */
8278 }
8279
8280 /*
8281 * PAT MSR.
8282 */
8283 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR)
8284 {
8285 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val);
8286 AssertRCBreak(rc);
8287 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED);
8288 for (unsigned i = 0; i < 8; i++)
8289 {
8290 uint8_t u8Val = (u64Val & 0x7);
8291 if ( u8Val != 0 /* UC */
8292 || u8Val != 1 /* WC */
8293 || u8Val != 4 /* WT */
8294 || u8Val != 5 /* WP */
8295 || u8Val != 6 /* WB */
8296 || u8Val != 7 /* UC- */)
8297 {
8298 HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID);
8299 }
8300 u64Val >>= 3;
8301 }
8302 }
8303
8304 /*
8305 * EFER MSR.
8306 */
8307 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR)
8308 {
8309 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val);
8310 AssertRCBreak(rc);
8311 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)),
8312 VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */
8313 HMVMX_CHECK_BREAK((u64Val & MSR_K6_EFER_LMA) == (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST),
8314 VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH);
8315 HMVMX_CHECK_BREAK( fUnrestrictedGuest
8316 || (u64Val & MSR_K6_EFER_LMA) == (u32GuestCR0 & X86_CR0_PG), VMX_IGS_EFER_LMA_PG_MISMATCH);
8317 }
8318
8319 /*
8320 * Segment registers.
8321 */
8322 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
8323 || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID);
8324 if (!(u32Eflags & X86_EFL_VM))
8325 {
8326 /* CS */
8327 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID);
8328 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED);
8329 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED);
8330 HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff
8331 || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
8332 HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000)
8333 || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
8334 /* CS cannot be loaded with NULL in protected mode. */
8335 HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE);
8336 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID);
8337 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
8338 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL);
8339 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
8340 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH);
8341 else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3)
8342 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID);
8343 else
8344 HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID);
8345
8346 /* SS */
8347 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8348 || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL);
8349 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL);
8350 if ( !(pCtx->cr0 & X86_CR0_PE)
8351 || pCtx->cs.Attr.n.u4Type == 3)
8352 {
8353 HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID);
8354 }
8355 if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
8356 {
8357 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID);
8358 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID);
8359 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED);
8360 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED);
8361 HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff
8362 || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
8363 HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000)
8364 || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
8365 }
8366
8367 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */
8368 if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
8369 {
8370 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID);
8371 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID);
8372 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8373 || pCtx->ds.Attr.n.u4Type > 11
8374 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
8375 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED);
8376 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED);
8377 HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff
8378 || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
8379 HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000)
8380 || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
8381 HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8382 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID);
8383 }
8384 if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
8385 {
8386 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID);
8387 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID);
8388 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8389 || pCtx->es.Attr.n.u4Type > 11
8390 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
8391 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED);
8392 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED);
8393 HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff
8394 || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
8395 HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000)
8396 || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
8397 HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8398 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID);
8399 }
8400 if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
8401 {
8402 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID);
8403 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID);
8404 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8405 || pCtx->fs.Attr.n.u4Type > 11
8406 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL);
8407 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED);
8408 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED);
8409 HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff
8410 || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
8411 HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000)
8412 || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
8413 HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8414 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID);
8415 }
8416 if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
8417 {
8418 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID);
8419 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID);
8420 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8421 || pCtx->gs.Attr.n.u4Type > 11
8422 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL);
8423 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED);
8424 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED);
8425 HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff
8426 || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
8427 HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000)
8428 || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
8429 HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8430 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID);
8431 }
8432 /* 64-bit capable CPUs. */
8433#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8434 if (HMVMX_IS_64BIT_HOST_MODE())
8435 {
8436 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
8437 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
8438 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
8439 || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
8440 HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID);
8441 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32),
8442 VMX_IGS_LONGMODE_SS_BASE_INVALID);
8443 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32),
8444 VMX_IGS_LONGMODE_DS_BASE_INVALID);
8445 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32),
8446 VMX_IGS_LONGMODE_ES_BASE_INVALID);
8447 }
8448#endif
8449 }
8450 else
8451 {
8452 /* V86 mode checks. */
8453 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
8454 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
8455 {
8456 u32CSAttr = 0xf3; u32SSAttr = 0xf3;
8457 u32DSAttr = 0xf3; u32ESAttr = 0xf3;
8458 u32FSAttr = 0xf3; u32GSAttr = 0xf3;
8459 }
8460 else
8461 {
8462 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u;
8463 u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u;
8464 u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
8465 }
8466
8467 /* CS */
8468 HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID);
8469 HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID);
8470 HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID);
8471 /* SS */
8472 HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID);
8473 HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID);
8474 HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID);
8475 /* DS */
8476 HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID);
8477 HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID);
8478 HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID);
8479 /* ES */
8480 HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID);
8481 HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID);
8482 HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID);
8483 /* FS */
8484 HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID);
8485 HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID);
8486 HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID);
8487 /* GS */
8488 HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID);
8489 HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID);
8490 HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID);
8491 /* 64-bit capable CPUs. */
8492#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8493 if (HMVMX_IS_64BIT_HOST_MODE())
8494 {
8495 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
8496 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
8497 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
8498 || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
8499 HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID);
8500 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32),
8501 VMX_IGS_LONGMODE_SS_BASE_INVALID);
8502 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32),
8503 VMX_IGS_LONGMODE_DS_BASE_INVALID);
8504 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32),
8505 VMX_IGS_LONGMODE_ES_BASE_INVALID);
8506 }
8507#endif
8508 }
8509
8510 /*
8511 * TR.
8512 */
8513 HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID);
8514 /* 64-bit capable CPUs. */
8515#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8516 if (HMVMX_IS_64BIT_HOST_MODE())
8517 {
8518 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL);
8519 }
8520#endif
8521 if (fLongModeGuest)
8522 {
8523 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */
8524 VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID);
8525 }
8526 else
8527 {
8528 HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */
8529 || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/
8530 VMX_IGS_TR_ATTR_TYPE_INVALID);
8531 }
8532 HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID);
8533 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID);
8534 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */
8535 HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff
8536 || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
8537 HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000)
8538 || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
8539 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE);
8540
8541 /*
8542 * GDTR and IDTR.
8543 */
8544#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8545 if (HMVMX_IS_64BIT_HOST_MODE())
8546 {
8547 rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
8548 AssertRCBreak(rc);
8549 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL);
8550
8551 rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
8552 AssertRCBreak(rc);
8553 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL);
8554 }
8555#endif
8556
8557 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
8558 AssertRCBreak(rc);
8559 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
8560
8561 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
8562 AssertRCBreak(rc);
8563 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
8564
8565 /*
8566 * Guest Non-Register State.
8567 */
8568 /* Activity State. */
8569 uint32_t u32ActivityState;
8570 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState);
8571 AssertRCBreak(rc);
8572 HMVMX_CHECK_BREAK( !u32ActivityState
8573 || (u32ActivityState & MSR_IA32_VMX_MISC_ACTIVITY_STATES(pVM->hm.s.vmx.Msrs.u64Misc)),
8574 VMX_IGS_ACTIVITY_STATE_INVALID);
8575 HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl)
8576 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID);
8577 uint32_t u32IntrState;
8578 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &u32IntrState);
8579 AssertRCBreak(rc);
8580 if ( u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS
8581 || u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
8582 {
8583 HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID);
8584 }
8585
8586 /** @todo Activity state and injecting interrupts. Left as a todo since we
8587 * currently don't use activity states but ACTIVE. */
8588
8589 HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM)
8590 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID);
8591
8592 /* Guest interruptibility-state. */
8593 HMVMX_CHECK_BREAK(!(u32IntrState & 0xfffffff0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED);
8594 HMVMX_CHECK_BREAK((u32IntrState & ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
8595 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS))
8596 != ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
8597 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
8598 VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID);
8599 HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF)
8600 || !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI),
8601 VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID);
8602 if (VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo))
8603 {
8604 if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
8605 {
8606 HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
8607 && !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
8608 VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID);
8609 }
8610 else if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
8611 {
8612 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
8613 VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID);
8614 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI),
8615 VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID);
8616 }
8617 }
8618 /** @todo Assumes the processor is not in SMM. */
8619 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI),
8620 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID);
8621 HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM)
8622 || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI),
8623 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID);
8624 if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)
8625 && VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo)
8626 && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
8627 {
8628 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI),
8629 VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID);
8630 }
8631
8632 /* Pending debug exceptions. */
8633 if (HMVMX_IS_64BIT_HOST_MODE())
8634 {
8635 rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u64Val);
8636 AssertRCBreak(rc);
8637 /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */
8638 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED);
8639 u32Val = u64Val; /* For pending debug exceptions checks below. */
8640 }
8641 else
8642 {
8643 rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u32Val);
8644 AssertRCBreak(rc);
8645 /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */
8646 HMVMX_CHECK_BREAK(!(u64Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED);
8647 }
8648
8649 if ( (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
8650 || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)
8651 || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT)
8652 {
8653 if ( (u32Eflags & X86_EFL_TF)
8654 && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
8655 {
8656 /* Bit 14 is PendingDebug.BS. */
8657 HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET);
8658 }
8659 if ( !(u32Eflags & X86_EFL_TF)
8660 || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
8661 {
8662 /* Bit 14 is PendingDebug.BS. */
8663 HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR);
8664 }
8665 }
8666
8667 /* VMCS link pointer. */
8668 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val);
8669 AssertRCBreak(rc);
8670 if (u64Val != UINT64_C(0xffffffffffffffff))
8671 {
8672 HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED);
8673 /** @todo Bits beyond the processor's physical-address width MBZ. */
8674 /** @todo 32-bit located in memory referenced by value of this field (as a
8675 * physical address) must contain the processor's VMCS revision ID. */
8676 /** @todo SMM checks. */
8677 }
8678
8679 /** @todo Checks on Guest Page-Directory-Pointer-Table Entries. */
8680
8681 /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */
8682 if (uError == VMX_IGS_ERROR)
8683 uError = VMX_IGS_REASON_NOT_FOUND;
8684 } while (0);
8685
8686 pVCpu->hm.s.u32HMError = uError;
8687 return uError;
8688
8689#undef HMVMX_ERROR_BREAK
8690#undef HMVMX_CHECK_BREAK
8691#undef HMVMX_IS_CANONICAL
8692}
8693
8694/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
8695/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
8696/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
8697
8698/** @name VM-exit handlers.
8699 * @{
8700 */
8701
8702/**
8703 * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT).
8704 */
8705HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8706{
8707 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8708 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
8709 /* 32-bit Windows hosts (4 cores) has trouble with this; causes higher interrupt latency. */
8710#if HC_ARCH_BITS == 64
8711 Assert(ASMIntAreEnabled());
8712 if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUsePreemptTimer)
8713 return VINF_SUCCESS;
8714#endif
8715 return VINF_EM_RAW_INTERRUPT;
8716}
8717
8718
8719/**
8720 * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI).
8721 */
8722HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8723{
8724 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8725 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3);
8726
8727 int rc = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
8728 AssertRCReturn(rc, rc);
8729
8730 uint32_t uIntrType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVmxTransient->uExitIntrInfo);
8731 Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT)
8732 && uIntrType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT);
8733 Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntrInfo));
8734
8735 if (uIntrType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
8736 {
8737 /*
8738 * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we injected it ourselves and
8739 * anything we inject is not going to cause a VM-exit directly for the event being injected.
8740 * See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery".
8741 *
8742 * Dispatch the NMI to the host. See Intel spec. 27.5.5 "Updating Non-Register State".
8743 */
8744 VMXDispatchHostNmi();
8745 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
8746 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8747 return VINF_SUCCESS;
8748 }
8749
8750 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
8751 rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
8752 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
8753 {
8754 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8755 return VINF_SUCCESS;
8756 }
8757 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
8758 {
8759 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8760 return rc;
8761 }
8762
8763 uint32_t uExitIntrInfo = pVmxTransient->uExitIntrInfo;
8764 uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(uExitIntrInfo);
8765 switch (uIntrType)
8766 {
8767 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */
8768 Assert(uVector == X86_XCPT_DB || uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
8769 /* no break */
8770 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT:
8771 {
8772 switch (uVector)
8773 {
8774 case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pMixedCtx, pVmxTransient); break;
8775 case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pMixedCtx, pVmxTransient); break;
8776 case X86_XCPT_NM: rc = hmR0VmxExitXcptNM(pVCpu, pMixedCtx, pVmxTransient); break;
8777 case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pMixedCtx, pVmxTransient); break;
8778 case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pMixedCtx, pVmxTransient); break;
8779 case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pMixedCtx, pVmxTransient); break;
8780#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
8781 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF);
8782 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8783 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);
8784 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8785 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
8786 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8787 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);
8788 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8789 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);
8790 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8791#endif
8792 default:
8793 {
8794 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
8795 AssertRCReturn(rc, rc);
8796
8797 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
8798 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
8799 {
8800 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
8801 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
8802 Assert(CPUMIsGuestInRealModeEx(pMixedCtx));
8803
8804 rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
8805 rc |= hmR0VmxReadExitIntrErrorCodeVmcs(pVCpu, pVmxTransient);
8806 AssertRCReturn(rc, rc);
8807 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(uExitIntrInfo),
8808 pVmxTransient->cbInstr, pVmxTransient->uExitIntrErrorCode,
8809 0 /* GCPtrFaultAddress */);
8810 AssertRCReturn(rc, rc);
8811 }
8812 else
8813 {
8814 AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector));
8815 pVCpu->hm.s.u32HMError = uVector;
8816 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
8817 }
8818 break;
8819 }
8820 }
8821 break;
8822 }
8823
8824 default:
8825 {
8826 pVCpu->hm.s.u32HMError = uExitIntrInfo;
8827 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
8828 AssertMsgFailed(("Unexpected interruption code %#x\n", VMX_EXIT_INTERRUPTION_INFO_TYPE(uExitIntrInfo)));
8829 break;
8830 }
8831 }
8832 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8833 return rc;
8834}
8835
8836
8837/**
8838 * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
8839 */
8840HMVMX_EXIT_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8841{
8842 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8843
8844 /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
8845 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT);
8846 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
8847 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
8848 AssertRCReturn(rc, rc);
8849
8850 /* Deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and resume guest execution. */
8851 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
8852 return VINF_SUCCESS;
8853}
8854
8855
8856/**
8857 * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
8858 */
8859HMVMX_EXIT_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8860{
8861 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8862 AssertMsgFailed(("Unexpected NMI-window exit.\n"));
8863 pVCpu->hm.s.u32HMError = VMX_EXIT_NMI_WINDOW;
8864 return VERR_VMX_UNEXPECTED_EXIT_CODE;
8865}
8866
8867
8868/**
8869 * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
8870 */
8871HMVMX_EXIT_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8872{
8873 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8874 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd);
8875 return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8876}
8877
8878
8879/**
8880 * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit.
8881 */
8882HMVMX_EXIT_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8883{
8884 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8885 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
8886 return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8887}
8888
8889
8890/**
8891 * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit.
8892 */
8893HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8894{
8895 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8896 PVM pVM = pVCpu->CTX_SUFF(pVM);
8897 int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
8898 if (RT_LIKELY(rc == VINF_SUCCESS))
8899 {
8900 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8901 Assert(pVmxTransient->cbInstr == 2);
8902 }
8903 else
8904 {
8905 AssertMsgFailed(("hmR0VmxExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc));
8906 rc = VERR_EM_INTERPRETER;
8907 }
8908 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
8909 return rc;
8910}
8911
8912
8913/**
8914 * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit.
8915 */
8916HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8917{
8918 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8919 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
8920 AssertRCReturn(rc, rc);
8921
8922 if (pMixedCtx->cr4 & X86_CR4_SMXE)
8923 return VINF_EM_RAW_EMULATE_INSTR;
8924
8925 AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n"));
8926 pVCpu->hm.s.u32HMError = VMX_EXIT_GETSEC;
8927 return VERR_VMX_UNEXPECTED_EXIT_CODE;
8928}
8929
8930
8931/**
8932 * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
8933 */
8934HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8935{
8936 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8937 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
8938 AssertRCReturn(rc, rc);
8939
8940 PVM pVM = pVCpu->CTX_SUFF(pVM);
8941 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
8942 if (RT_LIKELY(rc == VINF_SUCCESS))
8943 {
8944 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8945 Assert(pVmxTransient->cbInstr == 2);
8946 /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */
8947 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING)
8948 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
8949 }
8950 else
8951 {
8952 AssertMsgFailed(("hmR0VmxExitRdtsc: EMInterpretRdtsc failed with %Rrc\n", rc));
8953 rc = VERR_EM_INTERPRETER;
8954 }
8955 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
8956 return rc;
8957}
8958
8959
8960/**
8961 * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
8962 */
8963HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8964{
8965 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8966 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
8967 rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); /* For MSR_K8_TSC_AUX */
8968 AssertRCReturn(rc, rc);
8969
8970 PVM pVM = pVCpu->CTX_SUFF(pVM);
8971 rc = EMInterpretRdtscp(pVM, pVCpu, pMixedCtx);
8972 if (RT_LIKELY(rc == VINF_SUCCESS))
8973 {
8974 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8975 Assert(pVmxTransient->cbInstr == 3);
8976 /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */
8977 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING)
8978 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
8979 }
8980 else
8981 {
8982 AssertMsgFailed(("hmR0VmxExitRdtscp: EMInterpretRdtscp failed with %Rrc\n", rc));
8983 rc = VERR_EM_INTERPRETER;
8984 }
8985 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
8986 return rc;
8987}
8988
8989
8990/**
8991 * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
8992 */
8993HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8994{
8995 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8996 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
8997 rc |= hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); /** @todo review if CR0 is really required by EM. */
8998 AssertRCReturn(rc, rc);
8999
9000 PVM pVM = pVCpu->CTX_SUFF(pVM);
9001 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9002 if (RT_LIKELY(rc == VINF_SUCCESS))
9003 {
9004 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9005 Assert(pVmxTransient->cbInstr == 2);
9006 }
9007 else
9008 {
9009 AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
9010 rc = VERR_EM_INTERPRETER;
9011 }
9012 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
9013 return rc;
9014}
9015
9016
9017/**
9018 * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
9019 */
9020HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9021{
9022 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9023 PVM pVM = pVCpu->CTX_SUFF(pVM);
9024 Assert(!pVM->hm.s.fNestedPaging);
9025
9026 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9027 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
9028 AssertRCReturn(rc, rc);
9029
9030 VBOXSTRICTRC rc2 = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), pVmxTransient->uExitQualification);
9031 rc = VBOXSTRICTRC_VAL(rc2);
9032 if (RT_LIKELY(rc == VINF_SUCCESS))
9033 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9034 else
9035 {
9036 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitInvlpg: EMInterpretInvlpg %#RX64 failed with %Rrc\n",
9037 pVmxTransient->uExitQualification, rc));
9038 }
9039 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
9040 return rc;
9041}
9042
9043
9044/**
9045 * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
9046 */
9047HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9048{
9049 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9050 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9051 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9052 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9053 AssertRCReturn(rc, rc);
9054
9055 PVM pVM = pVCpu->CTX_SUFF(pVM);
9056 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9057 if (RT_LIKELY(rc == VINF_SUCCESS))
9058 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9059 else
9060 {
9061 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc));
9062 rc = VERR_EM_INTERPRETER;
9063 }
9064 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
9065 return rc;
9066}
9067
9068
9069/**
9070 * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
9071 */
9072HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9073{
9074 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9075 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9076 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9077 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9078 AssertRCReturn(rc, rc);
9079
9080 PVM pVM = pVCpu->CTX_SUFF(pVM);
9081 VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9082 rc = VBOXSTRICTRC_VAL(rc2);
9083 if (RT_LIKELY( rc == VINF_SUCCESS
9084 || rc == VINF_EM_HALT))
9085 {
9086 int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9087 AssertRCReturn(rc3, rc3);
9088
9089 if ( rc == VINF_EM_HALT
9090 && EMMonitorWaitShouldContinue(pVCpu, pMixedCtx))
9091 {
9092 rc = VINF_SUCCESS;
9093 }
9094 }
9095 else
9096 {
9097 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc));
9098 rc = VERR_EM_INTERPRETER;
9099 }
9100 AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER,
9101 ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc));
9102 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
9103 return rc;
9104}
9105
9106
9107/**
9108 * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit.
9109 */
9110HMVMX_EXIT_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9111{
9112 /*
9113 * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root mode. In theory, we should never
9114 * get this VM-exit. This can happen only if dual-monitor treatment of SMI and VMX is enabled, which can (only?) be done by
9115 * executing VMCALL in VMX root operation. If we get here, something funny is going on.
9116 * See Intel spec. "33.15.5 Enabling the Dual-Monitor Treatment".
9117 */
9118 AssertMsgFailed(("Unexpected RSM VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9119 pVCpu->hm.s.u32HMError = VMX_EXIT_RSM;
9120 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9121}
9122
9123
9124/**
9125 * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit.
9126 */
9127HMVMX_EXIT_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9128{
9129 /*
9130 * This can only happen if we support dual-monitor treatment of SMI, which can be activated by executing VMCALL in VMX
9131 * root operation. Only an STM (SMM transfer monitor) would get this exit when we (the executive monitor) execute a VMCALL
9132 * in VMX root mode or receive an SMI. If we get here, something funny is going on.
9133 * See Intel spec. "33.15.6 Activating the Dual-Monitor Treatment" and Intel spec. 25.3 "Other Causes of VM-Exits"
9134 */
9135 AssertMsgFailed(("Unexpected SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9136 pVCpu->hm.s.u32HMError = VMX_EXIT_SMI;
9137 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9138}
9139
9140
9141/**
9142 * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit.
9143 */
9144HMVMX_EXIT_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9145{
9146 /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */
9147 AssertMsgFailed(("Unexpected IO SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9148 pVCpu->hm.s.u32HMError = VMX_EXIT_IO_SMI;
9149 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9150}
9151
9152
9153/**
9154 * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit.
9155 */
9156HMVMX_EXIT_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9157{
9158 /*
9159 * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used. We currently
9160 * don't make use of it (see hmR0VmxLoadGuestActivityState()) as our guests don't have direct access to the host LAPIC.
9161 * See Intel spec. 25.3 "Other Causes of VM-exits".
9162 */
9163 AssertMsgFailed(("Unexpected SIPI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9164 pVCpu->hm.s.u32HMError = VMX_EXIT_SIPI;
9165 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9166}
9167
9168
9169/**
9170 * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional
9171 * VM-exit.
9172 */
9173HMVMX_EXIT_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9174{
9175 /*
9176 * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM.
9177 * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON".
9178 *
9179 * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits.
9180 * See Intel spec. "23.8 Restrictions on VMX operation".
9181 */
9182 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9183 return VINF_SUCCESS;
9184}
9185
9186
9187/**
9188 * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional
9189 * VM-exit.
9190 */
9191HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9192{
9193 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9194 return VINF_EM_RESET;
9195}
9196
9197
9198/**
9199 * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
9200 */
9201HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9202{
9203 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9204 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT);
9205 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
9206 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9207 AssertRCReturn(rc, rc);
9208
9209 pMixedCtx->rip++;
9210 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP;
9211 if (EMShouldContinueAfterHalt(pVCpu, pMixedCtx)) /* Requires eflags. */
9212 rc = VINF_SUCCESS;
9213 else
9214 rc = VINF_EM_HALT;
9215
9216 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
9217 return rc;
9218}
9219
9220
9221/**
9222 * VM-exit handler for instructions that result in a #UD exception delivered to
9223 * the guest.
9224 */
9225HMVMX_EXIT_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9226{
9227 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9228 hmR0VmxSetPendingXcptUD(pVCpu, pMixedCtx);
9229 return VINF_SUCCESS;
9230}
9231
9232
9233/**
9234 * VM-exit handler for expiry of the VMX preemption timer.
9235 */
9236HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9237{
9238 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9239
9240 /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */
9241 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
9242
9243 /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */
9244 PVM pVM = pVCpu->CTX_SUFF(pVM);
9245 bool fTimersPending = TMTimerPollBool(pVM, pVCpu);
9246 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer);
9247 return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS;
9248}
9249
9250
9251/**
9252 * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit.
9253 */
9254HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9255{
9256 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9257
9258 /* We expose XSETBV to the guest, fallback to the recompiler for emulation. */
9259 /** @todo check if XSETBV is supported by the recompiler. */
9260 return VERR_EM_INTERPRETER;
9261}
9262
9263
9264/**
9265 * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
9266 */
9267HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9268{
9269 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9270
9271 /* The guest should not invalidate the host CPU's TLBs, fallback to recompiler. */
9272 /** @todo implement EMInterpretInvpcid() */
9273 return VERR_EM_INTERPRETER;
9274}
9275
9276
9277/**
9278 * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE).
9279 * Error VM-exit.
9280 */
9281HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9282{
9283 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9284 AssertRCReturn(rc, rc);
9285
9286 uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
9287 NOREF(uInvalidReason);
9288
9289#ifdef VBOX_STRICT
9290 uint32_t uIntrState;
9291 HMVMXHCUINTREG uHCReg;
9292 uint64_t u64Val;
9293 uint32_t u32Val;
9294
9295 rc = hmR0VmxReadEntryIntrInfoVmcs(pVmxTransient);
9296 rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient);
9297 rc |= hmR0VmxReadEntryInstrLenVmcs(pVCpu, pVmxTransient);
9298 rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
9299 AssertRCReturn(rc, rc);
9300
9301 Log4(("uInvalidReason %u\n", uInvalidReason));
9302 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntrInfo));
9303 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode));
9304 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr));
9305 Log4(("VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE %#RX32\n", uIntrState));
9306
9307 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc);
9308 Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val));
9309 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
9310 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
9311 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
9312 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
9313 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
9314 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
9315 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
9316 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
9317 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
9318 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
9319#endif
9320
9321 PVM pVM = pVCpu->CTX_SUFF(pVM);
9322 HMDumpRegs(pVM, pVCpu, pMixedCtx);
9323
9324 return VERR_VMX_INVALID_GUEST_STATE;
9325}
9326
9327
9328/**
9329 * VM-exit handler for VM-entry failure due to an MSR-load
9330 * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit.
9331 */
9332HMVMX_EXIT_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9333{
9334 AssertMsgFailed(("Unexpected MSR-load exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9335 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9336}
9337
9338
9339/**
9340 * VM-exit handler for VM-entry failure due to a machine-check event
9341 * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit.
9342 */
9343HMVMX_EXIT_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9344{
9345 AssertMsgFailed(("Unexpected machine-check event exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9346 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9347}
9348
9349
9350/**
9351 * VM-exit handler for all undefined reasons. Should never ever happen.. in
9352 * theory.
9353 */
9354HMVMX_EXIT_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9355{
9356 AssertMsgFailed(("Huh!? Undefined VM-exit reason %d. pVCpu=%p pMixedCtx=%p\n", pVmxTransient->uExitReason, pVCpu, pMixedCtx));
9357 return VERR_VMX_UNDEFINED_EXIT_CODE;
9358}
9359
9360
9361/**
9362 * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses
9363 * (VMX_EXIT_XDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR).
9364 * Conditional VM-exit.
9365 */
9366HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9367{
9368 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9369
9370 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT. */
9371 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess);
9372 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT)
9373 return VERR_EM_INTERPRETER;
9374 AssertMsgFailed(("Unexpected XDTR access. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9375 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9376}
9377
9378
9379/**
9380 * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit.
9381 */
9382HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9383{
9384 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9385
9386 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT. */
9387 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdrand);
9388 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT)
9389 return VERR_EM_INTERPRETER;
9390 AssertMsgFailed(("Unexpected RDRAND exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9391 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9392}
9393
9394
9395/**
9396 * VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
9397 */
9398HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9399{
9400 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9401
9402 /* EMInterpretRdmsr() requires CR0, Eflags and SS segment register. */
9403 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9404 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9405 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9406 AssertRCReturn(rc, rc);
9407 Log4(("CS:RIP=%04x:%#RX64 ECX=%X\n", pMixedCtx->cs.Sel, pMixedCtx->rip, pMixedCtx->ecx));
9408
9409 PVM pVM = pVCpu->CTX_SUFF(pVM);
9410 rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9411 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER,
9412 ("hmR0VmxExitRdmsr: failed, invalid error code %Rrc\n", rc));
9413 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
9414
9415 if (RT_LIKELY(rc == VINF_SUCCESS))
9416 {
9417 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9418 Assert(pVmxTransient->cbInstr == 2);
9419 }
9420 return rc;
9421}
9422
9423
9424/**
9425 * VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
9426 */
9427HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9428{
9429 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9430 PVM pVM = pVCpu->CTX_SUFF(pVM);
9431 int rc = VINF_SUCCESS;
9432
9433 /* EMInterpretWrmsr() requires CR0, EFLAGS and SS segment register. */
9434 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9435 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9436 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9437 AssertRCReturn(rc, rc);
9438 Log4(("ecx=%#RX32\n", pMixedCtx->ecx));
9439
9440 rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9441 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0VmxExitWrmsr: failed, invalid error code %Rrc\n", rc));
9442 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
9443
9444 if (RT_LIKELY(rc == VINF_SUCCESS))
9445 {
9446 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9447
9448 /* If this is an X2APIC WRMSR access, update the APIC state as well. */
9449 if ( pMixedCtx->ecx >= MSR_IA32_X2APIC_START
9450 && pMixedCtx->ecx <= MSR_IA32_X2APIC_END)
9451 {
9452 /* We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register
9453 * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before
9454 EMInterpretWrmsr() changes it. */
9455 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_VMX_GUEST_APIC_STATE;
9456 }
9457 else if (pMixedCtx->ecx == MSR_K6_EFER) /* EFER is the only MSR we auto-load but don't allow write-passthrough. */
9458 {
9459 rc = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
9460 AssertRCReturn(rc, rc);
9461 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_VMX_GUEST_AUTO_MSRS;
9462 }
9463 else if (pMixedCtx->ecx == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */
9464 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
9465
9466 /* Update MSRs that are part of the VMCS when MSR-bitmaps are not supported. */
9467 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS))
9468 {
9469 switch (pMixedCtx->ecx)
9470 {
9471 case MSR_IA32_SYSENTER_CS: pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SYSENTER_CS_MSR; break;
9472 case MSR_IA32_SYSENTER_EIP: pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SYSENTER_EIP_MSR; break;
9473 case MSR_IA32_SYSENTER_ESP: pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SYSENTER_ESP_MSR; break;
9474 case MSR_K8_FS_BASE: /* no break */
9475 case MSR_K8_GS_BASE: pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SEGMENT_REGS; break;
9476 case MSR_K8_KERNEL_GS_BASE: pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_VMX_GUEST_AUTO_MSRS; break;
9477 }
9478 }
9479#ifdef VBOX_STRICT
9480 else
9481 {
9482 /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */
9483 switch (pMixedCtx->ecx)
9484 {
9485 case MSR_IA32_SYSENTER_CS:
9486 case MSR_IA32_SYSENTER_EIP:
9487 case MSR_IA32_SYSENTER_ESP:
9488 case MSR_K8_FS_BASE:
9489 case MSR_K8_GS_BASE:
9490 {
9491 AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", pMixedCtx->ecx));
9492 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9493 }
9494
9495 case MSR_K8_LSTAR:
9496 case MSR_K6_STAR:
9497 case MSR_K8_SF_MASK:
9498 case MSR_K8_TSC_AUX:
9499 case MSR_K8_KERNEL_GS_BASE:
9500 {
9501 AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
9502 pMixedCtx->ecx));
9503 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9504 }
9505 }
9506 }
9507#endif /* VBOX_STRICT */
9508 }
9509 return rc;
9510}
9511
9512
9513/**
9514 * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
9515 */
9516HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9517{
9518 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9519
9520 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT. */
9521 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPause);
9522 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT)
9523 return VERR_EM_INTERPRETER;
9524 AssertMsgFailed(("Unexpected PAUSE exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9525 return VERR_VMX_UNEXPECTED_EXIT_CODE;
9526}
9527
9528
9529/**
9530 * VM-exit handler for when the TPR value is lowered below the specified
9531 * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
9532 */
9533HMVMX_EXIT_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9534{
9535 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9536 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW);
9537
9538 /*
9539 * The TPR has already been updated, see hmR0VMXPostRunGuest(). RIP is also updated as part of the VM-exit by VT-x. Update
9540 * the threshold in the VMCS, deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and
9541 * resume guest execution.
9542 */
9543 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_VMX_GUEST_APIC_STATE;
9544 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold);
9545 return VINF_SUCCESS;
9546}
9547
9548
9549/**
9550 * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional
9551 * VM-exit.
9552 *
9553 * @retval VINF_SUCCESS when guest execution can continue.
9554 * @retval VINF_PGM_CHANGE_MODE when shadow paging mode changed, back to ring-3.
9555 * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3.
9556 * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to
9557 * recompiler.
9558 */
9559HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9560{
9561 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9562 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2);
9563 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9564 AssertRCReturn(rc, rc);
9565
9566 const RTGCUINTPTR uExitQualification = pVmxTransient->uExitQualification;
9567 const uint32_t uAccessType = VMX_EXIT_QUALIFICATION_CRX_ACCESS(uExitQualification);
9568 PVM pVM = pVCpu->CTX_SUFF(pVM);
9569 switch (uAccessType)
9570 {
9571 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE: /* MOV to CRx */
9572 {
9573#if 0
9574 /* EMInterpretCRxWrite() references a lot of guest state (EFER, RFLAGS, Segment Registers, etc.) Sync entire state */
9575 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9576#else
9577 rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
9578 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
9579 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9580#endif
9581 AssertRCReturn(rc, rc);
9582
9583 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
9584 VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification),
9585 VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification));
9586 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
9587
9588 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification))
9589 {
9590 case 0: /* CR0 */
9591 Log4(("CRX CR0 write rc=%d CR0=%#RX64\n", rc, pMixedCtx->cr0));
9592 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
9593 break;
9594 case 2: /* C2 **/
9595 /* Nothing to do here, CR2 it's not part of the VMCS. */
9596 break;
9597 case 3: /* CR3 */
9598 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(pMixedCtx));
9599 Log4(("CRX CR3 write rc=%d CR3=%#RX64\n", rc, pMixedCtx->cr3));
9600 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
9601 break;
9602 case 4: /* CR4 */
9603 Log4(("CRX CR4 write rc=%d CR4=%#RX64\n", rc, pMixedCtx->cr4));
9604 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
9605 break;
9606 case 8: /* CR8 */
9607 Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW));
9608 /* CR8 contains the APIC TPR. Was updated by EMInterpretCRxWrite(). */
9609 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_VMX_GUEST_APIC_STATE;
9610 break;
9611 default:
9612 AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)));
9613 break;
9614 }
9615
9616 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]);
9617 break;
9618 }
9619
9620 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ: /* MOV from CRx */
9621 {
9622 /* EMInterpretCRxRead() requires EFER MSR, CS. */
9623 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9624 AssertRCReturn(rc, rc);
9625 Assert( !pVM->hm.s.fNestedPaging
9626 || !CPUMIsGuestPagingEnabledEx(pMixedCtx)
9627 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 3);
9628
9629 /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */
9630 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 8
9631 || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW));
9632
9633 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
9634 VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification),
9635 VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification));
9636 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
9637 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]);
9638 Log4(("CRX CR%d Read access rc=%d\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification), rc));
9639 break;
9640 }
9641
9642 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */
9643 {
9644 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9645 AssertRCReturn(rc, rc);
9646 rc = EMInterpretCLTS(pVM, pVCpu);
9647 AssertRCReturn(rc, rc);
9648 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
9649 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
9650 Log4(("CRX CLTS write rc=%d\n", rc));
9651 break;
9652 }
9653
9654 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */
9655 {
9656 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9657 AssertRCReturn(rc, rc);
9658 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(uExitQualification));
9659 if (RT_LIKELY(rc == VINF_SUCCESS))
9660 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
9661 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
9662 Log4(("CRX LMSW write rc=%d\n", rc));
9663 break;
9664 }
9665
9666 default:
9667 {
9668 AssertMsgFailed(("Invalid access-type in Mov CRx exit qualification %#x\n", uAccessType));
9669 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
9670 }
9671 }
9672
9673 /* Validate possible error codes. */
9674 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_CHANGE_MODE || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_SYNC_CR3
9675 || rc == VERR_VMX_UNEXPECTED_EXCEPTION);
9676 if (RT_SUCCESS(rc))
9677 {
9678 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9679 AssertRCReturn(rc2, rc2);
9680 }
9681
9682 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2);
9683 return rc;
9684}
9685
9686
9687/**
9688 * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional
9689 * VM-exit.
9690 */
9691HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9692{
9693 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9694 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1);
9695
9696 int rc2 = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9697 rc2 |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
9698 rc2 |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
9699 rc2 |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* Eflag checks in EMInterpretDisasCurrent(). */
9700 rc2 |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); /* CR0 checks & PGM* in EMInterpretDisasCurrent(). */
9701 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); /* SELM checks in EMInterpretDisasCurrent(). */
9702 /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */
9703 AssertRCReturn(rc2, rc2);
9704
9705 /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */
9706 uint32_t uIOPort = VMX_EXIT_QUALIFICATION_IO_PORT(pVmxTransient->uExitQualification);
9707 uint8_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(pVmxTransient->uExitQualification);
9708 bool fIOWrite = ( VMX_EXIT_QUALIFICATION_IO_DIRECTION(pVmxTransient->uExitQualification)
9709 == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
9710 bool fIOString = VMX_EXIT_QUALIFICATION_IO_IS_STRING(pVmxTransient->uExitQualification);
9711 AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_HMVMX_IPE_1);
9712
9713 /* I/O operation lookup arrays. */
9714 static const uint32_t s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */
9715 static const uint32_t s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving the result (in AL/AX/EAX). */
9716
9717 VBOXSTRICTRC rcStrict;
9718 const uint32_t cbValue = s_aIOSizes[uIOWidth];
9719 const uint32_t cbInstr = pVmxTransient->cbInstr;
9720 bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */
9721 PVM pVM = pVCpu->CTX_SUFF(pVM);
9722 if (fIOString)
9723 {
9724 /*
9725 * INS/OUTS - I/O String instruction.
9726 *
9727 * Use instruction-information if available, otherwise fall back on
9728 * interpreting the instruction.
9729 */
9730 Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c str\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
9731#if 0 /* Not quite ready, seem iSegReg assertion trigger once... Do we perhaps need to always read that in longjmp / preempt scenario? */
9732 AssertReturn(pMixedCtx->dx == uIOPort, VERR_HMVMX_IPE_2);
9733 if (MSR_IA32_VMX_BASIC_INFO_VMCS_INS_OUTS(pVM->hm.s.vmx.Msrs.u64BasicInfo))
9734 {
9735 rc2 = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
9736 /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */
9737 rc2 |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9738 AssertRCReturn(rc2, rc2);
9739 AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_HMVMX_IPE_3);
9740 AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2);
9741 IEMMODE enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize;
9742 bool fRep = VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification);
9743 if (fIOWrite)
9744 {
9745 rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr,
9746 pVmxTransient->ExitInstrInfo.StrIo.iSegReg);
9747 //if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
9748 // hmR0SavePendingIOPortWriteStr(pVCpu, pMixedCtx->rip, cbValue, enmAddrMode, fRep, cbInstr,
9749 // pVmxTransient->ExitInstrInfo.StrIo.iSegReg);
9750 }
9751 else
9752 {
9753 AssertMsgReturn(pVmxTransient->ExitInstrInfo.StrIo.iSegReg == X86_SREG_ES,
9754 ("%#x (%#llx)\n", pVmxTransient->ExitInstrInfo.StrIo.iSegReg, pVmxTransient->ExitInstrInfo.u),
9755 VERR_HMVMX_IPE_4);
9756 rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr);
9757 //if (rcStrict == VINF_IOM_R3_IOPORT_READ)
9758 // hmR0SavePendingIOPortReadStr(pVCpu, pMixedCtx->rip, cbValue, enmAddrMode, fRep, cbInstr);
9759 }
9760 }
9761 else
9762 {
9763 /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */
9764 rc2 = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9765 AssertRCReturn(rc2, rc2);
9766 rcStrict = IEMExecOne(pVCpu);
9767 }
9768 /** @todo IEM needs to be setting these flags somehow. */
9769 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP;
9770 fUpdateRipAlready = true;
9771#else
9772 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
9773 rcStrict = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
9774 if (RT_SUCCESS(rcStrict))
9775 {
9776 if (fIOWrite)
9777 {
9778 rcStrict = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix,
9779 (DISCPUMODE)pDis->uAddrMode, cbValue);
9780 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
9781 }
9782 else
9783 {
9784 rcStrict = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix,
9785 (DISCPUMODE)pDis->uAddrMode, cbValue);
9786 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
9787 }
9788 }
9789 else
9790 {
9791 AssertMsg(rcStrict == VERR_EM_INTERPRETER, ("rcStrict=%Rrc RIP %#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->rip));
9792 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
9793 }
9794#endif
9795 }
9796 else
9797 {
9798 /*
9799 * IN/OUT - I/O instruction.
9800 */
9801 Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
9802 const uint32_t uAndVal = s_aIOOpAnd[uIOWidth];
9803 Assert(!VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification));
9804 if (fIOWrite)
9805 {
9806 rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pMixedCtx->eax & uAndVal, cbValue);
9807 if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
9808 HMR0SavePendingIOPortWrite(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue);
9809 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
9810 }
9811 else
9812 {
9813 uint32_t u32Result = 0;
9814 rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue);
9815 if (IOM_SUCCESS(rcStrict))
9816 {
9817 /* Save result of I/O IN instr. in AL/AX/EAX. */
9818 pMixedCtx->eax = (pMixedCtx->eax & ~uAndVal) | (u32Result & uAndVal);
9819 }
9820 else if (rcStrict == VINF_IOM_R3_IOPORT_READ)
9821 HMR0SavePendingIOPortRead(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue);
9822 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
9823 }
9824 }
9825
9826 if (IOM_SUCCESS(rcStrict))
9827 {
9828 if (!fUpdateRipAlready)
9829 {
9830 pMixedCtx->rip += cbInstr;
9831 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP;
9832 }
9833
9834 /*
9835 * If any I/O breakpoints are armed, we need to check if one triggered
9836 * and take appropriate action.
9837 * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
9838 */
9839 rc2 = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
9840 AssertRCReturn(rc2, rc2);
9841
9842 /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
9843 * execution engines about whether hyper BPs and such are pending. */
9844 uint32_t const uDr7 = pMixedCtx->dr[7];
9845 if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK)
9846 && X86_DR7_ANY_RW_IO(uDr7)
9847 && (pMixedCtx->cr4 & X86_CR4_DE))
9848 || DBGFBpIsHwIoArmed(pVM)))
9849 {
9850 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
9851
9852 /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
9853 VMMRZCallRing3Disable(pVCpu);
9854 HM_DISABLE_PREEMPT_IF_NEEDED();
9855
9856 bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /*fDr6*/);
9857
9858 VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pMixedCtx, uIOPort, cbValue);
9859 if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
9860 {
9861 /* Raise #DB. */
9862 if (fIsGuestDbgActive)
9863 ASMSetDR6(pMixedCtx->dr[6]);
9864 if (pMixedCtx->dr[7] != uDr7)
9865 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
9866
9867 hmR0VmxSetPendingXcptDB(pVCpu, pMixedCtx);
9868 }
9869 /* rcStrict is VINF_SUCCESS or in [VINF_EM_FIRST..VINF_EM_LAST]. */
9870 else if ( rcStrict2 != VINF_SUCCESS
9871 && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
9872 rcStrict = rcStrict2;
9873
9874 HM_RESTORE_PREEMPT_IF_NEEDED();
9875 VMMRZCallRing3Enable(pVCpu);
9876 }
9877 }
9878
9879#ifdef DEBUG
9880 if (rcStrict == VINF_IOM_R3_IOPORT_READ)
9881 Assert(!fIOWrite);
9882 else if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
9883 Assert(fIOWrite);
9884 else
9885 {
9886 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
9887 * statuses, that the VMM device and some others may return. See
9888 * IOM_SUCCESS() for guidance. */
9889 AssertMsg( RT_FAILURE(rcStrict)
9890 || rcStrict == VINF_SUCCESS
9891 || rcStrict == VINF_EM_RAW_EMULATE_INSTR
9892 || rcStrict == VINF_EM_DBG_BREAKPOINT
9893 || rcStrict == VINF_EM_RAW_GUEST_TRAP
9894 || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
9895 }
9896#endif
9897
9898 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1);
9899 return VBOXSTRICTRC_TODO(rcStrict);
9900}
9901
9902
9903/**
9904 * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional
9905 * VM-exit.
9906 */
9907HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9908{
9909 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9910
9911 /* Check if this task-switch occurred while delivery an event through the guest IDT. */
9912 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9913 AssertRCReturn(rc, rc);
9914 if (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
9915 {
9916 rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
9917 AssertRCReturn(rc, rc);
9918 if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo))
9919 {
9920 uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
9921
9922 /* Software interrupts and exceptions will be regenerated when the recompiler restarts the instruction. */
9923 if ( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT
9924 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
9925 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
9926 {
9927 uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
9928 bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo);
9929
9930 /* Save it as a pending event and it'll be converted to a TRPM event on the way out to ring-3. */
9931 Assert(!pVCpu->hm.s.Event.fPending);
9932 pVCpu->hm.s.Event.fPending = true;
9933 pVCpu->hm.s.Event.u64IntrInfo = pVmxTransient->uIdtVectoringInfo;
9934 rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
9935 AssertRCReturn(rc, rc);
9936 if (fErrorCodeValid)
9937 pVCpu->hm.s.Event.u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
9938 else
9939 pVCpu->hm.s.Event.u32ErrCode = 0;
9940 if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
9941 && uVector == X86_XCPT_PF)
9942 {
9943 pVCpu->hm.s.Event.GCPtrFaultAddress = pMixedCtx->cr2;
9944 }
9945
9946 Log4(("Pending event on TaskSwitch uIntType=%#x uVector=%#x\n", uIntType, uVector));
9947 }
9948 }
9949 }
9950
9951 /** @todo Emulate task switch someday, currently just going back to ring-3 for
9952 * emulation. */
9953 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
9954 return VERR_EM_INTERPRETER;
9955}
9956
9957
9958/**
9959 * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit.
9960 */
9961HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9962{
9963 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9964 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG);
9965 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG;
9966 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
9967 AssertRCReturn(rc, rc);
9968 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
9969 return VINF_EM_DBG_STEPPED;
9970}
9971
9972
9973/**
9974 * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit.
9975 */
9976HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9977{
9978 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9979
9980 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
9981 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
9982 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
9983 return VINF_SUCCESS;
9984 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
9985 return rc;
9986
9987#if 0
9988 /** @todo Investigate if IOMMMIOPhysHandler() requires a lot of state, for now
9989 * just sync the whole thing. */
9990 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9991#else
9992 /* Aggressive state sync. for now. */
9993 rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
9994 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
9995 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9996#endif
9997 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9998 AssertRCReturn(rc, rc);
9999
10000 /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */
10001 uint32_t uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(pVmxTransient->uExitQualification);
10002 switch (uAccessType)
10003 {
10004 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
10005 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
10006 {
10007 if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
10008 && VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification) == 0x80)
10009 {
10010 AssertMsgFailed(("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n"));
10011 }
10012
10013 RTGCPHYS GCPhys = pMixedCtx->msrApicBase; /* Always up-to-date, msrApicBase is not part of the VMCS. */
10014 GCPhys &= PAGE_BASE_GC_MASK;
10015 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification);
10016 PVM pVM = pVCpu->CTX_SUFF(pVM);
10017 Log4(("ApicAccess uAccessType=%#x GCPhys=%#RGv Off=%#x\n", uAccessType, GCPhys,
10018 VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification)));
10019
10020 VBOXSTRICTRC rc2 = IOMMMIOPhysHandler(pVM, pVCpu,
10021 (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
10022 CPUMCTX2CORE(pMixedCtx), GCPhys);
10023 rc = VBOXSTRICTRC_VAL(rc2);
10024 Log4(("ApicAccess rc=%d\n", rc));
10025 if ( rc == VINF_SUCCESS
10026 || rc == VERR_PAGE_TABLE_NOT_PRESENT
10027 || rc == VERR_PAGE_NOT_PRESENT)
10028 {
10029 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
10030 | HM_CHANGED_VMX_GUEST_APIC_STATE;
10031 rc = VINF_SUCCESS;
10032 }
10033 break;
10034 }
10035
10036 default:
10037 Log4(("ApicAccess uAccessType=%#x\n", uAccessType));
10038 rc = VINF_EM_RAW_EMULATE_INSTR;
10039 break;
10040 }
10041
10042 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess);
10043 return rc;
10044}
10045
10046
10047/**
10048 * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional
10049 * VM-exit.
10050 */
10051HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10052{
10053 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10054
10055 /* We should -not- get this VM-exit if the guest's debug registers were active. */
10056 if (pVmxTransient->fWasGuestDebugStateActive)
10057 {
10058 AssertMsgFailed(("Unexpected MOV DRx exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10059 return VERR_VMX_UNEXPECTED_EXIT_CODE;
10060 }
10061
10062 int rc = VERR_INTERNAL_ERROR_5;
10063 if ( !DBGFIsStepping(pVCpu)
10064 && !pVCpu->hm.s.fSingleInstruction
10065 && !pVmxTransient->fWasHyperDebugStateActive)
10066 {
10067 /* Don't intercept MOV DRx and #DB any more. */
10068 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
10069 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
10070 AssertRCReturn(rc, rc);
10071
10072 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
10073 {
10074#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
10075 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB);
10076 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
10077 AssertRCReturn(rc, rc);
10078#endif
10079 }
10080
10081 /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */
10082 VMMRZCallRing3Disable(pVCpu);
10083 HM_DISABLE_PREEMPT_IF_NEEDED();
10084
10085 /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
10086 PVM pVM = pVCpu->CTX_SUFF(pVM);
10087 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
10088 Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32);
10089
10090 HM_RESTORE_PREEMPT_IF_NEEDED();
10091 VMMRZCallRing3Enable(pVCpu);
10092
10093#ifdef VBOX_WITH_STATISTICS
10094 rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10095 AssertRCReturn(rc, rc);
10096 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
10097 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
10098 else
10099 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
10100#endif
10101 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
10102 return VINF_SUCCESS;
10103 }
10104
10105 /*
10106 * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date, see
10107 * hmR0VmxSaveGuestAutoLoadStoreMsrs(). Update only the segment registers from the CPU.
10108 */
10109 rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10110 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10111 AssertRCReturn(rc, rc);
10112 Log4(("CS:RIP=%04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip));
10113
10114 PVM pVM = pVCpu->CTX_SUFF(pVM);
10115 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
10116 {
10117 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
10118 VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification),
10119 VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification));
10120 if (RT_SUCCESS(rc))
10121 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
10122 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
10123 }
10124 else
10125 {
10126 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
10127 VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification),
10128 VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification));
10129 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
10130 }
10131
10132 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
10133 if (RT_SUCCESS(rc))
10134 {
10135 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10136 AssertRCReturn(rc2, rc2);
10137 }
10138 return rc;
10139}
10140
10141
10142/**
10143 * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG).
10144 * Conditional VM-exit.
10145 */
10146HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10147{
10148 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10149 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
10150
10151 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
10152 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
10153 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
10154 return VINF_SUCCESS;
10155 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
10156 return rc;
10157
10158 RTGCPHYS GCPhys = 0;
10159 rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
10160
10161#if 0
10162 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */
10163#else
10164 /* Aggressive state sync. for now. */
10165 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
10166 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10167 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10168#endif
10169 AssertRCReturn(rc, rc);
10170
10171 /*
10172 * If we succeed, resume guest execution.
10173 * If we fail in interpreting the instruction because we couldn't get the guest physical address
10174 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
10175 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
10176 * weird case. See @bugref{6043}.
10177 */
10178 PVM pVM = pVCpu->CTX_SUFF(pVM);
10179 VBOXSTRICTRC rc2 = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pMixedCtx), GCPhys, UINT32_MAX);
10180 rc = VBOXSTRICTRC_VAL(rc2);
10181 Log4(("EPT misconfig at %#RGv RIP=%#RX64 rc=%d\n", GCPhys, pMixedCtx->rip, rc));
10182 if ( rc == VINF_SUCCESS
10183 || rc == VERR_PAGE_TABLE_NOT_PRESENT
10184 || rc == VERR_PAGE_NOT_PRESENT)
10185 {
10186 /* Successfully handled MMIO operation. */
10187 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
10188 | HM_CHANGED_VMX_GUEST_APIC_STATE;
10189 rc = VINF_SUCCESS;
10190 }
10191 return rc;
10192}
10193
10194
10195/**
10196 * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional
10197 * VM-exit.
10198 */
10199HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10200{
10201 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10202 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
10203
10204 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
10205 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
10206 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
10207 return VINF_SUCCESS;
10208 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
10209 return rc;
10210
10211 RTGCPHYS GCPhys = 0;
10212 rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
10213 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10214#if 0
10215 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */
10216#else
10217 /* Aggressive state sync. for now. */
10218 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
10219 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10220 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10221#endif
10222 AssertRCReturn(rc, rc);
10223
10224 /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */
10225 AssertMsg(((pVmxTransient->uExitQualification >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQualification));
10226
10227 RTGCUINT uErrorCode = 0;
10228 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
10229 uErrorCode |= X86_TRAP_PF_ID;
10230 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
10231 uErrorCode |= X86_TRAP_PF_RW;
10232 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
10233 uErrorCode |= X86_TRAP_PF_P;
10234
10235 TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode);
10236
10237 Log4(("EPT violation %#x at %#RX64 ErrorCode %#x CS:EIP=%04x:%#RX64\n", pVmxTransient->uExitQualification, GCPhys,
10238 uErrorCode, pMixedCtx->cs.Sel, pMixedCtx->rip));
10239
10240 /* Handle the pagefault trap for the nested shadow table. */
10241 PVM pVM = pVCpu->CTX_SUFF(pVM);
10242 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pMixedCtx), GCPhys);
10243 TRPMResetTrap(pVCpu);
10244
10245 /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */
10246 if ( rc == VINF_SUCCESS
10247 || rc == VERR_PAGE_TABLE_NOT_PRESENT
10248 || rc == VERR_PAGE_NOT_PRESENT)
10249 {
10250 /* Successfully synced our nested page tables. */
10251 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
10252 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS;
10253 return VINF_SUCCESS;
10254 }
10255
10256 Log4(("EPT return to ring-3 rc=%d\n"));
10257 return rc;
10258}
10259
10260/** @} */
10261
10262/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */
10263/* -=-=-=-=-=-=-=-=-=- VM-exit Exception Handlers -=-=-=-=-=-=-=-=-=-=- */
10264/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */
10265
10266/** @name VM-exit exception handlers.
10267 * @{
10268 */
10269
10270/**
10271 * VM-exit exception handler for #MF (Math Fault: floating point exception).
10272 */
10273static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10274{
10275 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10276 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
10277
10278 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10279 AssertRCReturn(rc, rc);
10280
10281 if (!(pMixedCtx->cr0 & X86_CR0_NE))
10282 {
10283 /* Old-style FPU error reporting needs some extra work. */
10284 /** @todo don't fall back to the recompiler, but do it manually. */
10285 return VERR_EM_INTERPRETER;
10286 }
10287
10288 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10289 pVmxTransient->cbInstr, pVmxTransient->uExitIntrErrorCode, 0 /* GCPtrFaultAddress */);
10290 return rc;
10291}
10292
10293
10294/**
10295 * VM-exit exception handler for #BP (Breakpoint exception).
10296 */
10297static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10298{
10299 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10300 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
10301
10302 /** @todo Try optimize this by not saving the entire guest state unless
10303 * really needed. */
10304 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10305 AssertRCReturn(rc, rc);
10306
10307 PVM pVM = pVCpu->CTX_SUFF(pVM);
10308 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10309 if (rc == VINF_EM_RAW_GUEST_TRAP)
10310 {
10311 rc = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
10312 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10313 rc |= hmR0VmxReadExitIntrErrorCodeVmcs(pVCpu, pVmxTransient);
10314 AssertRCReturn(rc, rc);
10315
10316 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10317 pVmxTransient->cbInstr, pVmxTransient->uExitIntrErrorCode, 0 /* GCPtrFaultAddress */);
10318 }
10319
10320 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT);
10321 return rc;
10322}
10323
10324
10325/**
10326 * VM-exit exception handler for #DB (Debug exception).
10327 */
10328static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10329{
10330 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10331 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
10332 Log6(("XcptDB\n"));
10333
10334 /*
10335 * Get the DR6-like values from the exit qualification and pass it to DBGF
10336 * for processing.
10337 */
10338 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10339 AssertRCReturn(rc, rc);
10340
10341 /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */
10342 uint64_t uDR6 = X86_DR6_INIT_VAL;
10343 uDR6 |= ( pVmxTransient->uExitQualification
10344 & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS));
10345
10346 rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pMixedCtx), uDR6, pVCpu->hm.s.fSingleInstruction);
10347 if (rc == VINF_EM_RAW_GUEST_TRAP)
10348 {
10349 /*
10350 * The exception was for the guest. Update DR6, DR7.GD and
10351 * IA32_DEBUGCTL.LBR before forwarding it.
10352 * (See Intel spec. 27.1 "Architectural State before a VM-Exit".)
10353 */
10354 VMMRZCallRing3Disable(pVCpu);
10355 HM_DISABLE_PREEMPT_IF_NEEDED();
10356
10357 pMixedCtx->dr[6] &= ~X86_DR6_B_MASK;
10358 pMixedCtx->dr[6] |= uDR6;
10359 if (CPUMIsGuestDebugStateActive(pVCpu))
10360 ASMSetDR6(pMixedCtx->dr[6]);
10361
10362 HM_RESTORE_PREEMPT_IF_NEEDED();
10363 VMMRZCallRing3Enable(pVCpu);
10364
10365 rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
10366 AssertRCReturn(rc, rc);
10367
10368 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
10369 pMixedCtx->dr[7] &= ~X86_DR7_GD;
10370
10371 /* Paranoia. */
10372 pMixedCtx->dr[7] &= ~X86_DR7_RAZ_MASK;
10373 pMixedCtx->dr[7] |= X86_DR7_RA1_MASK;
10374
10375 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pMixedCtx->dr[7]);
10376 AssertRCReturn(rc, rc);
10377
10378 /*
10379 * Raise #DB in the guest.
10380 */
10381 rc = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
10382 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10383 rc |= hmR0VmxReadExitIntrErrorCodeVmcs(pVCpu, pVmxTransient);
10384 AssertRCReturn(rc, rc);
10385 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10386 pVmxTransient->cbInstr, pVmxTransient->uExitIntrErrorCode, 0 /* GCPtrFaultAddress */);
10387 return VINF_SUCCESS;
10388 }
10389
10390 /*
10391 * Not a guest trap, must be a hypervisor related debug event then.
10392 * Update DR6 in case someone is interested in it.
10393 */
10394 AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc));
10395 AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5);
10396 CPUMSetHyperDR6(pVCpu, uDR6);
10397
10398 return rc;
10399}
10400
10401
10402/**
10403 * VM-exit exception handler for #NM (Device-not-available exception: floating
10404 * point exception).
10405 */
10406static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10407{
10408 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10409
10410 /* We require CR0 and EFER. EFER is always up-to-date. */
10411 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10412 AssertRCReturn(rc, rc);
10413
10414 /* We're playing with the host CPU state here, have to disable preemption or longjmp. */
10415 VMMRZCallRing3Disable(pVCpu);
10416 HM_DISABLE_PREEMPT_IF_NEEDED();
10417
10418 /* If the guest FPU was active at the time of the #NM exit, then it's a guest fault. */
10419 if (pVmxTransient->fWasGuestFPUStateActive)
10420 {
10421 rc = VINF_EM_RAW_GUEST_TRAP;
10422 Assert(CPUMIsGuestFPUStateActive(pVCpu) || (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0));
10423 }
10424 else
10425 {
10426#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
10427 Assert(!pVmxTransient->fWasGuestFPUStateActive);
10428#endif
10429 /* Lazy FPU loading; load the guest-FPU state transparently and continue execution of the guest. */
10430 rc = CPUMR0LoadGuestFPU(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
10431 Assert(rc == VINF_EM_RAW_GUEST_TRAP || (rc == VINF_SUCCESS && CPUMIsGuestFPUStateActive(pVCpu)));
10432 }
10433
10434 HM_RESTORE_PREEMPT_IF_NEEDED();
10435 VMMRZCallRing3Enable(pVCpu);
10436
10437 if (rc == VINF_SUCCESS)
10438 {
10439 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
10440 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
10441 }
10442 else
10443 {
10444 /* Forward #NM to the guest. */
10445 Assert(rc == VINF_EM_RAW_GUEST_TRAP);
10446 rc = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
10447 AssertRCReturn(rc, rc);
10448 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10449 pVmxTransient->cbInstr, 0 /* error code */, 0 /* GCPtrFaultAddress */);
10450 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
10451 }
10452
10453 return VINF_SUCCESS;
10454}
10455
10456
10457/**
10458 * VM-exit exception handler for #GP (General-protection exception).
10459 *
10460 * @remarks Requires pVmxTransient->uExitIntrInfo to be up-to-date.
10461 */
10462static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10463{
10464 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10465 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
10466
10467 int rc = VERR_INTERNAL_ERROR_5;
10468 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
10469 {
10470#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
10471 /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */
10472 rc = hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
10473 rc |= hmR0VmxReadExitIntrErrorCodeVmcs(pVCpu, pVmxTransient);
10474 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10475 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10476 AssertRCReturn(rc, rc);
10477 Log4(("#GP Gst: RIP %#RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u\n", pMixedCtx->rip, pVmxTransient->uExitIntrErrorCode,
10478 pMixedCtx->cr0, CPUMGetGuestCPL(pVCpu)));
10479 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10480 pVmxTransient->cbInstr, pVmxTransient->uExitIntrErrorCode, 0 /* GCPtrFaultAddress */);
10481 return rc;
10482#else
10483 /* We don't intercept #GP. */
10484 AssertMsgFailed(("Unexpected VM-exit caused by #GP exception\n"));
10485 return VERR_VMX_UNEXPECTED_EXCEPTION;
10486#endif
10487 }
10488
10489 Assert(CPUMIsGuestInRealModeEx(pMixedCtx));
10490 Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest);
10491
10492 /* EMInterpretDisasCurrent() requires a lot of the state, save the entire state. */
10493 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10494 AssertRCReturn(rc, rc);
10495
10496 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
10497 uint32_t cbOp = 0;
10498 PVM pVM = pVCpu->CTX_SUFF(pVM);
10499 rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
10500 if (RT_SUCCESS(rc))
10501 {
10502 rc = VINF_SUCCESS;
10503 Assert(cbOp == pDis->cbInstr);
10504 Log4(("#GP Disas OpCode=%u CS:EIP %04x:%#RX64\n", pDis->pCurInstr->uOpcode, pMixedCtx->cs.Sel, pMixedCtx->rip));
10505 switch (pDis->pCurInstr->uOpcode)
10506 {
10507 case OP_CLI:
10508 {
10509 pMixedCtx->eflags.Bits.u1IF = 0;
10510 pMixedCtx->rip += pDis->cbInstr;
10511 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS;
10512 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
10513 break;
10514 }
10515
10516 case OP_STI:
10517 {
10518 pMixedCtx->eflags.Bits.u1IF = 1;
10519 pMixedCtx->rip += pDis->cbInstr;
10520 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
10521 Assert(VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
10522 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS;
10523 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
10524 break;
10525 }
10526
10527 case OP_HLT:
10528 {
10529 rc = VINF_EM_HALT;
10530 pMixedCtx->rip += pDis->cbInstr;
10531 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP;
10532 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
10533 break;
10534 }
10535
10536 case OP_POPF:
10537 {
10538 Log4(("POPF CS:RIP %04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip));
10539 uint32_t cbParm = 0;
10540 uint32_t uMask = 0;
10541 if (pDis->fPrefix & DISPREFIX_OPSIZE)
10542 {
10543 cbParm = 4;
10544 uMask = 0xffffffff;
10545 }
10546 else
10547 {
10548 cbParm = 2;
10549 uMask = 0xffff;
10550 }
10551
10552 /* Get the stack pointer & pop the contents of the stack onto Eflags. */
10553 RTGCPTR GCPtrStack = 0;
10554 X86EFLAGS Eflags;
10555 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0,
10556 &GCPtrStack);
10557 if (RT_SUCCESS(rc))
10558 {
10559 Assert(sizeof(Eflags.u32) >= cbParm);
10560 Eflags.u32 = 0;
10561 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u32, cbParm);
10562 }
10563 if (RT_FAILURE(rc))
10564 {
10565 rc = VERR_EM_INTERPRETER;
10566 break;
10567 }
10568 Log4(("POPF %#x -> %#RX64 mask=%#x RIP=%#RX64\n", Eflags.u, pMixedCtx->rsp, uMask, pMixedCtx->rip));
10569 pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask))
10570 | (Eflags.u32 & X86_EFL_POPF_BITS & uMask);
10571 /* The RF bit is always cleared by POPF; see Intel Instruction reference for POPF. */
10572 pMixedCtx->eflags.Bits.u1RF = 0;
10573 pMixedCtx->esp += cbParm;
10574 pMixedCtx->esp &= uMask;
10575 pMixedCtx->rip += pDis->cbInstr;
10576 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS;
10577 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
10578 break;
10579 }
10580
10581 case OP_PUSHF:
10582 {
10583 uint32_t cbParm = 0;
10584 uint32_t uMask = 0;
10585 if (pDis->fPrefix & DISPREFIX_OPSIZE)
10586 {
10587 cbParm = 4;
10588 uMask = 0xffffffff;
10589 }
10590 else
10591 {
10592 cbParm = 2;
10593 uMask = 0xffff;
10594 }
10595
10596 /* Get the stack pointer & push the contents of eflags onto the stack. */
10597 RTGCPTR GCPtrStack = 0;
10598 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), (pMixedCtx->esp - cbParm) & uMask,
10599 SELMTOFLAT_FLAGS_CPL0, &GCPtrStack);
10600 if (RT_FAILURE(rc))
10601 {
10602 rc = VERR_EM_INTERPRETER;
10603 break;
10604 }
10605 X86EFLAGS Eflags = pMixedCtx->eflags;
10606 /* The RF & VM bits are cleared on image stored on stack; see Intel Instruction reference for PUSHF. */
10607 Eflags.Bits.u1RF = 0;
10608 Eflags.Bits.u1VM = 0;
10609
10610 rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u, cbParm);
10611 if (RT_FAILURE(rc))
10612 {
10613 rc = VERR_EM_INTERPRETER;
10614 break;
10615 }
10616 Log4(("PUSHF %#x -> %#RGv\n", Eflags.u, GCPtrStack));
10617 pMixedCtx->esp -= cbParm;
10618 pMixedCtx->esp &= uMask;
10619 pMixedCtx->rip += pDis->cbInstr;
10620 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP;
10621 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
10622 break;
10623 }
10624
10625 case OP_IRET:
10626 {
10627 /** @todo Handle 32-bit operand sizes and check stack limits. See Intel
10628 * instruction reference. */
10629 RTGCPTR GCPtrStack = 0;
10630 uint32_t uMask = 0xffff;
10631 uint16_t aIretFrame[3];
10632 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
10633 {
10634 rc = VERR_EM_INTERPRETER;
10635 break;
10636 }
10637 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0,
10638 &GCPtrStack);
10639 if (RT_SUCCESS(rc))
10640 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
10641 if (RT_FAILURE(rc))
10642 {
10643 rc = VERR_EM_INTERPRETER;
10644 break;
10645 }
10646 pMixedCtx->eip = 0;
10647 pMixedCtx->ip = aIretFrame[0];
10648 pMixedCtx->cs.Sel = aIretFrame[1];
10649 pMixedCtx->cs.ValidSel = aIretFrame[1];
10650 pMixedCtx->cs.u64Base = (uint64_t)pMixedCtx->cs.Sel << 4;
10651 pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask))
10652 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
10653 pMixedCtx->sp += sizeof(aIretFrame);
10654 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_SEGMENT_REGS | HM_CHANGED_GUEST_RSP
10655 | HM_CHANGED_GUEST_RFLAGS;
10656 Log4(("IRET %#RX32 to %04x:%x\n", GCPtrStack, pMixedCtx->cs.Sel, pMixedCtx->ip));
10657 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
10658 break;
10659 }
10660
10661 case OP_INT:
10662 {
10663 uint16_t uVector = pDis->Param1.uValue & 0xff;
10664 hmR0VmxSetPendingIntN(pVCpu, pMixedCtx, uVector, pDis->cbInstr);
10665 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
10666 break;
10667 }
10668
10669 case OP_INTO:
10670 {
10671 if (pMixedCtx->eflags.Bits.u1OF)
10672 {
10673 hmR0VmxSetPendingXcptOF(pVCpu, pMixedCtx, pDis->cbInstr);
10674 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
10675 }
10676 break;
10677 }
10678
10679 default:
10680 {
10681 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pMixedCtx), 0 /* pvFault */,
10682 EMCODETYPE_SUPERVISOR);
10683 rc = VBOXSTRICTRC_VAL(rc2);
10684 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL_GUEST;
10685 Log4(("#GP rc=%Rrc\n", rc));
10686 break;
10687 }
10688 }
10689 }
10690 else
10691 rc = VERR_EM_INTERPRETER;
10692
10693 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
10694 ("#GP Unexpected rc=%Rrc\n", rc));
10695 return rc;
10696}
10697
10698
10699/**
10700 * VM-exit exception handler wrapper for generic exceptions. Simply re-injects
10701 * the exception reported in the VMX transient structure back into the VM.
10702 *
10703 * @remarks Requires uExitIntrInfo in the VMX transient structure to be
10704 * up-to-date.
10705 */
10706static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10707{
10708 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10709
10710 /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in
10711 hmR0VmxCheckExitDueToEventDelivery(). */
10712 int rc = hmR0VmxReadExitIntrErrorCodeVmcs(pVCpu, pVmxTransient);
10713 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10714 AssertRCReturn(rc, rc);
10715 Assert(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO);
10716
10717 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10718 pVmxTransient->cbInstr, pVmxTransient->uExitIntrErrorCode, 0 /* GCPtrFaultAddress */);
10719 return VINF_SUCCESS;
10720}
10721
10722
10723/**
10724 * VM-exit exception handler for #PF (Page-fault exception).
10725 */
10726static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10727{
10728 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10729 PVM pVM = pVCpu->CTX_SUFF(pVM);
10730 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10731 rc |= hmR0VmxReadExitIntrInfoVmcs(pVCpu, pVmxTransient);
10732 rc |= hmR0VmxReadExitIntrErrorCodeVmcs(pVCpu, pVmxTransient);
10733 AssertRCReturn(rc, rc);
10734
10735#if defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) || defined(HMVMX_ALWAYS_TRAP_PF)
10736 if (pVM->hm.s.fNestedPaging)
10737 {
10738 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
10739 if (RT_LIKELY(!pVmxTransient->fVectoringPF))
10740 {
10741 pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */
10742 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10743 0 /* cbInstr */, pVmxTransient->uExitIntrErrorCode, pVmxTransient->uExitQualification);
10744 }
10745 else
10746 {
10747 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
10748 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
10749 Log4(("Pending #DF due to vectoring #PF. NP\n"));
10750 }
10751 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
10752 return rc;
10753 }
10754#else
10755 Assert(!pVM->hm.s.fNestedPaging);
10756#endif
10757
10758 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10759 AssertRCReturn(rc, rc);
10760
10761 Log4(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQualification,
10762 pMixedCtx->cs.Sel, pMixedCtx->rip, pVmxTransient->uExitIntrErrorCode, pMixedCtx->cr3));
10763
10764 TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQualification, (RTGCUINT)pVmxTransient->uExitIntrErrorCode);
10765 rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntrErrorCode, CPUMCTX2CORE(pMixedCtx),
10766 (RTGCPTR)pVmxTransient->uExitQualification);
10767
10768 Log4(("#PF: rc=%Rrc\n", rc));
10769 if (rc == VINF_SUCCESS)
10770 {
10771 /* Successfully synced shadow pages tables or emulated an MMIO instruction. */
10772 /** @todo this isn't quite right, what if guest does lgdt with some MMIO
10773 * memory? We don't update the whole state here... */
10774 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS
10775 | HM_CHANGED_VMX_GUEST_APIC_STATE;
10776 TRPMResetTrap(pVCpu);
10777 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
10778 return rc;
10779 }
10780 else if (rc == VINF_EM_RAW_GUEST_TRAP)
10781 {
10782 if (!pVmxTransient->fVectoringPF)
10783 {
10784 /* It's a guest page fault and needs to be reflected to the guest. */
10785 uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu);
10786 TRPMResetTrap(pVCpu);
10787 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */
10788 pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */
10789 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntrInfo),
10790 0 /* cbInstr */, uGstErrorCode, pVmxTransient->uExitQualification);
10791 }
10792 else
10793 {
10794 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
10795 TRPMResetTrap(pVCpu);
10796 pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */
10797 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
10798 Log4(("#PF: Pending #DF due to vectoring #PF\n"));
10799 }
10800
10801 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
10802 return VINF_SUCCESS;
10803 }
10804
10805 TRPMResetTrap(pVCpu);
10806 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
10807 return rc;
10808}
10809
10810/** @} */
10811
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette