VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMVMXR0.cpp@ 49225

Last change on this file since 49225 was 49209, checked in by vboxsync, 11 years ago

VMM/HMVMXR0: Fix MSR_TSC_AUX, restore host MSR.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 454.1 KB
Line 
1/* $Id: HMVMXR0.cpp 49209 2013-10-21 11:09:42Z vboxsync $ */
2/** @file
3 * HM VMX (Intel VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2012-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_HM
22#include <iprt/asm-amd64-x86.h>
23#include <iprt/thread.h>
24#include <iprt/string.h>
25
26#include "HMInternal.h"
27#include <VBox/vmm/vm.h>
28#include "HMVMXR0.h"
29#include <VBox/vmm/pdmapi.h>
30#include <VBox/vmm/dbgf.h>
31#include <VBox/vmm/iem.h>
32#include <VBox/vmm/iom.h>
33#include <VBox/vmm/selm.h>
34#include <VBox/vmm/tm.h>
35#ifdef VBOX_WITH_REM
36# include <VBox/vmm/rem.h>
37#endif
38#ifdef DEBUG_ramshankar
39#define HMVMX_SAVE_FULL_GUEST_STATE
40#define HMVMX_SYNC_FULL_GUEST_STATE
41#define HMVMX_ALWAYS_CHECK_GUEST_STATE
42#define HMVMX_ALWAYS_TRAP_ALL_XCPTS
43#define HMVMX_ALWAYS_TRAP_PF
44#define HMVMX_ALWAYS_SWAP_FPU_STATE
45#endif
46
47
48/*******************************************************************************
49* Defined Constants And Macros *
50*******************************************************************************/
51#if defined(RT_ARCH_AMD64)
52# define HMVMX_IS_64BIT_HOST_MODE() (true)
53typedef RTHCUINTREG HMVMXHCUINTREG;
54#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
55extern "C" uint32_t g_fVMXIs64bitHost;
56# define HMVMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
57typedef uint64_t HMVMXHCUINTREG;
58#else
59# define HMVMX_IS_64BIT_HOST_MODE() (false)
60typedef RTHCUINTREG HMVMXHCUINTREG;
61#endif
62
63/** Use the function table. */
64#define HMVMX_USE_FUNCTION_TABLE
65
66/** Determine which tagged-TLB flush handler to use. */
67#define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0
68#define HMVMX_FLUSH_TAGGED_TLB_EPT 1
69#define HMVMX_FLUSH_TAGGED_TLB_VPID 2
70#define HMVMX_FLUSH_TAGGED_TLB_NONE 3
71
72/** @name Updated-guest-state flags.
73 * @{ */
74#define HMVMX_UPDATED_GUEST_RIP RT_BIT(0)
75#define HMVMX_UPDATED_GUEST_RSP RT_BIT(1)
76#define HMVMX_UPDATED_GUEST_RFLAGS RT_BIT(2)
77#define HMVMX_UPDATED_GUEST_CR0 RT_BIT(3)
78#define HMVMX_UPDATED_GUEST_CR3 RT_BIT(4)
79#define HMVMX_UPDATED_GUEST_CR4 RT_BIT(5)
80#define HMVMX_UPDATED_GUEST_GDTR RT_BIT(6)
81#define HMVMX_UPDATED_GUEST_IDTR RT_BIT(7)
82#define HMVMX_UPDATED_GUEST_LDTR RT_BIT(8)
83#define HMVMX_UPDATED_GUEST_TR RT_BIT(9)
84#define HMVMX_UPDATED_GUEST_SEGMENT_REGS RT_BIT(10)
85#define HMVMX_UPDATED_GUEST_DEBUG RT_BIT(11)
86#define HMVMX_UPDATED_GUEST_FS_BASE_MSR RT_BIT(12)
87#define HMVMX_UPDATED_GUEST_GS_BASE_MSR RT_BIT(13)
88#define HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR RT_BIT(14)
89#define HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR RT_BIT(15)
90#define HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR RT_BIT(16)
91#define HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS RT_BIT(17)
92#define HMVMX_UPDATED_GUEST_ACTIVITY_STATE RT_BIT(18)
93#define HMVMX_UPDATED_GUEST_APIC_STATE RT_BIT(19)
94#define HMVMX_UPDATED_GUEST_ALL ( HMVMX_UPDATED_GUEST_RIP \
95 | HMVMX_UPDATED_GUEST_RSP \
96 | HMVMX_UPDATED_GUEST_RFLAGS \
97 | HMVMX_UPDATED_GUEST_CR0 \
98 | HMVMX_UPDATED_GUEST_CR3 \
99 | HMVMX_UPDATED_GUEST_CR4 \
100 | HMVMX_UPDATED_GUEST_GDTR \
101 | HMVMX_UPDATED_GUEST_IDTR \
102 | HMVMX_UPDATED_GUEST_LDTR \
103 | HMVMX_UPDATED_GUEST_TR \
104 | HMVMX_UPDATED_GUEST_SEGMENT_REGS \
105 | HMVMX_UPDATED_GUEST_DEBUG \
106 | HMVMX_UPDATED_GUEST_FS_BASE_MSR \
107 | HMVMX_UPDATED_GUEST_GS_BASE_MSR \
108 | HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR \
109 | HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR \
110 | HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR \
111 | HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS \
112 | HMVMX_UPDATED_GUEST_ACTIVITY_STATE \
113 | HMVMX_UPDATED_GUEST_APIC_STATE)
114/** @} */
115
116/** @name
117 * Flags to skip redundant reads of some common VMCS fields that are not part of
118 * the guest-CPU state but are in the transient structure.
119 */
120#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO RT_BIT(0)
121#define HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE RT_BIT(1)
122#define HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION RT_BIT(2)
123#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN RT_BIT(3)
124#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO RT_BIT(4)
125#define HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE RT_BIT(5)
126#define HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO RT_BIT(6)
127/** @} */
128
129/** @name
130 * States of the VMCS.
131 *
132 * This does not reflect all possible VMCS states but currently only those
133 * needed for maintaining the VMCS consistently even when thread-context hooks
134 * are used. Maybe later this can be extended (i.e. Nested Virtualization).
135 */
136#define HMVMX_VMCS_STATE_CLEAR RT_BIT(0)
137#define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1)
138#define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2)
139/** @} */
140
141/**
142 * Exception bitmap mask for real-mode guests (real-on-v86).
143 *
144 * We need to intercept all exceptions manually (except #PF). #NM is also
145 * handled separately, see hmR0VmxLoadSharedCR0(). #PF need not be intercepted
146 * even in real-mode if we have Nested Paging support.
147 */
148#define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) | RT_BIT(X86_XCPT_DB) | RT_BIT(X86_XCPT_NMI) \
149 | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \
150 | RT_BIT(X86_XCPT_UD) /* RT_BIT(X86_XCPT_NM) */ | RT_BIT(X86_XCPT_DF) \
151 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \
152 | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \
153 | RT_BIT(X86_XCPT_MF) | RT_BIT(X86_XCPT_AC) | RT_BIT(X86_XCPT_MC) \
154 | RT_BIT(X86_XCPT_XF))
155
156/**
157 * Exception bitmap mask for all contributory exceptions.
158 *
159 * Page fault is deliberately excluded here as it's conditional as to whether
160 * it's contributory or benign. Page faults are handled separately.
161 */
162#define HMVMX_CONTRIBUTORY_XCPT_MASK ( RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_TS) \
163 | RT_BIT(X86_XCPT_DE))
164
165/** Maximum VM-instruction error number. */
166#define HMVMX_INSTR_ERROR_MAX 28
167
168/** Profiling macro. */
169#ifdef HM_PROFILE_EXIT_DISPATCH
170# define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed)
171# define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed)
172#else
173# define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0)
174# define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0)
175#endif
176
177/** Assert that preemption is disabled or covered by thread-context hooks. */
178#define HMVMX_ASSERT_PREEMPT_SAFE() Assert( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \
179 || !RTThreadPreemptIsEnabled(NIL_RTTHREAD));
180
181/** Assert that we haven't migrated CPUs when thread-context hooks are not
182 * used. */
183#define HMVMX_ASSERT_CPU_SAFE() AssertMsg( VMMR0ThreadCtxHooksAreRegistered(pVCpu) \
184 || pVCpu->hm.s.idEnteredCpu == RTMpCpuId(), \
185 ("Illegal migration! Entered on CPU %u Current %u\n", \
186 pVCpu->hm.s.idEnteredCpu, RTMpCpuId())); \
187
188/** Helper macro for VM-exit handlers called unexpectedly. */
189#define HMVMX_RETURN_UNEXPECTED_EXIT() \
190 do { \
191 pVCpu->hm.s.u32HMError = pVmxTransient->uExitReason; \
192 return VERR_VMX_UNEXPECTED_EXIT; \
193 } while (0)
194
195
196/*******************************************************************************
197* Structures and Typedefs *
198*******************************************************************************/
199/**
200 * VMX transient state.
201 *
202 * A state structure for holding miscellaneous information across
203 * VMX non-root operation and restored after the transition.
204 */
205typedef struct VMXTRANSIENT
206{
207 /** The host's rflags/eflags. */
208 RTCCUINTREG uEflags;
209#if HC_ARCH_BITS == 32
210 uint32_t u32Alignment0;
211#endif
212 /** The guest's LSTAR MSR value used for TPR patching for 32-bit guests. */
213 uint64_t u64LStarMsr;
214 /** The guest's TPR value used for TPR shadowing. */
215 uint8_t u8GuestTpr;
216 /** Alignment. */
217 uint8_t abAlignment0[7];
218
219 /** The basic VM-exit reason. */
220 uint16_t uExitReason;
221 /** Alignment. */
222 uint16_t u16Alignment0;
223 /** The VM-exit interruption error code. */
224 uint32_t uExitIntErrorCode;
225 /** The VM-exit exit qualification. */
226 uint64_t uExitQualification;
227
228 /** The VM-exit interruption-information field. */
229 uint32_t uExitIntInfo;
230 /** The VM-exit instruction-length field. */
231 uint32_t cbInstr;
232 /** The VM-exit instruction-information field. */
233 union
234 {
235 /** Plain unsigned int representation. */
236 uint32_t u;
237 /** INS and OUTS information. */
238 struct
239 {
240 uint32_t u6Reserved0 : 7;
241 /** The address size; 0=16-bit, 1=32-bit, 2=64-bit, rest undefined. */
242 uint32_t u3AddrSize : 3;
243 uint32_t u5Reserved1 : 5;
244 /** The segment register (X86_SREG_XXX). */
245 uint32_t iSegReg : 3;
246 uint32_t uReserved2 : 14;
247 } StrIo;
248 } ExitInstrInfo;
249 /** Whether the VM-entry failed or not. */
250 bool fVMEntryFailed;
251 /** Alignment. */
252 uint8_t abAlignment1[3];
253
254 /** The VM-entry interruption-information field. */
255 uint32_t uEntryIntInfo;
256 /** The VM-entry exception error code field. */
257 uint32_t uEntryXcptErrorCode;
258 /** The VM-entry instruction length field. */
259 uint32_t cbEntryInstr;
260
261 /** IDT-vectoring information field. */
262 uint32_t uIdtVectoringInfo;
263 /** IDT-vectoring error code. */
264 uint32_t uIdtVectoringErrorCode;
265
266 /** Mask of currently read VMCS fields; HMVMX_UPDATED_TRANSIENT_*. */
267 uint32_t fVmcsFieldsRead;
268
269 /** Whether the guest FPU was active at the time of VM-exit. */
270 bool fWasGuestFPUStateActive;
271 /** Whether the guest debug state was active at the time of VM-exit. */
272 bool fWasGuestDebugStateActive;
273 /** Whether the hyper debug state was active at the time of VM-exit. */
274 bool fWasHyperDebugStateActive;
275 /** Whether TSC-offsetting should be setup before VM-entry. */
276 bool fUpdateTscOffsettingAndPreemptTimer;
277 /** Whether the VM-exit was caused by a page-fault during delivery of a
278 * contributory exception or a page-fault. */
279 bool fVectoringPF;
280} VMXTRANSIENT;
281AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t));
282AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, sizeof(uint64_t));
283AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, sizeof(uint64_t));
284AssertCompileMemberAlignment(VMXTRANSIENT, fWasGuestFPUStateActive, sizeof(uint64_t));
285AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t));
286/** Pointer to VMX transient state. */
287typedef VMXTRANSIENT *PVMXTRANSIENT;
288
289
290/**
291 * MSR-bitmap read permissions.
292 */
293typedef enum VMXMSREXITREAD
294{
295 /** Reading this MSR causes a VM-exit. */
296 VMXMSREXIT_INTERCEPT_READ = 0xb,
297 /** Reading this MSR does not cause a VM-exit. */
298 VMXMSREXIT_PASSTHRU_READ
299} VMXMSREXITREAD;
300
301/**
302 * MSR-bitmap write permissions.
303 */
304typedef enum VMXMSREXITWRITE
305{
306 /** Writing to this MSR causes a VM-exit. */
307 VMXMSREXIT_INTERCEPT_WRITE = 0xd,
308 /** Writing to this MSR does not cause a VM-exit. */
309 VMXMSREXIT_PASSTHRU_WRITE
310} VMXMSREXITWRITE;
311
312/**
313 * VMX VM-exit handler.
314 *
315 * @returns VBox status code.
316 * @param pVCpu Pointer to the VMCPU.
317 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
318 * out-of-sync. Make sure to update the required
319 * fields before using them.
320 * @param pVmxTransient Pointer to the VMX-transient structure.
321 */
322#ifndef HMVMX_USE_FUNCTION_TABLE
323typedef int FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
324#else
325typedef DECLCALLBACK(int) FNVMXEXITHANDLER(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
326/** Pointer to VM-exit handler. */
327typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER;
328#endif
329
330
331/*******************************************************************************
332* Internal Functions *
333*******************************************************************************/
334static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
335static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
336static void hmR0VmxClearEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx);
337static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr,
338 uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntState);
339#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
340static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu);
341#endif
342#ifndef HMVMX_USE_FUNCTION_TABLE
343DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason);
344# define HMVMX_EXIT_DECL static int
345#else
346# define HMVMX_EXIT_DECL static DECLCALLBACK(int)
347#endif
348
349/** @name VM-exit handlers.
350 * @{
351 */
352static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi;
353static FNVMXEXITHANDLER hmR0VmxExitExtInt;
354static FNVMXEXITHANDLER hmR0VmxExitTripleFault;
355static FNVMXEXITHANDLER hmR0VmxExitInitSignal;
356static FNVMXEXITHANDLER hmR0VmxExitSipi;
357static FNVMXEXITHANDLER hmR0VmxExitIoSmi;
358static FNVMXEXITHANDLER hmR0VmxExitSmi;
359static FNVMXEXITHANDLER hmR0VmxExitIntWindow;
360static FNVMXEXITHANDLER hmR0VmxExitNmiWindow;
361static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch;
362static FNVMXEXITHANDLER hmR0VmxExitCpuid;
363static FNVMXEXITHANDLER hmR0VmxExitGetsec;
364static FNVMXEXITHANDLER hmR0VmxExitHlt;
365static FNVMXEXITHANDLER hmR0VmxExitInvd;
366static FNVMXEXITHANDLER hmR0VmxExitInvlpg;
367static FNVMXEXITHANDLER hmR0VmxExitRdpmc;
368static FNVMXEXITHANDLER hmR0VmxExitRdtsc;
369static FNVMXEXITHANDLER hmR0VmxExitRsm;
370static FNVMXEXITHANDLER hmR0VmxExitSetPendingXcptUD;
371static FNVMXEXITHANDLER hmR0VmxExitMovCRx;
372static FNVMXEXITHANDLER hmR0VmxExitMovDRx;
373static FNVMXEXITHANDLER hmR0VmxExitIoInstr;
374static FNVMXEXITHANDLER hmR0VmxExitRdmsr;
375static FNVMXEXITHANDLER hmR0VmxExitWrmsr;
376static FNVMXEXITHANDLER hmR0VmxExitErrInvalidGuestState;
377static FNVMXEXITHANDLER hmR0VmxExitErrMsrLoad;
378static FNVMXEXITHANDLER hmR0VmxExitErrUndefined;
379static FNVMXEXITHANDLER hmR0VmxExitMwait;
380static FNVMXEXITHANDLER hmR0VmxExitMtf;
381static FNVMXEXITHANDLER hmR0VmxExitMonitor;
382static FNVMXEXITHANDLER hmR0VmxExitPause;
383static FNVMXEXITHANDLER hmR0VmxExitErrMachineCheck;
384static FNVMXEXITHANDLER hmR0VmxExitTprBelowThreshold;
385static FNVMXEXITHANDLER hmR0VmxExitApicAccess;
386static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
387static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess;
388static FNVMXEXITHANDLER hmR0VmxExitEptViolation;
389static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig;
390static FNVMXEXITHANDLER hmR0VmxExitRdtscp;
391static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer;
392static FNVMXEXITHANDLER hmR0VmxExitWbinvd;
393static FNVMXEXITHANDLER hmR0VmxExitXsetbv;
394static FNVMXEXITHANDLER hmR0VmxExitRdrand;
395static FNVMXEXITHANDLER hmR0VmxExitInvpcid;
396/** @} */
397
398static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
399static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
400static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
401static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
402static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
403static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
404static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient);
405static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
406
407/*******************************************************************************
408* Global Variables *
409*******************************************************************************/
410#ifdef HMVMX_USE_FUNCTION_TABLE
411
412/**
413 * VMX_EXIT dispatch table.
414 */
415static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] =
416{
417 /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi,
418 /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt,
419 /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault,
420 /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal,
421 /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi,
422 /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi,
423 /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi,
424 /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow,
425 /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow,
426 /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch,
427 /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid,
428 /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec,
429 /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt,
430 /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd,
431 /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg,
432 /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc,
433 /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc,
434 /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm,
435 /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitSetPendingXcptUD,
436 /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD,
437 /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD,
438 /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD,
439 /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD,
440 /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD,
441 /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD,
442 /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD,
443 /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD,
444 /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD,
445 /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx,
446 /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx,
447 /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr,
448 /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr,
449 /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr,
450 /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState,
451 /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad,
452 /* 35 UNDEFINED */ hmR0VmxExitErrUndefined,
453 /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait,
454 /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf,
455 /* 38 UNDEFINED */ hmR0VmxExitErrUndefined,
456 /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor,
457 /* 40 UNDEFINED */ hmR0VmxExitPause,
458 /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck,
459 /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined,
460 /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold,
461 /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess,
462 /* 45 UNDEFINED */ hmR0VmxExitErrUndefined,
463 /* 46 VMX_EXIT_XDTR_ACCESS */ hmR0VmxExitXdtrAccess,
464 /* 47 VMX_EXIT_TR_ACCESS */ hmR0VmxExitXdtrAccess,
465 /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation,
466 /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig,
467 /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD,
468 /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp,
469 /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer,
470 /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD,
471 /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd,
472 /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv,
473 /* 56 UNDEFINED */ hmR0VmxExitErrUndefined,
474 /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand,
475 /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid,
476 /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD
477};
478#endif /* HMVMX_USE_FUNCTION_TABLE */
479
480#ifdef VBOX_STRICT
481static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] =
482{
483 /* 0 */ "(Not Used)",
484 /* 1 */ "VMCALL executed in VMX root operation.",
485 /* 2 */ "VMCLEAR with invalid physical address.",
486 /* 3 */ "VMCLEAR with VMXON pointer.",
487 /* 4 */ "VMLAUNCH with non-clear VMCS.",
488 /* 5 */ "VMRESUME with non-launched VMCS.",
489 /* 6 */ "VMRESUME after VMXOFF",
490 /* 7 */ "VM entry with invalid control fields.",
491 /* 8 */ "VM entry with invalid host state fields.",
492 /* 9 */ "VMPTRLD with invalid physical address.",
493 /* 10 */ "VMPTRLD with VMXON pointer.",
494 /* 11 */ "VMPTRLD with incorrect revision identifier.",
495 /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.",
496 /* 13 */ "VMWRITE to read-only VMCS component.",
497 /* 14 */ "(Not Used)",
498 /* 15 */ "VMXON executed in VMX root operation.",
499 /* 16 */ "VM entry with invalid executive-VMCS pointer.",
500 /* 17 */ "VM entry with non-launched executing VMCS.",
501 /* 18 */ "VM entry with executive-VMCS pointer not VMXON pointer.",
502 /* 19 */ "VMCALL with non-clear VMCS.",
503 /* 20 */ "VMCALL with invalid VM-exit control fields.",
504 /* 21 */ "(Not Used)",
505 /* 22 */ "VMCALL with incorrect MSEG revision identifier.",
506 /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.",
507 /* 24 */ "VMCALL with invalid SMM-monitor features.",
508 /* 25 */ "VM entry with invalid VM-execution control fields in executive VMCS.",
509 /* 26 */ "VM entry with events blocked by MOV SS.",
510 /* 27 */ "(Not Used)",
511 /* 28 */ "Invalid operand to INVEPT/INVVPID."
512};
513#endif /* VBOX_STRICT */
514
515
516
517/**
518 * Updates the VM's last error record. If there was a VMX instruction error,
519 * reads the error data from the VMCS and updates VCPU's last error record as
520 * well.
521 *
522 * @param pVM Pointer to the VM.
523 * @param pVCpu Pointer to the VMCPU (can be NULL if @a rc is not
524 * VERR_VMX_UNABLE_TO_START_VM or
525 * VERR_VMX_INVALID_VMCS_FIELD).
526 * @param rc The error code.
527 */
528static void hmR0VmxUpdateErrorRecord(PVM pVM, PVMCPU pVCpu, int rc)
529{
530 AssertPtr(pVM);
531 if ( rc == VERR_VMX_INVALID_VMCS_FIELD
532 || rc == VERR_VMX_UNABLE_TO_START_VM)
533 {
534 AssertPtrReturnVoid(pVCpu);
535 VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
536 }
537 pVM->hm.s.lLastError = rc;
538}
539
540
541/**
542 * Reads the VM-entry interruption-information field from the VMCS into the VMX
543 * transient structure.
544 *
545 * @returns VBox status code.
546 * @param pVmxTransient Pointer to the VMX transient structure.
547 *
548 * @remarks No-long-jump zone!!!
549 */
550DECLINLINE(int) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient)
551{
552 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo);
553 AssertRCReturn(rc, rc);
554 return VINF_SUCCESS;
555}
556
557
558/**
559 * Reads the VM-entry exception error code field from the VMCS into
560 * the VMX transient structure.
561 *
562 * @returns VBox status code.
563 * @param pVmxTransient Pointer to the VMX transient structure.
564 *
565 * @remarks No-long-jump zone!!!
566 */
567DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
568{
569 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode);
570 AssertRCReturn(rc, rc);
571 return VINF_SUCCESS;
572}
573
574
575/**
576 * Reads the VM-entry exception error code field from the VMCS into
577 * the VMX transient structure.
578 *
579 * @returns VBox status code.
580 * @param pVCpu Pointer to the VMCPU.
581 * @param pVmxTransient Pointer to the VMX transient structure.
582 *
583 * @remarks No-long-jump zone!!!
584 */
585DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
586{
587 int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr);
588 AssertRCReturn(rc, rc);
589 return VINF_SUCCESS;
590}
591
592
593/**
594 * Reads the VM-exit interruption-information field from the VMCS into the VMX
595 * transient structure.
596 *
597 * @returns VBox status code.
598 * @param pVCpu Pointer to the VMCPU.
599 * @param pVmxTransient Pointer to the VMX transient structure.
600 */
601DECLINLINE(int) hmR0VmxReadExitIntInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
602{
603 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO))
604 {
605 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo);
606 AssertRCReturn(rc, rc);
607 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO;
608 }
609 return VINF_SUCCESS;
610}
611
612
613/**
614 * Reads the VM-exit interruption error code from the VMCS into the VMX
615 * transient structure.
616 *
617 * @returns VBox status code.
618 * @param pVCpu Pointer to the VMCPU.
619 * @param pVmxTransient Pointer to the VMX transient structure.
620 */
621DECLINLINE(int) hmR0VmxReadExitIntErrorCodeVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
622{
623 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE))
624 {
625 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode);
626 AssertRCReturn(rc, rc);
627 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_ERROR_CODE;
628 }
629 return VINF_SUCCESS;
630}
631
632
633/**
634 * Reads the VM-exit instruction length field from the VMCS into the VMX
635 * transient structure.
636 *
637 * @returns VBox status code.
638 * @param pVCpu Pointer to the VMCPU.
639 * @param pVmxTransient Pointer to the VMX transient structure.
640 */
641DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
642{
643 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN))
644 {
645 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr);
646 AssertRCReturn(rc, rc);
647 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_LEN;
648 }
649 return VINF_SUCCESS;
650}
651
652
653/**
654 * Reads the VM-exit instruction-information field from the VMCS into
655 * the VMX transient structure.
656 *
657 * @returns VBox status code.
658 * @param pVCpu The cross context per CPU structure.
659 * @param pVmxTransient Pointer to the VMX transient structure.
660 */
661DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
662{
663 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO))
664 {
665 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u);
666 AssertRCReturn(rc, rc);
667 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_INSTR_INFO;
668 }
669 return VINF_SUCCESS;
670}
671
672
673/**
674 * Reads the exit qualification from the VMCS into the VMX transient structure.
675 *
676 * @returns VBox status code.
677 * @param pVCpu Pointer to the VMCPU.
678 * @param pVmxTransient Pointer to the VMX transient structure.
679 */
680DECLINLINE(int) hmR0VmxReadExitQualificationVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient)
681{
682 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION))
683 {
684 int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQualification);
685 AssertRCReturn(rc, rc);
686 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_EXIT_QUALIFICATION;
687 }
688 return VINF_SUCCESS;
689}
690
691
692/**
693 * Reads the IDT-vectoring information field from the VMCS into the VMX
694 * transient structure.
695 *
696 * @returns VBox status code.
697 * @param pVmxTransient Pointer to the VMX transient structure.
698 *
699 * @remarks No-long-jump zone!!!
700 */
701DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient)
702{
703 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO))
704 {
705 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_INFO, &pVmxTransient->uIdtVectoringInfo);
706 AssertRCReturn(rc, rc);
707 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_INFO;
708 }
709 return VINF_SUCCESS;
710}
711
712
713/**
714 * Reads the IDT-vectoring error code from the VMCS into the VMX
715 * transient structure.
716 *
717 * @returns VBox status code.
718 * @param pVmxTransient Pointer to the VMX transient structure.
719 */
720DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient)
721{
722 if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE))
723 {
724 int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode);
725 AssertRCReturn(rc, rc);
726 pVmxTransient->fVmcsFieldsRead |= HMVMX_UPDATED_TRANSIENT_IDT_VECTORING_ERROR_CODE;
727 }
728 return VINF_SUCCESS;
729}
730
731
732/**
733 * Enters VMX root mode operation on the current CPU.
734 *
735 * @returns VBox status code.
736 * @param pVM Pointer to the VM (optional, can be NULL, after
737 * a resume).
738 * @param HCPhysCpuPage Physical address of the VMXON region.
739 * @param pvCpuPage Pointer to the VMXON region.
740 */
741static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage)
742{
743 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
744 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
745 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
746
747 if (pVM)
748 {
749 /* Write the VMCS revision dword to the VMXON region. */
750 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo);
751 }
752
753 /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */
754 RTCCUINTREG uEflags = ASMIntDisableFlags();
755
756 /* Enable the VMX bit in CR4 if necessary. */
757 RTCCUINTREG uCr4 = ASMGetCR4();
758 if (!(uCr4 & X86_CR4_VMXE))
759 ASMSetCR4(uCr4 | X86_CR4_VMXE);
760
761 /* Enter VMX root mode. */
762 int rc = VMXEnable(HCPhysCpuPage);
763 if (RT_FAILURE(rc))
764 ASMSetCR4(uCr4);
765
766 /* Restore interrupts. */
767 ASMSetFlags(uEflags);
768 return rc;
769}
770
771
772/**
773 * Exits VMX root mode operation on the current CPU.
774 *
775 * @returns VBox status code.
776 */
777static int hmR0VmxLeaveRootMode(void)
778{
779 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
780
781 /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */
782 RTCCUINTREG uEflags = ASMIntDisableFlags();
783
784 /* If we're for some reason not in VMX root mode, then don't leave it. */
785 RTCCUINTREG uHostCR4 = ASMGetCR4();
786
787 int rc;
788 if (uHostCR4 & X86_CR4_VMXE)
789 {
790 /* Exit VMX root mode and clear the VMX bit in CR4. */
791 VMXDisable();
792 ASMSetCR4(uHostCR4 & ~X86_CR4_VMXE);
793 rc = VINF_SUCCESS;
794 }
795 else
796 rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE;
797
798 /* Restore interrupts. */
799 ASMSetFlags(uEflags);
800 return rc;
801}
802
803
804/**
805 * Allocates and maps one physically contiguous page. The allocated page is
806 * zero'd out. (Used by various VT-x structures).
807 *
808 * @returns IPRT status code.
809 * @param pMemObj Pointer to the ring-0 memory object.
810 * @param ppVirt Where to store the virtual address of the
811 * allocation.
812 * @param pPhys Where to store the physical address of the
813 * allocation.
814 */
815DECLINLINE(int) hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
816{
817 AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER);
818 AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER);
819 AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER);
820
821 int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */);
822 if (RT_FAILURE(rc))
823 return rc;
824 *ppVirt = RTR0MemObjAddress(*pMemObj);
825 *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */);
826 ASMMemZero32(*ppVirt, PAGE_SIZE);
827 return VINF_SUCCESS;
828}
829
830
831/**
832 * Frees and unmaps an allocated physical page.
833 *
834 * @param pMemObj Pointer to the ring-0 memory object.
835 * @param ppVirt Where to re-initialize the virtual address of
836 * allocation as 0.
837 * @param pHCPhys Where to re-initialize the physical address of the
838 * allocation as 0.
839 */
840DECLINLINE(void) hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys)
841{
842 AssertPtr(pMemObj);
843 AssertPtr(ppVirt);
844 AssertPtr(pHCPhys);
845 if (*pMemObj != NIL_RTR0MEMOBJ)
846 {
847 int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */);
848 AssertRC(rc);
849 *pMemObj = NIL_RTR0MEMOBJ;
850 *ppVirt = 0;
851 *pHCPhys = 0;
852 }
853}
854
855
856/**
857 * Worker function to free VT-x related structures.
858 *
859 * @returns IPRT status code.
860 * @param pVM Pointer to the VM.
861 */
862static void hmR0VmxStructsFree(PVM pVM)
863{
864 for (VMCPUID i = 0; i < pVM->cCpus; i++)
865 {
866 PVMCPU pVCpu = &pVM->aCpus[i];
867 AssertPtr(pVCpu);
868
869#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
870 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
871 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
872#endif
873
874 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
875 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
876
877 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, &pVCpu->hm.s.vmx.HCPhysVirtApic);
878 hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
879 }
880
881 hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess);
882#ifdef VBOX_WITH_CRASHDUMP_MAGIC
883 hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
884#endif
885}
886
887
888/**
889 * Worker function to allocate VT-x related VM structures.
890 *
891 * @returns IPRT status code.
892 * @param pVM Pointer to the VM.
893 */
894static int hmR0VmxStructsAlloc(PVM pVM)
895{
896 /*
897 * Initialize members up-front so we can cleanup properly on allocation failure.
898 */
899#define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \
900 pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
901 pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
902 pVM->hm.s.vmx.HCPhys##a_Name = 0;
903
904#define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \
905 pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \
906 pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \
907 pVCpu->hm.s.vmx.HCPhys##a_Name = 0;
908
909#ifdef VBOX_WITH_CRASHDUMP_MAGIC
910 VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv);
911#endif
912 VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb);
913
914 AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus));
915 for (VMCPUID i = 0; i < pVM->cCpus; i++)
916 {
917 PVMCPU pVCpu = &pVM->aCpus[i];
918 VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv);
919 VMXLOCAL_INIT_VMCPU_MEMOBJ(VirtApic, pb);
920 VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv);
921#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
922 VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv);
923 VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv);
924#endif
925 }
926#undef VMXLOCAL_INIT_VMCPU_MEMOBJ
927#undef VMXLOCAL_INIT_VM_MEMOBJ
928
929 /* The VMCS size cannot be more than 4096 bytes. See Intel spec. Appendix A.1 "Basic VMX Information". */
930 AssertReturnStmt(MSR_IA32_VMX_BASIC_INFO_VMCS_SIZE(pVM->hm.s.vmx.Msrs.u64BasicInfo) <= PAGE_SIZE,
931 (&pVM->aCpus[0])->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE,
932 VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO);
933
934 /*
935 * Allocate all the VT-x structures.
936 */
937 int rc = VINF_SUCCESS;
938#ifdef VBOX_WITH_CRASHDUMP_MAGIC
939 rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch);
940 if (RT_FAILURE(rc))
941 goto cleanup;
942 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
943 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef);
944#endif
945
946 /* Allocate the APIC-access page for trapping APIC accesses from the guest. */
947 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
948 {
949 rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess,
950 &pVM->hm.s.vmx.HCPhysApicAccess);
951 if (RT_FAILURE(rc))
952 goto cleanup;
953 }
954
955 /*
956 * Initialize per-VCPU VT-x structures.
957 */
958 for (VMCPUID i = 0; i < pVM->cCpus; i++)
959 {
960 PVMCPU pVCpu = &pVM->aCpus[i];
961 AssertPtr(pVCpu);
962
963 /* Allocate the VM control structure (VMCS). */
964 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs);
965 if (RT_FAILURE(rc))
966 goto cleanup;
967
968 /* Allocate the Virtual-APIC page for transparent TPR accesses. */
969 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
970 {
971 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic,
972 &pVCpu->hm.s.vmx.HCPhysVirtApic);
973 if (RT_FAILURE(rc))
974 goto cleanup;
975 }
976
977 /* Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for transparent accesses of specific MSRs. */
978 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
979 {
980 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap,
981 &pVCpu->hm.s.vmx.HCPhysMsrBitmap);
982 if (RT_FAILURE(rc))
983 goto cleanup;
984 memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE);
985 }
986
987#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
988 /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */
989 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr);
990 if (RT_FAILURE(rc))
991 goto cleanup;
992
993 /* Allocate the VM-exit MSR-load page for the host MSRs. */
994 rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr);
995 if (RT_FAILURE(rc))
996 goto cleanup;
997#endif
998 }
999
1000 return VINF_SUCCESS;
1001
1002cleanup:
1003 hmR0VmxStructsFree(pVM);
1004 return rc;
1005}
1006
1007
1008/**
1009 * Does global VT-x initialization (called during module initialization).
1010 *
1011 * @returns VBox status code.
1012 */
1013VMMR0DECL(int) VMXR0GlobalInit(void)
1014{
1015#ifdef HMVMX_USE_FUNCTION_TABLE
1016 AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers));
1017# ifdef VBOX_STRICT
1018 for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++)
1019 Assert(g_apfnVMExitHandlers[i]);
1020# endif
1021#endif
1022 return VINF_SUCCESS;
1023}
1024
1025
1026/**
1027 * Does global VT-x termination (called during module termination).
1028 */
1029VMMR0DECL(void) VMXR0GlobalTerm()
1030{
1031 /* Nothing to do currently. */
1032}
1033
1034
1035/**
1036 * Sets up and activates VT-x on the current CPU.
1037 *
1038 * @returns VBox status code.
1039 * @param pCpu Pointer to the global CPU info struct.
1040 * @param pVM Pointer to the VM (can be NULL after a host resume
1041 * operation).
1042 * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a
1043 * fEnabledByHost is true).
1044 * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if
1045 * @a fEnabledByHost is true).
1046 * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to
1047 * enable VT-x on the host.
1048 * @param pvMsrs Opaque pointer to VMXMSRS struct.
1049 */
1050VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBALCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost,
1051 void *pvMsrs)
1052{
1053 AssertReturn(pCpu, VERR_INVALID_PARAMETER);
1054 AssertReturn(pvMsrs, VERR_INVALID_PARAMETER);
1055 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1056
1057 /* Enable VT-x if it's not already enabled by the host. */
1058 if (!fEnabledByHost)
1059 {
1060 int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage);
1061 if (RT_FAILURE(rc))
1062 return rc;
1063 }
1064
1065 /*
1066 * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been using EPTPs) so
1067 * we don't retain any stale guest-physical mappings which won't get invalidated when flushing by VPID.
1068 */
1069 PVMXMSRS pMsrs = (PVMXMSRS)pvMsrs;
1070 if (pMsrs->u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
1071 {
1072 hmR0VmxFlushEpt(NULL /* pVCpu */, VMX_FLUSH_EPT_ALL_CONTEXTS);
1073 pCpu->fFlushAsidBeforeUse = false;
1074 }
1075 else
1076 pCpu->fFlushAsidBeforeUse = true;
1077
1078 /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */
1079 ++pCpu->cTlbFlushes;
1080
1081 return VINF_SUCCESS;
1082}
1083
1084
1085/**
1086 * Deactivates VT-x on the current CPU.
1087 *
1088 * @returns VBox status code.
1089 * @param pCpu Pointer to the global CPU info struct.
1090 * @param pvCpuPage Pointer to the VMXON region.
1091 * @param HCPhysCpuPage Physical address of the VMXON region.
1092 *
1093 * @remarks This function should never be called when SUPR0EnableVTx() or
1094 * similar was used to enable VT-x on the host.
1095 */
1096VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBALCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
1097{
1098 NOREF(pCpu);
1099 NOREF(pvCpuPage);
1100 NOREF(HCPhysCpuPage);
1101
1102 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1103 return hmR0VmxLeaveRootMode();
1104}
1105
1106
1107/**
1108 * Sets the permission bits for the specified MSR in the MSR bitmap.
1109 *
1110 * @param pVCpu Pointer to the VMCPU.
1111 * @param uMSR The MSR value.
1112 * @param enmRead Whether reading this MSR causes a VM-exit.
1113 * @param enmWrite Whether writing this MSR causes a VM-exit.
1114 */
1115static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite)
1116{
1117 int32_t iBit;
1118 uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
1119
1120 /*
1121 * Layout:
1122 * 0x000 - 0x3ff - Low MSR read bits
1123 * 0x400 - 0x7ff - High MSR read bits
1124 * 0x800 - 0xbff - Low MSR write bits
1125 * 0xc00 - 0xfff - High MSR write bits
1126 */
1127 if (uMsr <= 0x00001FFF)
1128 iBit = uMsr;
1129 else if ( uMsr >= 0xC0000000
1130 && uMsr <= 0xC0001FFF)
1131 {
1132 iBit = (uMsr - 0xC0000000);
1133 pbMsrBitmap += 0x400;
1134 }
1135 else
1136 {
1137 AssertMsgFailed(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr));
1138 return;
1139 }
1140
1141 Assert(iBit <= 0x1fff);
1142 if (enmRead == VMXMSREXIT_INTERCEPT_READ)
1143 ASMBitSet(pbMsrBitmap, iBit);
1144 else
1145 ASMBitClear(pbMsrBitmap, iBit);
1146
1147 if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE)
1148 ASMBitSet(pbMsrBitmap + 0x800, iBit);
1149 else
1150 ASMBitClear(pbMsrBitmap + 0x800, iBit);
1151}
1152
1153
1154/**
1155 * Flushes the TLB using EPT.
1156 *
1157 * @returns VBox status code.
1158 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
1159 * enmFlush).
1160 * @param enmFlush Type of flush.
1161 *
1162 * @remarks Caller is responsible for making sure this function is called only
1163 * when NestedPaging is supported and providing @a enmFlush that is
1164 * supported by the CPU.
1165 * @remarks Can be called with interrupts disabled.
1166 */
1167static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
1168{
1169 uint64_t au64Descriptor[2];
1170 if (enmFlush == VMX_FLUSH_EPT_ALL_CONTEXTS)
1171 au64Descriptor[0] = 0;
1172 else
1173 {
1174 Assert(pVCpu);
1175 au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP;
1176 }
1177 au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */
1178
1179 int rc = VMXR0InvEPT(enmFlush, &au64Descriptor[0]);
1180 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0,
1181 rc));
1182 if ( RT_SUCCESS(rc)
1183 && pVCpu)
1184 {
1185 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
1186 }
1187}
1188
1189
1190/**
1191 * Flushes the TLB using VPID.
1192 *
1193 * @returns VBox status code.
1194 * @param pVM Pointer to the VM.
1195 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
1196 * enmFlush).
1197 * @param enmFlush Type of flush.
1198 * @param GCPtr Virtual address of the page to flush (can be 0 depending
1199 * on @a enmFlush).
1200 *
1201 * @remarks Can be called with interrupts disabled.
1202 */
1203static void hmR0VmxFlushVpid(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
1204{
1205 AssertPtr(pVM);
1206 Assert(pVM->hm.s.vmx.fVpid);
1207
1208 uint64_t au64Descriptor[2];
1209 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
1210 {
1211 au64Descriptor[0] = 0;
1212 au64Descriptor[1] = 0;
1213 }
1214 else
1215 {
1216 AssertPtr(pVCpu);
1217 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
1218 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
1219 au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid;
1220 au64Descriptor[1] = GCPtr;
1221 }
1222
1223 int rc = VMXR0InvVPID(enmFlush, &au64Descriptor[0]); NOREF(rc);
1224 AssertMsg(rc == VINF_SUCCESS,
1225 ("VMXR0InvVPID %#x %u %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
1226 if ( RT_SUCCESS(rc)
1227 && pVCpu)
1228 {
1229 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1230 }
1231}
1232
1233
1234/**
1235 * Invalidates a guest page by guest virtual address. Only relevant for
1236 * EPT/VPID, otherwise there is nothing really to invalidate.
1237 *
1238 * @returns VBox status code.
1239 * @param pVM Pointer to the VM.
1240 * @param pVCpu Pointer to the VMCPU.
1241 * @param GCVirt Guest virtual address of the page to invalidate.
1242 */
1243VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
1244{
1245 AssertPtr(pVM);
1246 AssertPtr(pVCpu);
1247 LogFlowFunc(("pVM=%p pVCpu=%p GCVirt=%RGv\n", pVM, pVCpu, GCVirt));
1248
1249 bool fFlushPending = VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_FLUSH);
1250 if (!fFlushPending)
1251 {
1252 /*
1253 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
1254 * See @bugref{6043} and @bugref{6177}.
1255 *
1256 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() as this
1257 * function maybe called in a loop with individual addresses.
1258 */
1259 if (pVM->hm.s.vmx.fVpid)
1260 {
1261 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1262 {
1263 hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
1264 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt);
1265 }
1266 else
1267 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1268 }
1269 else if (pVM->hm.s.fNestedPaging)
1270 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1271 }
1272
1273 return VINF_SUCCESS;
1274}
1275
1276
1277/**
1278 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
1279 * otherwise there is nothing really to invalidate.
1280 *
1281 * @returns VBox status code.
1282 * @param pVM Pointer to the VM.
1283 * @param pVCpu Pointer to the VMCPU.
1284 * @param GCPhys Guest physical address of the page to invalidate.
1285 */
1286VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
1287{
1288 LogFlowFunc(("%RGp\n", GCPhys));
1289
1290 /*
1291 * We cannot flush a page by guest-physical address. invvpid takes only a linear address while invept only flushes
1292 * by EPT not individual addresses. We update the force flag here and flush before the next VM-entry in hmR0VmxFlushTLB*().
1293 * This function might be called in a loop. This should cause a flush-by-EPT if EPT is in use. See @bugref{6568}.
1294 */
1295 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
1296 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgPhys);
1297 return VINF_SUCCESS;
1298}
1299
1300
1301/**
1302 * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the
1303 * case where neither EPT nor VPID is supported by the CPU.
1304 *
1305 * @param pVM Pointer to the VM.
1306 * @param pVCpu Pointer to the VMCPU.
1307 * @param pCpu Pointer to the global HM struct.
1308 *
1309 * @remarks Called with interrupts disabled.
1310 */
1311static void hmR0VmxFlushTaggedTlbNone(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1312{
1313 AssertPtr(pVCpu);
1314 AssertPtr(pCpu);
1315 NOREF(pVM);
1316
1317 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
1318
1319 /** @todo TLB shootdown is currently not used. See hmQueueInvlPage(). */
1320#if 0
1321 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1322 pVCpu->hm.s.TlbShootdown.cPages = 0;
1323#endif
1324
1325 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1326 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1327 pVCpu->hm.s.fForceTLBFlush = false;
1328 return;
1329}
1330
1331
1332/**
1333 * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary.
1334 *
1335 * @param pVM Pointer to the VM.
1336 * @param pVCpu Pointer to the VMCPU.
1337 * @param pCpu Pointer to the global HM CPU struct.
1338 * @remarks All references to "ASID" in this function pertains to "VPID" in
1339 * Intel's nomenclature. The reason is, to avoid confusion in compare
1340 * statements since the host-CPU copies are named "ASID".
1341 *
1342 * @remarks Called with interrupts disabled.
1343 */
1344static void hmR0VmxFlushTaggedTlbBoth(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1345{
1346#ifdef VBOX_WITH_STATISTICS
1347 bool fTlbFlushed = false;
1348# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0)
1349# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \
1350 if (!fTlbFlushed) \
1351 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \
1352 } while (0)
1353#else
1354# define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0)
1355# define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0)
1356#endif
1357
1358 AssertPtr(pVM);
1359 AssertPtr(pCpu);
1360 AssertPtr(pVCpu);
1361 AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid,
1362 ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled."
1363 "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid));
1364
1365
1366 /*
1367 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1368 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
1369 * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore.
1370 */
1371 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1372 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1373 {
1374 ++pCpu->uCurrentAsid;
1375 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
1376 {
1377 pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */
1378 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1379 pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1380 }
1381
1382 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
1383 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1384 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1385
1386 /*
1387 * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also
1388 * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}.
1389 */
1390 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1391 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1392 HMVMX_SET_TAGGED_TLB_FLUSHED();
1393 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); /* Already flushed-by-EPT, skip doing it again below. */
1394 }
1395
1396 /* Check for explicit TLB shootdowns. */
1397 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1398 {
1399 /*
1400 * Changes to the EPT paging structure by VMM requires flushing by EPT as the CPU creates
1401 * guest-physical (only EPT-tagged) mappings while traversing the EPT tables when EPT is in use.
1402 * Flushing by VPID will only flush linear (only VPID-tagged) and combined (EPT+VPID tagged) mappings
1403 * but not guest-physical mappings.
1404 * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". See @bugref{6568}.
1405 */
1406 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1407 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1408 HMVMX_SET_TAGGED_TLB_FLUSHED();
1409 }
1410
1411 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage()
1412 * where it is commented out. Support individual entry flushing
1413 * someday. */
1414#if 0
1415 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1416 {
1417 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
1418
1419 /*
1420 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
1421 * as supported by the CPU.
1422 */
1423 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1424 {
1425 for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
1426 hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
1427 }
1428 else
1429 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1430
1431 HMVMX_SET_TAGGED_TLB_FLUSHED();
1432 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1433 pVCpu->hm.s.TlbShootdown.cPages = 0;
1434 }
1435#endif
1436
1437 pVCpu->hm.s.fForceTLBFlush = false;
1438
1439 HMVMX_UPDATE_FLUSH_SKIPPED_STAT();
1440
1441 Assert(pVCpu->hm.s.idLastCpu == pCpu->idCpu);
1442 Assert(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes);
1443 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
1444 ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1445 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
1446 ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid));
1447 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
1448 ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
1449
1450 /* Update VMCS with the VPID. */
1451 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
1452 AssertRC(rc);
1453
1454#undef HMVMX_SET_TAGGED_TLB_FLUSHED
1455}
1456
1457
1458/**
1459 * Flushes the tagged-TLB entries for EPT CPUs as necessary.
1460 *
1461 * @returns VBox status code.
1462 * @param pVM Pointer to the VM.
1463 * @param pVCpu Pointer to the VMCPU.
1464 * @param pCpu Pointer to the global HM CPU struct.
1465 *
1466 * @remarks Called with interrupts disabled.
1467 */
1468static void hmR0VmxFlushTaggedTlbEpt(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1469{
1470 AssertPtr(pVM);
1471 AssertPtr(pVCpu);
1472 AssertPtr(pCpu);
1473 AssertMsg(pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with NestedPaging disabled."));
1474 AssertMsg(!pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID enabled."));
1475
1476 /*
1477 * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last.
1478 * A change in the TLB flush count implies the host CPU is online after a suspend/resume.
1479 */
1480 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1481 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1482 {
1483 pVCpu->hm.s.fForceTLBFlush = true;
1484 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1485 }
1486
1487 /* Check for explicit TLB shootdown flushes. */
1488 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1489 {
1490 pVCpu->hm.s.fForceTLBFlush = true;
1491 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1492 }
1493
1494 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1495 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1496
1497 if (pVCpu->hm.s.fForceTLBFlush)
1498 {
1499 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1500 pVCpu->hm.s.fForceTLBFlush = false;
1501 }
1502 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage()
1503 * where it is commented out. Support individual entry flushing
1504 * someday. */
1505#if 0
1506 else
1507 {
1508 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1509 {
1510 /* We cannot flush individual entries without VPID support. Flush using EPT. */
1511 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
1512 hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmFlushEpt);
1513 }
1514 else
1515 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
1516
1517 pVCpu->hm.s.TlbShootdown.cPages = 0;
1518 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1519 }
1520#endif
1521}
1522
1523
1524/**
1525 * Flushes the tagged-TLB entries for VPID CPUs as necessary.
1526 *
1527 * @returns VBox status code.
1528 * @param pVM Pointer to the VM.
1529 * @param pVCpu Pointer to the VMCPU.
1530 * @param pCpu Pointer to the global HM CPU struct.
1531 *
1532 * @remarks Called with interrupts disabled.
1533 */
1534static void hmR0VmxFlushTaggedTlbVpid(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1535{
1536 AssertPtr(pVM);
1537 AssertPtr(pVCpu);
1538 AssertPtr(pCpu);
1539 AssertMsg(pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked with VPID disabled."));
1540 AssertMsg(!pVM->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging enabled"));
1541
1542 /*
1543 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last.
1544 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
1545 * or the host CPU is online after a suspend/resume, so we cannot reuse the current ASID anymore.
1546 */
1547 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1548 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1549 {
1550 pVCpu->hm.s.fForceTLBFlush = true;
1551 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1552 }
1553
1554 /* Check for explicit TLB shootdown flushes. */
1555 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1556 {
1557 /*
1558 * If we ever support VPID flush combinations other than ALL or SINGLE-context (see hmR0VmxSetupTaggedTlb())
1559 * we would need to explicitly flush in this case (add an fExplicitFlush = true here and change the
1560 * pCpu->fFlushAsidBeforeUse check below to include fExplicitFlush's too) - an obscure corner case.
1561 */
1562 pVCpu->hm.s.fForceTLBFlush = true;
1563 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
1564 }
1565
1566 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1567 if (pVCpu->hm.s.fForceTLBFlush)
1568 {
1569 ++pCpu->uCurrentAsid;
1570 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
1571 {
1572 pCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */
1573 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
1574 pCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */
1575 }
1576
1577 pVCpu->hm.s.fForceTLBFlush = false;
1578 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1579 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
1580 if (pCpu->fFlushAsidBeforeUse)
1581 hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
1582 }
1583 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere. See hmQueueInvlPage()
1584 * where it is commented out. Support individual entry flushing
1585 * someday. */
1586#if 0
1587 else
1588 {
1589 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
1590 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
1591 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
1592 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
1593
1594 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1595 {
1596 /* Flush individual guest entries using VPID or as little as possible with EPT as supported by the CPU. */
1597 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1598 {
1599 for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
1600 hmR0VmxFlushVpid(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
1601 }
1602 else
1603 hmR0VmxFlushVpid(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
1604
1605 pVCpu->hm.s.TlbShootdown.cPages = 0;
1606 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1607 }
1608 else
1609 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
1610 }
1611#endif
1612
1613 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
1614 ("Flush count mismatch for cpu %d (%u vs %u)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1615 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
1616 ("cpu%d uCurrentAsid = %u\n", pCpu->idCpu, pCpu->uCurrentAsid));
1617 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
1618 ("cpu%d VM uCurrentAsid = %u\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
1619
1620 int rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
1621 AssertRC(rc);
1622}
1623
1624
1625/**
1626 * Flushes the guest TLB entry based on CPU capabilities.
1627 *
1628 * @param pVCpu Pointer to the VMCPU.
1629 * @param pCpu Pointer to the global HM CPU struct.
1630 */
1631DECLINLINE(void) hmR0VmxFlushTaggedTlb(PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
1632{
1633 PVM pVM = pVCpu->CTX_SUFF(pVM);
1634 switch (pVM->hm.s.vmx.uFlushTaggedTlb)
1635 {
1636 case HMVMX_FLUSH_TAGGED_TLB_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pVM, pVCpu, pCpu); break;
1637 case HMVMX_FLUSH_TAGGED_TLB_EPT: hmR0VmxFlushTaggedTlbEpt(pVM, pVCpu, pCpu); break;
1638 case HMVMX_FLUSH_TAGGED_TLB_VPID: hmR0VmxFlushTaggedTlbVpid(pVM, pVCpu, pCpu); break;
1639 case HMVMX_FLUSH_TAGGED_TLB_NONE: hmR0VmxFlushTaggedTlbNone(pVM, pVCpu, pCpu); break;
1640 default:
1641 AssertMsgFailed(("Invalid flush-tag function identifier\n"));
1642 break;
1643 }
1644}
1645
1646
1647/**
1648 * Sets up the appropriate tagged TLB-flush level and handler for flushing guest
1649 * TLB entries from the host TLB before VM-entry.
1650 *
1651 * @returns VBox status code.
1652 * @param pVM Pointer to the VM.
1653 */
1654static int hmR0VmxSetupTaggedTlb(PVM pVM)
1655{
1656 /*
1657 * Determine optimal flush type for Nested Paging.
1658 * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted
1659 * guest execution (see hmR3InitFinalizeR0()).
1660 */
1661 if (pVM->hm.s.fNestedPaging)
1662 {
1663 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
1664 {
1665 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
1666 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT;
1667 else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
1668 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS;
1669 else
1670 {
1671 /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */
1672 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
1673 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1674 }
1675
1676 /* Make sure the write-back cacheable memory type for EPT is supported. */
1677 if (!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB))
1678 {
1679 LogRel(("hmR0VmxSetupTaggedTlb: Unsupported EPTP memory type %#x.\n", pVM->hm.s.vmx.Msrs.u64EptVpidCaps));
1680 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
1681 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1682 }
1683 }
1684 else
1685 {
1686 /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */
1687 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
1688 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1689 }
1690 }
1691
1692 /*
1693 * Determine optimal flush type for VPID.
1694 */
1695 if (pVM->hm.s.vmx.fVpid)
1696 {
1697 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
1698 {
1699 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
1700 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT;
1701 else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
1702 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS;
1703 else
1704 {
1705 /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */
1706 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
1707 LogRel(("hmR0VmxSetupTaggedTlb: Only INDIV_ADDR supported. Ignoring VPID.\n"));
1708 if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
1709 LogRel(("hmR0VmxSetupTaggedTlb: Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
1710 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
1711 pVM->hm.s.vmx.fVpid = false;
1712 }
1713 }
1714 else
1715 {
1716 /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */
1717 Log4(("hmR0VmxSetupTaggedTlb: VPID supported without INVEPT support. Ignoring VPID.\n"));
1718 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
1719 pVM->hm.s.vmx.fVpid = false;
1720 }
1721 }
1722
1723 /*
1724 * Setup the handler for flushing tagged-TLBs.
1725 */
1726 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
1727 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT_VPID;
1728 else if (pVM->hm.s.fNestedPaging)
1729 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_EPT;
1730 else if (pVM->hm.s.vmx.fVpid)
1731 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_VPID;
1732 else
1733 pVM->hm.s.vmx.uFlushTaggedTlb = HMVMX_FLUSH_TAGGED_TLB_NONE;
1734 return VINF_SUCCESS;
1735}
1736
1737
1738/**
1739 * Sets up pin-based VM-execution controls in the VMCS.
1740 *
1741 * @returns VBox status code.
1742 * @param pVM Pointer to the VM.
1743 * @param pVCpu Pointer to the VMCPU.
1744 */
1745static int hmR0VmxSetupPinCtls(PVM pVM, PVMCPU pVCpu)
1746{
1747 AssertPtr(pVM);
1748 AssertPtr(pVCpu);
1749
1750 uint32_t val = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0; /* Bits set here must always be set. */
1751 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1; /* Bits cleared here must always be cleared. */
1752
1753 val |= VMX_VMCS_CTRL_PIN_EXEC_EXT_INT_EXIT /* External interrupts causes a VM-exits. */
1754 | VMX_VMCS_CTRL_PIN_EXEC_NMI_EXIT; /* Non-maskable interrupts causes a VM-exit. */
1755 Assert(!(val & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI));
1756
1757 /* Enable the VMX preemption timer. */
1758 if (pVM->hm.s.vmx.fUsePreemptTimer)
1759 {
1760 Assert(pVM->hm.s.vmx.Msrs.VmxPinCtls.n.allowed1 & VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER);
1761 val |= VMX_VMCS_CTRL_PIN_EXEC_PREEMPT_TIMER;
1762 }
1763
1764 if ((val & zap) != val)
1765 {
1766 LogRel(("hmR0VmxSetupPinCtls: invalid pin-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n",
1767 pVM->hm.s.vmx.Msrs.VmxPinCtls.n.disallowed0, val, zap));
1768 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC;
1769 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1770 }
1771
1772 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, val);
1773 AssertRCReturn(rc, rc);
1774
1775 /* Update VCPU with the currently set pin-based VM-execution controls. */
1776 pVCpu->hm.s.vmx.u32PinCtls = val;
1777 return rc;
1778}
1779
1780
1781/**
1782 * Sets up processor-based VM-execution controls in the VMCS.
1783 *
1784 * @returns VBox status code.
1785 * @param pVM Pointer to the VM.
1786 * @param pVMCPU Pointer to the VMCPU.
1787 */
1788static int hmR0VmxSetupProcCtls(PVM pVM, PVMCPU pVCpu)
1789{
1790 AssertPtr(pVM);
1791 AssertPtr(pVCpu);
1792
1793 int rc = VERR_INTERNAL_ERROR_5;
1794 uint32_t val = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0; /* Bits set here must be set in the VMCS. */
1795 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
1796
1797 val |= VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT /* HLT causes a VM-exit. */
1798 | VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING /* Use TSC-offsetting. */
1799 | VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */
1800 | VMX_VMCS_CTRL_PROC_EXEC_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */
1801 | VMX_VMCS_CTRL_PROC_EXEC_RDPMC_EXIT /* RDPMC causes a VM-exit. */
1802 | VMX_VMCS_CTRL_PROC_EXEC_MONITOR_EXIT /* MONITOR causes a VM-exit. */
1803 | VMX_VMCS_CTRL_PROC_EXEC_MWAIT_EXIT; /* MWAIT causes a VM-exit. */
1804
1805 /* We toggle VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */
1806 if ( !(pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT)
1807 || (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0 & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT))
1808 {
1809 LogRel(("hmR0VmxSetupProcCtls: unsupported VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT combo!"));
1810 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT;
1811 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1812 }
1813
1814 /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */
1815 if (!pVM->hm.s.fNestedPaging)
1816 {
1817 Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */
1818 val |= VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT
1819 | VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
1820 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
1821 }
1822
1823 /* Use TPR shadowing if supported by the CPU. */
1824 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
1825 {
1826 Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
1827 Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */
1828 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
1829 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic);
1830 AssertRCReturn(rc, rc);
1831
1832 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */
1833 /* CR8 writes causes a VM-exit based on TPR threshold. */
1834 Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT));
1835 Assert(!(val & VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT));
1836 }
1837 else
1838 {
1839 /*
1840 * Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs.
1841 * Set this control only for 64-bit guests.
1842 */
1843 if (pVM->hm.s.fAllow64BitGuests)
1844 {
1845 val |= VMX_VMCS_CTRL_PROC_EXEC_CR8_STORE_EXIT /* CR8 reads causes a VM-exit. */
1846 | VMX_VMCS_CTRL_PROC_EXEC_CR8_LOAD_EXIT; /* CR8 writes causes a VM-exit. */
1847 }
1848 }
1849
1850 /* Use MSR-bitmaps if supported by the CPU. */
1851 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1852 {
1853 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS;
1854
1855 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
1856 Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */
1857 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
1858 AssertRCReturn(rc, rc);
1859
1860 /*
1861 * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored
1862 * automatically (either as part of the MSR-load/store areas or dedicated fields in the VMCS).
1863 */
1864 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1865 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1866 hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1867 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1868 hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1869 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1870 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1871 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1872 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1873 }
1874
1875 /* Use the secondary processor-based VM-execution controls if supported by the CPU. */
1876 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
1877 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
1878
1879 if ((val & zap) != val)
1880 {
1881 LogRel(("hmR0VmxSetupProcCtls: invalid processor-based VM-execution controls combo! cpu=%#RX64 val=%#RX64 zap=%#RX64\n",
1882 pVM->hm.s.vmx.Msrs.VmxProcCtls.n.disallowed0, val, zap));
1883 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC;
1884 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1885 }
1886
1887 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, val);
1888 AssertRCReturn(rc, rc);
1889
1890 /* Update VCPU with the currently set processor-based VM-execution controls. */
1891 pVCpu->hm.s.vmx.u32ProcCtls = val;
1892
1893 /*
1894 * Secondary processor-based VM-execution controls.
1895 */
1896 if (RT_LIKELY(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL))
1897 {
1898 val = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0; /* Bits set here must be set in the VMCS. */
1899 zap = pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
1900
1901 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT)
1902 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT; /* WBINVD causes a VM-exit. */
1903
1904 if (pVM->hm.s.fNestedPaging)
1905 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT; /* Enable EPT. */
1906 else
1907 {
1908 /*
1909 * Without Nested Paging, INVPCID should cause a VM-exit. Enabling this bit causes the CPU to refer to
1910 * VMX_VMCS_CTRL_PROC_EXEC_INVLPG_EXIT when INVPCID is executed by the guest.
1911 * See Intel spec. 25.4 "Changes to instruction behaviour in VMX non-root operation".
1912 */
1913 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_INVPCID)
1914 val |= VMX_VMCS_CTRL_PROC_EXEC2_INVPCID;
1915 }
1916
1917 if (pVM->hm.s.vmx.fVpid)
1918 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID; /* Enable VPID. */
1919
1920 if (pVM->hm.s.vmx.fUnrestrictedGuest)
1921 val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST; /* Enable Unrestricted Execution. */
1922
1923 /* Enable Virtual-APIC page accesses if supported by the CPU. This is essentially where the TPR shadow resides. */
1924 /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be
1925 * done dynamically. */
1926 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
1927 {
1928 Assert(pVM->hm.s.vmx.HCPhysApicAccess);
1929 Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */
1930 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC; /* Virtualize APIC accesses. */
1931 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
1932 AssertRCReturn(rc, rc);
1933 }
1934
1935 if (pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1936 {
1937 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP; /* Enable RDTSCP support. */
1938 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS)
1939 hmR0VmxSetMsrPermission(pVCpu, MSR_K8_TSC_AUX, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE);
1940 }
1941
1942 if ((val & zap) != val)
1943 {
1944 LogRel(("hmR0VmxSetupProcCtls: invalid secondary processor-based VM-execution controls combo! "
1945 "cpu=%#RX64 val=%#RX64 zap=%#RX64\n", pVM->hm.s.vmx.Msrs.VmxProcCtls2.n.disallowed0, val, zap));
1946 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1947 }
1948
1949 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, val);
1950 AssertRCReturn(rc, rc);
1951
1952 /* Update VCPU with the currently set secondary processor-based VM-execution controls. */
1953 pVCpu->hm.s.vmx.u32ProcCtls2 = val;
1954 }
1955 else if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest))
1956 {
1957 LogRel(("hmR0VmxSetupProcCtls: Unrestricted Guest set as true when secondary processor-based VM-execution controls not "
1958 "available\n"));
1959 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
1960 }
1961
1962 return VINF_SUCCESS;
1963}
1964
1965
1966/**
1967 * Sets up miscellaneous (everything other than Pin & Processor-based
1968 * VM-execution) control fields in the VMCS.
1969 *
1970 * @returns VBox status code.
1971 * @param pVM Pointer to the VM.
1972 * @param pVCpu Pointer to the VMCPU.
1973 */
1974static int hmR0VmxSetupMiscCtls(PVM pVM, PVMCPU pVCpu)
1975{
1976 AssertPtr(pVM);
1977 AssertPtr(pVCpu);
1978
1979 int rc = VERR_GENERAL_FAILURE;
1980
1981 /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
1982#if 0
1983 /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxLoadGuestCR3AndCR4())*/
1984 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0); AssertRCReturn(rc, rc);
1985 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0); AssertRCReturn(rc, rc);
1986
1987 /*
1988 * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is)
1989 * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit.
1990 * We thus use the exception bitmap to control it rather than use both.
1991 */
1992 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0); AssertRCReturn(rc, rc);
1993 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0); AssertRCReturn(rc, rc);
1994
1995 /** @todo Explore possibility of using IO-bitmaps. */
1996 /* All IO & IOIO instructions cause VM-exits. */
1997 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0); AssertRCReturn(rc, rc);
1998 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0); AssertRCReturn(rc, rc);
1999
2000 /* Initialize the MSR-bitmap area. */
2001 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc);
2002 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0); AssertRCReturn(rc, rc);
2003 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc);
2004#endif
2005
2006#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2007 /* Setup MSR autoloading/storing. */
2008 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
2009 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */
2010 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
2011 AssertRCReturn(rc, rc);
2012 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
2013 AssertRCReturn(rc, rc);
2014
2015 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
2016 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */
2017 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
2018 AssertRCReturn(rc, rc);
2019#endif
2020
2021 /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */
2022 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff));
2023 AssertRCReturn(rc, rc);
2024
2025 /* All fields are zero-initialized during allocation; but don't remove the commented block below. */
2026#if 0
2027 /* Setup debug controls */
2028 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0); /** @todo We don't support IA32_DEBUGCTL MSR. Should we? */
2029 AssertRCReturn(rc, rc);
2030 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
2031 AssertRCReturn(rc, rc);
2032#endif
2033
2034 return rc;
2035}
2036
2037
2038/**
2039 * Sets up the initial exception bitmap in the VMCS based on static conditions
2040 * (i.e. conditions that cannot ever change after starting the VM).
2041 *
2042 * @returns VBox status code.
2043 * @param pVM Pointer to the VM.
2044 * @param pVCpu Pointer to the VMCPU.
2045 */
2046static int hmR0VmxInitXcptBitmap(PVM pVM, PVMCPU pVCpu)
2047{
2048 AssertPtr(pVM);
2049 AssertPtr(pVCpu);
2050
2051 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
2052
2053 uint32_t u32XcptBitmap = 0;
2054
2055 /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */
2056 if (!pVM->hm.s.fNestedPaging)
2057 u32XcptBitmap |= RT_BIT(X86_XCPT_PF);
2058
2059 pVCpu->hm.s.vmx.u32XcptBitmap = u32XcptBitmap;
2060 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32XcptBitmap);
2061 AssertRCReturn(rc, rc);
2062 return rc;
2063}
2064
2065
2066/**
2067 * Sets up the initial guest-state mask. The guest-state mask is consulted
2068 * before reading guest-state fields from the VMCS as VMREADs can be expensive
2069 * for the nested virtualization case (as it would cause a VM-exit).
2070 *
2071 * @param pVCpu Pointer to the VMCPU.
2072 */
2073static int hmR0VmxInitUpdatedGuestStateMask(PVMCPU pVCpu)
2074{
2075 /* Initially the guest-state is up-to-date as there is nothing in the VMCS. */
2076 pVCpu->hm.s.vmx.fUpdatedGuestState = HMVMX_UPDATED_GUEST_ALL;
2077 return VINF_SUCCESS;
2078}
2079
2080
2081/**
2082 * Does per-VM VT-x initialization.
2083 *
2084 * @returns VBox status code.
2085 * @param pVM Pointer to the VM.
2086 */
2087VMMR0DECL(int) VMXR0InitVM(PVM pVM)
2088{
2089 LogFlowFunc(("pVM=%p\n", pVM));
2090
2091 int rc = hmR0VmxStructsAlloc(pVM);
2092 if (RT_FAILURE(rc))
2093 {
2094 LogRel(("VMXR0InitVM: hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc));
2095 return rc;
2096 }
2097
2098 return VINF_SUCCESS;
2099}
2100
2101
2102/**
2103 * Does per-VM VT-x termination.
2104 *
2105 * @returns VBox status code.
2106 * @param pVM Pointer to the VM.
2107 */
2108VMMR0DECL(int) VMXR0TermVM(PVM pVM)
2109{
2110 LogFlowFunc(("pVM=%p\n", pVM));
2111
2112#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2113 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
2114 ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE);
2115#endif
2116 hmR0VmxStructsFree(pVM);
2117 return VINF_SUCCESS;
2118}
2119
2120
2121/**
2122 * Sets up the VM for execution under VT-x.
2123 * This function is only called once per-VM during initialization.
2124 *
2125 * @returns VBox status code.
2126 * @param pVM Pointer to the VM.
2127 */
2128VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
2129{
2130 AssertPtrReturn(pVM, VERR_INVALID_PARAMETER);
2131 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2132
2133 LogFlowFunc(("pVM=%p\n", pVM));
2134
2135 /*
2136 * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be allocated.
2137 * We no longer support the highly unlikely case of UnrestrictedGuest without pRealModeTSS. See hmR3InitFinalizeR0().
2138 */
2139 /* -XXX- change hmR3InitFinalizeR0Intel() to fail if pRealModeTSS alloc fails. */
2140 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
2141 && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable
2142 || !pVM->hm.s.vmx.pRealModeTSS))
2143 {
2144 LogRel(("VMXR0SetupVM: invalid real-on-v86 state.\n"));
2145 return VERR_INTERNAL_ERROR;
2146 }
2147
2148#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2149 /*
2150 * This is for the darwin 32-bit/PAE kernels trying to execute 64-bit guests. We don't bother with
2151 * the 32<->64 switcher in this case. This is a rare, legacy use-case with barely any test coverage.
2152 */
2153 if ( pVM->hm.s.fAllow64BitGuests
2154 && !HMVMX_IS_64BIT_HOST_MODE())
2155 {
2156 LogRel(("VMXR0SetupVM: Unsupported guest and host paging mode combination.\n"));
2157 return VERR_PGM_UNSUPPORTED_HOST_PAGING_MODE;
2158 }
2159#endif
2160
2161 /* Initialize these always, see hmR3InitFinalizeR0().*/
2162 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE;
2163 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE;
2164
2165 /* Setup the tagged-TLB flush handlers. */
2166 int rc = hmR0VmxSetupTaggedTlb(pVM);
2167 if (RT_FAILURE(rc))
2168 {
2169 LogRel(("VMXR0SetupVM: hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc));
2170 return rc;
2171 }
2172
2173 for (VMCPUID i = 0; i < pVM->cCpus; i++)
2174 {
2175 PVMCPU pVCpu = &pVM->aCpus[i];
2176 AssertPtr(pVCpu);
2177 AssertPtr(pVCpu->hm.s.vmx.pvVmcs);
2178
2179 /* Log the VCPU pointers, useful for debugging SMP VMs. */
2180 Log4(("VMXR0SetupVM: pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu));
2181
2182 /* Set revision dword at the beginning of the VMCS structure. */
2183 *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.Msrs.u64BasicInfo);
2184
2185 /* Initialize our VMCS region in memory, set the VMCS launch state to "clear". */
2186 rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2187 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2188 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2189
2190 /* Load this VMCS as the current VMCS. */
2191 rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2192 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2193 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2194
2195 rc = hmR0VmxSetupPinCtls(pVM, pVCpu);
2196 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2197 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2198
2199 rc = hmR0VmxSetupProcCtls(pVM, pVCpu);
2200 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2201 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2202
2203 rc = hmR0VmxSetupMiscCtls(pVM, pVCpu);
2204 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2205 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2206
2207 rc = hmR0VmxInitXcptBitmap(pVM, pVCpu);
2208 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2209 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2210
2211 rc = hmR0VmxInitUpdatedGuestStateMask(pVCpu);
2212 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitUpdatedGuestStateMask failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2213 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2214
2215#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2216 rc = hmR0VmxInitVmcsReadCache(pVM, pVCpu);
2217 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2218 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2219#endif
2220
2221 /* Re-sync the CPU's internal data into our VMCS memory region & reset the launch state to "clear". */
2222 rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
2223 AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc (pVM=%p)\n", rc, pVM),
2224 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc), rc);
2225
2226 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
2227
2228 hmR0VmxUpdateErrorRecord(pVM, pVCpu, rc);
2229 }
2230
2231 return VINF_SUCCESS;
2232}
2233
2234
2235/**
2236 * Saves the host control registers (CR0, CR3, CR4) into the host-state area in
2237 * the VMCS.
2238 *
2239 * @returns VBox status code.
2240 * @param pVM Pointer to the VM.
2241 * @param pVCpu Pointer to the VMCPU.
2242 */
2243DECLINLINE(int) hmR0VmxSaveHostControlRegs(PVM pVM, PVMCPU pVCpu)
2244{
2245 RTCCUINTREG uReg = ASMGetCR0();
2246 int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg);
2247 AssertRCReturn(rc, rc);
2248
2249#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2250 /* For the darwin 32-bit hybrid kernel, we need the 64-bit CR3 as it uses 64-bit paging. */
2251 if (HMVMX_IS_64BIT_HOST_MODE())
2252 {
2253 uint64_t uRegCR3 = HMR0Get64bitCR3();
2254 rc = VMXWriteVmcs64(VMX_VMCS_HOST_CR3, uRegCR3);
2255 }
2256 else
2257#endif
2258 {
2259 uReg = ASMGetCR3();
2260 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg);
2261 }
2262 AssertRCReturn(rc, rc);
2263
2264 uReg = ASMGetCR4();
2265 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg);
2266 AssertRCReturn(rc, rc);
2267 return rc;
2268}
2269
2270
2271#if HC_ARCH_BITS == 64
2272/**
2273 * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry
2274 * requirements. See hmR0VmxSaveHostSegmentRegs().
2275 */
2276# define VMXLOCAL_ADJUST_HOST_SEG(seg, selValue) \
2277 if ((selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \
2278 { \
2279 bool fValidSelector = true; \
2280 if ((selValue) & X86_SEL_LDT) \
2281 { \
2282 uint32_t uAttr = ASMGetSegAttr((selValue)); \
2283 fValidSelector = RT_BOOL(uAttr != ~0U && (uAttr & X86_DESC_P)); \
2284 } \
2285 if (fValidSelector) \
2286 { \
2287 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##seg; \
2288 pVCpu->hm.s.vmx.RestoreHost.uHostSel##seg = (selValue); \
2289 } \
2290 (selValue) = 0; \
2291 }
2292#endif
2293
2294
2295/**
2296 * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into
2297 * the host-state area in the VMCS.
2298 *
2299 * @returns VBox status code.
2300 * @param pVM Pointer to the VM.
2301 * @param pVCpu Pointer to the VMCPU.
2302 */
2303DECLINLINE(int) hmR0VmxSaveHostSegmentRegs(PVM pVM, PVMCPU pVCpu)
2304{
2305 int rc = VERR_INTERNAL_ERROR_5;
2306
2307 /*
2308 * Host DS, ES, FS and GS segment registers.
2309 */
2310#if HC_ARCH_BITS == 64
2311 RTSEL uSelDS = ASMGetDS();
2312 RTSEL uSelES = ASMGetES();
2313 RTSEL uSelFS = ASMGetFS();
2314 RTSEL uSelGS = ASMGetGS();
2315#else
2316 RTSEL uSelDS = 0;
2317 RTSEL uSelES = 0;
2318 RTSEL uSelFS = 0;
2319 RTSEL uSelGS = 0;
2320#endif
2321
2322 /* Recalculate which host-state bits need to be manually restored. */
2323 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
2324
2325 /*
2326 * Host CS and SS segment registers.
2327 */
2328#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2329 RTSEL uSelCS;
2330 RTSEL uSelSS;
2331 if (HMVMX_IS_64BIT_HOST_MODE())
2332 {
2333 uSelCS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
2334 uSelSS = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
2335 }
2336 else
2337 {
2338 /* Seems darwin uses the LDT (TI flag is set) in the CS & SS selectors which VT-x doesn't like. */
2339 uSelCS = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
2340 uSelSS = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
2341 }
2342#else
2343 RTSEL uSelCS = ASMGetCS();
2344 RTSEL uSelSS = ASMGetSS();
2345#endif
2346
2347 /*
2348 * Host TR segment register.
2349 */
2350 RTSEL uSelTR = ASMGetTR();
2351
2352#if HC_ARCH_BITS == 64
2353 /*
2354 * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to gain VM-entry and restore them
2355 * before we get preempted. See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers".
2356 */
2357 VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS);
2358 VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES);
2359 VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS);
2360 VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS);
2361# undef VMXLOCAL_ADJUST_HOST_SEG
2362#endif
2363
2364 /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */
2365 Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT));
2366 Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT));
2367 Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT));
2368 Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT));
2369 Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT));
2370 Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT));
2371 Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT));
2372 Assert(uSelCS);
2373 Assert(uSelTR);
2374
2375 /* Assertion is right but we would not have updated u32ExitCtls yet. */
2376#if 0
2377 if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE))
2378 Assert(uSelSS != 0);
2379#endif
2380
2381 /* Write these host selector fields into the host-state area in the VMCS. */
2382 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_CS, uSelCS); AssertRCReturn(rc, rc);
2383 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_SS, uSelSS); AssertRCReturn(rc, rc);
2384#if HC_ARCH_BITS == 64
2385 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_DS, uSelDS); AssertRCReturn(rc, rc);
2386 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_ES, uSelES); AssertRCReturn(rc, rc);
2387 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_FS, uSelFS); AssertRCReturn(rc, rc);
2388 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_GS, uSelGS); AssertRCReturn(rc, rc);
2389#endif
2390 rc = VMXWriteVmcs32(VMX_VMCS16_HOST_FIELD_TR, uSelTR); AssertRCReturn(rc, rc);
2391
2392 /*
2393 * Host GDTR and IDTR.
2394 */
2395 RTGDTR Gdtr;
2396 RT_ZERO(Gdtr);
2397#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2398 if (HMVMX_IS_64BIT_HOST_MODE())
2399 {
2400 X86XDTR64 Gdtr64;
2401 X86XDTR64 Idtr64;
2402 HMR0Get64bitGdtrAndIdtr(&Gdtr64, &Idtr64);
2403 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GDTR_BASE, Gdtr64.uAddr); AssertRCReturn(rc, rc);
2404 rc = VMXWriteVmcs64(VMX_VMCS_HOST_IDTR_BASE, Idtr64.uAddr); AssertRCReturn(rc, rc);
2405
2406 Gdtr.cbGdt = Gdtr64.cb;
2407 Gdtr.pGdt = (uintptr_t)Gdtr64.uAddr;
2408 }
2409 else
2410#endif
2411 {
2412 RTIDTR Idtr;
2413 ASMGetGDTR(&Gdtr);
2414 ASMGetIDTR(&Idtr);
2415 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt); AssertRCReturn(rc, rc);
2416 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt); AssertRCReturn(rc, rc);
2417
2418#if HC_ARCH_BITS == 64
2419 /*
2420 * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps them to the
2421 * maximum limit (0xffff) on every VM-exit.
2422 */
2423 if (Gdtr.cbGdt != 0xffff)
2424 {
2425 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR;
2426 AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64));
2427 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
2428 }
2429
2430 /*
2431 * IDT limit is practically 0xfff. Therefore if the host has the limit as 0xfff, VT-x bloating the limit to 0xffff
2432 * is not a problem as it's not possible to get at them anyway. See Intel spec. 6.14.1 "64-Bit Mode IDT" and
2433 * Intel spec. 6.2 "Exception and Interrupt Vectors".
2434 */
2435 if (Idtr.cbIdt < 0x0fff)
2436 {
2437 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR;
2438 AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64));
2439 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64));
2440 }
2441#endif
2442 }
2443
2444 /*
2445 * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI and RPL bits
2446 * is effectively what the CPU does for "scaling by 8". TI is always 0 and RPL should be too in most cases.
2447 */
2448 if ((uSelTR & X86_SEL_MASK) > Gdtr.cbGdt)
2449 {
2450 AssertMsgFailed(("hmR0VmxSaveHostSegmentRegs: TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt));
2451 return VERR_VMX_INVALID_HOST_STATE;
2452 }
2453
2454 PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK));
2455#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2456 if (HMVMX_IS_64BIT_HOST_MODE())
2457 {
2458 /* We need the 64-bit TR base for hybrid darwin. */
2459 uint64_t u64TRBase = X86DESC64_BASE((PX86DESC64)pDesc);
2460 rc = VMXWriteVmcs64(VMX_VMCS_HOST_TR_BASE, u64TRBase);
2461 }
2462 else
2463#endif
2464 {
2465 uintptr_t uTRBase;
2466#if HC_ARCH_BITS == 64
2467 uTRBase = X86DESC64_BASE(pDesc);
2468
2469 /*
2470 * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on all VM-exits.
2471 * The type is the same for 64-bit busy TSS[1]. The limit needs manual restoration if the host has something else.
2472 * Task switching is not supported in 64-bit mode[2], but the limit still matters as IOPM is supported in 64-bit mode.
2473 * Restoring the limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0.
2474 *
2475 * [1] See Intel spec. 3.5 "System Descriptor Types".
2476 * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode".
2477 */
2478 Assert(pDesc->System.u4Type == 11);
2479 if ( pDesc->System.u16LimitLow != 0x67
2480 || pDesc->System.u4LimitHigh)
2481 {
2482 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR;
2483 pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR;
2484
2485 /* Store the GDTR here as we need it while restoring TR. */
2486 memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64));
2487 }
2488#else
2489 uTRBase = X86DESC_BASE(pDesc);
2490#endif
2491 rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase);
2492 }
2493 AssertRCReturn(rc, rc);
2494
2495 /*
2496 * Host FS base and GS base.
2497 */
2498#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2499 if (HMVMX_IS_64BIT_HOST_MODE())
2500 {
2501 uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE);
2502 uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE);
2503 rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase); AssertRCReturn(rc, rc);
2504 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase); AssertRCReturn(rc, rc);
2505
2506# if HC_ARCH_BITS == 64
2507 /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */
2508 if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS)
2509 pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase;
2510 if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS)
2511 pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase;
2512# endif
2513 }
2514#endif
2515 return rc;
2516}
2517
2518
2519/**
2520 * Saves certain host MSRs in the VM-Exit MSR-load area and some in the
2521 * host-state area of the VMCS. Theses MSRs will be automatically restored on
2522 * the host after every successful VM exit.
2523 *
2524 * @returns VBox status code.
2525 * @param pVM Pointer to the VM.
2526 * @param pVCpu Pointer to the VMCPU.
2527 */
2528DECLINLINE(int) hmR0VmxSaveHostMsrs(PVM pVM, PVMCPU pVCpu)
2529{
2530 AssertPtr(pVCpu);
2531 AssertPtr(pVCpu->hm.s.vmx.pvHostMsr);
2532
2533 int rc = VINF_SUCCESS;
2534#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2535 PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr;
2536 uint32_t cHostMsrs = 0;
2537 uint32_t u32HostExtFeatures = pVM->hm.s.cpuid.u32AMDFeatureEDX;
2538
2539 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2540 {
2541 uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
2542
2543# if HC_ARCH_BITS == 64
2544 /* Paranoia. 64-bit code requires these bits to be set always. */
2545 Assert((u64HostEfer & (MSR_K6_EFER_LMA | MSR_K6_EFER_LME)) == (MSR_K6_EFER_LMA | MSR_K6_EFER_LME));
2546
2547 /*
2548 * We currently do not save/restore host EFER, we just make sure it doesn't get modified by VT-x operation.
2549 * All guest accesses (read, write) on EFER cause VM-exits. If we are to conditionally load the guest EFER for
2550 * some reason (e.g. allow transparent reads) we would activate the code below.
2551 */
2552# if 0
2553 /* All our supported 64-bit host platforms must have NXE bit set. Otherwise we can change the below code to save EFER. */
2554 Assert(u64HostEfer & (MSR_K6_EFER_NXE));
2555 /* The SCE bit is only applicable in 64-bit mode. Save EFER if it doesn't match what the guest has.
2556 See Intel spec. 30.10.4.3 "Handling the SYSCALL and SYSRET Instructions". */
2557 if (CPUMIsGuestInLongMode(pVCpu))
2558 {
2559 uint64_t u64GuestEfer;
2560 rc = CPUMQueryGuestMsr(pVCpu, MSR_K6_EFER, &u64GuestEfer);
2561 AssertRC(rc);
2562
2563 if ((u64HostEfer & MSR_K6_EFER_SCE) != (u64GuestEfer & MSR_K6_EFER_SCE))
2564 {
2565 pHostMsr->u32Msr = MSR_K6_EFER;
2566 pHostMsr->u32Reserved = 0;
2567 pHostMsr->u64Value = u64HostEfer;
2568 pHostMsr++; cHostMsrs++;
2569 }
2570 }
2571# endif
2572# else /* HC_ARCH_BITS != 64 */
2573 pHostMsr->u32Msr = MSR_K6_EFER;
2574 pHostMsr->u32Reserved = 0;
2575# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2576 if (CPUMIsGuestInLongMode(pVCpu))
2577 {
2578 /* Must match the EFER value in our 64 bits switcher. */
2579 pHostMsr->u64Value = u64HostEfer | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
2580 }
2581 else
2582# endif
2583 pHostMsr->u64Value = u64HostEfer;
2584 pHostMsr++; cHostMsrs++;
2585# endif /* HC_ARCH_BITS == 64 */
2586 }
2587
2588# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2589 if (HMVMX_IS_64BIT_HOST_MODE())
2590 {
2591 pHostMsr->u32Msr = MSR_K6_STAR;
2592 pHostMsr->u32Reserved = 0;
2593 pHostMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
2594 pHostMsr++; cHostMsrs++;
2595 pHostMsr->u32Msr = MSR_K8_LSTAR;
2596 pHostMsr->u32Reserved = 0;
2597 pHostMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64-bit mode syscall rip */
2598 pHostMsr++; cHostMsrs++;
2599 pHostMsr->u32Msr = MSR_K8_SF_MASK;
2600 pHostMsr->u32Reserved = 0;
2601 pHostMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
2602 pHostMsr++; cHostMsrs++;
2603 pHostMsr->u32Msr = MSR_K8_KERNEL_GS_BASE;
2604 pHostMsr->u32Reserved = 0;
2605 pHostMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
2606 pHostMsr++; cHostMsrs++;
2607 }
2608# endif
2609
2610 /* Host TSC AUX MSR must be restored since we always load/store guest TSC AUX MSR. */
2611 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
2612 {
2613 pHostMsr->u32Msr = MSR_K8_TSC_AUX;
2614 pHostMsr->u32Reserved = 0;
2615 pHostMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
2616 pHostMsr++; cHostMsrs++;
2617 }
2618
2619 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
2620 if (RT_UNLIKELY(cHostMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc)))
2621 {
2622 LogRel(("cHostMsrs=%u Cpu=%u\n", cHostMsrs, (unsigned)MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc)));
2623 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_HOST_MSR_STORAGE;
2624 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2625 }
2626
2627 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cHostMsrs);
2628#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2629
2630 /*
2631 * Host Sysenter MSRs.
2632 */
2633 rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
2634 AssertRCReturn(rc, rc);
2635#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2636 if (HMVMX_IS_64BIT_HOST_MODE())
2637 {
2638 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
2639 AssertRCReturn(rc, rc);
2640 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
2641 }
2642 else
2643 {
2644 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
2645 AssertRCReturn(rc, rc);
2646 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
2647 }
2648#elif HC_ARCH_BITS == 32
2649 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
2650 AssertRCReturn(rc, rc);
2651 rc = VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
2652#else
2653 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
2654 AssertRCReturn(rc, rc);
2655 rc = VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
2656#endif
2657 AssertRCReturn(rc, rc);
2658
2659 /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT, IA32_EFER, also see
2660 * hmR0VmxSetupExitCtls() !! */
2661 return rc;
2662}
2663
2664
2665/**
2666 * Sets up VM-entry controls in the VMCS. These controls can affect things done
2667 * on VM-exit; e.g. "load debug controls", see Intel spec. 24.8.1 "VM-entry
2668 * controls".
2669 *
2670 * @returns VBox status code.
2671 * @param pVCpu Pointer to the VMCPU.
2672 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2673 * out-of-sync. Make sure to update the required fields
2674 * before using them.
2675 *
2676 * @remarks No-long-jump zone!!!
2677 */
2678DECLINLINE(int) hmR0VmxLoadGuestEntryCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2679{
2680 int rc = VINF_SUCCESS;
2681 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS))
2682 {
2683 PVM pVM = pVCpu->CTX_SUFF(pVM);
2684 uint32_t val = pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0; /* Bits set here must be set in the VMCS. */
2685 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxEntry.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2686
2687 /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */
2688 val |= VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG;
2689
2690 /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */
2691 if (CPUMIsGuestInLongModeEx(pMixedCtx))
2692 val |= VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST;
2693 else
2694 Assert(!(val & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST));
2695
2696 /*
2697 * The following should -not- be set (since we're not in SMM mode):
2698 * - VMX_VMCS_CTRL_ENTRY_ENTRY_SMM
2699 * - VMX_VMCS_CTRL_ENTRY_DEACTIVATE_DUALMON
2700 */
2701
2702 /** @todo VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR,
2703 * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR,
2704 * VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR */
2705
2706 if ((val & zap) != val)
2707 {
2708 LogRel(("hmR0VmxLoadGuestEntryCtls: invalid VM-entry controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n",
2709 pVM->hm.s.vmx.Msrs.VmxEntry.n.disallowed0, val, zap));
2710 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY;
2711 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2712 }
2713
2714 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, val);
2715 AssertRCReturn(rc, rc);
2716
2717 /* Update VCPU with the currently set VM-exit controls. */
2718 pVCpu->hm.s.vmx.u32EntryCtls = val;
2719 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_VMX_ENTRY_CTLS);
2720 }
2721 return rc;
2722}
2723
2724
2725/**
2726 * Sets up the VM-exit controls in the VMCS.
2727 *
2728 * @returns VBox status code.
2729 * @param pVM Pointer to the VM.
2730 * @param pVCpu Pointer to the VMCPU.
2731 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2732 * out-of-sync. Make sure to update the required fields
2733 * before using them.
2734 *
2735 * @remarks requires EFER.
2736 */
2737DECLINLINE(int) hmR0VmxLoadGuestExitCtls(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2738{
2739 int rc = VINF_SUCCESS;
2740 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_VMX_EXIT_CTLS))
2741 {
2742 PVM pVM = pVCpu->CTX_SUFF(pVM);
2743 uint32_t val = pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0; /* Bits set here must be set in the VMCS. */
2744 uint32_t zap = pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */
2745
2746 /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */
2747 val |= VMX_VMCS_CTRL_EXIT_SAVE_DEBUG;
2748
2749 /*
2750 * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary.
2751 * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in hmR0VmxSaveHostMsrs().
2752 */
2753#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2754 if (HMVMX_IS_64BIT_HOST_MODE())
2755 val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE;
2756 else
2757 Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE));
2758#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
2759 if (CPUMIsGuestInLongModeEx(pMixedCtx))
2760 val |= VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE; /* The switcher goes to long mode. */
2761 else
2762 Assert(!(val & VMX_VMCS_CTRL_EXIT_HOST_ADDR_SPACE_SIZE));
2763#endif
2764
2765 /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */
2766 Assert(!(val & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT));
2767
2768 /** @todo VMX_VMCS_CTRL_EXIT_LOAD_PERF_MSR,
2769 * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_PAT_MSR,
2770 * VMX_VMCS_CTRL_EXIT_LOAD_HOST_PAT_MSR,
2771 * VMX_VMCS_CTRL_EXIT_SAVE_GUEST_EFER_MSR,
2772 * VMX_VMCS_CTRL_EXIT_LOAD_HOST_EFER_MSR. */
2773
2774 if (pVM->hm.s.vmx.Msrs.VmxExit.n.allowed1 & VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER)
2775 val |= VMX_VMCS_CTRL_EXIT_SAVE_VMX_PREEMPT_TIMER;
2776
2777 if ((val & zap) != val)
2778 {
2779 LogRel(("hmR0VmxSetupProcCtls: invalid VM-exit controls combo! cpu=%RX64 val=%RX64 zap=%RX64\n",
2780 pVM->hm.s.vmx.Msrs.VmxExit.n.disallowed0, val, zap));
2781 pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT;
2782 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
2783 }
2784
2785 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, val);
2786 AssertRCReturn(rc, rc);
2787
2788 /* Update VCPU with the currently set VM-exit controls. */
2789 pVCpu->hm.s.vmx.u32ExitCtls = val;
2790 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_VMX_EXIT_CTLS);
2791 }
2792 return rc;
2793}
2794
2795
2796/**
2797 * Loads the guest APIC and related state.
2798 *
2799 * @returns VBox status code.
2800 * @param pVM Pointer to the VM.
2801 * @param pVCpu Pointer to the VMCPU.
2802 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2803 * out-of-sync. Make sure to update the required fields
2804 * before using them.
2805 */
2806DECLINLINE(int) hmR0VmxLoadGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2807{
2808 int rc = VINF_SUCCESS;
2809 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE))
2810 {
2811 /* Setup TPR shadowing. Also setup TPR patching for 32-bit guests. */
2812 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
2813 {
2814 Assert(pVCpu->hm.s.vmx.HCPhysVirtApic);
2815
2816 bool fPendingIntr = false;
2817 uint8_t u8Tpr = 0;
2818 uint8_t u8PendingIntr = 0;
2819 rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr);
2820 AssertRCReturn(rc, rc);
2821
2822 /*
2823 * If there are external interrupts pending but masked by the TPR value, instruct VT-x to cause a VM-exit when
2824 * the guest lowers its TPR below the highest-priority pending interrupt and we can deliver the interrupt.
2825 * If there are no external interrupts pending, set threshold to 0 to not cause a VM-exit. We will eventually deliver
2826 * the interrupt when we VM-exit for other reasons.
2827 */
2828 pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8Tpr; /* Offset 0x80 is TPR in the APIC MMIO range. */
2829 uint32_t u32TprThreshold = 0;
2830 if (fPendingIntr)
2831 {
2832 /* Bits 3-0 of the TPR threshold field correspond to bits 7-4 of the TPR (which is the Task-Priority Class). */
2833 const uint8_t u8PendingPriority = (u8PendingIntr >> 4);
2834 const uint8_t u8TprPriority = (u8Tpr >> 4) & 7;
2835 if (u8PendingPriority <= u8TprPriority)
2836 u32TprThreshold = u8PendingPriority;
2837 else
2838 u32TprThreshold = u8TprPriority; /* Required for Vista 64-bit guest, see @bugref{6398}. */
2839 }
2840 Assert(!(u32TprThreshold & 0xfffffff0)); /* Bits 31:4 MBZ. */
2841
2842 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold);
2843 AssertRCReturn(rc, rc);
2844 }
2845
2846 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
2847 }
2848 return rc;
2849}
2850
2851
2852/**
2853 * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it).
2854 *
2855 * @returns Guest's interruptibility-state.
2856 * @param pVCpu Pointer to the VMCPU.
2857 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2858 * out-of-sync. Make sure to update the required fields
2859 * before using them.
2860 *
2861 * @remarks No-long-jump zone!!!
2862 * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag.
2863 */
2864DECLINLINE(uint32_t) hmR0VmxGetGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2865{
2866 /*
2867 * Instructions like STI and MOV SS inhibit interrupts till the next instruction completes. Check if we should
2868 * inhibit interrupts or clear any existing interrupt-inhibition.
2869 */
2870 uint32_t uIntrState = 0;
2871 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2872 {
2873 /* If inhibition is active, RIP & RFLAGS should've been accessed (i.e. read previously from the VMCS or from ring-3). */
2874 AssertMsg((pVCpu->hm.s.vmx.fUpdatedGuestState & (HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS))
2875 == (HMVMX_UPDATED_GUEST_RIP | HMVMX_UPDATED_GUEST_RFLAGS), ("%#x\n", pVCpu->hm.s.vmx.fUpdatedGuestState));
2876 if (pMixedCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2877 {
2878 /*
2879 * We can clear the inhibit force flag as even if we go back to the recompiler without executing guest code in
2880 * VT-x, the flag's condition to be cleared is met and thus the cleared state is correct.
2881 */
2882 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2883 }
2884 else if (pMixedCtx->eflags.Bits.u1IF)
2885 uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI;
2886 else
2887 uIntrState = VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS;
2888 }
2889 return uIntrState;
2890}
2891
2892
2893/**
2894 * Loads the guest's interruptibility-state into the guest-state area in the
2895 * VMCS.
2896 *
2897 * @returns VBox status code.
2898 * @param pVCpu Pointer to the VMCPU.
2899 * @param uIntrState The interruptibility-state to set.
2900 */
2901static int hmR0VmxLoadGuestIntrState(PVMCPU pVCpu, uint32_t uIntrState)
2902{
2903 AssertMsg(!(uIntrState & 0xfffffff0), ("%#x\n", uIntrState)); /* Bits 31:4 MBZ. */
2904 Assert((uIntrState & 0x3) != 0x3); /* Block-by-STI and MOV SS cannot be simultaneously set. */
2905 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, uIntrState);
2906 AssertRCReturn(rc, rc);
2907 return rc;
2908}
2909
2910
2911/**
2912 * Loads the guest's RIP into the guest-state area in the VMCS.
2913 *
2914 * @returns VBox status code.
2915 * @param pVCpu Pointer to the VMCPU.
2916 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2917 * out-of-sync. Make sure to update the required fields
2918 * before using them.
2919 *
2920 * @remarks No-long-jump zone!!!
2921 */
2922static int hmR0VmxLoadGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2923{
2924 int rc = VINF_SUCCESS;
2925 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RIP))
2926 {
2927 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pMixedCtx->rip);
2928 AssertRCReturn(rc, rc);
2929
2930 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_RIP);
2931 Log4(("Load: VMX_VMCS_GUEST_RIP=%#RX64 fContextUseFlags=%#RX32\n", pMixedCtx->rip, VMCPU_HMCF_VALUE(pVCpu)));
2932 }
2933 return rc;
2934}
2935
2936
2937/**
2938 * Loads the guest's RSP into the guest-state area in the VMCS.
2939 *
2940 * @returns VBox status code.
2941 * @param pVCpu Pointer to the VMCPU.
2942 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2943 * out-of-sync. Make sure to update the required fields
2944 * before using them.
2945 *
2946 * @remarks No-long-jump zone!!!
2947 */
2948static int hmR0VmxLoadGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2949{
2950 int rc = VINF_SUCCESS;
2951 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RSP))
2952 {
2953 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pMixedCtx->rsp);
2954 AssertRCReturn(rc, rc);
2955
2956 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_RSP);
2957 Log4(("Load: VMX_VMCS_GUEST_RSP=%#RX64\n", pMixedCtx->rsp));
2958 }
2959 return rc;
2960}
2961
2962
2963/**
2964 * Loads the guest's RFLAGS into the guest-state area in the VMCS.
2965 *
2966 * @returns VBox status code.
2967 * @param pVCpu Pointer to the VMCPU.
2968 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
2969 * out-of-sync. Make sure to update the required fields
2970 * before using them.
2971 *
2972 * @remarks No-long-jump zone!!!
2973 */
2974static int hmR0VmxLoadGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
2975{
2976 int rc = VINF_SUCCESS;
2977 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS))
2978 {
2979 /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ).
2980 Let us assert it as such and use 32-bit VMWRITE. */
2981 Assert(!(pMixedCtx->rflags.u64 >> 32));
2982 X86EFLAGS Eflags = pMixedCtx->eflags;
2983 Eflags.u32 &= VMX_EFLAGS_RESERVED_0; /* Bits 22-31, 15, 5 & 3 MBZ. */
2984 Eflags.u32 |= VMX_EFLAGS_RESERVED_1; /* Bit 1 MB1. */
2985
2986 /*
2987 * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so we can restore them on VM exit.
2988 * Modify the real-mode guest's eflags so that VT-x can run the real-mode guest code under Virtual 8086 mode.
2989 */
2990 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
2991 {
2992 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
2993 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
2994 pVCpu->hm.s.vmx.RealMode.Eflags.u32 = Eflags.u32; /* Save the original eflags of the real-mode guest. */
2995 Eflags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */
2996 Eflags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */
2997 }
2998
2999 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, Eflags.u32);
3000 AssertRCReturn(rc, rc);
3001
3002 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_RFLAGS);
3003 Log4(("Load: VMX_VMCS_GUEST_RFLAGS=%#RX32\n", Eflags.u32));
3004 }
3005 return rc;
3006}
3007
3008
3009/**
3010 * Loads the guest RIP, RSP and RFLAGS into the guest-state area in the VMCS.
3011 *
3012 * @returns VBox status code.
3013 * @param pVCpu Pointer to the VMCPU.
3014 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3015 * out-of-sync. Make sure to update the required fields
3016 * before using them.
3017 *
3018 * @remarks No-long-jump zone!!!
3019 */
3020DECLINLINE(int) hmR0VmxLoadGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3021{
3022 int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx);
3023 AssertRCReturn(rc, rc);
3024 rc = hmR0VmxLoadGuestRsp(pVCpu, pMixedCtx);
3025 AssertRCReturn(rc, rc);
3026 rc = hmR0VmxLoadGuestRflags(pVCpu, pMixedCtx);
3027 AssertRCReturn(rc, rc);
3028 return rc;
3029}
3030
3031
3032/**
3033 * Loads the guest CR0 control register into the guest-state area in the VMCS.
3034 * CR0 is partially shared with the host and we have to consider the FPU bits.
3035 *
3036 * @returns VBox status code.
3037 * @param pVM Pointer to the VM.
3038 * @param pVCpu Pointer to the VMCPU.
3039 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3040 * out-of-sync. Make sure to update the required fields
3041 * before using them.
3042 *
3043 * @remarks No-long-jump zone!!!
3044 */
3045static int hmR0VmxLoadSharedCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3046{
3047 /*
3048 * Guest CR0.
3049 * Guest FPU.
3050 */
3051 int rc = VINF_SUCCESS;
3052 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0))
3053 {
3054 Assert(!(pMixedCtx->cr0 >> 32));
3055 uint32_t u32GuestCR0 = pMixedCtx->cr0;
3056 PVM pVM = pVCpu->CTX_SUFF(pVM);
3057
3058 /* The guest's view (read access) of its CR0 is unblemished. */
3059 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32GuestCR0);
3060 AssertRCReturn(rc, rc);
3061 Log4(("Load: VMX_VMCS_CTRL_CR0_READ_SHADOW=%#RX32\n", u32GuestCR0));
3062
3063 /* Setup VT-x's view of the guest CR0. */
3064 /* Minimize VM-exits due to CR3 changes when we have NestedPaging. */
3065 if (pVM->hm.s.fNestedPaging)
3066 {
3067 if (CPUMIsGuestPagingEnabledEx(pMixedCtx))
3068 {
3069 /* The guest has paging enabled, let it access CR3 without causing a VM exit if supported. */
3070 pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
3071 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT);
3072 }
3073 else
3074 {
3075 /* The guest doesn't have paging enabled, make CR3 access to cause VM exits to update our shadow. */
3076 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CR3_LOAD_EXIT
3077 | VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
3078 }
3079
3080 /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */
3081 if (pVM->hm.s.vmx.fUnrestrictedGuest)
3082 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CR3_STORE_EXIT;
3083
3084 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3085 AssertRCReturn(rc, rc);
3086 }
3087 else
3088 u32GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
3089
3090 /*
3091 * Guest FPU bits.
3092 * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be set on the first
3093 * CPUs to support VT-x and no mention of with regards to UX in VM-entry checks.
3094 */
3095 u32GuestCR0 |= X86_CR0_NE;
3096 bool fInterceptNM = false;
3097 if (CPUMIsGuestFPUStateActive(pVCpu))
3098 {
3099 fInterceptNM = false; /* Guest FPU active, no need to VM-exit on #NM. */
3100 /* The guest should still get #NM exceptions when it expects it to, so we should not clear TS & MP bits here.
3101 We're only concerned about -us- not intercepting #NMs when the guest-FPU is active. Not the guest itself! */
3102 }
3103 else
3104 {
3105 fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */
3106 u32GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */
3107 | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */
3108 }
3109
3110 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
3111 bool fInterceptMF = false;
3112 if (!(pMixedCtx->cr0 & X86_CR0_NE))
3113 fInterceptMF = true;
3114
3115 /* Finally, intercept all exceptions as we cannot directly inject them in real-mode, see hmR0VmxInjectEventVmcs(). */
3116 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3117 {
3118 Assert(PDMVmmDevHeapIsEnabled(pVM));
3119 Assert(pVM->hm.s.vmx.pRealModeTSS);
3120 pVCpu->hm.s.vmx.u32XcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK;
3121 fInterceptNM = true;
3122 fInterceptMF = true;
3123 }
3124 else
3125 pVCpu->hm.s.vmx.u32XcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK;
3126
3127 if (fInterceptNM)
3128 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_NM);
3129 else
3130 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_NM);
3131
3132 if (fInterceptMF)
3133 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_MF);
3134 else
3135 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_MF);
3136
3137 /* Additional intercepts for debugging, define these yourself explicitly. */
3138#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3139 pVCpu->hm.s.vmx.u32XcptBitmap |= 0
3140 | RT_BIT(X86_XCPT_BP)
3141 | RT_BIT(X86_XCPT_DB)
3142 | RT_BIT(X86_XCPT_DE)
3143 | RT_BIT(X86_XCPT_NM)
3144 | RT_BIT(X86_XCPT_UD)
3145 | RT_BIT(X86_XCPT_NP)
3146 | RT_BIT(X86_XCPT_SS)
3147 | RT_BIT(X86_XCPT_GP)
3148 | RT_BIT(X86_XCPT_PF)
3149 | RT_BIT(X86_XCPT_MF)
3150 ;
3151#elif defined(HMVMX_ALWAYS_TRAP_PF)
3152 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_PF);
3153#endif
3154
3155 Assert(pVM->hm.s.fNestedPaging || (pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT(X86_XCPT_PF)));
3156
3157 /* Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW). */
3158 uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
3159 uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
3160 if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */
3161 uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG);
3162 else
3163 Assert((uSetCR0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG));
3164
3165 u32GuestCR0 |= uSetCR0;
3166 u32GuestCR0 &= uZapCR0;
3167 u32GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */
3168
3169 /* Write VT-x's view of the guest CR0 into the VMCS and update the exception bitmap. */
3170 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCR0);
3171 AssertRCReturn(rc, rc);
3172 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
3173 AssertRCReturn(rc, rc);
3174 Log4(("Load: VMX_VMCS_GUEST_CR0=%#RX32 (uSetCR0=%#RX32 uZapCR0=%#RX32)\n", u32GuestCR0, uSetCR0, uZapCR0));
3175
3176 /*
3177 * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed
3178 * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits
3179 * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables).
3180 */
3181 uint32_t u32CR0Mask = 0;
3182 u32CR0Mask = X86_CR0_PE
3183 | X86_CR0_NE
3184 | X86_CR0_WP
3185 | X86_CR0_PG
3186 | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */
3187 | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */
3188 | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */
3189
3190 /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM
3191 * enmGuestMode to be in-sync with the current mode. See @bugref{6398}
3192 * and @bugref{6944}. */
3193#if 0
3194 if (pVM->hm.s.vmx.fUnrestrictedGuest)
3195 u32CR0Mask &= ~X86_CR0_PE;
3196#endif
3197 if (pVM->hm.s.fNestedPaging)
3198 u32CR0Mask &= ~X86_CR0_WP;
3199
3200 /* If the guest FPU state is active, don't need to VM-exit on writes to FPU related bits in CR0. */
3201 if (fInterceptNM)
3202 {
3203 u32CR0Mask |= X86_CR0_TS
3204 | X86_CR0_MP;
3205 }
3206
3207 /* Write the CR0 mask into the VMCS and update the VCPU's copy of the current CR0 mask. */
3208 pVCpu->hm.s.vmx.u32CR0Mask = u32CR0Mask;
3209 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32CR0Mask);
3210 AssertRCReturn(rc, rc);
3211 Log4(("Load: VMX_VMCS_CTRL_CR0_MASK=%#RX32\n", u32CR0Mask));
3212
3213 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR0);
3214 }
3215 return rc;
3216}
3217
3218
3219/**
3220 * Loads the guest control registers (CR3, CR4) into the guest-state area
3221 * in the VMCS.
3222 *
3223 * @returns VBox status code.
3224 * @param pVM Pointer to the VM.
3225 * @param pVCpu Pointer to the VMCPU.
3226 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3227 * out-of-sync. Make sure to update the required fields
3228 * before using them.
3229 *
3230 * @remarks No-long-jump zone!!!
3231 */
3232static int hmR0VmxLoadGuestCR3AndCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3233{
3234 int rc = VINF_SUCCESS;
3235 PVM pVM = pVCpu->CTX_SUFF(pVM);
3236
3237 /*
3238 * Guest CR2.
3239 * It's always loaded in the assembler code. Nothing to do here.
3240 */
3241
3242 /*
3243 * Guest CR3.
3244 */
3245 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR3))
3246 {
3247 RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS;
3248 if (pVM->hm.s.fNestedPaging)
3249 {
3250 pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu);
3251
3252 /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */
3253 Assert(pVCpu->hm.s.vmx.HCPhysEPTP);
3254 Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000)));
3255 Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff));
3256
3257 /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */
3258 pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB
3259 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
3260
3261 /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */
3262 AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */
3263 && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x3f) == 0, /* Bits 6:11 MBZ. */
3264 ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
3265
3266 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP);
3267 AssertRCReturn(rc, rc);
3268 Log4(("Load: VMX_VMCS64_CTRL_EPTP_FULL=%#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP));
3269
3270 if ( pVM->hm.s.vmx.fUnrestrictedGuest
3271 || CPUMIsGuestPagingEnabledEx(pMixedCtx))
3272 {
3273 /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */
3274 if (CPUMIsGuestInPAEModeEx(pMixedCtx))
3275 {
3276 rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); AssertRCReturn(rc, rc);
3277 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc);
3278 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc);
3279 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc);
3280 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc);
3281 }
3282
3283 /* The guest's view of its CR3 is unblemished with Nested Paging when the guest is using paging or we
3284 have Unrestricted Execution to handle the guest when it's not using paging. */
3285 GCPhysGuestCR3 = pMixedCtx->cr3;
3286 }
3287 else
3288 {
3289 /*
3290 * The guest is not using paging, but the CPU (VT-x) has to. While the guest thinks it accesses physical memory
3291 * directly, we use our identity-mapped page table to map guest-linear to guest-physical addresses.
3292 * EPT takes care of translating it to host-physical addresses.
3293 */
3294 RTGCPHYS GCPhys;
3295 Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable);
3296 Assert(PDMVmmDevHeapIsEnabled(pVM));
3297
3298 /* We obtain it here every time as the guest could have relocated this PCI region. */
3299 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
3300 AssertRCReturn(rc, rc);
3301
3302 GCPhysGuestCR3 = GCPhys;
3303 }
3304
3305 Log4(("Load: VMX_VMCS_GUEST_CR3=%#RGv (GstN)\n", GCPhysGuestCR3));
3306 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3);
3307 }
3308 else
3309 {
3310 /* Non-nested paging case, just use the hypervisor's CR3. */
3311 RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu);
3312
3313 Log4(("Load: VMX_VMCS_GUEST_CR3=%#RHv (HstN)\n", HCPhysGuestCR3));
3314 rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3);
3315 }
3316 AssertRCReturn(rc, rc);
3317
3318 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR3);
3319 }
3320
3321 /*
3322 * Guest CR4.
3323 */
3324 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR4))
3325 {
3326 Assert(!(pMixedCtx->cr4 >> 32));
3327 uint32_t u32GuestCR4 = pMixedCtx->cr4;
3328
3329 /* The guest's view of its CR4 is unblemished. */
3330 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32GuestCR4);
3331 AssertRCReturn(rc, rc);
3332 Log4(("Load: VMX_VMCS_CTRL_CR4_READ_SHADOW=%#RX32\n", u32GuestCR4));
3333
3334 /* Setup VT-x's view of the guest CR4. */
3335 /*
3336 * If we're emulating real-mode using virtual-8086 mode, we want to redirect software interrupts to the 8086 program
3337 * interrupt handler. Clear the VME bit (the interrupt redirection bitmap is already all 0, see hmR3InitFinalizeR0())
3338 * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode".
3339 */
3340 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3341 {
3342 Assert(pVM->hm.s.vmx.pRealModeTSS);
3343 Assert(PDMVmmDevHeapIsEnabled(pVM));
3344 u32GuestCR4 &= ~X86_CR4_VME;
3345 }
3346
3347 if (pVM->hm.s.fNestedPaging)
3348 {
3349 if ( !CPUMIsGuestPagingEnabledEx(pMixedCtx)
3350 && !pVM->hm.s.vmx.fUnrestrictedGuest)
3351 {
3352 /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */
3353 u32GuestCR4 |= X86_CR4_PSE;
3354 /* Our identity mapping is a 32 bits page directory. */
3355 u32GuestCR4 &= ~X86_CR4_PAE;
3356 }
3357 /* else use guest CR4.*/
3358 }
3359 else
3360 {
3361 /*
3362 * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host
3363 * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables.
3364 */
3365 switch (pVCpu->hm.s.enmShadowMode)
3366 {
3367 case PGMMODE_REAL: /* Real-mode. */
3368 case PGMMODE_PROTECTED: /* Protected mode without paging. */
3369 case PGMMODE_32_BIT: /* 32-bit paging. */
3370 {
3371 u32GuestCR4 &= ~X86_CR4_PAE;
3372 break;
3373 }
3374
3375 case PGMMODE_PAE: /* PAE paging. */
3376 case PGMMODE_PAE_NX: /* PAE paging with NX. */
3377 {
3378 u32GuestCR4 |= X86_CR4_PAE;
3379 break;
3380 }
3381
3382 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
3383 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
3384#ifdef VBOX_ENABLE_64_BITS_GUESTS
3385 break;
3386#endif
3387 default:
3388 AssertFailed();
3389 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
3390 }
3391 }
3392
3393 /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */
3394 uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
3395 uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
3396 u32GuestCR4 |= uSetCR4;
3397 u32GuestCR4 &= uZapCR4;
3398
3399 /* Write VT-x's view of the guest CR4 into the VMCS. */
3400 Log4(("Load: VMX_VMCS_GUEST_CR4=%#RX32 (Set=%#RX32 Zap=%#RX32)\n", u32GuestCR4, uSetCR4, uZapCR4));
3401 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCR4);
3402 AssertRCReturn(rc, rc);
3403
3404 /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them, that would cause a VM exit. */
3405 uint32_t u32CR4Mask = 0;
3406 u32CR4Mask = X86_CR4_VME
3407 | X86_CR4_PAE
3408 | X86_CR4_PGE
3409 | X86_CR4_PSE
3410 | X86_CR4_VMXE;
3411 pVCpu->hm.s.vmx.u32CR4Mask = u32CR4Mask;
3412 rc = VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32CR4Mask);
3413 AssertRCReturn(rc, rc);
3414
3415 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR4);
3416 }
3417 return rc;
3418}
3419
3420
3421/**
3422 * Loads the guest debug registers into the guest-state area in the VMCS.
3423 * This also sets up whether #DB and MOV DRx accesses cause VM exits.
3424 *
3425 * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3).
3426 *
3427 * @returns VBox status code.
3428 * @param pVCpu Pointer to the VMCPU.
3429 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3430 * out-of-sync. Make sure to update the required fields
3431 * before using them.
3432 *
3433 * @remarks No-long-jump zone!!!
3434 */
3435static int hmR0VmxLoadSharedDebugState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3436{
3437 if (!VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG))
3438 return VINF_SUCCESS;
3439
3440#ifdef VBOX_STRICT
3441 /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */
3442 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
3443 {
3444 /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */
3445 Assert((pMixedCtx->dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); /* Bits 63:32, 15, 14, 12, 11 are reserved. */
3446 Assert((pMixedCtx->dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); /* Bit 10 is reserved (RA1). */
3447 }
3448#endif
3449
3450 int rc;
3451 PVM pVM = pVCpu->CTX_SUFF(pVM);
3452 bool fInterceptDB = false;
3453 bool fInterceptMovDRx = false;
3454 if (pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu))
3455 {
3456 /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */
3457 if (pVM->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG)
3458 {
3459 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG;
3460 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3461 AssertRCReturn(rc, rc);
3462 Assert(fInterceptDB == false);
3463 }
3464 else
3465 {
3466 pMixedCtx->eflags.u32 |= X86_EFL_TF;
3467 pVCpu->hm.s.fClearTrapFlag = true;
3468 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RFLAGS);
3469 fInterceptDB = true;
3470 }
3471 }
3472
3473 if (fInterceptDB || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK))
3474 {
3475 /*
3476 * Use the combined guest and host DRx values found in the hypervisor
3477 * register set because the debugger has breakpoints active or someone
3478 * is single stepping on the host side without a monitor trap flag.
3479 *
3480 * Note! DBGF expects a clean DR6 state before executing guest code.
3481 */
3482#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3483 if ( CPUMIsGuestInLongModeEx(pMixedCtx)
3484 && !CPUMIsHyperDebugStateActivePending(pVCpu))
3485 {
3486 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3487 Assert(CPUMIsHyperDebugStateActivePending(pVCpu));
3488 Assert(!CPUMIsGuestDebugStateActivePending(pVCpu));
3489 }
3490 else
3491#endif
3492 if (!CPUMIsHyperDebugStateActive(pVCpu))
3493 {
3494 CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */);
3495 Assert(CPUMIsHyperDebugStateActive(pVCpu));
3496 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
3497 }
3498
3499 /* Update DR7. (The other DRx values are handled by CPUM one way or the other.) */
3500 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)CPUMGetHyperDR7(pVCpu));
3501 AssertRCReturn(rc, rc);
3502
3503 pVCpu->hm.s.fUsingHyperDR7 = true;
3504 fInterceptDB = true;
3505 fInterceptMovDRx = true;
3506 }
3507 else
3508 {
3509 /*
3510 * If the guest has enabled debug registers, we need to load them prior to
3511 * executing guest code so they'll trigger at the right time.
3512 */
3513 if (pMixedCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) /** @todo Why GD? */
3514 {
3515#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3516 if ( CPUMIsGuestInLongModeEx(pMixedCtx)
3517 && !CPUMIsGuestDebugStateActivePending(pVCpu))
3518 {
3519 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3520 Assert(CPUMIsGuestDebugStateActivePending(pVCpu));
3521 Assert(!CPUMIsHyperDebugStateActivePending(pVCpu));
3522 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3523 }
3524 else
3525#endif
3526 if (CPUMIsGuestDebugStateActive(pVCpu))
3527 {
3528 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
3529 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3530 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
3531 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
3532 }
3533 }
3534 /*
3535 * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we
3536 * must intercept #DB in order to maintain a correct DR6 guest value.
3537 */
3538#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3539 else if ( ( CPUMIsGuestInLongModeEx(pMixedCtx)
3540 && !CPUMIsGuestDebugStateActivePending(pVCpu))
3541 || !CPUMIsGuestDebugStateActive(pVCpu))
3542#else
3543 else if (!CPUMIsGuestDebugStateActive(pVCpu))
3544#endif
3545 {
3546 fInterceptMovDRx = true;
3547 fInterceptDB = true;
3548 }
3549
3550 /* Update guest DR7. */
3551 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, pMixedCtx->dr[7]);
3552 AssertRCReturn(rc, rc);
3553
3554 pVCpu->hm.s.fUsingHyperDR7 = false;
3555 }
3556
3557 /*
3558 * Update the exception bitmap regarding intercepting #DB generated by the guest.
3559 */
3560 if (fInterceptDB)
3561 pVCpu->hm.s.vmx.u32XcptBitmap |= RT_BIT(X86_XCPT_DB);
3562 else if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3563 {
3564#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3565 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB);
3566#endif
3567 }
3568 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
3569 AssertRCReturn(rc, rc);
3570
3571 /*
3572 * Update the processor-based VM-execution controls regarding intercepting MOV DRx instructions.
3573 */
3574 if (fInterceptMovDRx)
3575 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
3576 else
3577 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
3578 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
3579 AssertRCReturn(rc, rc);
3580
3581 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_DEBUG);
3582 return VINF_SUCCESS;
3583}
3584
3585
3586#ifdef VBOX_STRICT
3587/**
3588 * Strict function to validate segment registers.
3589 *
3590 * @remarks ASSUMES CR0 is up to date.
3591 */
3592static void hmR0VmxValidateSegmentRegs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
3593{
3594 /* Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". */
3595 /* NOTE: The reason we check for attribute value 0 and not just the unusable bit here is because hmR0VmxWriteSegmentReg()
3596 * only updates the VMCS' copy of the value with the unusable bit and doesn't change the guest-context value. */
3597 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
3598 && ( !CPUMIsGuestInRealModeEx(pCtx)
3599 && !CPUMIsGuestInV86ModeEx(pCtx)))
3600 {
3601 /* Protected mode checks */
3602 /* CS */
3603 Assert(pCtx->cs.Attr.n.u1Present);
3604 Assert(!(pCtx->cs.Attr.u & 0xf00));
3605 Assert(!(pCtx->cs.Attr.u & 0xfffe0000));
3606 Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff
3607 || !(pCtx->cs.Attr.n.u1Granularity));
3608 Assert( !(pCtx->cs.u32Limit & 0xfff00000)
3609 || (pCtx->cs.Attr.n.u1Granularity));
3610 /* CS cannot be loaded with NULL in protected mode. */
3611 Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS?!? */
3612 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
3613 Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl);
3614 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
3615 Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl);
3616 else
3617 AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl));
3618 /* SS */
3619 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
3620 Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL));
3621 if ( !(pCtx->cr0 & X86_CR0_PE)
3622 || pCtx->cs.Attr.n.u4Type == 3)
3623 {
3624 Assert(!pCtx->ss.Attr.n.u2Dpl);
3625 }
3626 if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
3627 {
3628 Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL));
3629 Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7);
3630 Assert(pCtx->ss.Attr.n.u1Present);
3631 Assert(!(pCtx->ss.Attr.u & 0xf00));
3632 Assert(!(pCtx->ss.Attr.u & 0xfffe0000));
3633 Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff
3634 || !(pCtx->ss.Attr.n.u1Granularity));
3635 Assert( !(pCtx->ss.u32Limit & 0xfff00000)
3636 || (pCtx->ss.Attr.n.u1Granularity));
3637 }
3638 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */
3639 if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
3640 {
3641 Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3642 Assert(pCtx->ds.Attr.n.u1Present);
3643 Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL));
3644 Assert(!(pCtx->ds.Attr.u & 0xf00));
3645 Assert(!(pCtx->ds.Attr.u & 0xfffe0000));
3646 Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff
3647 || !(pCtx->ds.Attr.n.u1Granularity));
3648 Assert( !(pCtx->ds.u32Limit & 0xfff00000)
3649 || (pCtx->ds.Attr.n.u1Granularity));
3650 Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3651 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ));
3652 }
3653 if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
3654 {
3655 Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3656 Assert(pCtx->es.Attr.n.u1Present);
3657 Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL));
3658 Assert(!(pCtx->es.Attr.u & 0xf00));
3659 Assert(!(pCtx->es.Attr.u & 0xfffe0000));
3660 Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff
3661 || !(pCtx->es.Attr.n.u1Granularity));
3662 Assert( !(pCtx->es.u32Limit & 0xfff00000)
3663 || (pCtx->es.Attr.n.u1Granularity));
3664 Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3665 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ));
3666 }
3667 if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
3668 {
3669 Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3670 Assert(pCtx->fs.Attr.n.u1Present);
3671 Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL));
3672 Assert(!(pCtx->fs.Attr.u & 0xf00));
3673 Assert(!(pCtx->fs.Attr.u & 0xfffe0000));
3674 Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff
3675 || !(pCtx->fs.Attr.n.u1Granularity));
3676 Assert( !(pCtx->fs.u32Limit & 0xfff00000)
3677 || (pCtx->fs.Attr.n.u1Granularity));
3678 Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3679 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ));
3680 }
3681 if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
3682 {
3683 Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED);
3684 Assert(pCtx->gs.Attr.n.u1Present);
3685 Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL));
3686 Assert(!(pCtx->gs.Attr.u & 0xf00));
3687 Assert(!(pCtx->gs.Attr.u & 0xfffe0000));
3688 Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff
3689 || !(pCtx->gs.Attr.n.u1Granularity));
3690 Assert( !(pCtx->gs.u32Limit & 0xfff00000)
3691 || (pCtx->gs.Attr.n.u1Granularity));
3692 Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
3693 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ));
3694 }
3695 /* 64-bit capable CPUs. */
3696# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3697 Assert(!(pCtx->cs.u64Base >> 32));
3698 Assert(!pCtx->ss.Attr.u || !(pCtx->ss.u64Base >> 32));
3699 Assert(!pCtx->ds.Attr.u || !(pCtx->ds.u64Base >> 32));
3700 Assert(!pCtx->es.Attr.u || !(pCtx->es.u64Base >> 32));
3701# endif
3702 }
3703 else if ( CPUMIsGuestInV86ModeEx(pCtx)
3704 || ( CPUMIsGuestInRealModeEx(pCtx)
3705 && !pVM->hm.s.vmx.fUnrestrictedGuest))
3706 {
3707 /* Real and v86 mode checks. */
3708 /* hmR0VmxWriteSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */
3709 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
3710 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3711 {
3712 u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3;
3713 }
3714 else
3715 {
3716 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u;
3717 u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
3718 }
3719
3720 /* CS */
3721 AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel));
3722 Assert(pCtx->cs.u32Limit == 0xffff);
3723 Assert(u32CSAttr == 0xf3);
3724 /* SS */
3725 Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4);
3726 Assert(pCtx->ss.u32Limit == 0xffff);
3727 Assert(u32SSAttr == 0xf3);
3728 /* DS */
3729 Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4);
3730 Assert(pCtx->ds.u32Limit == 0xffff);
3731 Assert(u32DSAttr == 0xf3);
3732 /* ES */
3733 Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4);
3734 Assert(pCtx->es.u32Limit == 0xffff);
3735 Assert(u32ESAttr == 0xf3);
3736 /* FS */
3737 Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4);
3738 Assert(pCtx->fs.u32Limit == 0xffff);
3739 Assert(u32FSAttr == 0xf3);
3740 /* GS */
3741 Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4);
3742 Assert(pCtx->gs.u32Limit == 0xffff);
3743 Assert(u32GSAttr == 0xf3);
3744 /* 64-bit capable CPUs. */
3745# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3746 Assert(!(pCtx->cs.u64Base >> 32));
3747 Assert(!u32SSAttr || !(pCtx->ss.u64Base >> 32));
3748 Assert(!u32DSAttr || !(pCtx->ds.u64Base >> 32));
3749 Assert(!u32ESAttr || !(pCtx->es.u64Base >> 32));
3750# endif
3751 }
3752}
3753#endif /* VBOX_STRICT */
3754
3755
3756/**
3757 * Writes a guest segment register into the guest-state area in the VMCS.
3758 *
3759 * @returns VBox status code.
3760 * @param pVCpu Pointer to the VMCPU.
3761 * @param idxSel Index of the selector in the VMCS.
3762 * @param idxLimit Index of the segment limit in the VMCS.
3763 * @param idxBase Index of the segment base in the VMCS.
3764 * @param idxAccess Index of the access rights of the segment in the VMCS.
3765 * @param pSelReg Pointer to the segment selector.
3766 * @param pCtx Pointer to the guest-CPU context.
3767 *
3768 * @remarks No-long-jump zone!!!
3769 */
3770static int hmR0VmxWriteSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase,
3771 uint32_t idxAccess, PCPUMSELREG pSelReg, PCPUMCTX pCtx)
3772{
3773 int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */
3774 AssertRCReturn(rc, rc);
3775 rc = VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */
3776 AssertRCReturn(rc, rc);
3777 rc = VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/
3778 AssertRCReturn(rc, rc);
3779
3780 uint32_t u32Access = pSelReg->Attr.u;
3781 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3782 {
3783 /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */
3784 u32Access = 0xf3;
3785 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
3786 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
3787 }
3788 else
3789 {
3790 /*
3791 * The way to differentiate between whether this is really a null selector or was just a selector loaded with 0 in
3792 * real-mode is using the segment attributes. A selector loaded in real-mode with the value 0 is valid and usable in
3793 * protected-mode and we should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures NULL selectors
3794 * loaded in protected-mode have their attribute as 0.
3795 */
3796 if (!u32Access)
3797 u32Access = X86DESCATTR_UNUSABLE;
3798 }
3799
3800 /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */
3801 AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED),
3802 ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u));
3803
3804 rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */
3805 AssertRCReturn(rc, rc);
3806 return rc;
3807}
3808
3809
3810/**
3811 * Loads the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases)
3812 * into the guest-state area in the VMCS.
3813 *
3814 * @returns VBox status code.
3815 * @param pVM Pointer to the VM.
3816 * @param pVCPU Pointer to the VMCPU.
3817 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
3818 * out-of-sync. Make sure to update the required fields
3819 * before using them.
3820 *
3821 * @remarks ASSUMES pMixedCtx->cr0 is up to date (strict builds validation).
3822 * @remarks No-long-jump zone!!!
3823 */
3824static int hmR0VmxLoadGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
3825{
3826 int rc = VERR_INTERNAL_ERROR_5;
3827 PVM pVM = pVCpu->CTX_SUFF(pVM);
3828
3829 /*
3830 * Guest Segment registers: CS, SS, DS, ES, FS, GS.
3831 */
3832 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS))
3833 {
3834 /* Save the segment attributes for real-on-v86 mode hack, so we can restore them on VM-exit. */
3835 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3836 {
3837 pVCpu->hm.s.vmx.RealMode.AttrCS.u = pMixedCtx->cs.Attr.u;
3838 pVCpu->hm.s.vmx.RealMode.AttrSS.u = pMixedCtx->ss.Attr.u;
3839 pVCpu->hm.s.vmx.RealMode.AttrDS.u = pMixedCtx->ds.Attr.u;
3840 pVCpu->hm.s.vmx.RealMode.AttrES.u = pMixedCtx->es.Attr.u;
3841 pVCpu->hm.s.vmx.RealMode.AttrFS.u = pMixedCtx->fs.Attr.u;
3842 pVCpu->hm.s.vmx.RealMode.AttrGS.u = pMixedCtx->gs.Attr.u;
3843 }
3844
3845#ifdef VBOX_WITH_REM
3846 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
3847 {
3848 Assert(pVM->hm.s.vmx.pRealModeTSS);
3849 AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED);
3850 if ( pVCpu->hm.s.vmx.fWasInRealMode
3851 && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED)
3852 {
3853 /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute
3854 in real-mode (e.g. OpenBSD 4.0) */
3855 REMFlushTBs(pVM);
3856 Log4(("Load: Switch to protected mode detected!\n"));
3857 pVCpu->hm.s.vmx.fWasInRealMode = false;
3858 }
3859 }
3860#endif
3861 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_CS, VMX_VMCS32_GUEST_CS_LIMIT, VMX_VMCS_GUEST_CS_BASE,
3862 VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS, &pMixedCtx->cs, pMixedCtx);
3863 AssertRCReturn(rc, rc);
3864 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_SS, VMX_VMCS32_GUEST_SS_LIMIT, VMX_VMCS_GUEST_SS_BASE,
3865 VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS, &pMixedCtx->ss, pMixedCtx);
3866 AssertRCReturn(rc, rc);
3867 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_DS, VMX_VMCS32_GUEST_DS_LIMIT, VMX_VMCS_GUEST_DS_BASE,
3868 VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS, &pMixedCtx->ds, pMixedCtx);
3869 AssertRCReturn(rc, rc);
3870 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_ES, VMX_VMCS32_GUEST_ES_LIMIT, VMX_VMCS_GUEST_ES_BASE,
3871 VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS, &pMixedCtx->es, pMixedCtx);
3872 AssertRCReturn(rc, rc);
3873 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_FS, VMX_VMCS32_GUEST_FS_LIMIT, VMX_VMCS_GUEST_FS_BASE,
3874 VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS, &pMixedCtx->fs, pMixedCtx);
3875 AssertRCReturn(rc, rc);
3876 rc = hmR0VmxWriteSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_GS, VMX_VMCS32_GUEST_GS_LIMIT, VMX_VMCS_GUEST_GS_BASE,
3877 VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS, &pMixedCtx->gs, pMixedCtx);
3878 AssertRCReturn(rc, rc);
3879
3880#ifdef VBOX_STRICT
3881 /* Validate. */
3882 hmR0VmxValidateSegmentRegs(pVM, pVCpu, pMixedCtx);
3883#endif
3884
3885 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS);
3886 Log4(("Load: CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pMixedCtx->cs.Sel, pMixedCtx->cs.u64Base,
3887 pMixedCtx->cs.u32Limit, pMixedCtx->cs.Attr.u));
3888 }
3889
3890 /*
3891 * Guest TR.
3892 */
3893 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_TR))
3894 {
3895 /*
3896 * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is achieved
3897 * using the interrupt redirection bitmap (all bits cleared to let the guest handle INT-n's) in the TSS.
3898 * See hmR3InitFinalizeR0() to see how pRealModeTSS is setup.
3899 */
3900 uint16_t u16Sel = 0;
3901 uint32_t u32Limit = 0;
3902 uint64_t u64Base = 0;
3903 uint32_t u32AccessRights = 0;
3904
3905 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
3906 {
3907 u16Sel = pMixedCtx->tr.Sel;
3908 u32Limit = pMixedCtx->tr.u32Limit;
3909 u64Base = pMixedCtx->tr.u64Base;
3910 u32AccessRights = pMixedCtx->tr.Attr.u;
3911 }
3912 else
3913 {
3914 Assert(pVM->hm.s.vmx.pRealModeTSS);
3915 Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMR3CanExecuteGuest() -XXX- what about inner loop changes? */
3916
3917 /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */
3918 RTGCPHYS GCPhys;
3919 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
3920 AssertRCReturn(rc, rc);
3921
3922 X86DESCATTR DescAttr;
3923 DescAttr.u = 0;
3924 DescAttr.n.u1Present = 1;
3925 DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
3926
3927 u16Sel = 0;
3928 u32Limit = HM_VTX_TSS_SIZE;
3929 u64Base = GCPhys; /* in real-mode phys = virt. */
3930 u32AccessRights = DescAttr.u;
3931 }
3932
3933 /* Validate. */
3934 Assert(!(u16Sel & RT_BIT(2)));
3935 AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY
3936 || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights));
3937 AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights));
3938 Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/
3939 Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/
3940 Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */
3941 Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */
3942 Assert( (u32Limit & 0xfff) == 0xfff
3943 || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */
3944 Assert( !(pMixedCtx->tr.u32Limit & 0xfff00000)
3945 || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */
3946
3947 rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_TR, u16Sel); AssertRCReturn(rc, rc);
3948 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); AssertRCReturn(rc, rc);
3949 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRCReturn(rc, rc);
3950 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); AssertRCReturn(rc, rc);
3951
3952 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_TR);
3953 Log4(("Load: VMX_VMCS_GUEST_TR_BASE=%#RX64\n", u64Base));
3954 }
3955
3956 /*
3957 * Guest GDTR.
3958 */
3959 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_GDTR))
3960 {
3961 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pMixedCtx->gdtr.cbGdt); AssertRCReturn(rc, rc);
3962 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pMixedCtx->gdtr.pGdt); AssertRCReturn(rc, rc);
3963
3964 /* Validate. */
3965 Assert(!(pMixedCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */
3966
3967 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_GDTR);
3968 Log4(("Load: VMX_VMCS_GUEST_GDTR_BASE=%#RX64\n", pMixedCtx->gdtr.pGdt));
3969 }
3970
3971 /*
3972 * Guest LDTR.
3973 */
3974 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_LDTR))
3975 {
3976 /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */
3977 uint32_t u32Access = 0;
3978 if (!pMixedCtx->ldtr.Attr.u)
3979 u32Access = X86DESCATTR_UNUSABLE;
3980 else
3981 u32Access = pMixedCtx->ldtr.Attr.u;
3982
3983 rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_FIELD_LDTR, pMixedCtx->ldtr.Sel); AssertRCReturn(rc, rc);
3984 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pMixedCtx->ldtr.u32Limit); AssertRCReturn(rc, rc);
3985 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pMixedCtx->ldtr.u64Base); AssertRCReturn(rc, rc);
3986 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); AssertRCReturn(rc, rc);
3987
3988 /* Validate. */
3989 if (!(u32Access & X86DESCATTR_UNUSABLE))
3990 {
3991 Assert(!(pMixedCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */
3992 Assert(pMixedCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */
3993 Assert(!pMixedCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */
3994 Assert(pMixedCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */
3995 Assert(!pMixedCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */
3996 Assert(!(pMixedCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */
3997 Assert( (pMixedCtx->ldtr.u32Limit & 0xfff) == 0xfff
3998 || !pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */
3999 Assert( !(pMixedCtx->ldtr.u32Limit & 0xfff00000)
4000 || pMixedCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */
4001 }
4002
4003 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_LDTR);
4004 Log4(("Load: VMX_VMCS_GUEST_LDTR_BASE=%#RX64\n", pMixedCtx->ldtr.u64Base));
4005 }
4006
4007 /*
4008 * Guest IDTR.
4009 */
4010 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_IDTR))
4011 {
4012 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pMixedCtx->idtr.cbIdt); AssertRCReturn(rc, rc);
4013 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pMixedCtx->idtr.pIdt); AssertRCReturn(rc, rc);
4014
4015 /* Validate. */
4016 Assert(!(pMixedCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */
4017
4018 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_IDTR);
4019 Log4(("Load: VMX_VMCS_GUEST_IDTR_BASE=%#RX64\n", pMixedCtx->idtr.pIdt));
4020 }
4021
4022 return VINF_SUCCESS;
4023}
4024
4025
4026/**
4027 * Loads certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store
4028 * areas. These MSRs will automatically be loaded to the host CPU on every
4029 * successful VM entry and stored from the host CPU on every successful VM exit.
4030 * Also loads the sysenter MSRs into the guest-state area in the VMCS.
4031 *
4032 * @returns VBox status code.
4033 * @param pVCpu Pointer to the VMCPU.
4034 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4035 * out-of-sync. Make sure to update the required fields
4036 * before using them.
4037 *
4038 * @remarks No-long-jump zone!!!
4039 */
4040static int hmR0VmxLoadGuestMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4041{
4042 AssertPtr(pVCpu);
4043 AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr);
4044
4045 /*
4046 * MSRs covered by Auto-load/store: EFER, LSTAR, STAR, SF_MASK, TSC_AUX (RDTSCP).
4047 */
4048 int rc = VINF_SUCCESS;
4049 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS))
4050 {
4051#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
4052 PVM pVM = pVCpu->CTX_SUFF(pVM);
4053 PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
4054 uint32_t cGuestMsrs = 0;
4055
4056 /* See Intel spec. 4.1.4 "Enumeration of Paging Features by CPUID". */
4057 /** @todo r=ramshankar: Optimize this further to do lazy restoration and only
4058 * when the guest really is in 64-bit mode. */
4059 bool fSupportsLongMode = CPUMGetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_LONG_MODE);
4060 if (fSupportsLongMode)
4061 {
4062 pGuestMsr->u32Msr = MSR_K8_LSTAR;
4063 pGuestMsr->u32Reserved = 0;
4064 pGuestMsr->u64Value = pMixedCtx->msrLSTAR; /* 64 bits mode syscall rip */
4065 pGuestMsr++; cGuestMsrs++;
4066 pGuestMsr->u32Msr = MSR_K6_STAR;
4067 pGuestMsr->u32Reserved = 0;
4068 pGuestMsr->u64Value = pMixedCtx->msrSTAR; /* legacy syscall eip, cs & ss */
4069 pGuestMsr++; cGuestMsrs++;
4070 pGuestMsr->u32Msr = MSR_K8_SF_MASK;
4071 pGuestMsr->u32Reserved = 0;
4072 pGuestMsr->u64Value = pMixedCtx->msrSFMASK; /* syscall flag mask */
4073 pGuestMsr++; cGuestMsrs++;
4074 pGuestMsr->u32Msr = MSR_K8_KERNEL_GS_BASE;
4075 pGuestMsr->u32Reserved = 0;
4076 pGuestMsr->u64Value = pMixedCtx->msrKERNELGSBASE; /* swapgs exchange value */
4077 pGuestMsr++; cGuestMsrs++;
4078 }
4079
4080 /*
4081 * RDTSCP requires the TSC_AUX MSR. Host and guest share the physical MSR. So we have to
4082 * load the guest's copy always (since the MSR bitmap allows passthru unconditionally).
4083 */
4084 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
4085 {
4086 pGuestMsr->u32Msr = MSR_K8_TSC_AUX;
4087 pGuestMsr->u32Reserved = 0;
4088 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pGuestMsr->u64Value);
4089 AssertRCReturn(rc, rc);
4090 pGuestMsr++; cGuestMsrs++;
4091 }
4092
4093 /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */
4094 if (cGuestMsrs > MSR_IA32_VMX_MISC_MAX_MSR(pVM->hm.s.vmx.Msrs.u64Misc))
4095 {
4096 LogRel(("CPU autoload/store MSR count in VMCS exceeded cGuestMsrs=%u.\n", cGuestMsrs));
4097 pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE;
4098 return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO;
4099 }
4100
4101 /* Update the VCPU's copy of the guest MSR count. */
4102 pVCpu->hm.s.vmx.cGuestMsrs = cGuestMsrs;
4103 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cGuestMsrs); AssertRCReturn(rc, rc);
4104 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cGuestMsrs); AssertRCReturn(rc, rc);
4105#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
4106
4107 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
4108 }
4109
4110 /*
4111 * Guest Sysenter MSRs.
4112 * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause
4113 * VM-exits on WRMSRs for these MSRs.
4114 */
4115 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR))
4116 {
4117 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pMixedCtx->SysEnter.cs); AssertRCReturn(rc, rc);
4118 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR);
4119 }
4120
4121 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR))
4122 {
4123 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pMixedCtx->SysEnter.eip); AssertRCReturn(rc, rc);
4124 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR);
4125 }
4126
4127 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR))
4128 {
4129 rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pMixedCtx->SysEnter.esp); AssertRCReturn(rc, rc);
4130 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
4131 }
4132
4133 return rc;
4134}
4135
4136
4137/**
4138 * Loads the guest activity state into the guest-state area in the VMCS.
4139 *
4140 * @returns VBox status code.
4141 * @param pVCpu Pointer to the VMCPU.
4142 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4143 * out-of-sync. Make sure to update the required fields
4144 * before using them.
4145 *
4146 * @remarks No-long-jump zone!!!
4147 */
4148static int hmR0VmxLoadGuestActivityState(PVMCPU pVCpu, PCPUMCTX pCtx)
4149{
4150 /** @todo See if we can make use of other states, e.g.
4151 * VMX_VMCS_GUEST_ACTIVITY_SHUTDOWN or HLT. */
4152 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE))
4153 {
4154 int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE);
4155 AssertRCReturn(rc, rc);
4156
4157 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_VMX_GUEST_ACTIVITY_STATE);
4158 }
4159 return VINF_SUCCESS;
4160}
4161
4162
4163/**
4164 * Sets up the appropriate function to run guest code.
4165 *
4166 * @returns VBox status code.
4167 * @param pVCpu Pointer to the VMCPU.
4168 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4169 * out-of-sync. Make sure to update the required fields
4170 * before using them.
4171 *
4172 * @remarks No-long-jump zone!!!
4173 */
4174static int hmR0VmxSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4175{
4176 if (CPUMIsGuestInLongModeEx(pMixedCtx))
4177 {
4178#ifndef VBOX_ENABLE_64_BITS_GUESTS
4179 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
4180#endif
4181 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */
4182#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4183 /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */
4184 if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64)
4185 {
4186 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
4187 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS);
4188 }
4189#else
4190 /* 64-bit host or hybrid host. */
4191 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
4192#endif
4193 }
4194 else
4195 {
4196 /* Guest is not in long mode, use the 32-bit handler. */
4197#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4198 if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32)
4199 {
4200 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
4201 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS);
4202 }
4203#else
4204 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
4205#endif
4206 }
4207 Assert(pVCpu->hm.s.vmx.pfnStartVM);
4208 return VINF_SUCCESS;
4209}
4210
4211
4212/**
4213 * Wrapper for running the guest code in VT-x.
4214 *
4215 * @returns VBox strict status code.
4216 * @param pVM Pointer to the VM.
4217 * @param pVCpu Pointer to the VMCPU.
4218 * @param pCtx Pointer to the guest-CPU context.
4219 *
4220 * @remarks No-long-jump zone!!!
4221 */
4222DECLINLINE(int) hmR0VmxRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4223{
4224 /*
4225 * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations
4226 * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper.
4227 * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details.
4228 */
4229 const bool fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED);
4230 /** @todo Add stats for resume vs launch. */
4231#ifdef VBOX_WITH_KERNEL_USING_XMM
4232 return HMR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
4233#else
4234 return pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
4235#endif
4236}
4237
4238
4239/**
4240 * Reports world-switch error and dumps some useful debug info.
4241 *
4242 * @param pVM Pointer to the VM.
4243 * @param pVCpu Pointer to the VMCPU.
4244 * @param rcVMRun The return code from VMLAUNCH/VMRESUME.
4245 * @param pCtx Pointer to the guest-CPU context.
4246 * @param pVmxTransient Pointer to the VMX transient structure (only
4247 * exitReason updated).
4248 */
4249static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx, PVMXTRANSIENT pVmxTransient)
4250{
4251 Assert(pVM);
4252 Assert(pVCpu);
4253 Assert(pCtx);
4254 Assert(pVmxTransient);
4255 HMVMX_ASSERT_PREEMPT_SAFE();
4256
4257 Log4(("VM-entry failure: %Rrc\n", rcVMRun));
4258 switch (rcVMRun)
4259 {
4260 case VERR_VMX_INVALID_VMXON_PTR:
4261 AssertFailed();
4262 break;
4263 case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */
4264 case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */
4265 {
4266 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason);
4267 rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError);
4268 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
4269 AssertRC(rc);
4270
4271 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
4272 /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted().
4273 Cannot do it here as we may have been long preempted. */
4274
4275#ifdef VBOX_STRICT
4276 Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason,
4277 pVmxTransient->uExitReason));
4278 Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQualification));
4279 Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError));
4280 if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX)
4281 Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError]));
4282 else
4283 Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX));
4284 Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu));
4285 Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu));
4286
4287 /* VMX control bits. */
4288 uint32_t u32Val;
4289 uint64_t u64Val;
4290 HMVMXHCUINTREG uHCReg;
4291 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc);
4292 Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val));
4293 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc);
4294 Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val));
4295 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc);
4296 Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val));
4297 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc);
4298 Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val));
4299 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc);
4300 Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val));
4301 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc);
4302 Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val));
4303 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc);
4304 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val));
4305 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc);
4306 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val));
4307 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc);
4308 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val));
4309 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc);
4310 Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val));
4311 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc);
4312 Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val));
4313 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
4314 Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val));
4315 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc);
4316 Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val));
4317 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc);
4318 Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val));
4319 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc);
4320 Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val));
4321 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc);
4322 Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val));
4323 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
4324 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
4325 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
4326 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
4327 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
4328 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
4329 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
4330 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
4331 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
4332 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
4333
4334 /* Guest bits. */
4335 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc);
4336 Log4(("Old Guest Rip %#RX64 New %#RX64\n", pCtx->rip, u64Val));
4337 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc);
4338 Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pCtx->rsp, u64Val));
4339 rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc);
4340 Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pCtx->eflags.u32, u32Val));
4341 rc = VMXReadVmcs32(VMX_VMCS16_GUEST_FIELD_VPID, &u32Val); AssertRC(rc);
4342 Log4(("VMX_VMCS16_GUEST_FIELD_VPID %u\n", u32Val));
4343
4344 /* Host bits. */
4345 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc);
4346 Log4(("Host CR0 %#RHr\n", uHCReg));
4347 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc);
4348 Log4(("Host CR3 %#RHr\n", uHCReg));
4349 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc);
4350 Log4(("Host CR4 %#RHr\n", uHCReg));
4351
4352 RTGDTR HostGdtr;
4353 PCX86DESCHC pDesc;
4354 ASMGetGDTR(&HostGdtr);
4355 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_CS, &u32Val); AssertRC(rc);
4356 Log4(("Host CS %#08x\n", u32Val));
4357 if (u32Val < HostGdtr.cbGdt)
4358 {
4359 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4360 HMR0DumpDescriptor(pDesc, u32Val, "CS: ");
4361 }
4362
4363 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_DS, &u32Val); AssertRC(rc);
4364 Log4(("Host DS %#08x\n", u32Val));
4365 if (u32Val < HostGdtr.cbGdt)
4366 {
4367 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4368 HMR0DumpDescriptor(pDesc, u32Val, "DS: ");
4369 }
4370
4371 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_ES, &u32Val); AssertRC(rc);
4372 Log4(("Host ES %#08x\n", u32Val));
4373 if (u32Val < HostGdtr.cbGdt)
4374 {
4375 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4376 HMR0DumpDescriptor(pDesc, u32Val, "ES: ");
4377 }
4378
4379 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_FS, &u32Val); AssertRC(rc);
4380 Log4(("Host FS %#08x\n", u32Val));
4381 if (u32Val < HostGdtr.cbGdt)
4382 {
4383 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4384 HMR0DumpDescriptor(pDesc, u32Val, "FS: ");
4385 }
4386
4387 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_GS, &u32Val); AssertRC(rc);
4388 Log4(("Host GS %#08x\n", u32Val));
4389 if (u32Val < HostGdtr.cbGdt)
4390 {
4391 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4392 HMR0DumpDescriptor(pDesc, u32Val, "GS: ");
4393 }
4394
4395 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_SS, &u32Val); AssertRC(rc);
4396 Log4(("Host SS %#08x\n", u32Val));
4397 if (u32Val < HostGdtr.cbGdt)
4398 {
4399 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4400 HMR0DumpDescriptor(pDesc, u32Val, "SS: ");
4401 }
4402
4403 rc = VMXReadVmcs32(VMX_VMCS16_HOST_FIELD_TR, &u32Val); AssertRC(rc);
4404 Log4(("Host TR %#08x\n", u32Val));
4405 if (u32Val < HostGdtr.cbGdt)
4406 {
4407 pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK));
4408 HMR0DumpDescriptor(pDesc, u32Val, "TR: ");
4409 }
4410
4411 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc);
4412 Log4(("Host TR Base %#RHv\n", uHCReg));
4413 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc);
4414 Log4(("Host GDTR Base %#RHv\n", uHCReg));
4415 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc);
4416 Log4(("Host IDTR Base %#RHv\n", uHCReg));
4417 rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc);
4418 Log4(("Host SYSENTER CS %#08x\n", u32Val));
4419 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc);
4420 Log4(("Host SYSENTER EIP %#RHv\n", uHCReg));
4421 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc);
4422 Log4(("Host SYSENTER ESP %#RHv\n", uHCReg));
4423 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc);
4424 Log4(("Host RSP %#RHv\n", uHCReg));
4425 rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc);
4426 Log4(("Host RIP %#RHv\n", uHCReg));
4427# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4428 if (HMVMX_IS_64BIT_HOST_MODE())
4429 {
4430 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER)));
4431 Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR)));
4432 Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4433 Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4434 Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4435 Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
4436 }
4437# endif
4438#endif /* VBOX_STRICT */
4439 break;
4440 }
4441
4442 default:
4443 /* Impossible */
4444 AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun));
4445 break;
4446 }
4447 NOREF(pVM);
4448}
4449
4450
4451#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4452#ifndef VMX_USE_CACHED_VMCS_ACCESSES
4453# error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!"
4454#endif
4455#ifdef VBOX_STRICT
4456static bool hmR0VmxIsValidWriteField(uint32_t idxField)
4457{
4458 switch (idxField)
4459 {
4460 case VMX_VMCS_GUEST_RIP:
4461 case VMX_VMCS_GUEST_RSP:
4462 case VMX_VMCS_GUEST_SYSENTER_EIP:
4463 case VMX_VMCS_GUEST_SYSENTER_ESP:
4464 case VMX_VMCS_GUEST_GDTR_BASE:
4465 case VMX_VMCS_GUEST_IDTR_BASE:
4466 case VMX_VMCS_GUEST_CS_BASE:
4467 case VMX_VMCS_GUEST_DS_BASE:
4468 case VMX_VMCS_GUEST_ES_BASE:
4469 case VMX_VMCS_GUEST_FS_BASE:
4470 case VMX_VMCS_GUEST_GS_BASE:
4471 case VMX_VMCS_GUEST_SS_BASE:
4472 case VMX_VMCS_GUEST_LDTR_BASE:
4473 case VMX_VMCS_GUEST_TR_BASE:
4474 case VMX_VMCS_GUEST_CR3:
4475 return true;
4476 }
4477 return false;
4478}
4479
4480static bool hmR0VmxIsValidReadField(uint32_t idxField)
4481{
4482 switch (idxField)
4483 {
4484 /* Read-only fields. */
4485 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4486 return true;
4487 }
4488 /* Remaining readable fields should also be writable. */
4489 return hmR0VmxIsValidWriteField(idxField);
4490}
4491#endif /* VBOX_STRICT */
4492
4493
4494/**
4495 * Executes the specified handler in 64-bit mode.
4496 *
4497 * @returns VBox status code.
4498 * @param pVM Pointer to the VM.
4499 * @param pVCpu Pointer to the VMCPU.
4500 * @param pCtx Pointer to the guest CPU context.
4501 * @param enmOp The operation to perform.
4502 * @param cbParam Number of parameters.
4503 * @param paParam Array of 32-bit parameters.
4504 */
4505VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam,
4506 uint32_t *paParam)
4507{
4508 int rc, rc2;
4509 PHMGLOBALCPUINFO pCpu;
4510 RTHCPHYS HCPhysCpuPage;
4511 RTCCUINTREG uOldEflags;
4512
4513 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
4514 Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END);
4515 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
4516 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
4517
4518#ifdef VBOX_STRICT
4519 for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++)
4520 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
4521
4522 for (uint32_t i = 0; i <pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries; i++)
4523 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
4524#endif
4525
4526 /* Disable interrupts. */
4527 uOldEflags = ASMIntDisableFlags();
4528
4529#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
4530 RTCPUID idHostCpu = RTMpCpuId();
4531 CPUMR0SetLApic(pVCpu, idHostCpu);
4532#endif
4533
4534 pCpu = HMR0GetCurrentCpu();
4535 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4536
4537 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
4538 VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
4539
4540 /* Leave VMX Root Mode. */
4541 VMXDisable();
4542
4543 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4544
4545 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
4546 CPUMSetHyperEIP(pVCpu, enmOp);
4547 for (int i = (int)cbParam - 1; i >= 0; i--)
4548 CPUMPushHyper(pVCpu, paParam[i]);
4549
4550 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
4551
4552 /* Call the switcher. */
4553 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
4554 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
4555
4556 /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */
4557 /* Make sure the VMX instructions don't cause #UD faults. */
4558 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
4559
4560 /* Re-enter VMX Root Mode */
4561 rc2 = VMXEnable(HCPhysCpuPage);
4562 if (RT_FAILURE(rc2))
4563 {
4564 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4565 ASMSetFlags(uOldEflags);
4566 return rc2;
4567 }
4568
4569 rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
4570 AssertRC(rc2);
4571 Assert(!(ASMGetFlags() & X86_EFL_IF));
4572 ASMSetFlags(uOldEflags);
4573 return rc;
4574}
4575
4576
4577/**
4578 * Prepares for and executes VMLAUNCH (64 bits guests) for 32-bit hosts
4579 * supporting 64-bit guests.
4580 *
4581 * @returns VBox status code.
4582 * @param fResume Whether to VMLAUNCH or VMRESUME.
4583 * @param pCtx Pointer to the guest-CPU context.
4584 * @param pCache Pointer to the VMCS cache.
4585 * @param pVM Pointer to the VM.
4586 * @param pVCpu Pointer to the VMCPU.
4587 */
4588DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
4589{
4590 uint32_t aParam[6];
4591 PHMGLOBALCPUINFO pCpu = NULL;
4592 RTHCPHYS HCPhysCpuPage = 0;
4593 int rc = VERR_INTERNAL_ERROR_5;
4594
4595 pCpu = HMR0GetCurrentCpu();
4596 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4597
4598#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4599 pCache->uPos = 1;
4600 pCache->interPD = PGMGetInterPaeCR3(pVM);
4601 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
4602#endif
4603
4604#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES)
4605 pCache->TestIn.HCPhysCpuPage = 0;
4606 pCache->TestIn.HCPhysVmcs = 0;
4607 pCache->TestIn.pCache = 0;
4608 pCache->TestOut.HCPhysVmcs = 0;
4609 pCache->TestOut.pCache = 0;
4610 pCache->TestOut.pCtx = 0;
4611 pCache->TestOut.eflags = 0;
4612#endif
4613
4614 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
4615 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
4616 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */
4617 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */
4618 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
4619 aParam[5] = 0;
4620
4621#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4622 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
4623 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
4624#endif
4625 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_VMXRCStartVM64, 6, &aParam[0]);
4626
4627#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4628 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
4629 Assert(pCtx->dr[4] == 10);
4630 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
4631#endif
4632
4633#if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES)
4634 AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
4635 AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
4636 pVCpu->hm.s.vmx.HCPhysVmcs));
4637 AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
4638 pCache->TestOut.HCPhysVmcs));
4639 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
4640 pCache->TestOut.pCache));
4641 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
4642 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
4643 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
4644 pCache->TestOut.pCtx));
4645 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
4646#endif
4647 return rc;
4648}
4649
4650
4651/**
4652 * Initialize the VMCS-Read cache. The VMCS cache is used for 32-bit hosts
4653 * running 64-bit guests (except 32-bit Darwin which runs with 64-bit paging in
4654 * 32-bit mode) for 64-bit fields that cannot be accessed in 32-bit mode. Some
4655 * 64-bit fields -can- be accessed (those that have a 32-bit FULL & HIGH part).
4656 *
4657 * @returns VBox status code.
4658 * @param pVM Pointer to the VM.
4659 * @param pVCpu Pointer to the VMCPU.
4660 */
4661static int hmR0VmxInitVmcsReadCache(PVM pVM, PVMCPU pVCpu)
4662{
4663#define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \
4664{ \
4665 Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \
4666 pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \
4667 pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \
4668 ++cReadFields; \
4669}
4670
4671 AssertPtr(pVM);
4672 AssertPtr(pVCpu);
4673 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
4674 uint32_t cReadFields = 0;
4675
4676 /*
4677 * Don't remove the #if 0'd fields in this code. They're listed here for consistency
4678 * and serve to indicate exceptions to the rules.
4679 */
4680
4681 /* Guest-natural selector base fields. */
4682#if 0
4683 /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */
4684 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0);
4685 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4);
4686#endif
4687 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE);
4688 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE);
4689 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE);
4690 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE);
4691 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE);
4692 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE);
4693 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE);
4694 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE);
4695 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE);
4696 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE);
4697 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP);
4698 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP);
4699#if 0
4700 /* Unused natural width guest-state fields. */
4701 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS);
4702 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */
4703#endif
4704 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
4705 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
4706
4707 /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for these 64-bit fields (using "FULL" and "HIGH" fields). */
4708#if 0
4709 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL);
4710 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL);
4711 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL);
4712 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL);
4713 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL);
4714 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL);
4715 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL);
4716 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL);
4717 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL);
4718#endif
4719
4720 /* Natural width guest-state fields. */
4721 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
4722#if 0
4723 /* Currently unused field. */
4724 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR);
4725#endif
4726
4727 if (pVM->hm.s.fNestedPaging)
4728 {
4729 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3);
4730 AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields,
4731 VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX));
4732 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
4733 }
4734 else
4735 {
4736 AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX));
4737 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
4738 }
4739
4740#undef VMXLOCAL_INIT_READ_CACHE_FIELD
4741 return VINF_SUCCESS;
4742}
4743
4744
4745/**
4746 * Writes a field into the VMCS. This can either directly invoke a VMWRITE or
4747 * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except
4748 * darwin, running 64-bit guests).
4749 *
4750 * @returns VBox status code.
4751 * @param pVCpu Pointer to the VMCPU.
4752 * @param idxField The VMCS field encoding.
4753 * @param u64Val 16, 32 or 64 bits value.
4754 */
4755VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4756{
4757 int rc;
4758 switch (idxField)
4759 {
4760 /*
4761 * These fields consists of a "FULL" and a "HIGH" part which can be written to individually.
4762 */
4763 /* 64-bit Control fields. */
4764 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
4765 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
4766 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
4767 case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
4768 case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
4769 case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
4770 case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL:
4771 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
4772 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
4773 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
4774 case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL:
4775 case VMX_VMCS64_CTRL_EPTP_FULL:
4776 case VMX_VMCS64_CTRL_EPTP_LIST_FULL:
4777 /* 64-bit Guest-state fields. */
4778 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
4779 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
4780 case VMX_VMCS64_GUEST_PAT_FULL:
4781 case VMX_VMCS64_GUEST_EFER_FULL:
4782 case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL:
4783 case VMX_VMCS64_GUEST_PDPTE0_FULL:
4784 case VMX_VMCS64_GUEST_PDPTE1_FULL:
4785 case VMX_VMCS64_GUEST_PDPTE2_FULL:
4786 case VMX_VMCS64_GUEST_PDPTE3_FULL:
4787 /* 64-bit Host-state fields. */
4788 case VMX_VMCS64_HOST_FIELD_PAT_FULL:
4789 case VMX_VMCS64_HOST_FIELD_EFER_FULL:
4790 case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL:
4791 {
4792 rc = VMXWriteVmcs32(idxField, u64Val);
4793 rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32));
4794 break;
4795 }
4796
4797 /*
4798 * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit
4799 * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then.
4800 */
4801 /* Natural-width Guest-state fields. */
4802 case VMX_VMCS_GUEST_CR3:
4803 case VMX_VMCS_GUEST_ES_BASE:
4804 case VMX_VMCS_GUEST_CS_BASE:
4805 case VMX_VMCS_GUEST_SS_BASE:
4806 case VMX_VMCS_GUEST_DS_BASE:
4807 case VMX_VMCS_GUEST_FS_BASE:
4808 case VMX_VMCS_GUEST_GS_BASE:
4809 case VMX_VMCS_GUEST_LDTR_BASE:
4810 case VMX_VMCS_GUEST_TR_BASE:
4811 case VMX_VMCS_GUEST_GDTR_BASE:
4812 case VMX_VMCS_GUEST_IDTR_BASE:
4813 case VMX_VMCS_GUEST_RSP:
4814 case VMX_VMCS_GUEST_RIP:
4815 case VMX_VMCS_GUEST_SYSENTER_ESP:
4816 case VMX_VMCS_GUEST_SYSENTER_EIP:
4817 {
4818 if (!(u64Val >> 32))
4819 {
4820 /* If this field is 64-bit, VT-x will zero out the top bits. */
4821 rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val);
4822 }
4823 else
4824 {
4825 /* Assert that only the 32->64 switcher case should ever come here. */
4826 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests);
4827 rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val);
4828 }
4829 break;
4830 }
4831
4832 default:
4833 {
4834 AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val));
4835 rc = VERR_INVALID_PARAMETER;
4836 break;
4837 }
4838 }
4839 AssertRCReturn(rc, rc);
4840 return rc;
4841}
4842
4843
4844/**
4845 * Queue up a VMWRITE by using the VMCS write cache. This is only used on 32-bit
4846 * hosts (except darwin) for 64-bit guests.
4847 *
4848 * @param pVCpu Pointer to the VMCPU.
4849 * @param idxField The VMCS field encoding.
4850 * @param u64Val 16, 32 or 64 bits value.
4851 */
4852VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4853{
4854 AssertPtr(pVCpu);
4855 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
4856
4857 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
4858 ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
4859
4860 /* Make sure there are no duplicates. */
4861 for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++)
4862 {
4863 if (pCache->Write.aField[i] == idxField)
4864 {
4865 pCache->Write.aFieldVal[i] = u64Val;
4866 return VINF_SUCCESS;
4867 }
4868 }
4869
4870 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
4871 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
4872 pCache->Write.cValidEntries++;
4873 return VINF_SUCCESS;
4874}
4875
4876/* Enable later when the assembly code uses these as callbacks. */
4877#if 0
4878/*
4879 * Loads the VMCS write-cache into the CPU (by executing VMWRITEs).
4880 *
4881 * @param pVCpu Pointer to the VMCPU.
4882 * @param pCache Pointer to the VMCS cache.
4883 *
4884 * @remarks No-long-jump zone!!!
4885 */
4886VMMR0DECL(void) VMXWriteCachedVmcsLoad(PVMCPU pVCpu, PVMCSCACHE pCache)
4887{
4888 AssertPtr(pCache);
4889 for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++)
4890 {
4891 int rc = VMXWriteVmcs64(pCache->Write.aField[i], pCache->Write.aFieldVal[i]);
4892 AssertRC(rc);
4893 }
4894 pCache->Write.cValidEntries = 0;
4895}
4896
4897
4898/**
4899 * Stores the VMCS read-cache from the CPU (by executing VMREADs).
4900 *
4901 * @param pVCpu Pointer to the VMCPU.
4902 * @param pCache Pointer to the VMCS cache.
4903 *
4904 * @remarks No-long-jump zone!!!
4905 */
4906VMMR0DECL(void) VMXReadCachedVmcsStore(PVMCPU pVCpu, PVMCSCACHE pCache)
4907{
4908 AssertPtr(pCache);
4909 for (uint32_t i = 0; i < pCache->Read.cValidEntries; i++)
4910 {
4911 int rc = VMXReadVmcs64(pCache->Read.aField[i], &pCache->Read.aFieldVal[i]);
4912 AssertRC(rc);
4913 }
4914}
4915#endif
4916#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
4917
4918
4919/**
4920 * Sets up the usage of TSC-offsetting and updates the VMCS. If offsetting is
4921 * not possible, cause VM-exits on RDTSC(P)s. Also sets up the VMX preemption
4922 * timer.
4923 *
4924 * @returns VBox status code.
4925 * @param pVCpu Pointer to the VMCPU.
4926 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
4927 * out-of-sync. Make sure to update the required fields
4928 * before using them.
4929 * @remarks No-long-jump zone!!!
4930 */
4931static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
4932{
4933 int rc = VERR_INTERNAL_ERROR_5;
4934 bool fOffsettedTsc = false;
4935 PVM pVM = pVCpu->CTX_SUFF(pVM);
4936 if (pVM->hm.s.vmx.fUsePreemptTimer)
4937 {
4938 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
4939
4940 /* Make sure the returned values have sane upper and lower boundaries. */
4941 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
4942 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */
4943 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
4944 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
4945
4946 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
4947 rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc);
4948 }
4949 else
4950 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
4951
4952 if (fOffsettedTsc)
4953 {
4954 uint64_t u64CurTSC = ASMReadTSC();
4955 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
4956 {
4957 /* Note: VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
4958 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset); AssertRC(rc);
4959
4960 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
4961 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
4962 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
4963 }
4964 else
4965 {
4966 /* VM-exit on RDTSC(P) as we would otherwise pass decreasing TSC values to the guest. */
4967 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
4968 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
4969 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
4970 }
4971 }
4972 else
4973 {
4974 /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */
4975 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT;
4976 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc);
4977 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
4978 }
4979}
4980
4981
4982/**
4983 * Determines if an exception is a contributory exception. Contributory
4984 * exceptions are ones which can cause double-faults. Page-fault is
4985 * intentionally not included here as it's a conditional contributory exception.
4986 *
4987 * @returns true if the exception is contributory, false otherwise.
4988 * @param uVector The exception vector.
4989 */
4990DECLINLINE(bool) hmR0VmxIsContributoryXcpt(const uint32_t uVector)
4991{
4992 switch (uVector)
4993 {
4994 case X86_XCPT_GP:
4995 case X86_XCPT_SS:
4996 case X86_XCPT_NP:
4997 case X86_XCPT_TS:
4998 case X86_XCPT_DE:
4999 return true;
5000 default:
5001 break;
5002 }
5003 return false;
5004}
5005
5006
5007/**
5008 * Sets an event as a pending event to be injected into the guest.
5009 *
5010 * @param pVCpu Pointer to the VMCPU.
5011 * @param u32IntInfo The VM-entry interruption-information field.
5012 * @param cbInstr The VM-entry instruction length in bytes (for software
5013 * interrupts, exceptions and privileged software
5014 * exceptions).
5015 * @param u32ErrCode The VM-entry exception error code.
5016 * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
5017 * page-fault.
5018 *
5019 * @remarks Statistics counter assumes this is a guest event being injected or
5020 * re-injected into the guest, i.e. 'StatInjectPendingReflect' is
5021 * always incremented.
5022 */
5023DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode,
5024 RTGCUINTPTR GCPtrFaultAddress)
5025{
5026 Assert(!pVCpu->hm.s.Event.fPending);
5027 pVCpu->hm.s.Event.fPending = true;
5028 pVCpu->hm.s.Event.u64IntInfo = u32IntInfo;
5029 pVCpu->hm.s.Event.u32ErrCode = u32ErrCode;
5030 pVCpu->hm.s.Event.cbInstr = cbInstr;
5031 pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
5032
5033 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect);
5034}
5035
5036
5037/**
5038 * Sets a double-fault (#DF) exception as pending-for-injection into the VM.
5039 *
5040 * @param pVCpu Pointer to the VMCPU.
5041 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5042 * out-of-sync. Make sure to update the required fields
5043 * before using them.
5044 */
5045DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5046{
5047 uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID;
5048 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
5049 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
5050 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
5051}
5052
5053
5054/**
5055 * Handle a condition that occurred while delivering an event through the guest
5056 * IDT.
5057 *
5058 * @returns VBox status code (informational error codes included).
5059 * @retval VINF_SUCCESS if we should continue handling the VM-exit.
5060 * @retval VINF_HM_DOUBLE_FAULT if a #DF condition was detected and we ought to
5061 * continue execution of the guest which will delivery the #DF.
5062 * @retval VINF_EM_RESET if we detected a triple-fault condition.
5063 *
5064 * @param pVCpu Pointer to the VMCPU.
5065 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5066 * out-of-sync. Make sure to update the required fields
5067 * before using them.
5068 * @param pVmxTransient Pointer to the VMX transient structure.
5069 *
5070 * @remarks No-long-jump zone!!!
5071 */
5072static int hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
5073{
5074 int rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
5075 AssertRCReturn(rc, rc);
5076 if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo))
5077 {
5078 rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
5079 AssertRCReturn(rc, rc);
5080
5081 uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
5082 uint32_t uExitVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVmxTransient->uExitIntInfo);
5083 uint32_t uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
5084
5085 typedef enum
5086 {
5087 VMXREFLECTXCPT_XCPT, /* Reflect the exception to the guest or for further evaluation by VMM. */
5088 VMXREFLECTXCPT_DF, /* Reflect the exception as a double-fault to the guest. */
5089 VMXREFLECTXCPT_TF, /* Indicate a triple faulted state to the VMM. */
5090 VMXREFLECTXCPT_NONE /* Nothing to reflect. */
5091 } VMXREFLECTXCPT;
5092
5093 /* See Intel spec. 30.7.1.1 "Reflecting Exceptions to Guest Software". */
5094 VMXREFLECTXCPT enmReflect = VMXREFLECTXCPT_NONE;
5095 if (VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo))
5096 {
5097 if (uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT)
5098 {
5099 enmReflect = VMXREFLECTXCPT_XCPT;
5100#ifdef VBOX_STRICT
5101 if ( hmR0VmxIsContributoryXcpt(uIdtVector)
5102 && uExitVector == X86_XCPT_PF)
5103 {
5104 Log4(("IDT: vcpu[%RU32] Contributory #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5105 }
5106#endif
5107 if ( uExitVector == X86_XCPT_PF
5108 && uIdtVector == X86_XCPT_PF)
5109 {
5110 pVmxTransient->fVectoringPF = true;
5111 Log4(("IDT: vcpu[%RU32] Vectoring #PF uCR2=%#RX64\n", pVCpu->idCpu, pMixedCtx->cr2));
5112 }
5113 else if ( (pVCpu->hm.s.vmx.u32XcptBitmap & HMVMX_CONTRIBUTORY_XCPT_MASK)
5114 && hmR0VmxIsContributoryXcpt(uExitVector)
5115 && ( hmR0VmxIsContributoryXcpt(uIdtVector)
5116 || uIdtVector == X86_XCPT_PF))
5117 {
5118 enmReflect = VMXREFLECTXCPT_DF;
5119 }
5120 else if (uIdtVector == X86_XCPT_DF)
5121 enmReflect = VMXREFLECTXCPT_TF;
5122 }
5123 else if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
5124 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT
5125 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI)
5126 {
5127 /*
5128 * Ignore software interrupts (INT n), software exceptions (#BP, #OF) and privileged software exception
5129 * (whatever they are) as they reoccur when restarting the instruction.
5130 */
5131 enmReflect = VMXREFLECTXCPT_XCPT;
5132 }
5133 }
5134 else
5135 {
5136 /*
5137 * If event delivery caused an EPT violation/misconfig or APIC access VM-exit, then the VM-exit
5138 * interruption-information will not be valid and we end up here. In such cases, it is sufficient to reflect the
5139 * original exception to the guest after handling the VM-exit.
5140 */
5141 if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
5142 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT
5143 || uIntType == VMX_IDT_VECTORING_INFO_TYPE_NMI)
5144 {
5145 enmReflect = VMXREFLECTXCPT_XCPT;
5146 }
5147 }
5148
5149 switch (enmReflect)
5150 {
5151 case VMXREFLECTXCPT_XCPT:
5152 {
5153 Assert( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT
5154 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
5155 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT);
5156
5157 uint32_t u32ErrCode = 0;
5158 if (VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo))
5159 {
5160 rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
5161 AssertRCReturn(rc, rc);
5162 u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
5163 }
5164
5165 /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF. See hmR0VmxExitXcptPF(). */
5166 hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo),
5167 0 /* cbInstr */, u32ErrCode, pMixedCtx->cr2);
5168 rc = VINF_SUCCESS;
5169 Log4(("IDT: vcpu[%RU32] Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->idCpu,
5170 pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.u32ErrCode));
5171
5172 break;
5173 }
5174
5175 case VMXREFLECTXCPT_DF:
5176 {
5177 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
5178 rc = VINF_HM_DOUBLE_FAULT;
5179 Log4(("IDT: vcpu[%RU32] Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->idCpu,
5180 pVCpu->hm.s.Event.u64IntInfo, uIdtVector, uExitVector));
5181
5182 break;
5183 }
5184
5185 case VMXREFLECTXCPT_TF:
5186 {
5187 rc = VINF_EM_RESET;
5188 Log4(("IDT: vcpu[%RU32] Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", pVCpu->idCpu, uIdtVector,
5189 uExitVector));
5190 break;
5191 }
5192
5193 default:
5194 Assert(rc == VINF_SUCCESS);
5195 break;
5196 }
5197 }
5198 Assert(rc == VINF_SUCCESS || rc == VINF_HM_DOUBLE_FAULT || rc == VINF_EM_RESET);
5199 return rc;
5200}
5201
5202
5203/**
5204 * Saves the guest's CR0 register from the VMCS into the guest-CPU context.
5205 *
5206 * @returns VBox status code.
5207 * @param pVCpu Pointer to the VMCPU.
5208 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5209 * out-of-sync. Make sure to update the required fields
5210 * before using them.
5211 *
5212 * @remarks No-long-jump zone!!!
5213 */
5214static int hmR0VmxSaveGuestCR0(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5215{
5216 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR0))
5217 {
5218 uint32_t uVal = 0;
5219 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &uVal);
5220 AssertRCReturn(rc, rc);
5221
5222 uint32_t uShadow = 0;
5223 rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uShadow);
5224 AssertRCReturn(rc, rc);
5225
5226 uVal = (uShadow & pVCpu->hm.s.vmx.u32CR0Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR0Mask);
5227 CPUMSetGuestCR0(pVCpu, uVal);
5228 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_CR0;
5229 }
5230 return VINF_SUCCESS;
5231}
5232
5233
5234/**
5235 * Saves the guest's CR4 register from the VMCS into the guest-CPU context.
5236 *
5237 * @returns VBox status code.
5238 * @param pVCpu Pointer to the VMCPU.
5239 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5240 * out-of-sync. Make sure to update the required fields
5241 * before using them.
5242 *
5243 * @remarks No-long-jump zone!!!
5244 */
5245static int hmR0VmxSaveGuestCR4(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5246{
5247 int rc = VINF_SUCCESS;
5248 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR4))
5249 {
5250 uint32_t uVal = 0;
5251 uint32_t uShadow = 0;
5252 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &uVal);
5253 AssertRCReturn(rc, rc);
5254 rc = VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uShadow);
5255 AssertRCReturn(rc, rc);
5256
5257 uVal = (uShadow & pVCpu->hm.s.vmx.u32CR4Mask) | (uVal & ~pVCpu->hm.s.vmx.u32CR4Mask);
5258 CPUMSetGuestCR4(pVCpu, uVal);
5259 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_CR4;
5260 }
5261 return rc;
5262}
5263
5264
5265/**
5266 * Saves the guest's RIP register from the VMCS into the guest-CPU context.
5267 *
5268 * @returns VBox status code.
5269 * @param pVCpu Pointer to the VMCPU.
5270 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5271 * out-of-sync. Make sure to update the required fields
5272 * before using them.
5273 *
5274 * @remarks No-long-jump zone!!!
5275 */
5276static int hmR0VmxSaveGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5277{
5278 int rc = VINF_SUCCESS;
5279 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RIP))
5280 {
5281 uint64_t u64Val = 0;
5282 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val);
5283 AssertRCReturn(rc, rc);
5284
5285 pMixedCtx->rip = u64Val;
5286 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_RIP;
5287 }
5288 return rc;
5289}
5290
5291
5292/**
5293 * Saves the guest's RSP register from the VMCS into the guest-CPU context.
5294 *
5295 * @returns VBox status code.
5296 * @param pVCpu Pointer to the VMCPU.
5297 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5298 * out-of-sync. Make sure to update the required fields
5299 * before using them.
5300 *
5301 * @remarks No-long-jump zone!!!
5302 */
5303static int hmR0VmxSaveGuestRsp(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5304{
5305 int rc = VINF_SUCCESS;
5306 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RSP))
5307 {
5308 uint64_t u64Val = 0;
5309 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val);
5310 AssertRCReturn(rc, rc);
5311
5312 pMixedCtx->rsp = u64Val;
5313 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_RSP;
5314 }
5315 return rc;
5316}
5317
5318
5319/**
5320 * Saves the guest's RFLAGS from the VMCS into the guest-CPU context.
5321 *
5322 * @returns VBox status code.
5323 * @param pVCpu Pointer to the VMCPU.
5324 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5325 * out-of-sync. Make sure to update the required fields
5326 * before using them.
5327 *
5328 * @remarks No-long-jump zone!!!
5329 */
5330static int hmR0VmxSaveGuestRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5331{
5332 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS))
5333 {
5334 uint32_t uVal = 0;
5335 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &uVal);
5336 AssertRCReturn(rc, rc);
5337
5338 pMixedCtx->eflags.u32 = uVal;
5339 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) /* Undo our real-on-v86-mode changes to eflags if necessary. */
5340 {
5341 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
5342 Log4(("Saving real-mode EFLAGS VT-x view=%#RX32\n", pMixedCtx->eflags.u32));
5343
5344 pMixedCtx->eflags.Bits.u1VM = 0;
5345 pMixedCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL;
5346 }
5347
5348 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_RFLAGS;
5349 }
5350 return VINF_SUCCESS;
5351}
5352
5353
5354/**
5355 * Wrapper for saving the guest's RIP, RSP and RFLAGS from the VMCS into the
5356 * guest-CPU context.
5357 */
5358DECLINLINE(int) hmR0VmxSaveGuestRipRspRflags(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5359{
5360 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
5361 rc |= hmR0VmxSaveGuestRsp(pVCpu, pMixedCtx);
5362 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
5363 return rc;
5364}
5365
5366
5367/**
5368 * Saves the guest's interruptibility-state ("interrupt shadow" as AMD calls it)
5369 * from the guest-state area in the VMCS.
5370 *
5371 * @param pVCpu Pointer to the VMCPU.
5372 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5373 * out-of-sync. Make sure to update the required fields
5374 * before using them.
5375 *
5376 * @remarks No-long-jump zone!!!
5377 */
5378static void hmR0VmxSaveGuestIntrState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5379{
5380 uint32_t uIntrState = 0;
5381 int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
5382 AssertRC(rc);
5383
5384 if (!uIntrState)
5385 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
5386 else
5387 {
5388 Assert( uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
5389 || uIntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
5390 rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
5391 AssertRC(rc);
5392 rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* for hmR0VmxGetGuestIntrState(). */
5393 AssertRC(rc);
5394
5395 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
5396 Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
5397 }
5398}
5399
5400
5401/**
5402 * Saves the guest's activity state.
5403 *
5404 * @returns VBox status code.
5405 * @param pVCpu Pointer to the VMCPU.
5406 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5407 * out-of-sync. Make sure to update the required fields
5408 * before using them.
5409 *
5410 * @remarks No-long-jump zone!!!
5411 */
5412static int hmR0VmxSaveGuestActivityState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5413{
5414 /* Nothing to do for now until we make use of different guest-CPU activity state. Just update the flag. */
5415 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_ACTIVITY_STATE;
5416 return VINF_SUCCESS;
5417}
5418
5419
5420/**
5421 * Saves the guest SYSENTER MSRs (SYSENTER_CS, SYSENTER_EIP, SYSENTER_ESP) from
5422 * the current VMCS into the guest-CPU context.
5423 *
5424 * @returns VBox status code.
5425 * @param pVCpu Pointer to the VMCPU.
5426 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5427 * out-of-sync. Make sure to update the required fields
5428 * before using them.
5429 *
5430 * @remarks No-long-jump zone!!!
5431 */
5432static int hmR0VmxSaveGuestSysenterMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5433{
5434 int rc = VINF_SUCCESS;
5435 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR))
5436 {
5437 uint32_t u32Val = 0;
5438 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); AssertRCReturn(rc, rc);
5439 pMixedCtx->SysEnter.cs = u32Val;
5440 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SYSENTER_CS_MSR;
5441 }
5442
5443 uint64_t u64Val = 0;
5444 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR))
5445 {
5446 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &u64Val); AssertRCReturn(rc, rc);
5447 pMixedCtx->SysEnter.eip = u64Val;
5448 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SYSENTER_EIP_MSR;
5449 }
5450 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR))
5451 {
5452 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &u64Val); AssertRCReturn(rc, rc);
5453 pMixedCtx->SysEnter.esp = u64Val;
5454 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SYSENTER_ESP_MSR;
5455 }
5456 return rc;
5457}
5458
5459
5460/**
5461 * Saves the guest FS_BASE MSRs from the current VMCS into the guest-CPU
5462 * context.
5463 *
5464 * @returns VBox status code.
5465 * @param pVCpu Pointer to the VMCPU.
5466 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5467 * out-of-sync. Make sure to update the required fields
5468 * before using them.
5469 *
5470 * @remarks No-long-jump zone!!!
5471 */
5472static int hmR0VmxSaveGuestFSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5473{
5474 int rc = VINF_SUCCESS;
5475 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_FS_BASE_MSR))
5476 {
5477 uint64_t u64Val = 0;
5478 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_FS_BASE, &u64Val); AssertRCReturn(rc, rc);
5479 pMixedCtx->fs.u64Base = u64Val;
5480 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_FS_BASE_MSR;
5481 }
5482 return rc;
5483}
5484
5485
5486/**
5487 * Saves the guest GS_BASE MSRs from the current VMCS into the guest-CPU
5488 * context.
5489 *
5490 * @returns VBox status code.
5491 * @param pVCpu Pointer to the VMCPU.
5492 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5493 * out-of-sync. Make sure to update the required fields
5494 * before using them.
5495 *
5496 * @remarks No-long-jump zone!!!
5497 */
5498static int hmR0VmxSaveGuestGSBaseMsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5499{
5500 int rc = VINF_SUCCESS;
5501 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_GS_BASE_MSR))
5502 {
5503 uint64_t u64Val = 0;
5504 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GS_BASE, &u64Val); AssertRCReturn(rc, rc);
5505 pMixedCtx->gs.u64Base = u64Val;
5506 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_GS_BASE_MSR;
5507 }
5508 return rc;
5509}
5510
5511
5512/**
5513 * Saves the auto load/store'd guest MSRs from the current VMCS into the
5514 * guest-CPU context. Currently these are LSTAR, STAR, SFMASK, KERNEL-GS BASE
5515 * and TSC_AUX.
5516 *
5517 * @returns VBox status code.
5518 * @param pVCpu Pointer to the VMCPU.
5519 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5520 * out-of-sync. Make sure to update the required fields
5521 * before using them.
5522 *
5523 * @remarks No-long-jump zone!!!
5524 */
5525static int hmR0VmxSaveGuestAutoLoadStoreMsrs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5526{
5527 if (pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS)
5528 return VINF_SUCCESS;
5529
5530#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
5531 for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++)
5532 {
5533 PVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr;
5534 pMsr += i;
5535 switch (pMsr->u32Msr)
5536 {
5537 case MSR_K8_LSTAR: pMixedCtx->msrLSTAR = pMsr->u64Value; break;
5538 case MSR_K6_STAR: pMixedCtx->msrSTAR = pMsr->u64Value; break;
5539 case MSR_K8_SF_MASK: pMixedCtx->msrSFMASK = pMsr->u64Value; break;
5540 case MSR_K8_TSC_AUX: CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value); break;
5541 case MSR_K8_KERNEL_GS_BASE: pMixedCtx->msrKERNELGSBASE = pMsr->u64Value; break;
5542 case MSR_K6_EFER: /* EFER can't be changed without causing a VM-exit. */ break;
5543 default:
5544 {
5545 AssertFailed();
5546 return VERR_HM_UNEXPECTED_LD_ST_MSR;
5547 }
5548 }
5549 }
5550#endif
5551
5552 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_AUTO_LOAD_STORE_MSRS;
5553 return VINF_SUCCESS;
5554}
5555
5556
5557/**
5558 * Saves the guest control registers from the current VMCS into the guest-CPU
5559 * context.
5560 *
5561 * @returns VBox status code.
5562 * @param pVCpu Pointer to the VMCPU.
5563 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5564 * out-of-sync. Make sure to update the required fields
5565 * before using them.
5566 *
5567 * @remarks No-long-jump zone!!!
5568 */
5569static int hmR0VmxSaveGuestControlRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5570{
5571 /* Guest CR0. Guest FPU. */
5572 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
5573 AssertRCReturn(rc, rc);
5574
5575 /* Guest CR4. */
5576 rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
5577 AssertRCReturn(rc, rc);
5578
5579 /* Guest CR2 - updated always during the world-switch or in #PF. */
5580 /* Guest CR3. Only changes with Nested Paging. This must be done -after- saving CR0 and CR4 from the guest! */
5581 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR3))
5582 {
5583 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR0);
5584 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR4);
5585
5586 PVM pVM = pVCpu->CTX_SUFF(pVM);
5587 if ( pVM->hm.s.vmx.fUnrestrictedGuest
5588 || ( pVM->hm.s.fNestedPaging
5589 && CPUMIsGuestPagingEnabledEx(pMixedCtx)))
5590 {
5591 uint64_t u64Val = 0;
5592 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val);
5593 if (pMixedCtx->cr3 != u64Val)
5594 {
5595 CPUMSetGuestCR3(pVCpu, u64Val);
5596 if (VMMRZCallRing3IsEnabled(pVCpu))
5597 {
5598 PGMUpdateCR3(pVCpu, u64Val);
5599 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5600 }
5601 else
5602 {
5603 /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMUpdateCR3().*/
5604 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3);
5605 }
5606 }
5607
5608 /* If the guest is in PAE mode, sync back the PDPE's into the guest state. */
5609 if (CPUMIsGuestInPAEModeEx(pMixedCtx)) /* Reads CR0, CR4 and EFER MSR (EFER is always up-to-date). */
5610 {
5611 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u); AssertRCReturn(rc, rc);
5612 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u); AssertRCReturn(rc, rc);
5613 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u); AssertRCReturn(rc, rc);
5614 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc);
5615
5616 if (VMMRZCallRing3IsEnabled(pVCpu))
5617 {
5618 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
5619 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
5620 }
5621 else
5622 {
5623 /* Set the force flag to inform PGM about it when necessary. It is cleared by PGMGstUpdatePaePdpes(). */
5624 VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES);
5625 }
5626 }
5627 }
5628
5629 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_CR3;
5630 }
5631
5632 /*
5633 * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback()
5634 * -> VMMRZCallRing3Disable() -> hmR0VmxSaveGuestState() -> Set VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp
5635 * -> continue with VM-exit handling -> hmR0VmxSaveGuestControlRegs() and here we are.
5636 *
5637 * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus
5638 * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that
5639 * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should
5640 * -NOT- check if HMVMX_UPDATED_GUEST_CR3 is already set or not!
5641 *
5642 * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here.
5643 */
5644 if (VMMRZCallRing3IsEnabled(pVCpu))
5645 {
5646 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
5647 PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu));
5648
5649 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
5650 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
5651
5652 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
5653 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
5654 }
5655
5656 return rc;
5657}
5658
5659
5660/**
5661 * Reads a guest segment register from the current VMCS into the guest-CPU
5662 * context.
5663 *
5664 * @returns VBox status code.
5665 * @param pVCpu Pointer to the VMCPU.
5666 * @param idxSel Index of the selector in the VMCS.
5667 * @param idxLimit Index of the segment limit in the VMCS.
5668 * @param idxBase Index of the segment base in the VMCS.
5669 * @param idxAccess Index of the access rights of the segment in the VMCS.
5670 * @param pSelReg Pointer to the segment selector.
5671 *
5672 * @remarks No-long-jump zone!!!
5673 * @remarks Never call this function directly!!! Use the VMXLOCAL_READ_SEG()
5674 * macro as that takes care of whether to read from the VMCS cache or
5675 * not.
5676 */
5677DECLINLINE(int) hmR0VmxReadSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess,
5678 PCPUMSELREG pSelReg)
5679{
5680 uint32_t u32Val = 0;
5681 int rc = VMXReadVmcs32(idxSel, &u32Val);
5682 AssertRCReturn(rc, rc);
5683 pSelReg->Sel = (uint16_t)u32Val;
5684 pSelReg->ValidSel = (uint16_t)u32Val;
5685 pSelReg->fFlags = CPUMSELREG_FLAGS_VALID;
5686
5687 rc = VMXReadVmcs32(idxLimit, &u32Val);
5688 AssertRCReturn(rc, rc);
5689 pSelReg->u32Limit = u32Val;
5690
5691 uint64_t u64Val = 0;
5692 rc = VMXReadVmcsGstNByIdxVal(idxBase, &u64Val);
5693 AssertRCReturn(rc, rc);
5694 pSelReg->u64Base = u64Val;
5695
5696 rc = VMXReadVmcs32(idxAccess, &u32Val);
5697 AssertRCReturn(rc, rc);
5698 pSelReg->Attr.u = u32Val;
5699
5700 /*
5701 * If VT-x marks the segment as unusable, most other bits remain undefined:
5702 * - For CS the L, D and G bits have meaning.
5703 * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox).
5704 * - For the remaining data segments no bits are defined.
5705 *
5706 * The present bit and the unusable bit has been observed to be set at the
5707 * same time (the selector was supposed to invalid as we started executing
5708 * a V8086 interrupt in ring-0).
5709 *
5710 * What should be important for the rest of the VBox code that the P bit is
5711 * cleared. Some of the other VBox code recognizes the unusable bit, but
5712 * AMD-V certainly don't, and REM doesn't really either. So, to be on the
5713 * safe side here, we'll strip off P and other bits we don't care about. If
5714 * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed.
5715 *
5716 * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers".
5717 */
5718 if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE)
5719 {
5720 Assert(idxSel != VMX_VMCS16_GUEST_FIELD_TR); /* TR is the only selector that can never be unusable. */
5721
5722 /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */
5723 pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G
5724 | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT;
5725
5726 Log4(("hmR0VmxReadSegmentReg: Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Val, pSelReg->Attr.u));
5727#ifdef DEBUG_bird
5728 AssertMsg((u32Val & ~X86DESCATTR_P) == pSelReg->Attr.u,
5729 ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n",
5730 idxSel, u32Val, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit));
5731#endif
5732 }
5733 return VINF_SUCCESS;
5734}
5735
5736
5737#ifdef VMX_USE_CACHED_VMCS_ACCESSES
5738# define VMXLOCAL_READ_SEG(Sel, CtxSel) \
5739 hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
5740 VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel)
5741#else
5742# define VMXLOCAL_READ_SEG(Sel, CtxSel) \
5743 hmR0VmxReadSegmentReg(pVCpu, VMX_VMCS16_GUEST_FIELD_##Sel, VMX_VMCS32_GUEST_##Sel##_LIMIT, \
5744 VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, &pMixedCtx->CtxSel)
5745#endif
5746
5747
5748/**
5749 * Saves the guest segment registers from the current VMCS into the guest-CPU
5750 * context.
5751 *
5752 * @returns VBox status code.
5753 * @param pVCpu Pointer to the VMCPU.
5754 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5755 * out-of-sync. Make sure to update the required fields
5756 * before using them.
5757 *
5758 * @remarks No-long-jump zone!!!
5759 */
5760static int hmR0VmxSaveGuestSegmentRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5761{
5762 /* Guest segment registers. */
5763 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_SEGMENT_REGS))
5764 {
5765 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); AssertRCReturn(rc, rc);
5766 rc = VMXLOCAL_READ_SEG(CS, cs); AssertRCReturn(rc, rc);
5767 rc = VMXLOCAL_READ_SEG(SS, ss); AssertRCReturn(rc, rc);
5768 rc = VMXLOCAL_READ_SEG(DS, ds); AssertRCReturn(rc, rc);
5769 rc = VMXLOCAL_READ_SEG(ES, es); AssertRCReturn(rc, rc);
5770 rc = VMXLOCAL_READ_SEG(FS, fs); AssertRCReturn(rc, rc);
5771 rc = VMXLOCAL_READ_SEG(GS, gs); AssertRCReturn(rc, rc);
5772
5773 /* Restore segment attributes for real-on-v86 mode hack. */
5774 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
5775 {
5776 pMixedCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u;
5777 pMixedCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u;
5778 pMixedCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u;
5779 pMixedCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u;
5780 pMixedCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u;
5781 pMixedCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u;
5782 }
5783 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_SEGMENT_REGS;
5784 }
5785
5786 return VINF_SUCCESS;
5787}
5788
5789
5790/**
5791 * Saves the guest descriptor table registers and task register from the current
5792 * VMCS into the guest-CPU context.
5793 *
5794 * @returns VBox status code.
5795 * @param pVCpu Pointer to the VMCPU.
5796 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5797 * out-of-sync. Make sure to update the required fields
5798 * before using them.
5799 *
5800 * @remarks No-long-jump zone!!!
5801 */
5802static int hmR0VmxSaveGuestTableRegs(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5803{
5804 int rc = VINF_SUCCESS;
5805
5806 /* Guest LDTR. */
5807 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_LDTR))
5808 {
5809 rc = VMXLOCAL_READ_SEG(LDTR, ldtr);
5810 AssertRCReturn(rc, rc);
5811 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_LDTR;
5812 }
5813
5814 /* Guest GDTR. */
5815 uint64_t u64Val = 0;
5816 uint32_t u32Val = 0;
5817 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_GDTR))
5818 {
5819 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); AssertRCReturn(rc, rc);
5820 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc);
5821 pMixedCtx->gdtr.pGdt = u64Val;
5822 pMixedCtx->gdtr.cbGdt = u32Val;
5823 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_GDTR;
5824 }
5825
5826 /* Guest IDTR. */
5827 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_IDTR))
5828 {
5829 rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); AssertRCReturn(rc, rc);
5830 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRCReturn(rc, rc);
5831 pMixedCtx->idtr.pIdt = u64Val;
5832 pMixedCtx->idtr.cbIdt = u32Val;
5833 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_IDTR;
5834 }
5835
5836 /* Guest TR. */
5837 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_TR))
5838 {
5839 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
5840 AssertRCReturn(rc, rc);
5841
5842 /* For real-mode emulation using virtual-8086 mode we have the fake TSS (pRealModeTSS) in TR, don't save the fake one. */
5843 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
5844 {
5845 rc = VMXLOCAL_READ_SEG(TR, tr);
5846 AssertRCReturn(rc, rc);
5847 }
5848 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_TR;
5849 }
5850 return rc;
5851}
5852
5853#undef VMXLOCAL_READ_SEG
5854
5855
5856/**
5857 * Saves the guest debug-register DR7 from the current VMCS into the guest-CPU
5858 * context.
5859 *
5860 * @returns VBox status code.
5861 * @param pVCpu Pointer to the VMCPU.
5862 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5863 * out-of-sync. Make sure to update the required fields
5864 * before using them.
5865 *
5866 * @remarks No-long-jump zone!!!
5867 */
5868static int hmR0VmxSaveGuestDR7(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5869{
5870 if (!(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_DEBUG))
5871 {
5872 if (!pVCpu->hm.s.fUsingHyperDR7)
5873 {
5874 /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */
5875 uint32_t u32Val;
5876 int rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val); AssertRCReturn(rc, rc);
5877 pMixedCtx->dr[7] = u32Val;
5878 }
5879
5880 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_DEBUG;
5881 }
5882 return VINF_SUCCESS;
5883}
5884
5885
5886/**
5887 * Saves the guest APIC state from the current VMCS into the guest-CPU context.
5888 *
5889 * @returns VBox status code.
5890 * @param pVCpu Pointer to the VMCPU.
5891 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
5892 * out-of-sync. Make sure to update the required fields
5893 * before using them.
5894 *
5895 * @remarks No-long-jump zone!!!
5896 */
5897static int hmR0VmxSaveGuestApicState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5898{
5899 /* Updating TPR is already done in hmR0VmxPostRunGuest(). Just update the flag. */
5900 pVCpu->hm.s.vmx.fUpdatedGuestState |= HMVMX_UPDATED_GUEST_APIC_STATE;
5901 return VINF_SUCCESS;
5902}
5903
5904
5905/**
5906 * Saves the entire guest state from the currently active VMCS into the
5907 * guest-CPU context. This essentially VMREADs all guest-data.
5908 *
5909 * @returns VBox status code.
5910 * @param pVCpu Pointer to the VMCPU.
5911 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5912 * out-of-sync. Make sure to update the required fields
5913 * before using them.
5914 */
5915static int hmR0VmxSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5916{
5917 Assert(pVCpu);
5918 Assert(pMixedCtx);
5919
5920 if (pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL)
5921 return VINF_SUCCESS;
5922
5923 /* Though we can longjmp to ring-3 due to log-flushes here and get recalled
5924 again on the ring-3 callback path, there is no real need to. */
5925 if (VMMRZCallRing3IsEnabled(pVCpu))
5926 VMMR0LogFlushDisable(pVCpu);
5927 else
5928 Assert(VMMR0IsLogFlushDisabled(pVCpu));
5929 Log4Func(("vcpu[%RU32]\n", pVCpu->idCpu));
5930
5931 int rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
5932 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestRipRspRflags failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5933
5934 rc = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
5935 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestControlRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5936
5937 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
5938 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSegmentRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5939
5940 rc = hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx);
5941 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestTableRegs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5942
5943 rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
5944 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestDR7 failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5945
5946 rc = hmR0VmxSaveGuestSysenterMsrs(pVCpu, pMixedCtx);
5947 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestSysenterMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5948
5949 rc = hmR0VmxSaveGuestFSBaseMsr(pVCpu, pMixedCtx);
5950 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestFSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5951
5952 rc = hmR0VmxSaveGuestGSBaseMsr(pVCpu, pMixedCtx);
5953 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestGSBaseMsr failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5954
5955 rc = hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
5956 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestAutoLoadStoreMsrs failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5957
5958 rc = hmR0VmxSaveGuestActivityState(pVCpu, pMixedCtx);
5959 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestActivityState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5960
5961 rc = hmR0VmxSaveGuestApicState(pVCpu, pMixedCtx);
5962 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveGuestApicState failed! rc=%Rrc (pVCpu=%p)\n", rc, pVCpu), rc);
5963
5964 AssertMsg(pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL,
5965 ("Missed guest state bits while saving state; residue %RX32\n", pVCpu->hm.s.vmx.fUpdatedGuestState));
5966
5967 if (VMMRZCallRing3IsEnabled(pVCpu))
5968 VMMR0LogFlushEnable(pVCpu);
5969
5970 return rc;
5971}
5972
5973
5974/**
5975 * Check per-VM and per-VCPU force flag actions that require us to go back to
5976 * ring-3 for one reason or another.
5977 *
5978 * @returns VBox status code (information status code included).
5979 * @retval VINF_SUCCESS if we don't have any actions that require going back to
5980 * ring-3.
5981 * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
5982 * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
5983 * interrupts)
5984 * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
5985 * all EMTs to be in ring-3.
5986 * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
5987 * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
5988 * to the EM loop.
5989 *
5990 * @param pVM Pointer to the VM.
5991 * @param pVCpu Pointer to the VMCPU.
5992 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
5993 * out-of-sync. Make sure to update the required fields
5994 * before using them.
5995 */
5996static int hmR0VmxCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
5997{
5998 Assert(VMMRZCallRing3IsEnabled(pVCpu));
5999
6000 if ( VM_FF_IS_PENDING(pVM, !pVCpu->hm.s.fSingleInstruction
6001 ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
6002 || VMCPU_FF_IS_PENDING(pVCpu, !pVCpu->hm.s.fSingleInstruction
6003 ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
6004 {
6005 /* We need the control registers now, make sure the guest-CPU context is updated. */
6006 int rc3 = hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
6007 AssertRCReturn(rc3, rc3);
6008
6009 /* Pending HM CR3 sync. */
6010 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
6011 {
6012 int rc2 = PGMUpdateCR3(pVCpu, pMixedCtx->cr3);
6013 AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3,
6014 ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS);
6015 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
6016 }
6017
6018 /* Pending HM PAE PDPEs. */
6019 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES))
6020 {
6021 PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]);
6022 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES));
6023 }
6024
6025 /* Pending PGM C3 sync. */
6026 if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
6027 {
6028 int rc2 = PGMSyncCR3(pVCpu, pMixedCtx->cr0, pMixedCtx->cr3, pMixedCtx->cr4,
6029 VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
6030 if (rc2 != VINF_SUCCESS)
6031 {
6032 AssertRC(rc2);
6033 Log4(("hmR0VmxCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", rc2));
6034 return rc2;
6035 }
6036 }
6037
6038 /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
6039 if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK)
6040 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
6041 {
6042 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
6043 int rc2 = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
6044 Log4(("hmR0VmxCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2));
6045 return rc2;
6046 }
6047
6048 /* Pending VM request packets, such as hardware interrupts. */
6049 if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST)
6050 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST))
6051 {
6052 Log4(("hmR0VmxCheckForceFlags: Pending VM request forcing us back to ring-3\n"));
6053 return VINF_EM_PENDING_REQUEST;
6054 }
6055
6056 /* Pending PGM pool flushes. */
6057 if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
6058 {
6059 Log4(("hmR0VmxCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n"));
6060 return VINF_PGM_POOL_FLUSH_PENDING;
6061 }
6062
6063 /* Pending DMA requests. */
6064 if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA))
6065 {
6066 Log4(("hmR0VmxCheckForceFlags: Pending DMA request forcing us back to ring-3\n"));
6067 return VINF_EM_RAW_TO_R3;
6068 }
6069 }
6070
6071 return VINF_SUCCESS;
6072}
6073
6074
6075/**
6076 * Converts any TRPM trap into a pending HM event. This is typically used when
6077 * entering from ring-3 (not longjmp returns).
6078 *
6079 * @param pVCpu Pointer to the VMCPU.
6080 */
6081static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu)
6082{
6083 Assert(TRPMHasTrap(pVCpu));
6084 Assert(!pVCpu->hm.s.Event.fPending);
6085
6086 uint8_t uVector;
6087 TRPMEVENT enmTrpmEvent;
6088 RTGCUINT uErrCode;
6089 RTGCUINTPTR GCPtrFaultAddress;
6090 uint8_t cbInstr;
6091
6092 int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr);
6093 AssertRC(rc);
6094
6095 /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntInfo. */
6096 uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID;
6097 if (enmTrpmEvent == TRPM_TRAP)
6098 {
6099 switch (uVector)
6100 {
6101 case X86_XCPT_BP:
6102 case X86_XCPT_OF:
6103 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6104 break;
6105
6106 case X86_XCPT_PF:
6107 case X86_XCPT_DF:
6108 case X86_XCPT_TS:
6109 case X86_XCPT_NP:
6110 case X86_XCPT_SS:
6111 case X86_XCPT_GP:
6112 case X86_XCPT_AC:
6113 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6114 /* no break! */
6115 default:
6116 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6117 break;
6118 }
6119 }
6120 else if (enmTrpmEvent == TRPM_HARDWARE_INT)
6121 {
6122 if (uVector == X86_XCPT_NMI)
6123 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6124 else
6125 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6126 }
6127 else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
6128 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6129 else
6130 AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
6131
6132 rc = TRPMResetTrap(pVCpu);
6133 AssertRC(rc);
6134 Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n",
6135 u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress));
6136
6137 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress);
6138 STAM_COUNTER_DEC(&pVCpu->hm.s.StatInjectPendingReflect);
6139}
6140
6141
6142/**
6143 * Converts any pending HM event into a TRPM trap. Typically used when leaving
6144 * VT-x to execute any instruction.
6145 *
6146 * @param pvCpu Pointer to the VMCPU.
6147 */
6148static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu)
6149{
6150 Assert(pVCpu->hm.s.Event.fPending);
6151
6152 uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
6153 uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntInfo);
6154 bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntInfo);
6155 uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode;
6156
6157 /* If a trap was already pending, we did something wrong! */
6158 Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
6159
6160 TRPMEVENT enmTrapType;
6161 switch (uVectorType)
6162 {
6163 case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT:
6164 case VMX_IDT_VECTORING_INFO_TYPE_NMI:
6165 enmTrapType = TRPM_HARDWARE_INT;
6166 break;
6167
6168 case VMX_IDT_VECTORING_INFO_TYPE_SW_INT:
6169 enmTrapType = TRPM_SOFTWARE_INT;
6170 break;
6171
6172 case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT:
6173 case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */
6174 case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT:
6175 enmTrapType = TRPM_TRAP;
6176 break;
6177
6178 default:
6179 AssertMsgFailed(("Invalid trap type %#x\n", uVectorType));
6180 enmTrapType = TRPM_32BIT_HACK;
6181 break;
6182 }
6183
6184 Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType));
6185
6186 int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
6187 AssertRC(rc);
6188
6189 if (fErrorCodeValid)
6190 TRPMSetErrorCode(pVCpu, uErrorCode);
6191
6192 if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
6193 && uVector == X86_XCPT_PF)
6194 {
6195 TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
6196 }
6197 else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
6198 || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
6199 || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
6200 {
6201 AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT
6202 || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF),
6203 ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
6204 TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
6205 }
6206 pVCpu->hm.s.Event.fPending = false;
6207}
6208
6209
6210/**
6211 * Does the necessary state syncing before returning to ring-3 for any reason
6212 * (longjmp, preemption, voluntary exits to ring-3) from VT-x.
6213 *
6214 * @returns VBox status code.
6215 * @param pVM Pointer to the VM.
6216 * @param pVCpu Pointer to the VMCPU.
6217 * @param pMixedCtx Pointer to the guest-CPU context. The data may
6218 * be out-of-sync. Make sure to update the required
6219 * fields before using them.
6220 * @param fSaveGuestState Whether to save the guest state or not.
6221 *
6222 * @remarks If you modify code here, make sure to check whether
6223 * hmR0VmxCallRing3Callback() needs to be updated too.
6224 * @remarks No-long-jmp zone!!!
6225 */
6226static int hmR0VmxLeave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fSaveGuestState)
6227{
6228 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6229 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6230
6231 RTCPUID idCpu = RTMpCpuId();
6232 Log4Func(("HostCpuId=%u\n", idCpu));
6233
6234 /* Save the guest state if necessary. */
6235 if ( fSaveGuestState
6236 && pVCpu->hm.s.vmx.fUpdatedGuestState != HMVMX_UPDATED_GUEST_ALL)
6237 {
6238 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
6239 AssertRCReturn(rc, rc);
6240 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL);
6241 }
6242
6243 /* Restore host FPU state if necessary and resync on next R0 reentry .*/
6244 if (CPUMIsGuestFPUStateActive(pVCpu))
6245 {
6246 /* We shouldn't reload CR0 without saving it first. */
6247 if (!fSaveGuestState)
6248 {
6249 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6250 AssertRCReturn(rc, rc);
6251 }
6252 CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx);
6253 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
6254 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
6255 }
6256
6257 /* Restore host debug registers if necessary and resync on next R0 reentry. */
6258#ifdef VBOX_STRICT
6259 if (CPUMIsHyperDebugStateActive(pVCpu))
6260 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT);
6261#endif
6262 if (CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */))
6263 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
6264 Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu));
6265 Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu));
6266
6267#if HC_ARCH_BITS == 64
6268 /* Restore host-state bits that VT-x only restores partially. */
6269 if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
6270 && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
6271 {
6272 Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu));
6273 VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
6274 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
6275 }
6276#endif
6277
6278 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry);
6279 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatLoadGuestState);
6280 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
6281 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
6282 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO);
6283 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx);
6284 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi);
6285 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
6286
6287 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
6288
6289 /** @todo This kinda defeats the purpose of having preemption hooks.
6290 * The problem is, deregistering the hooks should be moved to a place that
6291 * lasts until the EMT is about to be destroyed not everytime while leaving HM
6292 * context.
6293 */
6294 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE)
6295 {
6296 int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
6297 AssertRCReturn(rc, rc);
6298
6299 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
6300 Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu));
6301 }
6302 Assert(!(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_LAUNCHED));
6303 NOREF(idCpu);
6304
6305 return VINF_SUCCESS;
6306}
6307
6308
6309/**
6310 * Leaves the VT-x session.
6311 *
6312 * @returns VBox status code.
6313 * @param pVM Pointer to the VM.
6314 * @param pVCpu Pointer to the VMCPU.
6315 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6316 * out-of-sync. Make sure to update the required fields
6317 * before using them.
6318 *
6319 * @remarks No-long-jmp zone!!!
6320 */
6321DECLINLINE(int) hmR0VmxLeaveSession(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6322{
6323 HM_DISABLE_PREEMPT_IF_NEEDED();
6324 HMVMX_ASSERT_CPU_SAFE();
6325 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
6326 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
6327
6328 /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before
6329 and done this from the VMXR0ThreadCtxCallback(). */
6330 if (!pVCpu->hm.s.fLeaveDone)
6331 {
6332 int rc2 = hmR0VmxLeave(pVM, pVCpu, pMixedCtx, true /* fSaveGuestState */);
6333 AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT_IF_NEEDED(), rc2);
6334 pVCpu->hm.s.fLeaveDone = true;
6335 }
6336
6337 /* Deregister hook now that we've left HM context before re-enabling preemption. */
6338 /** @todo This is bad. Deregistering here means we need to VMCLEAR always
6339 * (longjmp/exit-to-r3) in VT-x which is not efficient. */
6340 if (VMMR0ThreadCtxHooksAreRegistered(pVCpu))
6341 VMMR0ThreadCtxHooksDeregister(pVCpu);
6342
6343 /* Leave HM context. This takes care of local init (term). */
6344 int rc = HMR0LeaveCpu(pVCpu);
6345
6346 HM_RESTORE_PREEMPT_IF_NEEDED();
6347
6348 return rc;
6349}
6350
6351
6352/**
6353 * Does the necessary state syncing before doing a longjmp to ring-3.
6354 *
6355 * @returns VBox status code.
6356 * @param pVM Pointer to the VM.
6357 * @param pVCpu Pointer to the VMCPU.
6358 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6359 * out-of-sync. Make sure to update the required fields
6360 * before using them.
6361 *
6362 * @remarks No-long-jmp zone!!!
6363 */
6364DECLINLINE(int) hmR0VmxLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6365{
6366 return hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx);
6367}
6368
6369
6370/**
6371 * Take necessary actions before going back to ring-3.
6372 *
6373 * An action requires us to go back to ring-3. This function does the necessary
6374 * steps before we can safely return to ring-3. This is not the same as longjmps
6375 * to ring-3, this is voluntary and prepares the guest so it may continue
6376 * executing outside HM (recompiler/IEM).
6377 *
6378 * @returns VBox status code.
6379 * @param pVM Pointer to the VM.
6380 * @param pVCpu Pointer to the VMCPU.
6381 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6382 * out-of-sync. Make sure to update the required fields
6383 * before using them.
6384 * @param rcExit The reason for exiting to ring-3. Can be
6385 * VINF_VMM_UNKNOWN_RING3_CALL.
6386 */
6387static int hmR0VmxExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, int rcExit)
6388{
6389 Assert(pVM);
6390 Assert(pVCpu);
6391 Assert(pMixedCtx);
6392 HMVMX_ASSERT_PREEMPT_SAFE();
6393
6394 if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR))
6395 {
6396 VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VMCSPhys);
6397 pVCpu->hm.s.vmx.LastError.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs;
6398 pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
6399 /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */
6400 }
6401
6402 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
6403 VMMRZCallRing3Disable(pVCpu);
6404 Log4(("hmR0VmxExitToRing3: pVCpu=%p idCpu=%RU32 rcExit=%d\n", pVCpu, pVCpu->idCpu, rcExit));
6405
6406 /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
6407 if (pVCpu->hm.s.Event.fPending)
6408 {
6409 hmR0VmxPendingEventToTrpmTrap(pVCpu);
6410 Assert(!pVCpu->hm.s.Event.fPending);
6411 }
6412
6413 /* Save guest state and restore host state bits. */
6414 int rc = hmR0VmxLeaveSession(pVM, pVCpu, pMixedCtx);
6415 AssertRCReturn(rc, rc);
6416 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
6417
6418 /* Sync recompiler state. */
6419 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
6420 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
6421 | CPUM_CHANGED_LDTR
6422 | CPUM_CHANGED_GDTR
6423 | CPUM_CHANGED_IDTR
6424 | CPUM_CHANGED_TR
6425 | CPUM_CHANGED_HIDDEN_SEL_REGS);
6426 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_CR0);
6427 if ( pVM->hm.s.fNestedPaging
6428 && CPUMIsGuestPagingEnabledEx(pMixedCtx))
6429 {
6430 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH);
6431 }
6432
6433 Assert(!pVCpu->hm.s.fClearTrapFlag);
6434
6435 /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */
6436 if (rcExit != VINF_EM_RAW_INTERRUPT)
6437 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
6438
6439 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
6440
6441 /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */
6442 VMMRZCallRing3RemoveNotification(pVCpu);
6443 VMMRZCallRing3Enable(pVCpu);
6444
6445 return rc;
6446}
6447
6448
6449/**
6450 * VMMRZCallRing3() callback wrapper which saves the guest state before we
6451 * longjump to ring-3 and possibly get preempted.
6452 *
6453 * @returns VBox status code.
6454 * @param pVCpu Pointer to the VMCPU.
6455 * @param enmOperation The operation causing the ring-3 longjump.
6456 * @param pvUser Opaque pointer to the guest-CPU context. The data
6457 * may be out-of-sync. Make sure to update the required
6458 * fields before using them.
6459 * @remarks If you modify code here, make sure to check whether
6460 * hmR0VmxLeave() needs to be updated too.
6461 */
6462DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
6463{
6464 if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION)
6465 {
6466 VMMRZCallRing3RemoveNotification(pVCpu);
6467 HM_DISABLE_PREEMPT_IF_NEEDED();
6468
6469 /* If anything here asserts or fails, good luck. */
6470 if (CPUMIsGuestFPUStateActive(pVCpu))
6471 CPUMR0SaveGuestFPU(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser);
6472
6473 CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */);
6474
6475#if HC_ARCH_BITS == 64
6476 /* Restore host-state bits that VT-x only restores partially. */
6477 if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED)
6478 && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED))
6479 {
6480 VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost);
6481 pVCpu->hm.s.vmx.fRestoreHostFlags = 0;
6482 }
6483#endif
6484 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
6485 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_ACTIVE)
6486 {
6487 VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
6488 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_CLEAR;
6489 }
6490
6491 if (VMMR0ThreadCtxHooksAreRegistered(pVCpu))
6492 VMMR0ThreadCtxHooksDeregister(pVCpu);
6493
6494 HMR0LeaveCpu(pVCpu);
6495 HM_RESTORE_PREEMPT_IF_NEEDED();
6496 return VINF_SUCCESS;
6497 }
6498
6499 Assert(pVCpu);
6500 Assert(pvUser);
6501 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6502 HMVMX_ASSERT_PREEMPT_SAFE();
6503
6504 VMMRZCallRing3Disable(pVCpu);
6505 Assert(VMMR0IsLogFlushDisabled(pVCpu));
6506
6507 Log4(("hmR0VmxCallRing3Callback->hmR0VmxLongJmpToRing3 pVCpu=%p idCpu=%RU32\n enmOperation=%d", pVCpu, pVCpu->idCpu,
6508 enmOperation));
6509
6510 int rc = hmR0VmxLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser);
6511 AssertRCReturn(rc, rc);
6512
6513 VMMRZCallRing3Enable(pVCpu);
6514 return VINF_SUCCESS;
6515}
6516
6517
6518/**
6519 * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to
6520 * cause a VM-exit as soon as the guest is in a state to receive interrupts.
6521 *
6522 * @param pVCpu Pointer to the VMCPU.
6523 */
6524DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu)
6525{
6526 if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.VmxProcCtls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT))
6527 {
6528 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT))
6529 {
6530 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
6531 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
6532 AssertRC(rc);
6533 }
6534 } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */
6535}
6536
6537
6538/**
6539 * Evaluates the event to be delivered to the guest and sets it as the pending
6540 * event.
6541 *
6542 * @param pVCpu Pointer to the VMCPU.
6543 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6544 * out-of-sync. Make sure to update the required fields
6545 * before using them.
6546 */
6547static void hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6548{
6549 Assert(!pVCpu->hm.s.Event.fPending);
6550
6551 /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
6552 uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx);
6553 bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
6554 bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
6555
6556 Assert(!fBlockSti || (pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS));
6557 Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/
6558 && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI));
6559 Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
6560 Assert(!TRPMHasTrap(pVCpu));
6561
6562 /** @todo SMI. SMIs take priority over NMIs. */
6563 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts . */
6564 {
6565 /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */
6566 if ( !fBlockMovSS
6567 && !fBlockSti)
6568 {
6569 /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */
6570 Log4(("Pending NMI vcpu[%RU32]\n", pVCpu->idCpu));
6571 uint32_t u32IntInfo = X86_XCPT_NMI | VMX_EXIT_INTERRUPTION_INFO_VALID;
6572 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6573
6574 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddres */);
6575 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
6576 }
6577 else
6578 hmR0VmxSetIntWindowExitVmcs(pVCpu);
6579 }
6580 else if ( VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
6581 && !pVCpu->hm.s.fSingleInstruction)
6582 {
6583 /*
6584 * Check if the guest can receive external interrupts (PIC/APIC). Once we do PDMGetInterrupt() we -must- deliver
6585 * the interrupt ASAP. We must not execute any guest code until we inject the interrupt which is why it is
6586 * evaluated here and not set as pending, solely based on the force-flags.
6587 */
6588 int rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
6589 AssertRC(rc);
6590 const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF);
6591 if ( !fBlockInt
6592 && !fBlockSti
6593 && !fBlockMovSS)
6594 {
6595 uint8_t u8Interrupt;
6596 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
6597 if (RT_SUCCESS(rc))
6598 {
6599 Log4(("Pending interrupt vcpu[%RU32] u8Interrupt=%#x \n", pVCpu->idCpu, u8Interrupt));
6600 uint32_t u32IntInfo = u8Interrupt | VMX_EXIT_INTERRUPTION_INFO_VALID;
6601 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6602
6603 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */);
6604 }
6605 else
6606 {
6607 /** @todo Does this actually happen? If not turn it into an assertion. */
6608 Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)));
6609 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
6610 }
6611 }
6612 else
6613 hmR0VmxSetIntWindowExitVmcs(pVCpu);
6614 }
6615}
6616
6617
6618/**
6619 * Injects any pending events into the guest if the guest is in a state to
6620 * receive them.
6621 *
6622 * @returns VBox status code (informational status codes included).
6623 * @param pVCpu Pointer to the VMCPU.
6624 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6625 * out-of-sync. Make sure to update the required fields
6626 * before using them.
6627 */
6628static int hmR0VmxInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6629{
6630 HMVMX_ASSERT_PREEMPT_SAFE();
6631 Assert(VMMRZCallRing3IsEnabled(pVCpu));
6632
6633 /* Get the current interruptibility-state of the guest and then figure out what can be injected. */
6634 uint32_t uIntrState = hmR0VmxGetGuestIntrState(pVCpu, pMixedCtx);
6635 bool fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
6636 bool fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
6637
6638 Assert(!fBlockSti || (pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS));
6639 Assert( !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI) /* We don't support block-by-NMI and SMI yet.*/
6640 && !(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI));
6641 Assert(!fBlockSti || pMixedCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */
6642 Assert(!TRPMHasTrap(pVCpu));
6643
6644 int rc = VINF_SUCCESS;
6645 if (pVCpu->hm.s.Event.fPending)
6646 {
6647#if defined(VBOX_STRICT) || defined(VBOX_WITH_STATISTICS)
6648 uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo);
6649 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
6650 {
6651 rc = hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
6652 AssertRCReturn(rc, rc);
6653 const bool fBlockInt = !(pMixedCtx->eflags.u32 & X86_EFL_IF);
6654 Assert(!fBlockInt);
6655 Assert(!fBlockSti);
6656 Assert(!fBlockMovSS);
6657 }
6658 else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
6659 {
6660 Assert(!fBlockSti);
6661 Assert(!fBlockMovSS);
6662 }
6663#endif
6664 Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo));
6665 rc = hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.cbInstr,
6666 pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, &uIntrState);
6667 AssertRCReturn(rc, rc);
6668
6669 /* Update the interruptibility-state as it could have been changed by
6670 hmR0VmxInjectEventVmcs() (e.g. real-on-v86 guest injecting software interrupts) */
6671 fBlockMovSS = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS);
6672 fBlockSti = RT_BOOL(uIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
6673
6674#ifdef VBOX_WITH_STATISTICS
6675 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
6676 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt);
6677 else
6678 STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt);
6679#endif
6680 }
6681
6682 /* Delivery pending debug exception if the guest is single-stepping. Evaluate and set the BS bit. */
6683 int rc2 = VINF_SUCCESS;
6684 if ( fBlockSti
6685 || fBlockMovSS)
6686 {
6687 if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu))
6688 {
6689 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RFLAGS);
6690 if (pMixedCtx->eflags.Bits.u1TF) /* We don't have any IA32_DEBUGCTL MSR for guests. Treat as all bits 0. */
6691 {
6692 /*
6693 * The pending-debug exceptions field is cleared on all VM-exits except VMX_EXIT_TPR_BELOW_THRESHOLD,
6694 * VMX_EXIT_MTF, VMX_EXIT_APIC_WRITE and VMX_EXIT_VIRTUALIZED_EOI.
6695 * See Intel spec. 27.3.4 "Saving Non-Register State".
6696 */
6697 rc2 = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, VMX_VMCS_GUEST_DEBUG_EXCEPTIONS_BS);
6698 AssertRCReturn(rc, rc);
6699 }
6700 }
6701 else
6702 {
6703 /* We are single-stepping in the hypervisor debugger, clear interrupt inhibition as setting the BS bit would mean
6704 delivering a #DB to the guest upon VM-entry when it shouldn't be. */
6705 uIntrState = 0;
6706 }
6707 }
6708
6709 /*
6710 * There's no need to clear the VM entry-interruption information field here if we're not injecting anything.
6711 * VT-x clears the valid bit on every VM-exit. See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection".
6712 */
6713 rc2 = hmR0VmxLoadGuestIntrState(pVCpu, uIntrState);
6714 AssertRC(rc2);
6715
6716 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET);
6717 return rc;
6718}
6719
6720
6721/**
6722 * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM.
6723 *
6724 * @param pVCpu Pointer to the VMCPU.
6725 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6726 * out-of-sync. Make sure to update the required fields
6727 * before using them.
6728 */
6729DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6730{
6731 uint32_t u32IntInfo = X86_XCPT_UD | VMX_EXIT_INTERRUPTION_INFO_VALID;
6732 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6733}
6734
6735
6736/**
6737 * Injects a double-fault (#DF) exception into the VM.
6738 *
6739 * @returns VBox status code (informational status code included).
6740 * @param pVCpu Pointer to the VMCPU.
6741 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6742 * out-of-sync. Make sure to update the required fields
6743 * before using them.
6744 */
6745DECLINLINE(int) hmR0VmxInjectXcptDF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t *puIntrState)
6746{
6747 uint32_t u32IntInfo = X86_XCPT_DF | VMX_EXIT_INTERRUPTION_INFO_VALID;
6748 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6749 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6750 return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */,
6751 puIntrState);
6752}
6753
6754
6755/**
6756 * Sets a debug (#DB) exception as pending-for-injection into the VM.
6757 *
6758 * @param pVCpu Pointer to the VMCPU.
6759 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6760 * out-of-sync. Make sure to update the required fields
6761 * before using them.
6762 */
6763DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
6764{
6765 uint32_t u32IntInfo = X86_XCPT_DB | VMX_EXIT_INTERRUPTION_INFO_VALID;
6766 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6767 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6768}
6769
6770
6771/**
6772 * Sets an overflow (#OF) exception as pending-for-injection into the VM.
6773 *
6774 * @param pVCpu Pointer to the VMCPU.
6775 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6776 * out-of-sync. Make sure to update the required fields
6777 * before using them.
6778 * @param cbInstr The value of RIP that is to be pushed on the guest
6779 * stack.
6780 */
6781DECLINLINE(void) hmR0VmxSetPendingXcptOF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint32_t cbInstr)
6782{
6783 uint32_t u32IntInfo = X86_XCPT_OF | VMX_EXIT_INTERRUPTION_INFO_VALID;
6784 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6785 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6786}
6787
6788
6789/**
6790 * Injects a general-protection (#GP) fault into the VM.
6791 *
6792 * @returns VBox status code (informational status code included).
6793 * @param pVCpu Pointer to the VMCPU.
6794 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6795 * out-of-sync. Make sure to update the required fields
6796 * before using them.
6797 * @param u32ErrorCode The error code associated with the #GP.
6798 */
6799DECLINLINE(int) hmR0VmxInjectXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, bool fErrorCodeValid, uint32_t u32ErrorCode,
6800 uint32_t *puIntrState)
6801{
6802 uint32_t u32IntInfo = X86_XCPT_GP | VMX_EXIT_INTERRUPTION_INFO_VALID;
6803 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6804 if (fErrorCodeValid)
6805 u32IntInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
6806 return hmR0VmxInjectEventVmcs(pVCpu, pMixedCtx, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */,
6807 puIntrState);
6808}
6809
6810
6811/**
6812 * Sets a software interrupt (INTn) as pending-for-injection into the VM.
6813 *
6814 * @param pVCpu Pointer to the VMCPU.
6815 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
6816 * out-of-sync. Make sure to update the required fields
6817 * before using them.
6818 * @param uVector The software interrupt vector number.
6819 * @param cbInstr The value of RIP that is to be pushed on the guest
6820 * stack.
6821 */
6822DECLINLINE(void) hmR0VmxSetPendingIntN(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint16_t uVector, uint32_t cbInstr)
6823{
6824 uint32_t u32IntInfo = uVector | VMX_EXIT_INTERRUPTION_INFO_VALID;
6825 if ( uVector == X86_XCPT_BP
6826 || uVector == X86_XCPT_OF)
6827 {
6828 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6829 }
6830 else
6831 u32IntInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
6832 hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */);
6833}
6834
6835
6836/**
6837 * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's
6838 * stack.
6839 *
6840 * @returns VBox status code (information status code included).
6841 * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault.
6842 * @param pVM Pointer to the VM.
6843 * @param pMixedCtx Pointer to the guest-CPU context.
6844 * @param uValue The value to push to the guest stack.
6845 */
6846DECLINLINE(int) hmR0VmxRealModeGuestStackPush(PVM pVM, PCPUMCTX pMixedCtx, uint16_t uValue)
6847{
6848 /*
6849 * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in
6850 * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers".
6851 * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound".
6852 */
6853 if (pMixedCtx->sp == 1)
6854 return VINF_EM_RESET;
6855 pMixedCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */
6856 int rc = PGMPhysSimpleWriteGCPhys(pVM, pMixedCtx->ss.u64Base + pMixedCtx->sp, &uValue, sizeof(uint16_t));
6857 AssertRCReturn(rc, rc);
6858 return rc;
6859}
6860
6861
6862/**
6863 * Injects an event into the guest upon VM-entry by updating the relevant fields
6864 * in the VM-entry area in the VMCS.
6865 *
6866 * @returns VBox status code (informational error codes included).
6867 * @retval VINF_SUCCESS if the event is successfully injected into the VMCS.
6868 * @retval VINF_EM_RESET if event injection resulted in a triple-fault.
6869 *
6870 * @param pVCpu Pointer to the VMCPU.
6871 * @param pMixedCtx Pointer to the guest-CPU context. The data may
6872 * be out-of-sync. Make sure to update the required
6873 * fields before using them.
6874 * @param u64IntInfo The VM-entry interruption-information field.
6875 * @param cbInstr The VM-entry instruction length in bytes (for
6876 * software interrupts, exceptions and privileged
6877 * software exceptions).
6878 * @param u32ErrCode The VM-entry exception error code.
6879 * @param GCPtrFaultAddress The page-fault address for #PF exceptions.
6880 * @param puIntrState Pointer to the current guest interruptibility-state.
6881 * This interruptibility-state will be updated if
6882 * necessary. This cannot not be NULL.
6883 *
6884 * @remarks Requires CR0!
6885 * @remarks No-long-jump zone!!!
6886 */
6887static int hmR0VmxInjectEventVmcs(PVMCPU pVCpu, PCPUMCTX pMixedCtx, uint64_t u64IntInfo, uint32_t cbInstr,
6888 uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, uint32_t *puIntrState)
6889{
6890 /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */
6891 AssertMsg(u64IntInfo >> 32 == 0, ("%#RX64\n", u64IntInfo));
6892 Assert(puIntrState);
6893 uint32_t u32IntInfo = (uint32_t)u64IntInfo;
6894
6895 const uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(u32IntInfo);
6896 const uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo);
6897
6898#ifdef VBOX_STRICT
6899 /* Validate the error-code-valid bit for hardware exceptions. */
6900 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT)
6901 {
6902 switch (uVector)
6903 {
6904 case X86_XCPT_PF:
6905 case X86_XCPT_DF:
6906 case X86_XCPT_TS:
6907 case X86_XCPT_NP:
6908 case X86_XCPT_SS:
6909 case X86_XCPT_GP:
6910 case X86_XCPT_AC:
6911 AssertMsg(VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo),
6912 ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector));
6913 /* fallthru */
6914 default:
6915 break;
6916 }
6917 }
6918#endif
6919
6920 /* Cannot inject an NMI when block-by-MOV SS is in effect. */
6921 Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI
6922 || !(*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS));
6923
6924 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]);
6925
6926 /* We require CR0 to check if the guest is in real-mode. */
6927 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
6928 AssertRCReturn(rc, rc);
6929
6930 /*
6931 * Hardware interrupts & exceptions cannot be delivered through the software interrupt redirection bitmap to the real
6932 * mode task in virtual-8086 mode. We must jump to the interrupt handler in the (real-mode) guest.
6933 * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode" for interrupt & exception classes.
6934 * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling.
6935 */
6936 if (CPUMIsGuestInRealModeEx(pMixedCtx))
6937 {
6938 PVM pVM = pVCpu->CTX_SUFF(pVM);
6939 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
6940 {
6941 Assert(PDMVmmDevHeapIsEnabled(pVM));
6942 Assert(pVM->hm.s.vmx.pRealModeTSS);
6943
6944 /* We require RIP, RSP, RFLAGS, CS, IDTR. Save the required ones from the VMCS. */
6945 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
6946 rc |= hmR0VmxSaveGuestTableRegs(pVCpu, pMixedCtx);
6947 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
6948 AssertRCReturn(rc, rc);
6949 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState & HMVMX_UPDATED_GUEST_RIP);
6950
6951 /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */
6952 const size_t cbIdtEntry = sizeof(X86IDTR16);
6953 if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pMixedCtx->idtr.cbIdt)
6954 {
6955 /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */
6956 if (uVector == X86_XCPT_DF)
6957 return VINF_EM_RESET;
6958 else if (uVector == X86_XCPT_GP)
6959 {
6960 /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */
6961 return hmR0VmxInjectXcptDF(pVCpu, pMixedCtx, puIntrState);
6962 }
6963
6964 /* If we're injecting an interrupt/exception with no valid IDT entry, inject a general-protection fault. */
6965 /* No error codes for exceptions in real-mode. See Intel spec. 20.1.4 "Interrupt and Exception Handling" */
6966 return hmR0VmxInjectXcptGP(pVCpu, pMixedCtx, false /* fErrCodeValid */, 0 /* u32ErrCode */, puIntrState);
6967 }
6968
6969 /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */
6970 uint16_t uGuestIp = pMixedCtx->ip;
6971 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
6972 {
6973 Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
6974 /* #BP and #OF are both benign traps, we need to resume the next instruction. */
6975 uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr;
6976 }
6977 else if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT)
6978 uGuestIp = pMixedCtx->ip + (uint16_t)cbInstr;
6979
6980 /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */
6981 X86IDTR16 IdtEntry;
6982 RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pMixedCtx->idtr.pIdt + uVector * cbIdtEntry;
6983 rc = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry);
6984 AssertRCReturn(rc, rc);
6985
6986 /* Construct the stack frame for the interrupt/exception handler. */
6987 rc = hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->eflags.u32);
6988 rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, pMixedCtx->cs.Sel);
6989 rc |= hmR0VmxRealModeGuestStackPush(pVM, pMixedCtx, uGuestIp);
6990 AssertRCReturn(rc, rc);
6991
6992 /* Clear the required eflag bits and jump to the interrupt/exception handler. */
6993 if (rc == VINF_SUCCESS)
6994 {
6995 pMixedCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
6996 pMixedCtx->rip = IdtEntry.offSel;
6997 pMixedCtx->cs.Sel = IdtEntry.uSel;
6998 pMixedCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry;
6999 if ( uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT
7000 && uVector == X86_XCPT_PF)
7001 {
7002 pMixedCtx->cr2 = GCPtrFaultAddress;
7003 }
7004
7005 /* If any other guest-state bits are changed here, make sure to update
7006 hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */
7007 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS
7008 | HM_CHANGED_GUEST_RIP
7009 | HM_CHANGED_GUEST_RFLAGS
7010 | HM_CHANGED_GUEST_RSP);
7011
7012 /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */
7013 if (*puIntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
7014 {
7015 Assert( uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI
7016 && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT);
7017 Log4(("Clearing inhibition due to STI.\n"));
7018 *puIntrState &= ~VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI;
7019 }
7020 Log4(("Injecting real-mode: u32IntInfo=%#x u32ErrCode=%#x instrlen=%#x\n", u32IntInfo, u32ErrCode, cbInstr));
7021
7022 /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo'
7023 it, if we are returning to ring-3 before executing guest code. */
7024 pVCpu->hm.s.Event.fPending = false;
7025 }
7026 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RESET);
7027 return rc;
7028 }
7029 else
7030 {
7031 /*
7032 * For unrestricted execution enabled CPUs running real-mode guests, we must not set the deliver-error-code bit.
7033 * See Intel spec. 26.2.1.3 "VM-Entry Control Fields".
7034 */
7035 u32IntInfo &= ~VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
7036 }
7037 }
7038
7039 /* Validate. */
7040 Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */
7041 Assert(!VMX_EXIT_INTERRUPTION_INFO_NMI_UNBLOCK(u32IntInfo)); /* Bit 12 MBZ. */
7042 Assert(!(u32IntInfo & 0x7ffff000)); /* Bits 30:12 MBZ. */
7043
7044 /* Inject. */
7045 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo);
7046 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(u32IntInfo))
7047 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode);
7048 rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
7049
7050 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(u32IntInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT
7051 && uVector == X86_XCPT_PF)
7052 {
7053 pMixedCtx->cr2 = GCPtrFaultAddress;
7054 }
7055
7056 Log4(("Injecting vcpu[%RU32] u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x pMixedCtx->uCR2=%#RX64\n", pVCpu->idCpu,
7057 u32IntInfo, u32ErrCode, cbInstr, pMixedCtx->cr2));
7058
7059 AssertRCReturn(rc, rc);
7060 return rc;
7061}
7062
7063
7064/**
7065 * Clears the interrupt-window exiting control in the VMCS and if necessary
7066 * clears the current event in the VMCS as well.
7067 *
7068 * @returns VBox status code.
7069 * @param pVCpu Pointer to the VMCPU.
7070 *
7071 * @remarks Use this function only to clear events that have not yet been
7072 * delivered to the guest but are injected in the VMCS!
7073 * @remarks No-long-jump zone!!!
7074 */
7075static void hmR0VmxClearEventVmcs(PVMCPU pVCpu)
7076{
7077 int rc;
7078 Log4Func(("vcpu[%d]\n", pVCpu->idCpu));
7079
7080 /* Clear interrupt-window exiting control. */
7081 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT)
7082 {
7083 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
7084 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
7085 AssertRC(rc);
7086 }
7087
7088 if (!pVCpu->hm.s.Event.fPending)
7089 return;
7090
7091#ifdef VBOX_STRICT
7092 uint32_t u32EntryInfo;
7093 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
7094 AssertRC(rc);
7095 Assert(VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo));
7096#endif
7097
7098 /* Clear the entry-interruption field (including the valid bit). */
7099 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);
7100 AssertRC(rc);
7101
7102 /* Clear the pending debug exception field. */
7103 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
7104 AssertRC(rc);
7105}
7106
7107
7108/**
7109 * Enters the VT-x session.
7110 *
7111 * @returns VBox status code.
7112 * @param pVM Pointer to the VM.
7113 * @param pVCpu Pointer to the VMCPU.
7114 * @param pCpu Pointer to the CPU info struct.
7115 */
7116VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBALCPUINFO pCpu)
7117{
7118 AssertPtr(pVM);
7119 AssertPtr(pVCpu);
7120 Assert(pVM->hm.s.vmx.fSupported);
7121 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7122 NOREF(pCpu);
7123
7124 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7125 Assert(VMCPU_HMCF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE));
7126
7127#ifdef VBOX_STRICT
7128 /* Make sure we're in VMX root mode. */
7129 RTCCUINTREG u32HostCR4 = ASMGetCR4();
7130 if (!(u32HostCR4 & X86_CR4_VMXE))
7131 {
7132 LogRel(("VMXR0Enter: X86_CR4_VMXE bit in CR4 is not set!\n"));
7133 return VERR_VMX_X86_CR4_VMXE_CLEARED;
7134 }
7135#endif
7136
7137 /*
7138 * Load the VCPU's VMCS as the current (and active) one.
7139 */
7140 Assert(pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR);
7141 int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
7142 if (RT_FAILURE(rc))
7143 return rc;
7144
7145 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE;
7146 pVCpu->hm.s.fLeaveDone = false;
7147 Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
7148
7149 return VINF_SUCCESS;
7150}
7151
7152
7153/**
7154 * The thread-context callback (only on platforms which support it).
7155 *
7156 * @param enmEvent The thread-context event.
7157 * @param pVCpu Pointer to the VMCPU.
7158 * @param fGlobalInit Whether global VT-x/AMD-V init. was used.
7159 * @thread EMT(pVCpu)
7160 */
7161VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit)
7162{
7163 switch (enmEvent)
7164 {
7165 case RTTHREADCTXEVENT_PREEMPTING:
7166 {
7167 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7168 Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu));
7169 VMCPU_ASSERT_EMT(pVCpu);
7170
7171 PVM pVM = pVCpu->CTX_SUFF(pVM);
7172 PCPUMCTX pMixedCtx = CPUMQueryGuestCtxPtr(pVCpu);
7173
7174 /* No longjmps (logger flushes, locks) in this fragile context. */
7175 VMMRZCallRing3Disable(pVCpu);
7176 Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId()));
7177
7178 /*
7179 * Restore host-state (FPU, debug etc.)
7180 */
7181 if (!pVCpu->hm.s.fLeaveDone)
7182 {
7183 /* Do -not- save guest-state here as we might already be in the middle of saving it (esp. bad if we are
7184 holding the PGM lock while saving the guest state (see hmR0VmxSaveGuestControlRegs()). */
7185 hmR0VmxLeave(pVM, pVCpu, pMixedCtx, false /* fSaveGuestState */);
7186 pVCpu->hm.s.fLeaveDone = true;
7187 }
7188
7189 /* Leave HM context, takes care of local init (term). */
7190 int rc = HMR0LeaveCpu(pVCpu);
7191 AssertRC(rc); NOREF(rc);
7192
7193 /* Restore longjmp state. */
7194 VMMRZCallRing3Enable(pVCpu);
7195 STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptPreempting);
7196 break;
7197 }
7198
7199 case RTTHREADCTXEVENT_RESUMED:
7200 {
7201 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7202 Assert(VMMR0ThreadCtxHooksAreRegistered(pVCpu));
7203 VMCPU_ASSERT_EMT(pVCpu);
7204
7205 /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */
7206 VMMRZCallRing3Disable(pVCpu);
7207 Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId()));
7208
7209 /* Initialize the bare minimum state required for HM. This takes care of
7210 initializing VT-x if necessary (onlined CPUs, local init etc.) */
7211 int rc = HMR0EnterCpu(pVCpu);
7212 AssertRC(rc);
7213 Assert(VMCPU_HMCF_IS_SET(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE));
7214
7215 /* Load the active VMCS as the current one. */
7216 if (pVCpu->hm.s.vmx.uVmcsState & HMVMX_VMCS_STATE_CLEAR)
7217 {
7218 rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs);
7219 AssertRC(rc); NOREF(rc);
7220 pVCpu->hm.s.vmx.uVmcsState = HMVMX_VMCS_STATE_ACTIVE;
7221 Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId()));
7222 }
7223 pVCpu->hm.s.fLeaveDone = false;
7224
7225 /* Restore longjmp state. */
7226 VMMRZCallRing3Enable(pVCpu);
7227 break;
7228 }
7229
7230 default:
7231 break;
7232 }
7233}
7234
7235
7236/**
7237 * Saves the host state in the VMCS host-state.
7238 * Sets up the VM-exit MSR-load area.
7239 *
7240 * The CPU state will be loaded from these fields on every successful VM-exit.
7241 *
7242 * @returns VBox status code.
7243 * @param pVM Pointer to the VM.
7244 * @param pVCpu Pointer to the VMCPU.
7245 *
7246 * @remarks No-long-jump zone!!!
7247 */
7248static int hmR0VmxSaveHostState(PVM pVM, PVMCPU pVCpu)
7249{
7250 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7251
7252 if (!VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT))
7253 return VINF_SUCCESS;
7254
7255 int rc = hmR0VmxSaveHostControlRegs(pVM, pVCpu);
7256 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostControlRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7257
7258 rc = hmR0VmxSaveHostSegmentRegs(pVM, pVCpu);
7259 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostSegmentRegisters failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7260
7261 rc = hmR0VmxSaveHostMsrs(pVM, pVCpu);
7262 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSaveHostMsrs failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7263
7264 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_HOST_CONTEXT);
7265 return rc;
7266}
7267
7268
7269/**
7270 * Saves the host state in the VMCS host-state.
7271 *
7272 * @returns VBox status code.
7273 * @param pVM Pointer to the VM.
7274 * @param pVCpu Pointer to the VMCPU.
7275 *
7276 * @remarks No-long-jump zone!!!
7277 */
7278VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
7279{
7280 AssertPtr(pVM);
7281 AssertPtr(pVCpu);
7282
7283 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7284
7285 /* Save the host state here while entering HM context. When thread-context hooks are used, we might get preempted
7286 and have to resave the host state but most of the time we won't be, so do it here before we disable interrupts. */
7287 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7288 return hmR0VmxSaveHostState(pVM, pVCpu);
7289}
7290
7291
7292/**
7293 * Loads the guest state into the VMCS guest-state area. The CPU state will be
7294 * loaded from these fields on every successful VM-entry.
7295 *
7296 * Sets up the VM-entry MSR-load and VM-exit MSR-store areas.
7297 * Sets up the VM-entry controls.
7298 * Sets up the appropriate VMX non-root function to execute guest code based on
7299 * the guest CPU mode.
7300 *
7301 * @returns VBox status code.
7302 * @param pVM Pointer to the VM.
7303 * @param pVCpu Pointer to the VMCPU.
7304 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7305 * out-of-sync. Make sure to update the required fields
7306 * before using them.
7307 *
7308 * @remarks No-long-jump zone!!!
7309 */
7310static int hmR0VmxLoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7311{
7312 AssertPtr(pVM);
7313 AssertPtr(pVCpu);
7314 AssertPtr(pMixedCtx);
7315 HMVMX_ASSERT_PREEMPT_SAFE();
7316
7317#ifdef LOG_ENABLED
7318 /** @todo r=ramshankar: I'm not able to use VMMRZCallRing3Disable() here,
7319 * probably not initialized yet? Anyway this will do for now.
7320 *
7321 * Update: Should be possible once VMXR0LoadGuestState() is removed as an
7322 * interface and disable ring-3 calls when thread-context hooks are not
7323 * available. */
7324 bool fCallerDisabledLogFlush = VMMR0IsLogFlushDisabled(pVCpu);
7325 VMMR0LogFlushDisable(pVCpu);
7326#endif
7327
7328 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
7329
7330 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x);
7331
7332 /* Determine real-on-v86 mode. */
7333 pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false;
7334 if ( !pVM->hm.s.vmx.fUnrestrictedGuest
7335 && CPUMIsGuestInRealModeEx(pMixedCtx))
7336 {
7337 pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true;
7338 }
7339
7340 /*
7341 * Load the guest-state into the VMCS.
7342 * Any ordering dependency among the sub-functions below must be explicitly stated using comments.
7343 * Ideally, assert that the cross-dependent bits are up to date at the point of using it.
7344 */
7345 int rc = hmR0VmxSetupVMRunHandler(pVCpu, pMixedCtx);
7346 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7347
7348 /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */
7349 rc = hmR0VmxLoadGuestEntryCtls(pVCpu, pMixedCtx);
7350 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestEntryCtls! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7351
7352 /* This needs to be done after hmR0VmxSetupVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */
7353 rc = hmR0VmxLoadGuestExitCtls(pVCpu, pMixedCtx);
7354 AssertLogRelMsgRCReturn(rc, ("hmR0VmxSetupExitCtls failed! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7355
7356 rc = hmR0VmxLoadGuestActivityState(pVCpu, pMixedCtx);
7357 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestActivityState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7358
7359 rc = hmR0VmxLoadGuestCR3AndCR4(pVCpu, pMixedCtx);
7360 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestCR3AndCR4: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7361
7362 /* Assumes pMixedCtx->cr0 is up-to-date (strict builds require CR0 for segment register validation checks). */
7363 rc = hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx);
7364 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestSegmentRegs: rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7365
7366 rc = hmR0VmxLoadGuestMsrs(pVCpu, pMixedCtx);
7367 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestMsrs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7368
7369 rc = hmR0VmxLoadGuestApicState(pVCpu, pMixedCtx);
7370 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7371
7372 /*
7373 * Loading Rflags here is fine, even though Rflags.TF might depend on guest debug state (which is not loaded here).
7374 * It is re-evaluated and updated if necessary in hmR0VmxLoadSharedState().
7375 */
7376 rc = hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx);
7377 AssertLogRelMsgRCReturn(rc, ("hmR0VmxLoadGuestRipRspRflags! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
7378
7379 /* Clear any unused and reserved bits. */
7380 VMCPU_HMCF_CLEAR(pVCpu, HM_CHANGED_GUEST_CR2);
7381
7382#ifdef LOG_ENABLED
7383 /* Only reenable log-flushing if the caller has it enabled. */
7384 if (!fCallerDisabledLogFlush)
7385 VMMR0LogFlushEnable(pVCpu);
7386#endif
7387
7388 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x);
7389 return rc;
7390}
7391
7392
7393/**
7394 * Loads the state shared between the host and guest into the VMCS.
7395 *
7396 * @param pVM Pointer to the VM.
7397 * @param pVCpu Pointer to the VMCPU.
7398 * @param pCtx Pointer to the guest-CPU context.
7399 *
7400 * @remarks No-long-jump zone!!!
7401 */
7402static void hmR0VmxLoadSharedState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7403{
7404 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7405 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7406
7407 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0))
7408 {
7409 int rc = hmR0VmxLoadSharedCR0(pVCpu, pCtx);
7410 AssertRC(rc);
7411 }
7412
7413 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_DEBUG))
7414 {
7415 int rc = hmR0VmxLoadSharedDebugState(pVCpu, pCtx);
7416 AssertRC(rc);
7417
7418 /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */
7419 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_RFLAGS))
7420 {
7421 rc = hmR0VmxLoadGuestRflags(pVCpu, pCtx);
7422 AssertRC(rc);
7423 }
7424 }
7425
7426 AssertMsg(!VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE),
7427 ("fContextUseFlags=%#RX32\n", VMCPU_HMCF_VALUE(pVCpu)));
7428}
7429
7430
7431/**
7432 * Worker for loading the guest-state bits in the inner VT-x execution loop.
7433 *
7434 * @param pVM Pointer to the VM.
7435 * @param pVCpu Pointer to the VMCPU.
7436 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7437 * out-of-sync. Make sure to update the required fields
7438 * before using them.
7439 */
7440DECLINLINE(void) hmR0VmxLoadGuestStateOptimal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx)
7441{
7442 HMVMX_ASSERT_PREEMPT_SAFE();
7443
7444 Log5(("LoadFlags=%#RX32\n", VMCPU_HMCF_VALUE(pVCpu)));
7445#ifdef HMVMX_SYNC_FULL_GUEST_STATE
7446 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
7447#endif
7448
7449 if (VMCPU_HMCF_IS_SET_ONLY(pVCpu, HM_CHANGED_GUEST_RIP))
7450 {
7451 int rc = hmR0VmxLoadGuestRip(pVCpu, pMixedCtx);
7452 AssertRC(rc);
7453 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
7454 }
7455 else if (VMCPU_HMCF_VALUE(pVCpu))
7456 {
7457 int rc = hmR0VmxLoadGuestState(pVM, pVCpu, pMixedCtx);
7458 AssertRC(rc);
7459 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
7460 }
7461
7462 /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */
7463 AssertMsg( !VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_ALL_GUEST)
7464 || VMCPU_HMCF_IS_PENDING_ONLY(pVCpu, HM_CHANGED_HOST_CONTEXT | HM_CHANGED_HOST_GUEST_SHARED_STATE),
7465 ("fContextUseFlags=%#RX32\n", VMCPU_HMCF_VALUE(pVCpu)));
7466
7467#ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE
7468 uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVM, pVCpu, pMixedCtx);
7469 if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND)
7470 Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason));
7471#endif
7472}
7473
7474
7475/**
7476 * Does the preparations before executing guest code in VT-x.
7477 *
7478 * This may cause longjmps to ring-3 and may even result in rescheduling to the
7479 * recompiler. We must be cautious what we do here regarding committing
7480 * guest-state information into the VMCS assuming we assuredly execute the
7481 * guest in VT-x mode. If we fall back to the recompiler after updating the VMCS
7482 * and clearing the common-state (TRPM/forceflags), we must undo those changes
7483 * so that the recompiler can (and should) use them when it resumes guest
7484 * execution. Otherwise such operations must be done when we can no longer
7485 * exit to ring-3.
7486 *
7487 * @returns Strict VBox status code.
7488 * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts
7489 * have been disabled.
7490 * @retval VINF_EM_RESET if a triple-fault occurs while injecting a
7491 * double-fault into the guest.
7492 * @retval VINF_* scheduling changes, we have to go back to ring-3.
7493 *
7494 * @param pVM Pointer to the VM.
7495 * @param pVCpu Pointer to the VMCPU.
7496 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7497 * out-of-sync. Make sure to update the required fields
7498 * before using them.
7499 * @param pVmxTransient Pointer to the VMX transient structure.
7500 *
7501 * @remarks Called with preemption disabled. In the VINF_SUCCESS return case
7502 * interrupts will be disabled.
7503 */
7504static int hmR0VmxPreRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
7505{
7506 Assert(VMMRZCallRing3IsEnabled(pVCpu));
7507
7508#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
7509 PGMRZDynMapFlushAutoSet(pVCpu);
7510#endif
7511
7512 /* Check force flag actions that might require us to go back to ring-3. */
7513 int rc = hmR0VmxCheckForceFlags(pVM, pVCpu, pMixedCtx);
7514 if (rc != VINF_SUCCESS)
7515 return rc;
7516
7517#ifndef IEM_VERIFICATION_MODE_FULL
7518 /* Setup the Virtualized APIC accesses. pMixedCtx->msrApicBase is always up-to-date. It's not part of the VMCS. */
7519 if ( pVCpu->hm.s.vmx.u64MsrApicBase != pMixedCtx->msrApicBase
7520 && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
7521 {
7522 Assert(pVM->hm.s.vmx.HCPhysApicAccess);
7523 RTGCPHYS GCPhysApicBase;
7524 GCPhysApicBase = pMixedCtx->msrApicBase;
7525 GCPhysApicBase &= PAGE_BASE_GC_MASK;
7526
7527 /* Unalias any existing mapping. */
7528 rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase);
7529 AssertRCReturn(rc, rc);
7530
7531 /* Map the HC APIC-access page into the GC space, this also updates the shadow page tables if necessary. */
7532 Log4(("Mapped HC APIC-access page into GC: GCPhysApicBase=%#RGv\n", GCPhysApicBase));
7533 rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
7534 AssertRCReturn(rc, rc);
7535
7536 pVCpu->hm.s.vmx.u64MsrApicBase = pMixedCtx->msrApicBase;
7537 }
7538#endif /* !IEM_VERIFICATION_MODE_FULL */
7539
7540 /* Load the guest state bits, we can handle longjmps/getting preempted here. */
7541 hmR0VmxLoadGuestStateOptimal(pVM, pVCpu, pMixedCtx);
7542
7543 /*
7544 * Evaluate events as pending-for-injection into the guest. Toggling of force-flags here is safe as long as
7545 * we update TRPM on premature exits to ring-3 before executing guest code. We must NOT restore the force-flags.
7546 */
7547 if (TRPMHasTrap(pVCpu))
7548 hmR0VmxTrpmTrapToPendingEvent(pVCpu);
7549 else if (!pVCpu->hm.s.Event.fPending)
7550 hmR0VmxEvaluatePendingEvent(pVCpu, pMixedCtx);
7551
7552 /*
7553 * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus needs to be done with
7554 * longjmps or interrupts + preemption enabled. Event injection might also result in triple-faulting the VM.
7555 */
7556 rc = hmR0VmxInjectPendingEvent(pVCpu, pMixedCtx);
7557 if (RT_UNLIKELY(rc != VINF_SUCCESS))
7558 {
7559 Assert(rc == VINF_EM_RESET);
7560 return rc;
7561 }
7562
7563 /*
7564 * No longjmps to ring-3 from this point on!!!
7565 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
7566 * This also disables flushing of the R0-logger instance (if any).
7567 */
7568 VMMRZCallRing3Disable(pVCpu);
7569
7570 /*
7571 * We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.)
7572 * when thread-context hooks aren't used and we've been running with preemption disabled for a while.
7573 *
7574 * We need to check for force-flags that could've possible been altered since we last checked them (e.g.
7575 * by PDMGetInterrupt() leaving the PDM critical section, see @bugref{6398}).
7576 *
7577 * We also check a couple of other force-flags as a last opportunity to get the EMT back to ring-3 before
7578 * executing guest code.
7579 */
7580 pVmxTransient->uEflags = ASMIntDisableFlags();
7581 if ( VM_FF_IS_PENDING(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
7582 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
7583 {
7584 hmR0VmxClearEventVmcs(pVCpu);
7585 ASMSetFlags(pVmxTransient->uEflags);
7586 VMMRZCallRing3Enable(pVCpu);
7587 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
7588 return VINF_EM_RAW_TO_R3;
7589 }
7590 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
7591 {
7592 hmR0VmxClearEventVmcs(pVCpu);
7593 ASMSetFlags(pVmxTransient->uEflags);
7594 VMMRZCallRing3Enable(pVCpu);
7595 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
7596 return VINF_EM_RAW_INTERRUPT;
7597 }
7598
7599 /* We've injected any pending events. This is really the point of no return (to ring-3). */
7600 pVCpu->hm.s.Event.fPending = false;
7601
7602 return VINF_SUCCESS;
7603}
7604
7605
7606/**
7607 * Prepares to run guest code in VT-x and we've committed to doing so. This
7608 * means there is no backing out to ring-3 or anywhere else at this
7609 * point.
7610 *
7611 * @param pVM Pointer to the VM.
7612 * @param pVCpu Pointer to the VMCPU.
7613 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
7614 * out-of-sync. Make sure to update the required fields
7615 * before using them.
7616 * @param pVmxTransient Pointer to the VMX transient structure.
7617 *
7618 * @remarks Called with preemption disabled.
7619 * @remarks No-long-jump zone!!!
7620 */
7621static void hmR0VmxPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
7622{
7623 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7624 Assert(VMMR0IsLogFlushDisabled(pVCpu));
7625 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
7626
7627 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
7628 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); /* Indicate the start of guest execution. */
7629
7630 /*
7631 * If we are injecting events to a real-on-v86 mode guest, we may have to update
7632 * RIP and some other registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs().
7633 * Reload only the necessary state, the assertion will catch if other parts of the code
7634 * change.
7635 */
7636 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
7637 {
7638 hmR0VmxLoadGuestRipRspRflags(pVCpu, pMixedCtx);
7639 hmR0VmxLoadGuestSegmentRegs(pVCpu, pMixedCtx);
7640 }
7641
7642#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE
7643 if (!CPUMIsGuestFPUStateActive(pVCpu))
7644 CPUMR0LoadGuestFPU(pVM, pVCpu, pMixedCtx);
7645 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
7646#endif
7647
7648 /*
7649 * Load the host state bits as we may've been preempted (only happens when
7650 * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM).
7651 */
7652 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT))
7653 {
7654 /* This ASSUMES that pfnStartVM has been set up already. */
7655 int rc = hmR0VmxSaveHostState(pVM, pVCpu);
7656 AssertRC(rc);
7657 STAM_COUNTER_INC(&pVCpu->hm.s.StatPreemptSaveHostState);
7658 }
7659 Assert(!VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_HOST_CONTEXT));
7660
7661 /*
7662 * Load the state shared between host and guest (FPU, debug).
7663 */
7664 if (VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_HOST_GUEST_SHARED_STATE))
7665 hmR0VmxLoadSharedState(pVM, pVCpu, pMixedCtx);
7666 AssertMsg(!VMCPU_HMCF_VALUE(pVCpu), ("fContextUseFlags=%#RX32\n", VMCPU_HMCF_VALUE(pVCpu)));
7667
7668 /* Store status of the shared guest-host state at the time of VM-entry. */
7669#if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
7670 if (CPUMIsGuestInLongModeEx(pMixedCtx))
7671 {
7672 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu);
7673 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu);
7674 }
7675 else
7676#endif
7677 {
7678 pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu);
7679 pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu);
7680 }
7681 pVmxTransient->fWasGuestFPUStateActive = CPUMIsGuestFPUStateActive(pVCpu);
7682
7683 /*
7684 * Cache the TPR-shadow for checking on every VM-exit if it might have changed.
7685 */
7686 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
7687 pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[0x80];
7688
7689 PHMGLOBALCPUINFO pCpu = HMR0GetCurrentCpu();
7690 RTCPUID idCurrentCpu = pCpu->idCpu;
7691 if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer
7692 || idCurrentCpu != pVCpu->hm.s.idLastCpu)
7693 {
7694 hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu, pMixedCtx);
7695 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false;
7696 }
7697
7698 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB-shootdowns, set this across the world switch. */
7699 hmR0VmxFlushTaggedTlb(pVCpu, pCpu); /* Invalidate the appropriate guest entries from the TLB. */
7700 Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu);
7701 pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */
7702
7703 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
7704
7705 TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about
7706 to start executing. */
7707
7708#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
7709 /*
7710 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
7711 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
7712 */
7713 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
7714 {
7715 pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
7716 uint64_t u64HostTscAux = 0;
7717 int rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64HostTscAux);
7718 AssertRC(rc2);
7719 ASMWrMsr(MSR_K8_TSC_AUX, u64HostTscAux);
7720 }
7721#endif
7722}
7723
7724
7725/**
7726 * Performs some essential restoration of state after running guest code in
7727 * VT-x.
7728 *
7729 * @param pVM Pointer to the VM.
7730 * @param pVCpu Pointer to the VMCPU.
7731 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
7732 * out-of-sync. Make sure to update the required fields
7733 * before using them.
7734 * @param pVmxTransient Pointer to the VMX transient structure.
7735 * @param rcVMRun Return code of VMLAUNCH/VMRESUME.
7736 *
7737 * @remarks Called with interrupts disabled, and returns with interrups enabled!
7738 *
7739 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
7740 * unconditionally when it is safe to do so.
7741 */
7742static void hmR0VmxPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, int rcVMRun)
7743{
7744 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
7745
7746 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB-shootdowns. */
7747 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for TLB-shootdowns. */
7748 pVCpu->hm.s.vmx.fUpdatedGuestState = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */
7749 pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */
7750 pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */
7751
7752 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_RDTSC_EXIT))
7753 {
7754#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
7755 /* Restore host's TSC_AUX. */
7756 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
7757 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux);
7758#endif
7759 /** @todo Find a way to fix hardcoding a guestimate. */
7760 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC()
7761 + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
7762 }
7763
7764 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
7765 TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */
7766 Assert(!(ASMGetFlags() & X86_EFL_IF));
7767 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
7768
7769#ifdef HMVMX_ALWAYS_SWAP_FPU_STATE
7770 if (CPUMIsGuestFPUStateActive(pVCpu))
7771 {
7772 hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
7773 CPUMR0SaveGuestFPU(pVM, pVCpu, pMixedCtx);
7774 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
7775 }
7776#endif
7777
7778 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */
7779 pVCpu->hm.s.vmx.uVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */
7780 ASMSetFlags(pVmxTransient->uEflags); /* Enable interrupts. */
7781 VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */
7782
7783 /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */
7784 uint32_t uExitReason;
7785 int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason);
7786 rc |= hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
7787 AssertRC(rc);
7788 pVmxTransient->uExitReason = (uint16_t)VMX_EXIT_REASON_BASIC(uExitReason);
7789 pVmxTransient->fVMEntryFailed = !!VMX_ENTRY_INTERRUPTION_INFO_VALID(pVmxTransient->uEntryIntInfo);
7790
7791 /* If the VMLAUNCH/VMRESUME failed, we can bail out early. This does -not- cover VMX_EXIT_ERR_*. */
7792 if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
7793 {
7794 Log4(("VM-entry failure: pVCpu=%p idCpu=%RU32 rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", pVCpu, pVCpu->idCpu, rcVMRun,
7795 pVmxTransient->fVMEntryFailed));
7796 return;
7797 }
7798
7799 if (RT_LIKELY(!pVmxTransient->fVMEntryFailed))
7800 {
7801 /* Update the guest interruptibility-state from the VMCS. */
7802 hmR0VmxSaveGuestIntrState(pVCpu, pMixedCtx);
7803#if defined(HMVMX_SYNC_FULL_GUEST_STATE) || defined(HMVMX_SAVE_FULL_GUEST_STATE)
7804 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
7805 AssertRC(rc);
7806#endif
7807 /*
7808 * If the TPR was raised by the guest, it wouldn't cause a VM-exit immediately. Instead we sync the TPR lazily whenever
7809 * we eventually get a VM-exit for any reason. This maybe expensive as PDMApicSetTPR() can longjmp to ring-3 and which is
7810 * why it's done here as it's easier and no less efficient to deal with it here than making hmR0VmxSaveGuestState()
7811 * cope with longjmps safely (see VMCPU_FF_HM_UPDATE_CR3 handling).
7812 */
7813 if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
7814 && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[0x80])
7815 {
7816 rc = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]);
7817 AssertRC(rc);
7818 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
7819 }
7820 }
7821}
7822
7823
7824
7825/**
7826 * Runs the guest code using VT-x the normal way.
7827 *
7828 * @returns VBox status code.
7829 * @param pVM Pointer to the VM.
7830 * @param pVCpu Pointer to the VMCPU.
7831 * @param pCtx Pointer to the guest-CPU context.
7832 *
7833 * @note Mostly the same as hmR0VmxRunGuestCodeStep.
7834 * @remarks Called with preemption disabled.
7835 */
7836static int hmR0VmxRunGuestCodeNormal(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7837{
7838 VMXTRANSIENT VmxTransient;
7839 VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
7840 int rc = VERR_INTERNAL_ERROR_5;
7841 uint32_t cLoops = 0;
7842
7843 for (;; cLoops++)
7844 {
7845 Assert(!HMR0SuspendPending());
7846 HMVMX_ASSERT_CPU_SAFE();
7847
7848 /* Preparatory work for running guest code, this may force us to return
7849 to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
7850 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7851 rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient);
7852 if (rc != VINF_SUCCESS)
7853 break;
7854
7855 hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient);
7856 rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx);
7857 /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */
7858
7859 /* Restore any residual host-state and save any bits shared between host
7860 and guest into the guest-CPU state. Re-enables interrupts! */
7861 hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc);
7862
7863 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7864 if (RT_UNLIKELY(rc != VINF_SUCCESS))
7865 {
7866 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
7867 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient);
7868 return rc;
7869 }
7870
7871 /* Handle the VM-exit. */
7872 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7873 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
7874 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7875 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
7876 HMVMX_START_EXIT_DISPATCH_PROF();
7877#ifdef HMVMX_USE_FUNCTION_TABLE
7878 rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient);
7879#else
7880 rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason);
7881#endif
7882 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
7883 if (rc != VINF_SUCCESS)
7884 break;
7885 else if (cLoops > pVM->hm.s.cMaxResumeLoops)
7886 {
7887 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
7888 rc = VINF_EM_RAW_INTERRUPT;
7889 break;
7890 }
7891 }
7892
7893 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7894 return rc;
7895}
7896
7897
7898/**
7899 * Single steps guest code using VT-x.
7900 *
7901 * @returns VBox status code.
7902 * @param pVM Pointer to the VM.
7903 * @param pVCpu Pointer to the VMCPU.
7904 * @param pCtx Pointer to the guest-CPU context.
7905 *
7906 * @note Mostly the same as hmR0VmxRunGuestCodeNormal.
7907 * @remarks Called with preemption disabled.
7908 */
7909static int hmR0VmxRunGuestCodeStep(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
7910{
7911 VMXTRANSIENT VmxTransient;
7912 VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true;
7913 int rc = VERR_INTERNAL_ERROR_5;
7914 uint32_t cLoops = 0;
7915 uint16_t uCsStart = pCtx->cs.Sel;
7916 uint64_t uRipStart = pCtx->rip;
7917
7918 for (;; cLoops++)
7919 {
7920 Assert(!HMR0SuspendPending());
7921 HMVMX_ASSERT_CPU_SAFE();
7922
7923 /* Preparatory work for running guest code, this may force us to return
7924 to ring-3. This bugger disables interrupts on VINF_SUCCESS! */
7925 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
7926 rc = hmR0VmxPreRunGuest(pVM, pVCpu, pCtx, &VmxTransient);
7927 if (rc != VINF_SUCCESS)
7928 break;
7929
7930 hmR0VmxPreRunGuestCommitted(pVM, pVCpu, pCtx, &VmxTransient);
7931 rc = hmR0VmxRunGuest(pVM, pVCpu, pCtx);
7932 /* The guest-CPU context is now outdated, 'pCtx' is to be treated as 'pMixedCtx' from this point on!!! */
7933
7934 /* Restore any residual host-state and save any bits shared between host
7935 and guest into the guest-CPU state. Re-enables interrupts! */
7936 hmR0VmxPostRunGuest(pVM, pVCpu, pCtx, &VmxTransient, rc);
7937
7938 /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */
7939 if (RT_UNLIKELY(rc != VINF_SUCCESS))
7940 {
7941 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
7942 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &VmxTransient);
7943 return rc;
7944 }
7945
7946 /* Handle the VM-exit. */
7947 AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason));
7948 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll);
7949 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]);
7950 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
7951 HMVMX_START_EXIT_DISPATCH_PROF();
7952#ifdef HMVMX_USE_FUNCTION_TABLE
7953 rc = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, pCtx, &VmxTransient);
7954#else
7955 rc = hmR0VmxHandleExit(pVCpu, pCtx, &VmxTransient, VmxTransient.uExitReason);
7956#endif
7957 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
7958 if (rc != VINF_SUCCESS)
7959 break;
7960 else if (cLoops > pVM->hm.s.cMaxResumeLoops)
7961 {
7962 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
7963 rc = VINF_EM_RAW_INTERRUPT;
7964 break;
7965 }
7966
7967 /*
7968 * Did the RIP change, if so, consider it a single step.
7969 * Otherwise, make sure one of the TFs gets set.
7970 */
7971 int rc2 = hmR0VmxSaveGuestRip(pVCpu, pCtx);
7972 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pCtx);
7973 AssertRCReturn(rc2, rc2);
7974 if ( pCtx->rip != uRipStart
7975 || pCtx->cs.Sel != uCsStart)
7976 {
7977 rc = VINF_EM_DBG_STEPPED;
7978 break;
7979 }
7980 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
7981 }
7982
7983 /*
7984 * Clear the X86_EFL_TF if necessary.
7985 */
7986 if (pVCpu->hm.s.fClearTrapFlag)
7987 {
7988 int rc2 = hmR0VmxSaveGuestRflags(pVCpu, pCtx);
7989 AssertRCReturn(rc2, rc2);
7990 pVCpu->hm.s.fClearTrapFlag = false;
7991 pCtx->eflags.Bits.u1TF = 0;
7992 }
7993 /** @todo there seems to be issues with the resume flag when the monitor trap
7994 * flag is pending without being used. Seen early in bios init when
7995 * accessing APIC page in prot mode. */
7996
7997 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
7998 return rc;
7999}
8000
8001
8002/**
8003 * Runs the guest code using VT-x.
8004 *
8005 * @returns VBox status code.
8006 * @param pVM Pointer to the VM.
8007 * @param pVCpu Pointer to the VMCPU.
8008 * @param pCtx Pointer to the guest-CPU context.
8009 *
8010 * @remarks Called with preemption disabled.
8011 */
8012VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8013{
8014 Assert(VMMRZCallRing3IsEnabled(pVCpu));
8015 Assert(pVCpu->hm.s.vmx.fUpdatedGuestState == HMVMX_UPDATED_GUEST_ALL);
8016 HMVMX_ASSERT_PREEMPT_SAFE();
8017
8018 VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx);
8019
8020 int rc;
8021 if (!pVCpu->hm.s.fSingleInstruction && !DBGFIsStepping(pVCpu))
8022 rc = hmR0VmxRunGuestCodeNormal(pVM, pVCpu, pCtx);
8023 else
8024 rc = hmR0VmxRunGuestCodeStep(pVM, pVCpu, pCtx);
8025
8026 if (rc == VERR_EM_INTERPRETER)
8027 rc = VINF_EM_RAW_EMULATE_INSTR;
8028 else if (rc == VINF_EM_RESET)
8029 rc = VINF_EM_TRIPLE_FAULT;
8030
8031 int rc2 = hmR0VmxExitToRing3(pVM, pVCpu, pCtx, rc);
8032 if (RT_FAILURE(rc2))
8033 {
8034 pVCpu->hm.s.u32HMError = rc;
8035 rc = rc2;
8036 }
8037 Assert(!VMMRZCallRing3IsNotificationSet(pVCpu));
8038 return rc;
8039}
8040
8041
8042#ifndef HMVMX_USE_FUNCTION_TABLE
8043DECLINLINE(int) hmR0VmxHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient, uint32_t rcReason)
8044{
8045 int rc;
8046 switch (rcReason)
8047 {
8048 case VMX_EXIT_EPT_MISCONFIG: rc = hmR0VmxExitEptMisconfig(pVCpu, pMixedCtx, pVmxTransient); break;
8049 case VMX_EXIT_EPT_VIOLATION: rc = hmR0VmxExitEptViolation(pVCpu, pMixedCtx, pVmxTransient); break;
8050 case VMX_EXIT_IO_INSTR: rc = hmR0VmxExitIoInstr(pVCpu, pMixedCtx, pVmxTransient); break;
8051 case VMX_EXIT_CPUID: rc = hmR0VmxExitCpuid(pVCpu, pMixedCtx, pVmxTransient); break;
8052 case VMX_EXIT_RDTSC: rc = hmR0VmxExitRdtsc(pVCpu, pMixedCtx, pVmxTransient); break;
8053 case VMX_EXIT_RDTSCP: rc = hmR0VmxExitRdtscp(pVCpu, pMixedCtx, pVmxTransient); break;
8054 case VMX_EXIT_APIC_ACCESS: rc = hmR0VmxExitApicAccess(pVCpu, pMixedCtx, pVmxTransient); break;
8055 case VMX_EXIT_XCPT_OR_NMI: rc = hmR0VmxExitXcptOrNmi(pVCpu, pMixedCtx, pVmxTransient); break;
8056 case VMX_EXIT_MOV_CRX: rc = hmR0VmxExitMovCRx(pVCpu, pMixedCtx, pVmxTransient); break;
8057 case VMX_EXIT_EXT_INT: rc = hmR0VmxExitExtInt(pVCpu, pMixedCtx, pVmxTransient); break;
8058 case VMX_EXIT_INT_WINDOW: rc = hmR0VmxExitIntWindow(pVCpu, pMixedCtx, pVmxTransient); break;
8059 case VMX_EXIT_MWAIT: rc = hmR0VmxExitMwait(pVCpu, pMixedCtx, pVmxTransient); break;
8060 case VMX_EXIT_MONITOR: rc = hmR0VmxExitMonitor(pVCpu, pMixedCtx, pVmxTransient); break;
8061 case VMX_EXIT_TASK_SWITCH: rc = hmR0VmxExitTaskSwitch(pVCpu, pMixedCtx, pVmxTransient); break;
8062 case VMX_EXIT_PREEMPT_TIMER: rc = hmR0VmxExitPreemptTimer(pVCpu, pMixedCtx, pVmxTransient); break;
8063 case VMX_EXIT_RDMSR: rc = hmR0VmxExitRdmsr(pVCpu, pMixedCtx, pVmxTransient); break;
8064 case VMX_EXIT_WRMSR: rc = hmR0VmxExitWrmsr(pVCpu, pMixedCtx, pVmxTransient); break;
8065 case VMX_EXIT_MOV_DRX: rc = hmR0VmxExitMovDRx(pVCpu, pMixedCtx, pVmxTransient); break;
8066 case VMX_EXIT_TPR_BELOW_THRESHOLD: rc = hmR0VmxExitTprBelowThreshold(pVCpu, pMixedCtx, pVmxTransient); break;
8067 case VMX_EXIT_HLT: rc = hmR0VmxExitHlt(pVCpu, pMixedCtx, pVmxTransient); break;
8068 case VMX_EXIT_INVD: rc = hmR0VmxExitInvd(pVCpu, pMixedCtx, pVmxTransient); break;
8069 case VMX_EXIT_INVLPG: rc = hmR0VmxExitInvlpg(pVCpu, pMixedCtx, pVmxTransient); break;
8070 case VMX_EXIT_RSM: rc = hmR0VmxExitRsm(pVCpu, pMixedCtx, pVmxTransient); break;
8071 case VMX_EXIT_MTF: rc = hmR0VmxExitMtf(pVCpu, pMixedCtx, pVmxTransient); break;
8072 case VMX_EXIT_PAUSE: rc = hmR0VmxExitPause(pVCpu, pMixedCtx, pVmxTransient); break;
8073 case VMX_EXIT_XDTR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break;
8074 case VMX_EXIT_TR_ACCESS: rc = hmR0VmxExitXdtrAccess(pVCpu, pMixedCtx, pVmxTransient); break;
8075 case VMX_EXIT_WBINVD: rc = hmR0VmxExitWbinvd(pVCpu, pMixedCtx, pVmxTransient); break;
8076 case VMX_EXIT_XSETBV: rc = hmR0VmxExitXsetbv(pVCpu, pMixedCtx, pVmxTransient); break;
8077 case VMX_EXIT_RDRAND: rc = hmR0VmxExitRdrand(pVCpu, pMixedCtx, pVmxTransient); break;
8078 case VMX_EXIT_INVPCID: rc = hmR0VmxExitInvpcid(pVCpu, pMixedCtx, pVmxTransient); break;
8079 case VMX_EXIT_GETSEC: rc = hmR0VmxExitGetsec(pVCpu, pMixedCtx, pVmxTransient); break;
8080 case VMX_EXIT_RDPMC: rc = hmR0VmxExitRdpmc(pVCpu, pMixedCtx, pVmxTransient); break;
8081
8082 case VMX_EXIT_TRIPLE_FAULT: rc = hmR0VmxExitTripleFault(pVCpu, pMixedCtx, pVmxTransient); break;
8083 case VMX_EXIT_NMI_WINDOW: rc = hmR0VmxExitNmiWindow(pVCpu, pMixedCtx, pVmxTransient); break;
8084 case VMX_EXIT_INIT_SIGNAL: rc = hmR0VmxExitInitSignal(pVCpu, pMixedCtx, pVmxTransient); break;
8085 case VMX_EXIT_SIPI: rc = hmR0VmxExitSipi(pVCpu, pMixedCtx, pVmxTransient); break;
8086 case VMX_EXIT_IO_SMI: rc = hmR0VmxExitIoSmi(pVCpu, pMixedCtx, pVmxTransient); break;
8087 case VMX_EXIT_SMI: rc = hmR0VmxExitSmi(pVCpu, pMixedCtx, pVmxTransient); break;
8088 case VMX_EXIT_ERR_MSR_LOAD: rc = hmR0VmxExitErrMsrLoad(pVCpu, pMixedCtx, pVmxTransient); break;
8089 case VMX_EXIT_ERR_INVALID_GUEST_STATE: rc = hmR0VmxExitErrInvalidGuestState(pVCpu, pMixedCtx, pVmxTransient); break;
8090 case VMX_EXIT_ERR_MACHINE_CHECK: rc = hmR0VmxExitErrMachineCheck(pVCpu, pMixedCtx, pVmxTransient); break;
8091
8092 case VMX_EXIT_VMCALL:
8093 case VMX_EXIT_VMCLEAR:
8094 case VMX_EXIT_VMLAUNCH:
8095 case VMX_EXIT_VMPTRLD:
8096 case VMX_EXIT_VMPTRST:
8097 case VMX_EXIT_VMREAD:
8098 case VMX_EXIT_VMRESUME:
8099 case VMX_EXIT_VMWRITE:
8100 case VMX_EXIT_VMXOFF:
8101 case VMX_EXIT_VMXON:
8102 case VMX_EXIT_INVEPT:
8103 case VMX_EXIT_INVVPID:
8104 case VMX_EXIT_VMFUNC:
8105 rc = hmR0VmxExitSetPendingXcptUD(pVCpu, pMixedCtx, pVmxTransient);
8106 break;
8107 default:
8108 rc = hmR0VmxExitErrUndefined(pVCpu, pMixedCtx, pVmxTransient);
8109 break;
8110 }
8111 return rc;
8112}
8113#endif
8114
8115#ifdef DEBUG
8116/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
8117# define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \
8118 RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
8119
8120# define HMVMX_ASSERT_PREEMPT_CPUID() \
8121 do \
8122 { \
8123 RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
8124 AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
8125 } while (0)
8126
8127# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() \
8128 do { \
8129 AssertPtr(pVCpu); \
8130 AssertPtr(pMixedCtx); \
8131 AssertPtr(pVmxTransient); \
8132 Assert(pVmxTransient->fVMEntryFailed == false); \
8133 Assert(ASMIntAreEnabled()); \
8134 HMVMX_ASSERT_PREEMPT_SAFE(); \
8135 HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \
8136 Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", pVCpu->idCpu)); \
8137 HMVMX_ASSERT_PREEMPT_SAFE(); \
8138 if (VMMR0IsLogFlushDisabled(pVCpu)) \
8139 HMVMX_ASSERT_PREEMPT_CPUID(); \
8140 HMVMX_STOP_EXIT_DISPATCH_PROF(); \
8141 } while (0)
8142
8143# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() \
8144 do { \
8145 Log4Func(("\n")); \
8146 } while(0)
8147#else /* Release builds */
8148# define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS() do { HMVMX_STOP_EXIT_DISPATCH_PROF(); } while(0)
8149# define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS() do { } while(0)
8150#endif
8151
8152
8153/**
8154 * Advances the guest RIP after reading it from the VMCS.
8155 *
8156 * @returns VBox status code.
8157 * @param pVCpu Pointer to the VMCPU.
8158 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
8159 * out-of-sync. Make sure to update the required fields
8160 * before using them.
8161 * @param pVmxTransient Pointer to the VMX transient structure.
8162 *
8163 * @remarks No-long-jump zone!!!
8164 */
8165DECLINLINE(int) hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8166{
8167 int rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
8168 rc |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
8169 AssertRCReturn(rc, rc);
8170
8171 pMixedCtx->rip += pVmxTransient->cbInstr;
8172 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
8173 return rc;
8174}
8175
8176
8177/**
8178 * Tries to determine what part of the guest-state VT-x has deemed as invalid
8179 * and update error record fields accordingly.
8180 *
8181 * @return VMX_IGS_* return codes.
8182 * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything
8183 * wrong with the guest state.
8184 *
8185 * @param pVM Pointer to the VM.
8186 * @param pVCpu Pointer to the VMCPU.
8187 * @param pCtx Pointer to the guest-CPU state.
8188 */
8189static uint32_t hmR0VmxCheckGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
8190{
8191#define HMVMX_ERROR_BREAK(err) { uError = (err); break; }
8192#define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \
8193 uError = (err); \
8194 break; \
8195 } else do {} while (0)
8196/* Duplicate of IEM_IS_CANONICAL(). */
8197#define HMVMX_IS_CANONICAL(a_u64Addr) ((uint64_t)(a_u64Addr) + UINT64_C(0x800000000000) < UINT64_C(0x1000000000000))
8198
8199 int rc;
8200 uint32_t uError = VMX_IGS_ERROR;
8201 uint32_t u32Val;
8202 bool fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest;
8203
8204 do
8205 {
8206 /*
8207 * CR0.
8208 */
8209 uint32_t uSetCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
8210 uint32_t uZapCR0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1);
8211 /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG).
8212 See Intel spec. 26.3.1 "Checks on guest Guest Control Registers, Debug Registers and MSRs." */
8213 if (fUnrestrictedGuest)
8214 uSetCR0 &= ~(X86_CR0_PE | X86_CR0_PG);
8215
8216 uint32_t u32GuestCR0;
8217 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCR0);
8218 AssertRCBreak(rc);
8219 HMVMX_CHECK_BREAK((u32GuestCR0 & uSetCR0) == uSetCR0, VMX_IGS_CR0_FIXED1);
8220 HMVMX_CHECK_BREAK(!(u32GuestCR0 & ~uZapCR0), VMX_IGS_CR0_FIXED0);
8221 if ( !fUnrestrictedGuest
8222 && (u32GuestCR0 & X86_CR0_PG)
8223 && !(u32GuestCR0 & X86_CR0_PE))
8224 {
8225 HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO);
8226 }
8227
8228 /*
8229 * CR4.
8230 */
8231 uint64_t uSetCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
8232 uint64_t uZapCR4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1);
8233
8234 uint32_t u32GuestCR4;
8235 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCR4);
8236 AssertRCBreak(rc);
8237 HMVMX_CHECK_BREAK((u32GuestCR4 & uSetCR4) == uSetCR4, VMX_IGS_CR4_FIXED1);
8238 HMVMX_CHECK_BREAK(!(u32GuestCR4 & ~uZapCR4), VMX_IGS_CR4_FIXED0);
8239
8240 /*
8241 * IA32_DEBUGCTL MSR.
8242 */
8243 uint64_t u64Val;
8244 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val);
8245 AssertRCBreak(rc);
8246 if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
8247 && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */
8248 {
8249 HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED);
8250 }
8251 uint64_t u64DebugCtlMsr = u64Val;
8252
8253#ifdef VBOX_STRICT
8254 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val);
8255 AssertRCBreak(rc);
8256 Assert(u32Val == pVCpu->hm.s.vmx.u32ProcCtls);
8257#endif
8258 bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST);
8259
8260 /*
8261 * RIP and RFLAGS.
8262 */
8263 uint32_t u32Eflags;
8264#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8265 if (HMVMX_IS_64BIT_HOST_MODE())
8266 {
8267 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val);
8268 AssertRCBreak(rc);
8269 /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */
8270 if ( !fLongModeGuest
8271 || !pCtx->cs.Attr.n.u1Long)
8272 {
8273 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID);
8274 }
8275 /** @todo If the processor supports N < 64 linear-address bits, bits 63:N
8276 * must be identical if the "IA32e mode guest" VM-entry control is 1
8277 * and CS.L is 1. No check applies if the CPU supports 64
8278 * linear-address bits. */
8279
8280 /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */
8281 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val);
8282 AssertRCBreak(rc);
8283 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */
8284 VMX_IGS_RFLAGS_RESERVED);
8285 HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
8286 u32Eflags = u64Val;
8287 }
8288 else
8289#endif
8290 {
8291 rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags);
8292 AssertRCBreak(rc);
8293 HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */
8294 HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */
8295 }
8296
8297 if ( fLongModeGuest
8298 || ( fUnrestrictedGuest
8299 && !(u32GuestCR0 & X86_CR0_PE)))
8300 {
8301 HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID);
8302 }
8303
8304 uint32_t u32EntryInfo;
8305 rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo);
8306 AssertRCBreak(rc);
8307 if ( VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo)
8308 && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
8309 {
8310 HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID);
8311 }
8312
8313 /*
8314 * 64-bit checks.
8315 */
8316#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8317 if (HMVMX_IS_64BIT_HOST_MODE())
8318 {
8319 if ( fLongModeGuest
8320 && !fUnrestrictedGuest)
8321 {
8322 HMVMX_CHECK_BREAK(u32GuestCR0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE);
8323 HMVMX_CHECK_BREAK(u32GuestCR4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE);
8324 }
8325
8326 if ( !fLongModeGuest
8327 && (u32GuestCR4 & X86_CR4_PCIDE))
8328 {
8329 HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE);
8330 }
8331
8332 /** @todo CR3 field must be such that bits 63:52 and bits in the range
8333 * 51:32 beyond the processor's physical-address width are 0. */
8334
8335 if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_DEBUG)
8336 && (pCtx->dr[7] & X86_DR7_MBZ_MASK))
8337 {
8338 HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED);
8339 }
8340
8341 rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val);
8342 AssertRCBreak(rc);
8343 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL);
8344
8345 rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val);
8346 AssertRCBreak(rc);
8347 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL);
8348 }
8349#endif
8350
8351 /*
8352 * PERF_GLOBAL MSR.
8353 */
8354 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PERF_MSR)
8355 {
8356 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val);
8357 AssertRCBreak(rc);
8358 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)),
8359 VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */
8360 }
8361
8362 /*
8363 * PAT MSR.
8364 */
8365 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_PAT_MSR)
8366 {
8367 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val);
8368 AssertRCBreak(rc);
8369 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED);
8370 for (unsigned i = 0; i < 8; i++)
8371 {
8372 uint8_t u8Val = (u64Val & 0x7);
8373 if ( u8Val != 0 /* UC */
8374 || u8Val != 1 /* WC */
8375 || u8Val != 4 /* WT */
8376 || u8Val != 5 /* WP */
8377 || u8Val != 6 /* WB */
8378 || u8Val != 7 /* UC- */)
8379 {
8380 HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID);
8381 }
8382 u64Val >>= 3;
8383 }
8384 }
8385
8386 /*
8387 * EFER MSR.
8388 */
8389 if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_LOAD_GUEST_EFER_MSR)
8390 {
8391 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val);
8392 AssertRCBreak(rc);
8393 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)),
8394 VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */
8395 HMVMX_CHECK_BREAK((u64Val & MSR_K6_EFER_LMA) == (pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_IA32E_MODE_GUEST),
8396 VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH);
8397 HMVMX_CHECK_BREAK( fUnrestrictedGuest
8398 || (u64Val & MSR_K6_EFER_LMA) == (u32GuestCR0 & X86_CR0_PG), VMX_IGS_EFER_LMA_PG_MISMATCH);
8399 }
8400
8401 /*
8402 * Segment registers.
8403 */
8404 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
8405 || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID);
8406 if (!(u32Eflags & X86_EFL_VM))
8407 {
8408 /* CS */
8409 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID);
8410 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED);
8411 HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED);
8412 HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff
8413 || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
8414 HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000)
8415 || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID);
8416 /* CS cannot be loaded with NULL in protected mode. */
8417 HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE);
8418 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID);
8419 if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11)
8420 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL);
8421 else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15)
8422 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH);
8423 else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3)
8424 HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID);
8425 else
8426 HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID);
8427
8428 /* SS */
8429 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8430 || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL);
8431 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL);
8432 if ( !(pCtx->cr0 & X86_CR0_PE)
8433 || pCtx->cs.Attr.n.u4Type == 3)
8434 {
8435 HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID);
8436 }
8437 if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE))
8438 {
8439 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID);
8440 HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID);
8441 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED);
8442 HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED);
8443 HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff
8444 || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
8445 HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000)
8446 || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID);
8447 }
8448
8449 /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxWriteSegmentReg(). */
8450 if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE))
8451 {
8452 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID);
8453 HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID);
8454 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8455 || pCtx->ds.Attr.n.u4Type > 11
8456 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
8457 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED);
8458 HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED);
8459 HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff
8460 || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
8461 HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000)
8462 || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID);
8463 HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8464 || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID);
8465 }
8466 if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE))
8467 {
8468 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID);
8469 HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID);
8470 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8471 || pCtx->es.Attr.n.u4Type > 11
8472 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL);
8473 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED);
8474 HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED);
8475 HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff
8476 || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
8477 HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000)
8478 || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID);
8479 HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8480 || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID);
8481 }
8482 if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE))
8483 {
8484 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID);
8485 HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID);
8486 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8487 || pCtx->fs.Attr.n.u4Type > 11
8488 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL);
8489 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED);
8490 HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED);
8491 HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff
8492 || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
8493 HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000)
8494 || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID);
8495 HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8496 || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID);
8497 }
8498 if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE))
8499 {
8500 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID);
8501 HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID);
8502 HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest
8503 || pCtx->gs.Attr.n.u4Type > 11
8504 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL);
8505 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED);
8506 HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED);
8507 HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff
8508 || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
8509 HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000)
8510 || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID);
8511 HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE)
8512 || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID);
8513 }
8514 /* 64-bit capable CPUs. */
8515#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8516 if (HMVMX_IS_64BIT_HOST_MODE())
8517 {
8518 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
8519 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
8520 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
8521 || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
8522 HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID);
8523 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32),
8524 VMX_IGS_LONGMODE_SS_BASE_INVALID);
8525 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32),
8526 VMX_IGS_LONGMODE_DS_BASE_INVALID);
8527 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32),
8528 VMX_IGS_LONGMODE_ES_BASE_INVALID);
8529 }
8530#endif
8531 }
8532 else
8533 {
8534 /* V86 mode checks. */
8535 uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr;
8536 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
8537 {
8538 u32CSAttr = 0xf3; u32SSAttr = 0xf3;
8539 u32DSAttr = 0xf3; u32ESAttr = 0xf3;
8540 u32FSAttr = 0xf3; u32GSAttr = 0xf3;
8541 }
8542 else
8543 {
8544 u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u;
8545 u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u;
8546 u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u;
8547 }
8548
8549 /* CS */
8550 HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID);
8551 HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID);
8552 HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID);
8553 /* SS */
8554 HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID);
8555 HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID);
8556 HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID);
8557 /* DS */
8558 HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID);
8559 HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID);
8560 HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID);
8561 /* ES */
8562 HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID);
8563 HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID);
8564 HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID);
8565 /* FS */
8566 HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID);
8567 HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID);
8568 HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID);
8569 /* GS */
8570 HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID);
8571 HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID);
8572 HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID);
8573 /* 64-bit capable CPUs. */
8574#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8575 if (HMVMX_IS_64BIT_HOST_MODE())
8576 {
8577 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL);
8578 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL);
8579 HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE)
8580 || HMVMX_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL);
8581 HMVMX_CHECK_BREAK(!(pCtx->cs.u64Base >> 32), VMX_IGS_LONGMODE_CS_BASE_INVALID);
8582 HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ss.u64Base >> 32),
8583 VMX_IGS_LONGMODE_SS_BASE_INVALID);
8584 HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ds.u64Base >> 32),
8585 VMX_IGS_LONGMODE_DS_BASE_INVALID);
8586 HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->es.u64Base >> 32),
8587 VMX_IGS_LONGMODE_ES_BASE_INVALID);
8588 }
8589#endif
8590 }
8591
8592 /*
8593 * TR.
8594 */
8595 HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID);
8596 /* 64-bit capable CPUs. */
8597#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8598 if (HMVMX_IS_64BIT_HOST_MODE())
8599 {
8600 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL);
8601 }
8602#endif
8603 if (fLongModeGuest)
8604 {
8605 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */
8606 VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID);
8607 }
8608 else
8609 {
8610 HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */
8611 || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/
8612 VMX_IGS_TR_ATTR_TYPE_INVALID);
8613 }
8614 HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID);
8615 HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID);
8616 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */
8617 HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff
8618 || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
8619 HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000)
8620 || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID);
8621 HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE);
8622
8623 /*
8624 * GDTR and IDTR.
8625 */
8626#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
8627 if (HMVMX_IS_64BIT_HOST_MODE())
8628 {
8629 rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val);
8630 AssertRCBreak(rc);
8631 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL);
8632
8633 rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val);
8634 AssertRCBreak(rc);
8635 HMVMX_CHECK_BREAK(HMVMX_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL);
8636 }
8637#endif
8638
8639 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val);
8640 AssertRCBreak(rc);
8641 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
8642
8643 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val);
8644 AssertRCBreak(rc);
8645 HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */
8646
8647 /*
8648 * Guest Non-Register State.
8649 */
8650 /* Activity State. */
8651 uint32_t u32ActivityState;
8652 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState);
8653 AssertRCBreak(rc);
8654 HMVMX_CHECK_BREAK( !u32ActivityState
8655 || (u32ActivityState & MSR_IA32_VMX_MISC_ACTIVITY_STATES(pVM->hm.s.vmx.Msrs.u64Misc)),
8656 VMX_IGS_ACTIVITY_STATE_INVALID);
8657 HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl)
8658 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID);
8659 uint32_t u32IntrState;
8660 rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &u32IntrState);
8661 AssertRCBreak(rc);
8662 if ( u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS
8663 || u32IntrState == VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
8664 {
8665 HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID);
8666 }
8667
8668 /** @todo Activity state and injecting interrupts. Left as a todo since we
8669 * currently don't use activity states but ACTIVE. */
8670
8671 HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM)
8672 || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID);
8673
8674 /* Guest interruptibility-state. */
8675 HMVMX_CHECK_BREAK(!(u32IntrState & 0xfffffff0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED);
8676 HMVMX_CHECK_BREAK((u32IntrState & ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
8677 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS))
8678 != ( VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI
8679 | VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
8680 VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID);
8681 HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF)
8682 || !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI),
8683 VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID);
8684 if (VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo))
8685 {
8686 if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT)
8687 {
8688 HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
8689 && !(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
8690 VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID);
8691 }
8692 else if (VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
8693 {
8694 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS),
8695 VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID);
8696 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI),
8697 VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID);
8698 }
8699 }
8700 /** @todo Assumes the processor is not in SMM. */
8701 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI),
8702 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID);
8703 HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_VMCS_CTRL_ENTRY_ENTRY_SMM)
8704 || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_SMI),
8705 VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID);
8706 if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_VMCS_CTRL_PIN_EXEC_VIRTUAL_NMI)
8707 && VMX_ENTRY_INTERRUPTION_INFO_VALID(u32EntryInfo)
8708 && VMX_ENTRY_INTERRUPTION_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
8709 {
8710 HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_NMI),
8711 VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID);
8712 }
8713
8714 /* Pending debug exceptions. */
8715 if (HMVMX_IS_64BIT_HOST_MODE())
8716 {
8717 rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u64Val);
8718 AssertRCBreak(rc);
8719 /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */
8720 HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED);
8721 u32Val = u64Val; /* For pending debug exceptions checks below. */
8722 }
8723 else
8724 {
8725 rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, &u32Val);
8726 AssertRCBreak(rc);
8727 /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */
8728 HMVMX_CHECK_BREAK(!(u64Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED);
8729 }
8730
8731 if ( (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI)
8732 || (u32IntrState & VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_MOVSS)
8733 || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT)
8734 {
8735 if ( (u32Eflags & X86_EFL_TF)
8736 && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
8737 {
8738 /* Bit 14 is PendingDebug.BS. */
8739 HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET);
8740 }
8741 if ( !(u32Eflags & X86_EFL_TF)
8742 || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */
8743 {
8744 /* Bit 14 is PendingDebug.BS. */
8745 HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR);
8746 }
8747 }
8748
8749 /* VMCS link pointer. */
8750 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val);
8751 AssertRCBreak(rc);
8752 if (u64Val != UINT64_C(0xffffffffffffffff))
8753 {
8754 HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED);
8755 /** @todo Bits beyond the processor's physical-address width MBZ. */
8756 /** @todo 32-bit located in memory referenced by value of this field (as a
8757 * physical address) must contain the processor's VMCS revision ID. */
8758 /** @todo SMM checks. */
8759 }
8760
8761 /** @todo Checks on Guest Page-Directory-Pointer-Table Entries. */
8762
8763 /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */
8764 if (uError == VMX_IGS_ERROR)
8765 uError = VMX_IGS_REASON_NOT_FOUND;
8766 } while (0);
8767
8768 pVCpu->hm.s.u32HMError = uError;
8769 return uError;
8770
8771#undef HMVMX_ERROR_BREAK
8772#undef HMVMX_CHECK_BREAK
8773#undef HMVMX_IS_CANONICAL
8774}
8775
8776/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
8777/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
8778/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
8779
8780/** @name VM-exit handlers.
8781 * @{
8782 */
8783
8784/**
8785 * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT).
8786 */
8787HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8788{
8789 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8790 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
8791 /* 32-bit Windows hosts (4 cores) has trouble with this; causes higher interrupt latency. */
8792#if HC_ARCH_BITS == 64
8793 Assert(ASMIntAreEnabled());
8794 if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUsePreemptTimer)
8795 return VINF_SUCCESS;
8796#endif
8797 return VINF_EM_RAW_INTERRUPT;
8798}
8799
8800
8801/**
8802 * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI).
8803 */
8804HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8805{
8806 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8807 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3);
8808
8809 int rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
8810 AssertRCReturn(rc, rc);
8811
8812 uint32_t uIntType = VMX_EXIT_INTERRUPTION_INFO_TYPE(pVmxTransient->uExitIntInfo);
8813 Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_VMCS_CTRL_EXIT_ACK_EXT_INT)
8814 && uIntType != VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT);
8815 Assert(VMX_EXIT_INTERRUPTION_INFO_IS_VALID(pVmxTransient->uExitIntInfo));
8816
8817 if (uIntType == VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI)
8818 {
8819 /*
8820 * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we injected it ourselves and
8821 * anything we inject is not going to cause a VM-exit directly for the event being injected.
8822 * See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery".
8823 *
8824 * Dispatch the NMI to the host. See Intel spec. 27.5.5 "Updating Non-Register State".
8825 */
8826 VMXDispatchHostNmi();
8827 STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC);
8828 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8829 return VINF_SUCCESS;
8830 }
8831
8832 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
8833 rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
8834 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
8835 {
8836 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8837 return VINF_SUCCESS;
8838 }
8839 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
8840 {
8841 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8842 return rc;
8843 }
8844
8845 uint32_t uExitIntInfo = pVmxTransient->uExitIntInfo;
8846 uint32_t uVector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(uExitIntInfo);
8847 switch (uIntType)
8848 {
8849 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */
8850 Assert(uVector == X86_XCPT_DB || uVector == X86_XCPT_BP || uVector == X86_XCPT_OF);
8851 /* no break */
8852 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT:
8853 {
8854 switch (uVector)
8855 {
8856 case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pMixedCtx, pVmxTransient); break;
8857 case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pMixedCtx, pVmxTransient); break;
8858 case X86_XCPT_NM: rc = hmR0VmxExitXcptNM(pVCpu, pMixedCtx, pVmxTransient); break;
8859 case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pMixedCtx, pVmxTransient); break;
8860 case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pMixedCtx, pVmxTransient); break;
8861 case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pMixedCtx, pVmxTransient); break;
8862#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
8863 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF);
8864 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8865 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);
8866 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8867 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
8868 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8869 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);
8870 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8871 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);
8872 rc = hmR0VmxExitXcptGeneric(pVCpu, pMixedCtx, pVmxTransient); break;
8873#endif
8874 default:
8875 {
8876 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
8877 AssertRCReturn(rc, rc);
8878
8879 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
8880 if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
8881 {
8882 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS);
8883 Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM)));
8884 Assert(CPUMIsGuestInRealModeEx(pMixedCtx));
8885
8886 rc = hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
8887 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
8888 AssertRCReturn(rc, rc);
8889 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(uExitIntInfo),
8890 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode,
8891 0 /* GCPtrFaultAddress */);
8892 AssertRCReturn(rc, rc);
8893 }
8894 else
8895 {
8896 AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector));
8897 pVCpu->hm.s.u32HMError = uVector;
8898 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
8899 }
8900 break;
8901 }
8902 }
8903 break;
8904 }
8905
8906 default:
8907 {
8908 pVCpu->hm.s.u32HMError = uExitIntInfo;
8909 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE;
8910 AssertMsgFailed(("Unexpected interruption info %#x\n", VMX_EXIT_INTERRUPTION_INFO_TYPE(uExitIntInfo)));
8911 break;
8912 }
8913 }
8914 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3);
8915 return rc;
8916}
8917
8918
8919/**
8920 * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW).
8921 */
8922HMVMX_EXIT_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8923{
8924 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8925
8926 /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
8927 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT);
8928 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_INT_WINDOW_EXIT;
8929 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
8930 AssertRCReturn(rc, rc);
8931
8932 /* Deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and resume guest execution. */
8933 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
8934 return VINF_SUCCESS;
8935}
8936
8937
8938/**
8939 * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW).
8940 */
8941HMVMX_EXIT_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8942{
8943 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8944 AssertMsgFailed(("Unexpected NMI-window exit.\n"));
8945 HMVMX_RETURN_UNEXPECTED_EXIT();
8946}
8947
8948
8949/**
8950 * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit.
8951 */
8952HMVMX_EXIT_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8953{
8954 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8955 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd);
8956 return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8957}
8958
8959
8960/**
8961 * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit.
8962 */
8963HMVMX_EXIT_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8964{
8965 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8966 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
8967 return hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8968}
8969
8970
8971/**
8972 * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit.
8973 */
8974HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8975{
8976 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
8977 PVM pVM = pVCpu->CTX_SUFF(pVM);
8978 int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
8979 if (RT_LIKELY(rc == VINF_SUCCESS))
8980 {
8981 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
8982 Assert(pVmxTransient->cbInstr == 2);
8983 }
8984 else
8985 {
8986 AssertMsgFailed(("hmR0VmxExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc));
8987 rc = VERR_EM_INTERPRETER;
8988 }
8989 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
8990 return rc;
8991}
8992
8993
8994/**
8995 * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit.
8996 */
8997HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
8998{
8999 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9000 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx);
9001 AssertRCReturn(rc, rc);
9002
9003 if (pMixedCtx->cr4 & X86_CR4_SMXE)
9004 return VINF_EM_RAW_EMULATE_INSTR;
9005
9006 AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n"));
9007 HMVMX_RETURN_UNEXPECTED_EXIT();
9008}
9009
9010
9011/**
9012 * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit.
9013 */
9014HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9015{
9016 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9017 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
9018 AssertRCReturn(rc, rc);
9019
9020 PVM pVM = pVCpu->CTX_SUFF(pVM);
9021 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9022 if (RT_LIKELY(rc == VINF_SUCCESS))
9023 {
9024 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9025 Assert(pVmxTransient->cbInstr == 2);
9026 /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */
9027 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING)
9028 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
9029 }
9030 else
9031 {
9032 AssertMsgFailed(("hmR0VmxExitRdtsc: EMInterpretRdtsc failed with %Rrc\n", rc));
9033 rc = VERR_EM_INTERPRETER;
9034 }
9035 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
9036 return rc;
9037}
9038
9039
9040/**
9041 * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit.
9042 */
9043HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9044{
9045 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9046 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
9047 rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx); /* For MSR_K8_TSC_AUX */
9048 AssertRCReturn(rc, rc);
9049
9050 PVM pVM = pVCpu->CTX_SUFF(pVM);
9051 rc = EMInterpretRdtscp(pVM, pVCpu, pMixedCtx);
9052 if (RT_LIKELY(rc == VINF_SUCCESS))
9053 {
9054 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9055 Assert(pVmxTransient->cbInstr == 3);
9056 /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */
9057 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TSC_OFFSETTING)
9058 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
9059 }
9060 else
9061 {
9062 AssertMsgFailed(("hmR0VmxExitRdtscp: EMInterpretRdtscp failed with %Rrc\n", rc));
9063 rc = VERR_EM_INTERPRETER;
9064 }
9065 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
9066 return rc;
9067}
9068
9069
9070/**
9071 * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit.
9072 */
9073HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9074{
9075 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9076 int rc = hmR0VmxSaveGuestCR4(pVCpu, pMixedCtx); /** @todo review if CR4 is really required by EM. */
9077 rc |= hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx); /** @todo review if CR0 is really required by EM. */
9078 AssertRCReturn(rc, rc);
9079
9080 PVM pVM = pVCpu->CTX_SUFF(pVM);
9081 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9082 if (RT_LIKELY(rc == VINF_SUCCESS))
9083 {
9084 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9085 Assert(pVmxTransient->cbInstr == 2);
9086 }
9087 else
9088 {
9089 AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
9090 rc = VERR_EM_INTERPRETER;
9091 }
9092 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
9093 return rc;
9094}
9095
9096
9097/**
9098 * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit.
9099 */
9100HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9101{
9102 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9103 PVM pVM = pVCpu->CTX_SUFF(pVM);
9104 Assert(!pVM->hm.s.fNestedPaging);
9105
9106 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9107 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
9108 AssertRCReturn(rc, rc);
9109
9110 VBOXSTRICTRC rc2 = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), pVmxTransient->uExitQualification);
9111 rc = VBOXSTRICTRC_VAL(rc2);
9112 if (RT_LIKELY(rc == VINF_SUCCESS))
9113 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9114 else
9115 {
9116 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitInvlpg: EMInterpretInvlpg %#RX64 failed with %Rrc\n",
9117 pVmxTransient->uExitQualification, rc));
9118 }
9119 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
9120 return rc;
9121}
9122
9123
9124/**
9125 * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit.
9126 */
9127HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9128{
9129 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9130 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9131 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9132 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9133 AssertRCReturn(rc, rc);
9134
9135 PVM pVM = pVCpu->CTX_SUFF(pVM);
9136 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9137 if (RT_LIKELY(rc == VINF_SUCCESS))
9138 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9139 else
9140 {
9141 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc));
9142 rc = VERR_EM_INTERPRETER;
9143 }
9144 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
9145 return rc;
9146}
9147
9148
9149/**
9150 * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit.
9151 */
9152HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9153{
9154 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9155 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9156 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9157 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9158 AssertRCReturn(rc, rc);
9159
9160 PVM pVM = pVCpu->CTX_SUFF(pVM);
9161 VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9162 rc = VBOXSTRICTRC_VAL(rc2);
9163 if (RT_LIKELY( rc == VINF_SUCCESS
9164 || rc == VINF_EM_HALT))
9165 {
9166 int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9167 AssertRCReturn(rc3, rc3);
9168
9169 if ( rc == VINF_EM_HALT
9170 && EMMonitorWaitShouldContinue(pVCpu, pMixedCtx))
9171 {
9172 rc = VINF_SUCCESS;
9173 }
9174 }
9175 else
9176 {
9177 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc));
9178 rc = VERR_EM_INTERPRETER;
9179 }
9180 AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER,
9181 ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc));
9182 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
9183 return rc;
9184}
9185
9186
9187/**
9188 * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit.
9189 */
9190HMVMX_EXIT_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9191{
9192 /*
9193 * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root mode. In theory, we should never
9194 * get this VM-exit. This can happen only if dual-monitor treatment of SMI and VMX is enabled, which can (only?) be done by
9195 * executing VMCALL in VMX root operation. If we get here, something funny is going on.
9196 * See Intel spec. "33.15.5 Enabling the Dual-Monitor Treatment".
9197 */
9198 AssertMsgFailed(("Unexpected RSM VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9199 HMVMX_RETURN_UNEXPECTED_EXIT();
9200}
9201
9202
9203/**
9204 * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit.
9205 */
9206HMVMX_EXIT_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9207{
9208 /*
9209 * This can only happen if we support dual-monitor treatment of SMI, which can be activated by executing VMCALL in VMX
9210 * root operation. Only an STM (SMM transfer monitor) would get this exit when we (the executive monitor) execute a VMCALL
9211 * in VMX root mode or receive an SMI. If we get here, something funny is going on.
9212 * See Intel spec. "33.15.6 Activating the Dual-Monitor Treatment" and Intel spec. 25.3 "Other Causes of VM-Exits"
9213 */
9214 AssertMsgFailed(("Unexpected SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9215 HMVMX_RETURN_UNEXPECTED_EXIT();
9216}
9217
9218
9219/**
9220 * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit.
9221 */
9222HMVMX_EXIT_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9223{
9224 /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */
9225 AssertMsgFailed(("Unexpected IO SMI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9226 HMVMX_RETURN_UNEXPECTED_EXIT();
9227}
9228
9229
9230/**
9231 * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit.
9232 */
9233HMVMX_EXIT_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9234{
9235 /*
9236 * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used. We currently
9237 * don't make use of it (see hmR0VmxLoadGuestActivityState()) as our guests don't have direct access to the host LAPIC.
9238 * See Intel spec. 25.3 "Other Causes of VM-exits".
9239 */
9240 AssertMsgFailed(("Unexpected SIPI VM-exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9241 HMVMX_RETURN_UNEXPECTED_EXIT();
9242}
9243
9244
9245/**
9246 * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional
9247 * VM-exit.
9248 */
9249HMVMX_EXIT_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9250{
9251 /*
9252 * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM.
9253 * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON".
9254 *
9255 * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits.
9256 * See Intel spec. "23.8 Restrictions on VMX operation".
9257 */
9258 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9259 return VINF_SUCCESS;
9260}
9261
9262
9263/**
9264 * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional
9265 * VM-exit.
9266 */
9267HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9268{
9269 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9270 return VINF_EM_RESET;
9271}
9272
9273
9274/**
9275 * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit.
9276 */
9277HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9278{
9279 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9280 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_HLT_EXIT);
9281 int rc = hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
9282 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9283 AssertRCReturn(rc, rc);
9284
9285 pMixedCtx->rip++;
9286 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
9287 if (EMShouldContinueAfterHalt(pVCpu, pMixedCtx)) /* Requires eflags. */
9288 rc = VINF_SUCCESS;
9289 else
9290 rc = VINF_EM_HALT;
9291
9292 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
9293 return rc;
9294}
9295
9296
9297/**
9298 * VM-exit handler for instructions that result in a #UD exception delivered to
9299 * the guest.
9300 */
9301HMVMX_EXIT_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9302{
9303 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9304 hmR0VmxSetPendingXcptUD(pVCpu, pMixedCtx);
9305 return VINF_SUCCESS;
9306}
9307
9308
9309/**
9310 * VM-exit handler for expiry of the VMX preemption timer.
9311 */
9312HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9313{
9314 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9315
9316 /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */
9317 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
9318
9319 /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */
9320 PVM pVM = pVCpu->CTX_SUFF(pVM);
9321 bool fTimersPending = TMTimerPollBool(pVM, pVCpu);
9322 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer);
9323 return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS;
9324}
9325
9326
9327/**
9328 * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit.
9329 */
9330HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9331{
9332 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9333
9334 /* We expose XSETBV to the guest, fallback to the recompiler for emulation. */
9335 /** @todo check if XSETBV is supported by the recompiler. */
9336 return VERR_EM_INTERPRETER;
9337}
9338
9339
9340/**
9341 * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit.
9342 */
9343HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9344{
9345 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9346
9347 /* The guest should not invalidate the host CPU's TLBs, fallback to recompiler. */
9348 /** @todo implement EMInterpretInvpcid() */
9349 return VERR_EM_INTERPRETER;
9350}
9351
9352
9353/**
9354 * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE).
9355 * Error VM-exit.
9356 */
9357HMVMX_EXIT_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9358{
9359 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9360 AssertRCReturn(rc, rc);
9361
9362 uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
9363 NOREF(uInvalidReason);
9364
9365#ifdef VBOX_STRICT
9366 uint32_t uIntrState;
9367 HMVMXHCUINTREG uHCReg;
9368 uint64_t u64Val;
9369 uint32_t u32Val;
9370
9371 rc = hmR0VmxReadEntryIntInfoVmcs(pVmxTransient);
9372 rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient);
9373 rc |= hmR0VmxReadEntryInstrLenVmcs(pVCpu, pVmxTransient);
9374 rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &uIntrState);
9375 AssertRCReturn(rc, rc);
9376
9377 Log4(("uInvalidReason %u\n", uInvalidReason));
9378 Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo));
9379 Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode));
9380 Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr));
9381 Log4(("VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE %#RX32\n", uIntrState));
9382
9383 rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc);
9384 Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val));
9385 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc);
9386 Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg));
9387 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc);
9388 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
9389 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc);
9390 Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg));
9391 rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc);
9392 Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg));
9393 rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc);
9394 Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val));
9395#endif
9396
9397 PVM pVM = pVCpu->CTX_SUFF(pVM);
9398 HMDumpRegs(pVM, pVCpu, pMixedCtx);
9399
9400 return VERR_VMX_INVALID_GUEST_STATE;
9401}
9402
9403
9404/**
9405 * VM-exit handler for VM-entry failure due to an MSR-load
9406 * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit.
9407 */
9408HMVMX_EXIT_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9409{
9410 AssertMsgFailed(("Unexpected MSR-load exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9411 HMVMX_RETURN_UNEXPECTED_EXIT();
9412}
9413
9414
9415/**
9416 * VM-exit handler for VM-entry failure due to a machine-check event
9417 * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit.
9418 */
9419HMVMX_EXIT_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9420{
9421 AssertMsgFailed(("Unexpected machine-check event exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9422 HMVMX_RETURN_UNEXPECTED_EXIT();
9423}
9424
9425
9426/**
9427 * VM-exit handler for all undefined reasons. Should never ever happen.. in
9428 * theory.
9429 */
9430HMVMX_EXIT_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9431{
9432 AssertMsgFailed(("Huh!? Undefined VM-exit reason %d. pVCpu=%p pMixedCtx=%p\n", pVmxTransient->uExitReason, pVCpu, pMixedCtx));
9433 return VERR_VMX_UNDEFINED_EXIT_CODE;
9434}
9435
9436
9437/**
9438 * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses
9439 * (VMX_EXIT_XDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR).
9440 * Conditional VM-exit.
9441 */
9442HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9443{
9444 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9445
9446 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT. */
9447 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess);
9448 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_DESCRIPTOR_TABLE_EXIT)
9449 return VERR_EM_INTERPRETER;
9450 AssertMsgFailed(("Unexpected XDTR access. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9451 HMVMX_RETURN_UNEXPECTED_EXIT();
9452}
9453
9454
9455/**
9456 * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit.
9457 */
9458HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9459{
9460 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9461
9462 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT. */
9463 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdrand);
9464 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDRAND_EXIT)
9465 return VERR_EM_INTERPRETER;
9466 AssertMsgFailed(("Unexpected RDRAND exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9467 HMVMX_RETURN_UNEXPECTED_EXIT();
9468}
9469
9470
9471/**
9472 * VM-exit handler for RDMSR (VMX_EXIT_RDMSR).
9473 */
9474HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9475{
9476 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9477
9478 /* EMInterpretRdmsr() requires CR0, Eflags and SS segment register. */
9479 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9480 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9481 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9482 AssertRCReturn(rc, rc);
9483 Log4(("CS:RIP=%04x:%#RX64 ECX=%X\n", pMixedCtx->cs.Sel, pMixedCtx->rip, pMixedCtx->ecx));
9484
9485 PVM pVM = pVCpu->CTX_SUFF(pVM);
9486 rc = EMInterpretRdmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9487 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER,
9488 ("hmR0VmxExitRdmsr: failed, invalid error code %Rrc\n", rc));
9489 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
9490
9491 if (RT_LIKELY(rc == VINF_SUCCESS))
9492 {
9493 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9494 Assert(pVmxTransient->cbInstr == 2);
9495 }
9496 return rc;
9497}
9498
9499
9500/**
9501 * VM-exit handler for WRMSR (VMX_EXIT_WRMSR).
9502 */
9503HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9504{
9505 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9506 PVM pVM = pVCpu->CTX_SUFF(pVM);
9507 int rc = VINF_SUCCESS;
9508
9509 /* EMInterpretWrmsr() requires CR0, EFLAGS and SS segment register. */
9510 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9511 rc |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx);
9512 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9513 rc |= hmR0VmxSaveGuestAutoLoadStoreMsrs(pVCpu, pMixedCtx);
9514 AssertRCReturn(rc, rc);
9515 Log4(("ecx=%#RX32\n", pMixedCtx->ecx));
9516
9517 rc = EMInterpretWrmsr(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
9518 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0VmxExitWrmsr: failed, invalid error code %Rrc\n", rc));
9519 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
9520
9521 if (RT_LIKELY(rc == VINF_SUCCESS))
9522 {
9523 rc = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9524
9525 /* If this is an X2APIC WRMSR access, update the APIC state as well. */
9526 if ( pMixedCtx->ecx >= MSR_IA32_X2APIC_START
9527 && pMixedCtx->ecx <= MSR_IA32_X2APIC_END)
9528 {
9529 /* We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register
9530 * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before
9531 EMInterpretWrmsr() changes it. */
9532 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
9533 }
9534 else if (pMixedCtx->ecx == MSR_K6_EFER) /* EFER is the only MSR we auto-load but don't allow write-passthrough. */
9535 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS);
9536 else if (pMixedCtx->ecx == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */
9537 pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true;
9538
9539 /* Update MSRs that are part of the VMCS when MSR-bitmaps are not supported. */
9540 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_MSR_BITMAPS))
9541 {
9542 switch (pMixedCtx->ecx)
9543 {
9544 case MSR_IA32_SYSENTER_CS: VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break;
9545 case MSR_IA32_SYSENTER_EIP: VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break;
9546 case MSR_IA32_SYSENTER_ESP: VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break;
9547 case MSR_K8_FS_BASE: /* no break */
9548 case MSR_K8_GS_BASE: VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_SEGMENT_REGS); break;
9549 case MSR_K8_KERNEL_GS_BASE: VMCPU_HMCF_SET(pVCpu, HM_CHANGED_VMX_GUEST_AUTO_MSRS); break;
9550 }
9551 }
9552#ifdef VBOX_STRICT
9553 else
9554 {
9555 /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */
9556 switch (pMixedCtx->ecx)
9557 {
9558 case MSR_IA32_SYSENTER_CS:
9559 case MSR_IA32_SYSENTER_EIP:
9560 case MSR_IA32_SYSENTER_ESP:
9561 case MSR_K8_FS_BASE:
9562 case MSR_K8_GS_BASE:
9563 {
9564 AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", pMixedCtx->ecx));
9565 HMVMX_RETURN_UNEXPECTED_EXIT();
9566 }
9567
9568 case MSR_K8_LSTAR:
9569 case MSR_K6_STAR:
9570 case MSR_K8_SF_MASK:
9571 case MSR_K8_TSC_AUX:
9572 case MSR_K8_KERNEL_GS_BASE:
9573 {
9574 AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n",
9575 pMixedCtx->ecx));
9576 HMVMX_RETURN_UNEXPECTED_EXIT();
9577 }
9578 }
9579 }
9580#endif /* VBOX_STRICT */
9581 }
9582 return rc;
9583}
9584
9585
9586/**
9587 * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit.
9588 */
9589HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9590{
9591 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9592
9593 /* By default, we don't enable VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT. */
9594 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPause);
9595 if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_PAUSE_EXIT)
9596 return VERR_EM_INTERPRETER;
9597 AssertMsgFailed(("Unexpected PAUSE exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
9598 HMVMX_RETURN_UNEXPECTED_EXIT();
9599}
9600
9601
9602/**
9603 * VM-exit handler for when the TPR value is lowered below the specified
9604 * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit.
9605 */
9606HMVMX_EXIT_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9607{
9608 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9609 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW);
9610
9611 /*
9612 * The TPR has already been updated, see hmR0VMXPostRunGuest(). RIP is also updated as part of the VM-exit by VT-x. Update
9613 * the threshold in the VMCS, deliver the pending interrupt via hmR0VmxPreRunGuest()->hmR0VmxInjectEvent() and
9614 * resume guest execution.
9615 */
9616 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
9617 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold);
9618 return VINF_SUCCESS;
9619}
9620
9621
9622/**
9623 * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional
9624 * VM-exit.
9625 *
9626 * @retval VINF_SUCCESS when guest execution can continue.
9627 * @retval VINF_PGM_CHANGE_MODE when shadow paging mode changed, back to ring-3.
9628 * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3.
9629 * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to
9630 * recompiler.
9631 */
9632HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9633{
9634 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9635 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2);
9636 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9637 AssertRCReturn(rc, rc);
9638
9639 const RTGCUINTPTR uExitQualification = pVmxTransient->uExitQualification;
9640 const uint32_t uAccessType = VMX_EXIT_QUALIFICATION_CRX_ACCESS(uExitQualification);
9641 PVM pVM = pVCpu->CTX_SUFF(pVM);
9642 switch (uAccessType)
9643 {
9644 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE: /* MOV to CRx */
9645 {
9646#if 0
9647 /* EMInterpretCRxWrite() references a lot of guest state (EFER, RFLAGS, Segment Registers, etc.) Sync entire state */
9648 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9649#else
9650 rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
9651 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
9652 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9653#endif
9654 AssertRCReturn(rc, rc);
9655
9656 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
9657 VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification),
9658 VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification));
9659 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
9660
9661 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification))
9662 {
9663 case 0: /* CR0 */
9664 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
9665 Log4(("CRX CR0 write rc=%d CR0=%#RX64\n", rc, pMixedCtx->cr0));
9666 break;
9667 case 2: /* CR2 */
9668 /* Nothing to do here, CR2 it's not part of the VMCS. */
9669 break;
9670 case 3: /* CR3 */
9671 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(pMixedCtx));
9672 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR3);
9673 Log4(("CRX CR3 write rc=%d CR3=%#RX64\n", rc, pMixedCtx->cr3));
9674 break;
9675 case 4: /* CR4 */
9676 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR4);
9677 Log4(("CRX CR4 write rc=%d CR4=%#RX64\n", rc, pMixedCtx->cr4));
9678 break;
9679 case 8: /* CR8 */
9680 Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW));
9681 /* CR8 contains the APIC TPR. Was updated by EMInterpretCRxWrite(). */
9682 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_VMX_GUEST_APIC_STATE);
9683 break;
9684 default:
9685 AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)));
9686 break;
9687 }
9688
9689 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]);
9690 break;
9691 }
9692
9693 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ: /* MOV from CRx */
9694 {
9695 /* EMInterpretCRxRead() requires EFER MSR, CS. */
9696 rc = hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
9697 AssertRCReturn(rc, rc);
9698 Assert( !pVM->hm.s.fNestedPaging
9699 || !CPUMIsGuestPagingEnabledEx(pMixedCtx)
9700 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 3);
9701
9702 /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */
9703 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification) != 8
9704 || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW));
9705
9706 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
9707 VMX_EXIT_QUALIFICATION_CRX_GENREG(uExitQualification),
9708 VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification));
9709 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
9710 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification)]);
9711 Log4(("CRX CR%d Read access rc=%d\n", VMX_EXIT_QUALIFICATION_CRX_REGISTER(uExitQualification), rc));
9712 break;
9713 }
9714
9715 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */
9716 {
9717 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9718 AssertRCReturn(rc, rc);
9719 rc = EMInterpretCLTS(pVM, pVCpu);
9720 AssertRCReturn(rc, rc);
9721 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
9722 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
9723 Log4(("CRX CLTS write rc=%d\n", rc));
9724 break;
9725 }
9726
9727 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */
9728 {
9729 rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
9730 AssertRCReturn(rc, rc);
9731 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(uExitQualification));
9732 if (RT_LIKELY(rc == VINF_SUCCESS))
9733 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
9734 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
9735 Log4(("CRX LMSW write rc=%d\n", rc));
9736 break;
9737 }
9738
9739 default:
9740 {
9741 AssertMsgFailed(("Invalid access-type in Mov CRx exit qualification %#x\n", uAccessType));
9742 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
9743 }
9744 }
9745
9746 /* Validate possible error codes. */
9747 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_CHANGE_MODE || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_SYNC_CR3
9748 || rc == VERR_VMX_UNEXPECTED_EXCEPTION);
9749 if (RT_SUCCESS(rc))
9750 {
9751 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
9752 AssertRCReturn(rc2, rc2);
9753 }
9754
9755 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2);
9756 return rc;
9757}
9758
9759
9760/**
9761 * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional
9762 * VM-exit.
9763 */
9764HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9765{
9766 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9767 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1);
9768
9769 int rc2 = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9770 rc2 |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
9771 rc2 |= hmR0VmxSaveGuestRip(pVCpu, pMixedCtx);
9772 rc2 |= hmR0VmxSaveGuestRflags(pVCpu, pMixedCtx); /* Eflag checks in EMInterpretDisasCurrent(). */
9773 rc2 |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx); /* CR0 checks & PGM* in EMInterpretDisasCurrent(). */
9774 rc2 |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx); /* SELM checks in EMInterpretDisasCurrent(). */
9775 /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */
9776 AssertRCReturn(rc2, rc2);
9777
9778 /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */
9779 uint32_t uIOPort = VMX_EXIT_QUALIFICATION_IO_PORT(pVmxTransient->uExitQualification);
9780 uint8_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(pVmxTransient->uExitQualification);
9781 bool fIOWrite = ( VMX_EXIT_QUALIFICATION_IO_DIRECTION(pVmxTransient->uExitQualification)
9782 == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
9783 bool fIOString = VMX_EXIT_QUALIFICATION_IO_IS_STRING(pVmxTransient->uExitQualification);
9784 AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_HMVMX_IPE_1);
9785
9786 /* I/O operation lookup arrays. */
9787 static const uint32_t s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */
9788 static const uint32_t s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving the result (in AL/AX/EAX). */
9789
9790 VBOXSTRICTRC rcStrict;
9791 const uint32_t cbValue = s_aIOSizes[uIOWidth];
9792 const uint32_t cbInstr = pVmxTransient->cbInstr;
9793 bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */
9794 PVM pVM = pVCpu->CTX_SUFF(pVM);
9795 if (fIOString)
9796 {
9797#if 0 /* Not yet ready. IEM gurus with debian 32-bit guest without NP (on ATA reads). See @bugref{5752#c158}*/
9798 /*
9799 * INS/OUTS - I/O String instruction.
9800 *
9801 * Use instruction-information if available, otherwise fall back on
9802 * interpreting the instruction.
9803 */
9804 Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c str\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
9805 AssertReturn(pMixedCtx->dx == uIOPort, VERR_HMVMX_IPE_2);
9806 if (MSR_IA32_VMX_BASIC_INFO_VMCS_INS_OUTS(pVM->hm.s.vmx.Msrs.u64BasicInfo))
9807 {
9808 rc2 = hmR0VmxReadExitInstrInfoVmcs(pVCpu, pVmxTransient);
9809 /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */
9810 rc2 |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9811 AssertRCReturn(rc2, rc2);
9812 AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_HMVMX_IPE_3);
9813 AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2);
9814 IEMMODE enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize;
9815 bool fRep = VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification);
9816 if (fIOWrite)
9817 {
9818 rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr,
9819 pVmxTransient->ExitInstrInfo.StrIo.iSegReg);
9820 }
9821 else
9822 {
9823 /*
9824 * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES.
9825 * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS.
9826 * See Intel Instruction spec. for "INS".
9827 * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS".
9828 */
9829 rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr);
9830 }
9831 }
9832 else
9833 {
9834 /** @todo optimize this, IEM should request the additional state if it needs it (GP, PF, ++). */
9835 rc2 = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
9836 AssertRCReturn(rc2, rc2);
9837 rcStrict = IEMExecOne(pVCpu);
9838 }
9839 /** @todo IEM needs to be setting these flags somehow. */
9840 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
9841 fUpdateRipAlready = true;
9842#else
9843 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
9844 rcStrict = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
9845 if (RT_SUCCESS(rcStrict))
9846 {
9847 if (fIOWrite)
9848 {
9849 rcStrict = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix,
9850 (DISCPUMODE)pDis->uAddrMode, cbValue);
9851 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
9852 }
9853 else
9854 {
9855 rcStrict = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx), uIOPort, pDis->fPrefix,
9856 (DISCPUMODE)pDis->uAddrMode, cbValue);
9857 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
9858 }
9859 }
9860 else
9861 {
9862 AssertMsg(rcStrict == VERR_EM_INTERPRETER, ("rcStrict=%Rrc RIP %#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pMixedCtx->rip));
9863 rcStrict = VINF_EM_RAW_EMULATE_INSTR;
9864 }
9865#endif
9866 }
9867 else
9868 {
9869 /*
9870 * IN/OUT - I/O instruction.
9871 */
9872 Log4(("CS:RIP=%04x:%#RX64 %#06x/%u %c\n", pMixedCtx->cs.Sel, pMixedCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r'));
9873 const uint32_t uAndVal = s_aIOOpAnd[uIOWidth];
9874 Assert(!VMX_EXIT_QUALIFICATION_IO_IS_REP(pVmxTransient->uExitQualification));
9875 if (fIOWrite)
9876 {
9877 rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pMixedCtx->eax & uAndVal, cbValue);
9878 if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
9879 HMR0SavePendingIOPortWrite(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue);
9880 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
9881 }
9882 else
9883 {
9884 uint32_t u32Result = 0;
9885 rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue);
9886 if (IOM_SUCCESS(rcStrict))
9887 {
9888 /* Save result of I/O IN instr. in AL/AX/EAX. */
9889 pMixedCtx->eax = (pMixedCtx->eax & ~uAndVal) | (u32Result & uAndVal);
9890 }
9891 else if (rcStrict == VINF_IOM_R3_IOPORT_READ)
9892 HMR0SavePendingIOPortRead(pVCpu, pMixedCtx->rip, pMixedCtx->rip + cbInstr, uIOPort, uAndVal, cbValue);
9893 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
9894 }
9895 }
9896
9897 if (IOM_SUCCESS(rcStrict))
9898 {
9899 if (!fUpdateRipAlready)
9900 {
9901 pMixedCtx->rip += cbInstr;
9902 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
9903 }
9904
9905 /*
9906 * If any I/O breakpoints are armed, we need to check if one triggered
9907 * and take appropriate action.
9908 * Note that the I/O breakpoint type is undefined if CR4.DE is 0.
9909 */
9910 rc2 = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
9911 AssertRCReturn(rc2, rc2);
9912
9913 /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the
9914 * execution engines about whether hyper BPs and such are pending. */
9915 uint32_t const uDr7 = pMixedCtx->dr[7];
9916 if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK)
9917 && X86_DR7_ANY_RW_IO(uDr7)
9918 && (pMixedCtx->cr4 & X86_CR4_DE))
9919 || DBGFBpIsHwIoArmed(pVM)))
9920 {
9921 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
9922
9923 /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */
9924 VMMRZCallRing3Disable(pVCpu);
9925 HM_DISABLE_PREEMPT_IF_NEEDED();
9926
9927 bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /*fDr6*/);
9928
9929 VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pMixedCtx, uIOPort, cbValue);
9930 if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP)
9931 {
9932 /* Raise #DB. */
9933 if (fIsGuestDbgActive)
9934 ASMSetDR6(pMixedCtx->dr[6]);
9935 if (pMixedCtx->dr[7] != uDr7)
9936 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
9937
9938 hmR0VmxSetPendingXcptDB(pVCpu, pMixedCtx);
9939 }
9940 /* rcStrict is VINF_SUCCESS or in [VINF_EM_FIRST..VINF_EM_LAST]. */
9941 else if ( rcStrict2 != VINF_SUCCESS
9942 && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict))
9943 rcStrict = rcStrict2;
9944
9945 HM_RESTORE_PREEMPT_IF_NEEDED();
9946 VMMRZCallRing3Enable(pVCpu);
9947 }
9948 }
9949
9950#ifdef DEBUG
9951 if (rcStrict == VINF_IOM_R3_IOPORT_READ)
9952 Assert(!fIOWrite);
9953 else if (rcStrict == VINF_IOM_R3_IOPORT_WRITE)
9954 Assert(fIOWrite);
9955 else
9956 {
9957 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST
9958 * statuses, that the VMM device and some others may return. See
9959 * IOM_SUCCESS() for guidance. */
9960 AssertMsg( RT_FAILURE(rcStrict)
9961 || rcStrict == VINF_SUCCESS
9962 || rcStrict == VINF_EM_RAW_EMULATE_INSTR
9963 || rcStrict == VINF_EM_DBG_BREAKPOINT
9964 || rcStrict == VINF_EM_RAW_GUEST_TRAP
9965 || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)));
9966 }
9967#endif
9968
9969 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1);
9970 return VBOXSTRICTRC_TODO(rcStrict);
9971}
9972
9973
9974/**
9975 * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional
9976 * VM-exit.
9977 */
9978HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
9979{
9980 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
9981
9982 /* Check if this task-switch occurred while delivery an event through the guest IDT. */
9983 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
9984 AssertRCReturn(rc, rc);
9985 if (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
9986 {
9987 rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient);
9988 AssertRCReturn(rc, rc);
9989 if (VMX_IDT_VECTORING_INFO_VALID(pVmxTransient->uIdtVectoringInfo))
9990 {
9991 uint32_t uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo);
9992
9993 /* Software interrupts and exceptions will be regenerated when the recompiler restarts the instruction. */
9994 if ( uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_INT
9995 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT
9996 && uIntType != VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT)
9997 {
9998 uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo);
9999 bool fErrorCodeValid = !!VMX_IDT_VECTORING_INFO_ERROR_CODE_IS_VALID(pVmxTransient->uIdtVectoringInfo);
10000
10001 /* Save it as a pending event and it'll be converted to a TRPM event on the way out to ring-3. */
10002 Assert(!pVCpu->hm.s.Event.fPending);
10003 pVCpu->hm.s.Event.fPending = true;
10004 pVCpu->hm.s.Event.u64IntInfo = pVmxTransient->uIdtVectoringInfo;
10005 rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient);
10006 AssertRCReturn(rc, rc);
10007 if (fErrorCodeValid)
10008 pVCpu->hm.s.Event.u32ErrCode = pVmxTransient->uIdtVectoringErrorCode;
10009 else
10010 pVCpu->hm.s.Event.u32ErrCode = 0;
10011 if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT
10012 && uVector == X86_XCPT_PF)
10013 {
10014 pVCpu->hm.s.Event.GCPtrFaultAddress = pMixedCtx->cr2;
10015 }
10016
10017 Log4(("Pending event on TaskSwitch uIntType=%#x uVector=%#x\n", uIntType, uVector));
10018 }
10019 }
10020 }
10021
10022 /** @todo Emulate task switch someday, currently just going back to ring-3 for
10023 * emulation. */
10024 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
10025 return VERR_EM_INTERPRETER;
10026}
10027
10028
10029/**
10030 * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit.
10031 */
10032HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10033{
10034 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10035 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG);
10036 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MONITOR_TRAP_FLAG;
10037 int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
10038 AssertRCReturn(rc, rc);
10039 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
10040 return VINF_EM_DBG_STEPPED;
10041}
10042
10043
10044/**
10045 * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit.
10046 */
10047HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10048{
10049 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10050
10051 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
10052 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
10053 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
10054 return VINF_SUCCESS;
10055 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
10056 return rc;
10057
10058#if 0
10059 /** @todo Investigate if IOMMMIOPhysHandler() requires a lot of state, for now
10060 * just sync the whole thing. */
10061 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10062#else
10063 /* Aggressive state sync. for now. */
10064 rc = hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
10065 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10066 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10067#endif
10068 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10069 AssertRCReturn(rc, rc);
10070
10071 /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */
10072 uint32_t uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(pVmxTransient->uExitQualification);
10073 switch (uAccessType)
10074 {
10075 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
10076 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
10077 {
10078 if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_USE_TPR_SHADOW)
10079 && VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification) == 0x80)
10080 {
10081 AssertMsgFailed(("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n"));
10082 }
10083
10084 RTGCPHYS GCPhys = pMixedCtx->msrApicBase; /* Always up-to-date, msrApicBase is not part of the VMCS. */
10085 GCPhys &= PAGE_BASE_GC_MASK;
10086 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification);
10087 PVM pVM = pVCpu->CTX_SUFF(pVM);
10088 Log4(("ApicAccess uAccessType=%#x GCPhys=%#RGv Off=%#x\n", uAccessType, GCPhys,
10089 VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(pVmxTransient->uExitQualification)));
10090
10091 VBOXSTRICTRC rc2 = IOMMMIOPhysHandler(pVM, pVCpu,
10092 (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
10093 CPUMCTX2CORE(pMixedCtx), GCPhys);
10094 rc = VBOXSTRICTRC_VAL(rc2);
10095 Log4(("ApicAccess rc=%d\n", rc));
10096 if ( rc == VINF_SUCCESS
10097 || rc == VERR_PAGE_TABLE_NOT_PRESENT
10098 || rc == VERR_PAGE_NOT_PRESENT)
10099 {
10100 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP
10101 | HM_CHANGED_GUEST_RSP
10102 | HM_CHANGED_GUEST_RFLAGS
10103 | HM_CHANGED_VMX_GUEST_APIC_STATE);
10104 rc = VINF_SUCCESS;
10105 }
10106 break;
10107 }
10108
10109 default:
10110 Log4(("ApicAccess uAccessType=%#x\n", uAccessType));
10111 rc = VINF_EM_RAW_EMULATE_INSTR;
10112 break;
10113 }
10114
10115 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess);
10116 return rc;
10117}
10118
10119
10120/**
10121 * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional
10122 * VM-exit.
10123 */
10124HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10125{
10126 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10127
10128 /* We should -not- get this VM-exit if the guest's debug registers were active. */
10129 if (pVmxTransient->fWasGuestDebugStateActive)
10130 {
10131 AssertMsgFailed(("Unexpected MOV DRx exit. pVCpu=%p pMixedCtx=%p\n", pVCpu, pMixedCtx));
10132 HMVMX_RETURN_UNEXPECTED_EXIT();
10133 }
10134
10135 int rc = VERR_INTERNAL_ERROR_5;
10136 if ( !DBGFIsStepping(pVCpu)
10137 && !pVCpu->hm.s.fSingleInstruction
10138 && !pVmxTransient->fWasHyperDebugStateActive)
10139 {
10140 /* Don't intercept MOV DRx and #DB any more. */
10141 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_MOV_DR_EXIT;
10142 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls);
10143 AssertRCReturn(rc, rc);
10144
10145 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
10146 {
10147#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
10148 pVCpu->hm.s.vmx.u32XcptBitmap &= ~RT_BIT(X86_XCPT_DB);
10149 rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap);
10150 AssertRCReturn(rc, rc);
10151#endif
10152 }
10153
10154 /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */
10155 VMMRZCallRing3Disable(pVCpu);
10156 HM_DISABLE_PREEMPT_IF_NEEDED();
10157
10158 /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
10159 PVM pVM = pVCpu->CTX_SUFF(pVM);
10160 CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */);
10161 Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32);
10162
10163 HM_RESTORE_PREEMPT_IF_NEEDED();
10164 VMMRZCallRing3Enable(pVCpu);
10165
10166#ifdef VBOX_WITH_STATISTICS
10167 rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10168 AssertRCReturn(rc, rc);
10169 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
10170 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
10171 else
10172 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
10173#endif
10174 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
10175 return VINF_SUCCESS;
10176 }
10177
10178 /*
10179 * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date, see
10180 * hmR0VmxSaveGuestAutoLoadStoreMsrs(). Update only the segment registers from the CPU.
10181 */
10182 rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10183 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10184 AssertRCReturn(rc, rc);
10185 Log4(("CS:RIP=%04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip));
10186
10187 PVM pVM = pVCpu->CTX_SUFF(pVM);
10188 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(pVmxTransient->uExitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
10189 {
10190 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
10191 VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification),
10192 VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification));
10193 if (RT_SUCCESS(rc))
10194 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_DEBUG);
10195 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
10196 }
10197 else
10198 {
10199 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx),
10200 VMX_EXIT_QUALIFICATION_DRX_GENREG(pVmxTransient->uExitQualification),
10201 VMX_EXIT_QUALIFICATION_DRX_REGISTER(pVmxTransient->uExitQualification));
10202 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
10203 }
10204
10205 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
10206 if (RT_SUCCESS(rc))
10207 {
10208 int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pMixedCtx, pVmxTransient);
10209 AssertRCReturn(rc2, rc2);
10210 }
10211 return rc;
10212}
10213
10214
10215/**
10216 * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG).
10217 * Conditional VM-exit.
10218 */
10219HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10220{
10221 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10222 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
10223
10224 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
10225 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
10226 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
10227 return VINF_SUCCESS;
10228 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
10229 return rc;
10230
10231 RTGCPHYS GCPhys = 0;
10232 rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
10233
10234#if 0
10235 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */
10236#else
10237 /* Aggressive state sync. for now. */
10238 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
10239 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10240 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10241#endif
10242 AssertRCReturn(rc, rc);
10243
10244 /*
10245 * If we succeed, resume guest execution.
10246 * If we fail in interpreting the instruction because we couldn't get the guest physical address
10247 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
10248 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
10249 * weird case. See @bugref{6043}.
10250 */
10251 PVM pVM = pVCpu->CTX_SUFF(pVM);
10252 VBOXSTRICTRC rc2 = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pMixedCtx), GCPhys, UINT32_MAX);
10253 rc = VBOXSTRICTRC_VAL(rc2);
10254 Log4(("EPT misconfig at %#RGv RIP=%#RX64 rc=%d\n", GCPhys, pMixedCtx->rip, rc));
10255 if ( rc == VINF_SUCCESS
10256 || rc == VERR_PAGE_TABLE_NOT_PRESENT
10257 || rc == VERR_PAGE_NOT_PRESENT)
10258 {
10259 /* Successfully handled MMIO operation. */
10260 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP
10261 | HM_CHANGED_GUEST_RSP
10262 | HM_CHANGED_GUEST_RFLAGS
10263 | HM_CHANGED_VMX_GUEST_APIC_STATE);
10264 rc = VINF_SUCCESS;
10265 }
10266 return rc;
10267}
10268
10269
10270/**
10271 * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional
10272 * VM-exit.
10273 */
10274HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10275{
10276 HMVMX_VALIDATE_EXIT_HANDLER_PARAMS();
10277 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging);
10278
10279 /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */
10280 int rc = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pMixedCtx, pVmxTransient);
10281 if (RT_UNLIKELY(rc == VINF_HM_DOUBLE_FAULT))
10282 return VINF_SUCCESS;
10283 else if (RT_UNLIKELY(rc == VINF_EM_RESET))
10284 return rc;
10285
10286 RTGCPHYS GCPhys = 0;
10287 rc = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
10288 rc |= hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10289#if 0
10290 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx); /** @todo Can we do better? */
10291#else
10292 /* Aggressive state sync. for now. */
10293 rc |= hmR0VmxSaveGuestRipRspRflags(pVCpu, pMixedCtx);
10294 rc |= hmR0VmxSaveGuestControlRegs(pVCpu, pMixedCtx);
10295 rc |= hmR0VmxSaveGuestSegmentRegs(pVCpu, pMixedCtx);
10296#endif
10297 AssertRCReturn(rc, rc);
10298
10299 /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */
10300 AssertMsg(((pVmxTransient->uExitQualification >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQualification));
10301
10302 RTGCUINT uErrorCode = 0;
10303 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
10304 uErrorCode |= X86_TRAP_PF_ID;
10305 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
10306 uErrorCode |= X86_TRAP_PF_RW;
10307 if (pVmxTransient->uExitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
10308 uErrorCode |= X86_TRAP_PF_P;
10309
10310 TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode);
10311
10312 Log4(("EPT violation %#x at %#RX64 ErrorCode %#x CS:EIP=%04x:%#RX64\n", pVmxTransient->uExitQualification, GCPhys,
10313 uErrorCode, pMixedCtx->cs.Sel, pMixedCtx->rip));
10314
10315 /* Handle the pagefault trap for the nested shadow table. */
10316 PVM pVM = pVCpu->CTX_SUFF(pVM);
10317 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pMixedCtx), GCPhys);
10318 TRPMResetTrap(pVCpu);
10319
10320 /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */
10321 if ( rc == VINF_SUCCESS
10322 || rc == VERR_PAGE_TABLE_NOT_PRESENT
10323 || rc == VERR_PAGE_NOT_PRESENT)
10324 {
10325 /* Successfully synced our nested page tables. */
10326 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
10327 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP
10328 | HM_CHANGED_GUEST_RSP
10329 | HM_CHANGED_GUEST_RFLAGS);
10330 return VINF_SUCCESS;
10331 }
10332
10333 Log4(("EPT return to ring-3 rc=%d\n"));
10334 return rc;
10335}
10336
10337/** @} */
10338
10339/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */
10340/* -=-=-=-=-=-=-=-=-=- VM-exit Exception Handlers -=-=-=-=-=-=-=-=-=-=- */
10341/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-= */
10342
10343/** @name VM-exit exception handlers.
10344 * @{
10345 */
10346
10347/**
10348 * VM-exit exception handler for #MF (Math Fault: floating point exception).
10349 */
10350static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10351{
10352 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10353 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
10354
10355 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10356 AssertRCReturn(rc, rc);
10357
10358 if (!(pMixedCtx->cr0 & X86_CR0_NE))
10359 {
10360 /* Old-style FPU error reporting needs some extra work. */
10361 /** @todo don't fall back to the recompiler, but do it manually. */
10362 return VERR_EM_INTERPRETER;
10363 }
10364
10365 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10366 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
10367 return rc;
10368}
10369
10370
10371/**
10372 * VM-exit exception handler for #BP (Breakpoint exception).
10373 */
10374static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10375{
10376 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10377 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
10378
10379 /** @todo Try optimize this by not saving the entire guest state unless
10380 * really needed. */
10381 int rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10382 AssertRCReturn(rc, rc);
10383
10384 PVM pVM = pVCpu->CTX_SUFF(pVM);
10385 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pMixedCtx));
10386 if (rc == VINF_EM_RAW_GUEST_TRAP)
10387 {
10388 rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
10389 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10390 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
10391 AssertRCReturn(rc, rc);
10392
10393 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10394 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
10395 }
10396
10397 Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT);
10398 return rc;
10399}
10400
10401
10402/**
10403 * VM-exit exception handler for #DB (Debug exception).
10404 */
10405static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10406{
10407 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10408 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
10409 Log6(("XcptDB\n"));
10410
10411 /*
10412 * Get the DR6-like values from the exit qualification and pass it to DBGF
10413 * for processing.
10414 */
10415 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10416 AssertRCReturn(rc, rc);
10417
10418 /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */
10419 uint64_t uDR6 = X86_DR6_INIT_VAL;
10420 uDR6 |= ( pVmxTransient->uExitQualification
10421 & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS));
10422
10423 rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pMixedCtx), uDR6, pVCpu->hm.s.fSingleInstruction);
10424 if (rc == VINF_EM_RAW_GUEST_TRAP)
10425 {
10426 /*
10427 * The exception was for the guest. Update DR6, DR7.GD and
10428 * IA32_DEBUGCTL.LBR before forwarding it.
10429 * (See Intel spec. 27.1 "Architectural State before a VM-Exit".)
10430 */
10431 VMMRZCallRing3Disable(pVCpu);
10432 HM_DISABLE_PREEMPT_IF_NEEDED();
10433
10434 pMixedCtx->dr[6] &= ~X86_DR6_B_MASK;
10435 pMixedCtx->dr[6] |= uDR6;
10436 if (CPUMIsGuestDebugStateActive(pVCpu))
10437 ASMSetDR6(pMixedCtx->dr[6]);
10438
10439 HM_RESTORE_PREEMPT_IF_NEEDED();
10440 VMMRZCallRing3Enable(pVCpu);
10441
10442 rc = hmR0VmxSaveGuestDR7(pVCpu, pMixedCtx);
10443 AssertRCReturn(rc, rc);
10444
10445 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
10446 pMixedCtx->dr[7] &= ~X86_DR7_GD;
10447
10448 /* Paranoia. */
10449 pMixedCtx->dr[7] &= ~X86_DR7_RAZ_MASK;
10450 pMixedCtx->dr[7] |= X86_DR7_RA1_MASK;
10451
10452 rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pMixedCtx->dr[7]);
10453 AssertRCReturn(rc, rc);
10454
10455 /*
10456 * Raise #DB in the guest.
10457 */
10458 rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
10459 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10460 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
10461 AssertRCReturn(rc, rc);
10462 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10463 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
10464 return VINF_SUCCESS;
10465 }
10466
10467 /*
10468 * Not a guest trap, must be a hypervisor related debug event then.
10469 * Update DR6 in case someone is interested in it.
10470 */
10471 AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc));
10472 AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5);
10473 CPUMSetHyperDR6(pVCpu, uDR6);
10474
10475 return rc;
10476}
10477
10478
10479/**
10480 * VM-exit exception handler for #NM (Device-not-available exception: floating
10481 * point exception).
10482 */
10483static int hmR0VmxExitXcptNM(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10484{
10485 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10486
10487 /* We require CR0 and EFER. EFER is always up-to-date. */
10488 int rc = hmR0VmxSaveGuestCR0(pVCpu, pMixedCtx);
10489 AssertRCReturn(rc, rc);
10490
10491 /* We're playing with the host CPU state here, have to disable preemption or longjmp. */
10492 VMMRZCallRing3Disable(pVCpu);
10493 HM_DISABLE_PREEMPT_IF_NEEDED();
10494
10495 /* If the guest FPU was active at the time of the #NM exit, then it's a guest fault. */
10496 if (pVmxTransient->fWasGuestFPUStateActive)
10497 {
10498 rc = VINF_EM_RAW_GUEST_TRAP;
10499 Assert(CPUMIsGuestFPUStateActive(pVCpu) || VMCPU_HMCF_IS_PENDING(pVCpu, HM_CHANGED_GUEST_CR0));
10500 }
10501 else
10502 {
10503#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
10504 Assert(!pVmxTransient->fWasGuestFPUStateActive);
10505#endif
10506 rc = CPUMR0Trap07Handler(pVCpu->CTX_SUFF(pVM), pVCpu, pMixedCtx);
10507 Assert(rc == VINF_EM_RAW_GUEST_TRAP || (rc == VINF_SUCCESS && CPUMIsGuestFPUStateActive(pVCpu)));
10508 }
10509
10510 HM_RESTORE_PREEMPT_IF_NEEDED();
10511 VMMRZCallRing3Enable(pVCpu);
10512
10513 if (rc == VINF_SUCCESS)
10514 {
10515 /* Guest FPU state was activated, we'll want to change CR0 FPU intercepts before the next VM-reentry. */
10516 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_CR0);
10517 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
10518 }
10519 else
10520 {
10521 /* Forward #NM to the guest. */
10522 Assert(rc == VINF_EM_RAW_GUEST_TRAP);
10523 rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
10524 AssertRCReturn(rc, rc);
10525 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10526 pVmxTransient->cbInstr, 0 /* error code */, 0 /* GCPtrFaultAddress */);
10527 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
10528 }
10529
10530 return VINF_SUCCESS;
10531}
10532
10533
10534/**
10535 * VM-exit exception handler for #GP (General-protection exception).
10536 *
10537 * @remarks Requires pVmxTransient->uExitIntInfo to be up-to-date.
10538 */
10539static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10540{
10541 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10542 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
10543
10544 int rc = VERR_INTERNAL_ERROR_5;
10545 if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active)
10546 {
10547#ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS
10548 /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */
10549 rc = hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
10550 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
10551 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10552 rc |= hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10553 AssertRCReturn(rc, rc);
10554 Log4(("#GP Gst: RIP %#RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u\n", pMixedCtx->rip, pVmxTransient->uExitIntErrorCode,
10555 pMixedCtx->cr0, CPUMGetGuestCPL(pVCpu)));
10556 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10557 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
10558 return rc;
10559#else
10560 /* We don't intercept #GP. */
10561 AssertMsgFailed(("Unexpected VM-exit caused by #GP exception\n"));
10562 return VERR_VMX_UNEXPECTED_EXCEPTION;
10563#endif
10564 }
10565
10566 Assert(CPUMIsGuestInRealModeEx(pMixedCtx));
10567 Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest);
10568
10569 /* EMInterpretDisasCurrent() requires a lot of the state, save the entire state. */
10570 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10571 AssertRCReturn(rc, rc);
10572
10573 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
10574 uint32_t cbOp = 0;
10575 PVM pVM = pVCpu->CTX_SUFF(pVM);
10576 rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
10577 if (RT_SUCCESS(rc))
10578 {
10579 rc = VINF_SUCCESS;
10580 Assert(cbOp == pDis->cbInstr);
10581 Log4(("#GP Disas OpCode=%u CS:EIP %04x:%#RX64\n", pDis->pCurInstr->uOpcode, pMixedCtx->cs.Sel, pMixedCtx->rip));
10582 switch (pDis->pCurInstr->uOpcode)
10583 {
10584 case OP_CLI:
10585 {
10586 pMixedCtx->eflags.Bits.u1IF = 0;
10587 pMixedCtx->rip += pDis->cbInstr;
10588 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
10589 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
10590 break;
10591 }
10592
10593 case OP_STI:
10594 {
10595 pMixedCtx->eflags.Bits.u1IF = 1;
10596 pMixedCtx->rip += pDis->cbInstr;
10597 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
10598 Assert(VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
10599 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS);
10600 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
10601 break;
10602 }
10603
10604 case OP_HLT:
10605 {
10606 rc = VINF_EM_HALT;
10607 pMixedCtx->rip += pDis->cbInstr;
10608 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP);
10609 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
10610 break;
10611 }
10612
10613 case OP_POPF:
10614 {
10615 Log4(("POPF CS:RIP %04x:%#RX64\n", pMixedCtx->cs.Sel, pMixedCtx->rip));
10616 uint32_t cbParm = 0;
10617 uint32_t uMask = 0;
10618 if (pDis->fPrefix & DISPREFIX_OPSIZE)
10619 {
10620 cbParm = 4;
10621 uMask = 0xffffffff;
10622 }
10623 else
10624 {
10625 cbParm = 2;
10626 uMask = 0xffff;
10627 }
10628
10629 /* Get the stack pointer & pop the contents of the stack onto Eflags. */
10630 RTGCPTR GCPtrStack = 0;
10631 X86EFLAGS Eflags;
10632 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0,
10633 &GCPtrStack);
10634 if (RT_SUCCESS(rc))
10635 {
10636 Assert(sizeof(Eflags.u32) >= cbParm);
10637 Eflags.u32 = 0;
10638 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u32, cbParm);
10639 }
10640 if (RT_FAILURE(rc))
10641 {
10642 rc = VERR_EM_INTERPRETER;
10643 break;
10644 }
10645 Log4(("POPF %#x -> %#RX64 mask=%#x RIP=%#RX64\n", Eflags.u, pMixedCtx->rsp, uMask, pMixedCtx->rip));
10646 pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask))
10647 | (Eflags.u32 & X86_EFL_POPF_BITS & uMask);
10648 pMixedCtx->eflags.Bits.u1RF = 0; /* The RF bit is always cleared by POPF; see Intel Instruction reference. */
10649 pMixedCtx->esp += cbParm;
10650 pMixedCtx->esp &= uMask;
10651 pMixedCtx->rip += pDis->cbInstr;
10652
10653 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP
10654 | HM_CHANGED_GUEST_RSP
10655 | HM_CHANGED_GUEST_RFLAGS);
10656 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
10657 break;
10658 }
10659
10660 case OP_PUSHF:
10661 {
10662 uint32_t cbParm = 0;
10663 uint32_t uMask = 0;
10664 if (pDis->fPrefix & DISPREFIX_OPSIZE)
10665 {
10666 cbParm = 4;
10667 uMask = 0xffffffff;
10668 }
10669 else
10670 {
10671 cbParm = 2;
10672 uMask = 0xffff;
10673 }
10674
10675 /* Get the stack pointer & push the contents of eflags onto the stack. */
10676 RTGCPTR GCPtrStack = 0;
10677 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), (pMixedCtx->esp - cbParm) & uMask,
10678 SELMTOFLAT_FLAGS_CPL0, &GCPtrStack);
10679 if (RT_FAILURE(rc))
10680 {
10681 rc = VERR_EM_INTERPRETER;
10682 break;
10683 }
10684 X86EFLAGS Eflags = pMixedCtx->eflags;
10685 /* The RF & VM bits are cleared on image stored on stack; see Intel Instruction reference for PUSHF. */
10686 Eflags.Bits.u1RF = 0;
10687 Eflags.Bits.u1VM = 0;
10688
10689 rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &Eflags.u, cbParm);
10690 if (RT_FAILURE(rc))
10691 {
10692 rc = VERR_EM_INTERPRETER;
10693 break;
10694 }
10695 Log4(("PUSHF %#x -> %#RGv\n", Eflags.u, GCPtrStack));
10696 pMixedCtx->esp -= cbParm;
10697 pMixedCtx->esp &= uMask;
10698 pMixedCtx->rip += pDis->cbInstr;
10699 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP);
10700 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
10701 break;
10702 }
10703
10704 case OP_IRET:
10705 {
10706 /** @todo Handle 32-bit operand sizes and check stack limits. See Intel
10707 * instruction reference. */
10708 RTGCPTR GCPtrStack = 0;
10709 uint32_t uMask = 0xffff;
10710 uint16_t aIretFrame[3];
10711 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
10712 {
10713 rc = VERR_EM_INTERPRETER;
10714 break;
10715 }
10716 rc = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pMixedCtx), pMixedCtx->esp & uMask, SELMTOFLAT_FLAGS_CPL0,
10717 &GCPtrStack);
10718 if (RT_SUCCESS(rc))
10719 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
10720 if (RT_FAILURE(rc))
10721 {
10722 rc = VERR_EM_INTERPRETER;
10723 break;
10724 }
10725 pMixedCtx->eip = 0;
10726 pMixedCtx->ip = aIretFrame[0];
10727 pMixedCtx->cs.Sel = aIretFrame[1];
10728 pMixedCtx->cs.ValidSel = aIretFrame[1];
10729 pMixedCtx->cs.u64Base = (uint64_t)pMixedCtx->cs.Sel << 4;
10730 pMixedCtx->eflags.u32 = (pMixedCtx->eflags.u32 & ~(X86_EFL_POPF_BITS & uMask))
10731 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
10732 pMixedCtx->sp += sizeof(aIretFrame);
10733 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP
10734 | HM_CHANGED_GUEST_SEGMENT_REGS
10735 | HM_CHANGED_GUEST_RSP
10736 | HM_CHANGED_GUEST_RFLAGS);
10737 Log4(("IRET %#RX32 to %04x:%x\n", GCPtrStack, pMixedCtx->cs.Sel, pMixedCtx->ip));
10738 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
10739 break;
10740 }
10741
10742 case OP_INT:
10743 {
10744 uint16_t uVector = pDis->Param1.uValue & 0xff;
10745 hmR0VmxSetPendingIntN(pVCpu, pMixedCtx, uVector, pDis->cbInstr);
10746 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
10747 break;
10748 }
10749
10750 case OP_INTO:
10751 {
10752 if (pMixedCtx->eflags.Bits.u1OF)
10753 {
10754 hmR0VmxSetPendingXcptOF(pVCpu, pMixedCtx, pDis->cbInstr);
10755 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
10756 }
10757 break;
10758 }
10759
10760 default:
10761 {
10762 VBOXSTRICTRC rc2 = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pMixedCtx), 0 /* pvFault */,
10763 EMCODETYPE_SUPERVISOR);
10764 rc = VBOXSTRICTRC_VAL(rc2);
10765 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_ALL_GUEST);
10766 Log4(("#GP rc=%Rrc\n", rc));
10767 break;
10768 }
10769 }
10770 }
10771 else
10772 rc = VERR_EM_INTERPRETER;
10773
10774 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
10775 ("#GP Unexpected rc=%Rrc\n", rc));
10776 return rc;
10777}
10778
10779
10780/**
10781 * VM-exit exception handler wrapper for generic exceptions. Simply re-injects
10782 * the exception reported in the VMX transient structure back into the VM.
10783 *
10784 * @remarks Requires uExitIntInfo in the VMX transient structure to be
10785 * up-to-date.
10786 */
10787static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10788{
10789 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10790
10791 /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in
10792 hmR0VmxCheckExitDueToEventDelivery(). */
10793 int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
10794 rc |= hmR0VmxReadExitInstrLenVmcs(pVCpu, pVmxTransient);
10795 AssertRCReturn(rc, rc);
10796 Assert(pVmxTransient->fVmcsFieldsRead & HMVMX_UPDATED_TRANSIENT_EXIT_INTERRUPTION_INFO);
10797
10798 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10799 pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */);
10800 return VINF_SUCCESS;
10801}
10802
10803
10804/**
10805 * VM-exit exception handler for #PF (Page-fault exception).
10806 */
10807static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PVMXTRANSIENT pVmxTransient)
10808{
10809 HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS();
10810 PVM pVM = pVCpu->CTX_SUFF(pVM);
10811 int rc = hmR0VmxReadExitQualificationVmcs(pVCpu, pVmxTransient);
10812 rc |= hmR0VmxReadExitIntInfoVmcs(pVCpu, pVmxTransient);
10813 rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVCpu, pVmxTransient);
10814 AssertRCReturn(rc, rc);
10815
10816#if defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) || defined(HMVMX_ALWAYS_TRAP_PF)
10817 if (pVM->hm.s.fNestedPaging)
10818 {
10819 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */
10820 if (RT_LIKELY(!pVmxTransient->fVectoringPF))
10821 {
10822 pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */
10823 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10824 0 /* cbInstr */, pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQualification);
10825 }
10826 else
10827 {
10828 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
10829 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
10830 Log4(("Pending #DF due to vectoring #PF. NP\n"));
10831 }
10832 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
10833 return rc;
10834 }
10835#else
10836 Assert(!pVM->hm.s.fNestedPaging);
10837#endif
10838
10839 rc = hmR0VmxSaveGuestState(pVCpu, pMixedCtx);
10840 AssertRCReturn(rc, rc);
10841
10842 Log4(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQualification,
10843 pMixedCtx->cs.Sel, pMixedCtx->rip, pVmxTransient->uExitIntErrorCode, pMixedCtx->cr3));
10844
10845 TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQualification, (RTGCUINT)pVmxTransient->uExitIntErrorCode);
10846 rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pMixedCtx),
10847 (RTGCPTR)pVmxTransient->uExitQualification);
10848
10849 Log4(("#PF: rc=%Rrc\n", rc));
10850 if (rc == VINF_SUCCESS)
10851 {
10852 /* Successfully synced shadow pages tables or emulated an MMIO instruction. */
10853 /** @todo this isn't quite right, what if guest does lgdt with some MMIO
10854 * memory? We don't update the whole state here... */
10855 VMCPU_HMCF_SET(pVCpu, HM_CHANGED_GUEST_RIP
10856 | HM_CHANGED_GUEST_RSP
10857 | HM_CHANGED_GUEST_RFLAGS
10858 | HM_CHANGED_VMX_GUEST_APIC_STATE);
10859 TRPMResetTrap(pVCpu);
10860 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
10861 return rc;
10862 }
10863 else if (rc == VINF_EM_RAW_GUEST_TRAP)
10864 {
10865 if (!pVmxTransient->fVectoringPF)
10866 {
10867 /* It's a guest page fault and needs to be reflected to the guest. */
10868 uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu);
10869 TRPMResetTrap(pVCpu);
10870 pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */
10871 pMixedCtx->cr2 = pVmxTransient->uExitQualification; /* Update here in case we go back to ring-3 before injection. */
10872 hmR0VmxSetPendingEvent(pVCpu, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo),
10873 0 /* cbInstr */, uGstErrorCode, pVmxTransient->uExitQualification);
10874 }
10875 else
10876 {
10877 /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */
10878 TRPMResetTrap(pVCpu);
10879 pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */
10880 hmR0VmxSetPendingXcptDF(pVCpu, pMixedCtx);
10881 Log4(("#PF: Pending #DF due to vectoring #PF\n"));
10882 }
10883
10884 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
10885 return VINF_SUCCESS;
10886 }
10887
10888 TRPMResetTrap(pVCpu);
10889 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
10890 return rc;
10891}
10892
10893/** @} */
10894
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette