VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 43771

Last change on this file since 43771 was 43771, checked in by vboxsync, 12 years ago

VMM/VMMR0: HM bits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 221.1 KB
Line 
1/* $Id: HWVMXR0.cpp 43771 2012-10-29 18:00:54Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hm.s.vmx.lasterror.ulInstrError = instrError;
106 }
107 pVM->hm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 * @param fEnabledByHost Set if SUPR0EnableVTx or similar was used to enable
120 * VT-x/AMD-V on the host.
121 */
122VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
123{
124 if (!fEnabledByHost)
125 {
126 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
127 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
128
129 if (pVM)
130 {
131 /* Set revision dword at the beginning of the VMXON structure. */
132 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
133 }
134
135 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
136 * (which can have very bad consequences!!!)
137 */
138
139 /** @todo r=bird: Why is this code different than the probing code earlier
140 * on? It just sets VMXE if needed and doesn't check that it isn't
141 * set. Mac OS X host_vmxoff may leave this set and we'll fail here
142 * and debug-assert in the calling code. This is what caused the
143 * "regression" after backing out the SUPR0EnableVTx code hours before
144 * 4.2.0GA (reboot fixed the issue). I've changed here to do the same
145 * as the init code. */
146 uint64_t uCr4 = ASMGetCR4();
147 if (!(uCr4 & X86_CR4_VMXE))
148 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
149
150 /*
151 * Enter VM root mode.
152 */
153 int rc = VMXEnable(HCPhysCpuPage);
154 if (RT_FAILURE(rc))
155 {
156 ASMSetCR4(uCr4);
157 return VERR_VMX_VMXON_FAILED;
158 }
159 }
160
161 /*
162 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
163 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
164 * each time while reusing a VPID after hitting the MaxASID limit once.
165 */
166 if ( pVM
167 && pVM->hm.s.vmx.fVpid
168 && (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
169 {
170 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
171 pCpu->fFlushAsidBeforeUse = false;
172 }
173 else
174 pCpu->fFlushAsidBeforeUse = true;
175
176 /*
177 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
178 */
179 ++pCpu->cTlbFlushes;
180
181 return VINF_SUCCESS;
182}
183
184
185/**
186 * Deactivates VT-x on the current CPU.
187 *
188 * @returns VBox status code.
189 * @param pCpu Pointer to the CPU info struct.
190 * @param pvCpuPage Pointer to the global CPU page.
191 * @param HCPhysCpuPage Physical address of the global CPU page.
192 */
193VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
194{
195 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
196 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
197 NOREF(pCpu);
198
199 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
200 if (!(ASMGetCR4() & X86_CR4_VMXE))
201 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
202
203 /* Leave VMX Root Mode. */
204 VMXDisable();
205
206 /* And clear the X86_CR4_VMXE bit. */
207 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
208 return VINF_SUCCESS;
209}
210
211
212/**
213 * Does Ring-0 per VM VT-x initialization.
214 *
215 * @returns VBox status code.
216 * @param pVM Pointer to the VM.
217 */
218VMMR0DECL(int) VMXR0InitVM(PVM pVM)
219{
220 int rc;
221
222#ifdef LOG_ENABLED
223 SUPR0Printf("VMXR0InitVM %p\n", pVM);
224#endif
225
226 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
227
228 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
229 {
230 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
231 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjApicAccess, PAGE_SIZE, false /* fExecutable */);
232 AssertRC(rc);
233 if (RT_FAILURE(rc))
234 return rc;
235
236 pVM->hm.s.vmx.pbApicAccess = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjApicAccess);
237 pVM->hm.s.vmx.HCPhysApicAccess = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjApicAccess, 0);
238 ASMMemZero32(pVM->hm.s.vmx.pbApicAccess, PAGE_SIZE);
239 }
240 else
241 {
242 pVM->hm.s.vmx.hMemObjApicAccess = 0;
243 pVM->hm.s.vmx.pbApicAccess = 0;
244 pVM->hm.s.vmx.HCPhysApicAccess = 0;
245 }
246
247#ifdef VBOX_WITH_CRASHDUMP_MAGIC
248 {
249 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjScratch, PAGE_SIZE, false /* fExecutable */);
250 AssertRC(rc);
251 if (RT_FAILURE(rc))
252 return rc;
253
254 pVM->hm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjScratch);
255 pVM->hm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjScratch, 0);
256
257 ASMMemZero32(pVM->hm.s.vmx.pbScratch, PAGE_SIZE);
258 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
259 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
260 }
261#endif
262
263 /* Allocate VMCSs for all guest CPUs. */
264 for (VMCPUID i = 0; i < pVM->cCpus; i++)
265 {
266 PVMCPU pVCpu = &pVM->aCpus[i];
267
268 pVCpu->hm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
269
270 /* Allocate one page for the VM control structure (VMCS). */
271 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVMCS, PAGE_SIZE, false /* fExecutable */);
272 AssertRC(rc);
273 if (RT_FAILURE(rc))
274 return rc;
275
276 pVCpu->hm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVMCS);
277 pVCpu->hm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVMCS, 0);
278 ASMMemZeroPage(pVCpu->hm.s.vmx.pvVMCS);
279
280 pVCpu->hm.s.vmx.cr0_mask = 0;
281 pVCpu->hm.s.vmx.cr4_mask = 0;
282
283 /* Allocate one page for the virtual APIC page for TPR caching. */
284 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVAPIC, PAGE_SIZE, false /* fExecutable */);
285 AssertRC(rc);
286 if (RT_FAILURE(rc))
287 return rc;
288
289 pVCpu->hm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVAPIC);
290 pVCpu->hm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVAPIC, 0);
291 ASMMemZeroPage(pVCpu->hm.s.vmx.pbVAPIC);
292
293 /* Allocate the MSR bitmap if this feature is supported. */
294 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
295 {
296 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, PAGE_SIZE, false /* fExecutable */);
297 AssertRC(rc);
298 if (RT_FAILURE(rc))
299 return rc;
300
301 pVCpu->hm.s.vmx.pvMsrBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjMsrBitmap);
302 pVCpu->hm.s.vmx.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjMsrBitmap, 0);
303 memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE);
304 }
305
306#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
307 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
308 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjGuestMsr, PAGE_SIZE, false /* fExecutable */);
309 AssertRC(rc);
310 if (RT_FAILURE(rc))
311 return rc;
312
313 pVCpu->hm.s.vmx.pvGuestMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjGuestMsr);
314 pVCpu->hm.s.vmx.HCPhysGuestMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjGuestMsr, 0);
315 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf));
316 memset(pVCpu->hm.s.vmx.pvGuestMsr, 0, PAGE_SIZE);
317
318 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
319 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjHostMsr, PAGE_SIZE, false /* fExecutable */);
320 AssertRC(rc);
321 if (RT_FAILURE(rc))
322 return rc;
323
324 pVCpu->hm.s.vmx.pvHostMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjHostMsr);
325 pVCpu->hm.s.vmx.HCPhysHostMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjHostMsr, 0);
326 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf));
327 memset(pVCpu->hm.s.vmx.pvHostMsr, 0, PAGE_SIZE);
328#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
329
330 /* Current guest paging mode. */
331 pVCpu->hm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
332
333#ifdef LOG_ENABLED
334 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hm.s.vmx.pvVMCS, (uint32_t)pVCpu->hm.s.vmx.HCPhysVMCS);
335#endif
336 }
337
338 return VINF_SUCCESS;
339}
340
341
342/**
343 * Does Ring-0 per VM VT-x termination.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the VM.
347 */
348VMMR0DECL(int) VMXR0TermVM(PVM pVM)
349{
350 for (VMCPUID i = 0; i < pVM->cCpus; i++)
351 {
352 PVMCPU pVCpu = &pVM->aCpus[i];
353
354 if (pVCpu->hm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
355 {
356 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVMCS, false);
357 pVCpu->hm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
358 pVCpu->hm.s.vmx.pvVMCS = 0;
359 pVCpu->hm.s.vmx.HCPhysVMCS = 0;
360 }
361 if (pVCpu->hm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
362 {
363 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVAPIC, false);
364 pVCpu->hm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
365 pVCpu->hm.s.vmx.pbVAPIC = 0;
366 pVCpu->hm.s.vmx.HCPhysVAPIC = 0;
367 }
368 if (pVCpu->hm.s.vmx.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
369 {
370 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjMsrBitmap, false);
371 pVCpu->hm.s.vmx.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
372 pVCpu->hm.s.vmx.pvMsrBitmap = 0;
373 pVCpu->hm.s.vmx.HCPhysMsrBitmap = 0;
374 }
375#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
376 if (pVCpu->hm.s.vmx.hMemObjHostMsr != NIL_RTR0MEMOBJ)
377 {
378 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjHostMsr, false);
379 pVCpu->hm.s.vmx.hMemObjHostMsr = NIL_RTR0MEMOBJ;
380 pVCpu->hm.s.vmx.pvHostMsr = 0;
381 pVCpu->hm.s.vmx.HCPhysHostMsr = 0;
382 }
383 if (pVCpu->hm.s.vmx.hMemObjGuestMsr != NIL_RTR0MEMOBJ)
384 {
385 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjGuestMsr, false);
386 pVCpu->hm.s.vmx.hMemObjGuestMsr = NIL_RTR0MEMOBJ;
387 pVCpu->hm.s.vmx.pvGuestMsr = 0;
388 pVCpu->hm.s.vmx.HCPhysGuestMsr = 0;
389 }
390#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
391 }
392 if (pVM->hm.s.vmx.hMemObjApicAccess != NIL_RTR0MEMOBJ)
393 {
394 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjApicAccess, false);
395 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
396 pVM->hm.s.vmx.pbApicAccess = 0;
397 pVM->hm.s.vmx.HCPhysApicAccess = 0;
398 }
399#ifdef VBOX_WITH_CRASHDUMP_MAGIC
400 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
401 {
402 ASMMemZero32(pVM->hm.s.vmx.pScratch, PAGE_SIZE);
403 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjScratch, false);
404 pVM->hm.s.vmx.hMemObjScratch = NIL_RTR0MEMOBJ;
405 pVM->hm.s.vmx.pScratch = 0;
406 pVM->hm.s.vmx.pScratchPhys = 0;
407 }
408#endif
409 return VINF_SUCCESS;
410}
411
412
413/**
414 * Sets up VT-x for the specified VM.
415 *
416 * @returns VBox status code.
417 * @param pVM Pointer to the VM.
418 */
419VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
420{
421 int rc = VINF_SUCCESS;
422 uint32_t val;
423
424 AssertReturn(pVM, VERR_INVALID_PARAMETER);
425
426 /* Initialize these always, see hmR3InitFinalizeR0().*/
427 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE;
428 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE;
429
430 /* Determine optimal flush type for EPT. */
431 if (pVM->hm.s.fNestedPaging)
432 {
433 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT)
434 {
435 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
436 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT;
437 else if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
438 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS;
439 else
440 {
441 /*
442 * Should never really happen. EPT is supported but no suitable flush types supported.
443 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
444 */
445 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
446 return VERR_VMX_GENERIC;
447 }
448 }
449 else
450 {
451 /*
452 * Should never really happen. EPT is supported but INVEPT instruction is not supported.
453 */
454 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
455 return VERR_VMX_GENERIC;
456 }
457 }
458
459 /* Determine optimal flush type for VPID. */
460 if (pVM->hm.s.vmx.fVpid)
461 {
462 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID)
463 {
464 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
465 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT;
466 else if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
467 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS;
468 else
469 {
470 /*
471 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
472 * We do not handle other flush type combinations, ignore VPID capabilities.
473 */
474 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
475 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
476 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
477 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
478 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
479 pVM->hm.s.vmx.fVpid = false;
480 }
481 }
482 else
483 {
484 /*
485 * Should not really happen. EPT is supported but INVEPT is not supported.
486 * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing.
487 */
488 Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n"));
489 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
490 pVM->hm.s.vmx.fVpid = false;
491 }
492 }
493
494 for (VMCPUID i = 0; i < pVM->cCpus; i++)
495 {
496 PVMCPU pVCpu = &pVM->aCpus[i];
497
498 AssertPtr(pVCpu->hm.s.vmx.pvVMCS);
499
500 /* Set revision dword at the beginning of the VMCS structure. */
501 *(uint32_t *)pVCpu->hm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
502
503 /*
504 * Clear and activate the VMCS.
505 */
506 Log(("HCPhysVMCS = %RHp\n", pVCpu->hm.s.vmx.HCPhysVMCS));
507 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
508 if (RT_FAILURE(rc))
509 goto vmx_end;
510
511 rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
512 if (RT_FAILURE(rc))
513 goto vmx_end;
514
515 /*
516 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
517 * Set required bits to one and zero according to the MSR capabilities.
518 */
519 val = pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
520 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
521 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
522
523 /*
524 * Enable the VMX preemption timer.
525 */
526 if (pVM->hm.s.vmx.fUsePreemptTimer)
527 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
528 val &= pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
529
530 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, val);
531 AssertRC(rc);
532
533 /*
534 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
535 * Set required bits to one and zero according to the MSR capabilities.
536 */
537 val = pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
538 /* Program which event cause VM-exits and which features we want to use. */
539 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
540 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
541 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
542 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
543 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
544 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
545 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
546 the guest (host thinks the cpu load is high) */
547
548 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
549 if (!pVM->hm.s.fNestedPaging)
550 {
551 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
552 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
553 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
554 }
555
556 /*
557 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
558 * failure with an invalid control fields error. (combined with some other exit reasons)
559 */
560 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
561 {
562 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
563 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
564 Assert(pVM->hm.s.vmx.pbApicAccess);
565 }
566 else
567 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
568 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
569
570 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
571 {
572 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
573 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
574 }
575
576 /* We will use the secondary control if it's present. */
577 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
578
579 /* Mask away the bits that the CPU doesn't support */
580 /** @todo make sure they don't conflict with the above requirements. */
581 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
582 pVCpu->hm.s.vmx.proc_ctls = val;
583
584 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, val);
585 AssertRC(rc);
586
587 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
588 {
589 /*
590 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
591 * Set required bits to one and zero according to the MSR capabilities.
592 */
593 val = pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
594 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
595
596 if (pVM->hm.s.fNestedPaging)
597 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
598
599 if (pVM->hm.s.vmx.fVpid)
600 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
601
602 if (pVM->hm.s.fHasIoApic)
603 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
604
605 if (pVM->hm.s.vmx.fUnrestrictedGuest)
606 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
607
608 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
609 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
610
611 /* Mask away the bits that the CPU doesn't support */
612 /** @todo make sure they don't conflict with the above requirements. */
613 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
614 pVCpu->hm.s.vmx.proc_ctls2 = val;
615 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS2, val);
616 AssertRC(rc);
617 }
618
619 /*
620 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
621 * Set required bits to one and zero according to the MSR capabilities.
622 */
623 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0);
624 AssertRC(rc);
625
626 /*
627 * Forward all exception except #NM & #PF to the guest.
628 * We always need to check pagefaults since our shadow page table can be out of sync.
629 * And we always lazily sync the FPU & XMM state. .
630 */
631
632 /** @todo Possible optimization:
633 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
634 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
635 * registers ourselves of course.
636 *
637 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
638 */
639
640 /*
641 * Don't filter page faults, all of them should cause a world switch.
642 */
643 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0);
644 AssertRC(rc);
645 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0);
646 AssertRC(rc);
647
648 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0);
649 AssertRC(rc);
650 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0);
651 AssertRC(rc);
652 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0);
653 AssertRC(rc);
654
655 /*
656 * Set the MSR bitmap address.
657 */
658 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
659 {
660 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
661
662 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
663 AssertRC(rc);
664
665 /*
666 * Allow the guest to directly modify these MSRs; they are loaded/stored automatically
667 * using MSR-load/store areas in the VMCS.
668 */
669 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
670 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
671 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
672 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
673 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
674 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
675 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
676 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
677 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
678 if (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
679 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true);
680 }
681
682#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
683 /*
684 * Set the guest & host MSR load/store physical addresses.
685 */
686 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
687 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
688 AssertRC(rc);
689 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
690 AssertRC(rc);
691 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
692 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
693 AssertRC(rc);
694#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
695
696 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
697 AssertRC(rc);
698 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0);
699 AssertRC(rc);
700 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0);
701 AssertRC(rc);
702
703 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
704 {
705 Assert(pVM->hm.s.vmx.hMemObjApicAccess);
706 /* Optional */
707 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
708 rc |= VMXWriteVMCS64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVAPIC);
709
710 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
711 rc |= VMXWriteVMCS64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
712
713 AssertRC(rc);
714 }
715
716 /* Set link pointer to -1. Not currently used. */
717 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
718 AssertRC(rc);
719
720 /*
721 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
722 * VMCS data back to memory.
723 */
724 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
725 AssertRC(rc);
726
727 /*
728 * Configure the VMCS read cache.
729 */
730 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
731
732 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RIP);
733 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RSP);
734 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
735 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
736 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
737 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_CR0);
738 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
739 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_CR4);
740 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_DR7);
741 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
742 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
743 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
744 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
745 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_GDTR_BASE);
746 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
747 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_IDTR_BASE);
748
749 VMX_SETUP_SELREG(ES, pCache);
750 VMX_SETUP_SELREG(SS, pCache);
751 VMX_SETUP_SELREG(CS, pCache);
752 VMX_SETUP_SELREG(DS, pCache);
753 VMX_SETUP_SELREG(FS, pCache);
754 VMX_SETUP_SELREG(GS, pCache);
755 VMX_SETUP_SELREG(LDTR, pCache);
756 VMX_SETUP_SELREG(TR, pCache);
757
758 /*
759 * Status code VMCS reads.
760 */
761 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
762 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
763 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
764 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
765 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
766 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
767 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
768 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
769 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
770
771 if (pVM->hm.s.fNestedPaging)
772 {
773 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_CR3);
774 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL);
775 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
776 }
777 else
778 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
779 } /* for each VMCPU */
780
781 /*
782 * Setup the right TLB function based on CPU capabilities.
783 */
784 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
785 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBBoth;
786 else if (pVM->hm.s.fNestedPaging)
787 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBEPT;
788 else if (pVM->hm.s.vmx.fVpid)
789 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBVPID;
790 else
791 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBDummy;
792
793vmx_end:
794 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
795 return rc;
796}
797
798
799/**
800 * Sets the permission bits for the specified MSR.
801 *
802 * @param pVCpu Pointer to the VMCPU.
803 * @param ulMSR The MSR value.
804 * @param fRead Whether reading is allowed.
805 * @param fWrite Whether writing is allowed.
806 */
807static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
808{
809 unsigned ulBit;
810 uint8_t *pvMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
811
812 /*
813 * Layout:
814 * 0x000 - 0x3ff - Low MSR read bits
815 * 0x400 - 0x7ff - High MSR read bits
816 * 0x800 - 0xbff - Low MSR write bits
817 * 0xc00 - 0xfff - High MSR write bits
818 */
819 if (ulMSR <= 0x00001FFF)
820 {
821 /* Pentium-compatible MSRs */
822 ulBit = ulMSR;
823 }
824 else if ( ulMSR >= 0xC0000000
825 && ulMSR <= 0xC0001FFF)
826 {
827 /* AMD Sixth Generation x86 Processor MSRs */
828 ulBit = (ulMSR - 0xC0000000);
829 pvMsrBitmap += 0x400;
830 }
831 else
832 {
833 AssertFailed();
834 return;
835 }
836
837 Assert(ulBit <= 0x1fff);
838 if (fRead)
839 ASMBitClear(pvMsrBitmap, ulBit);
840 else
841 ASMBitSet(pvMsrBitmap, ulBit);
842
843 if (fWrite)
844 ASMBitClear(pvMsrBitmap + 0x800, ulBit);
845 else
846 ASMBitSet(pvMsrBitmap + 0x800, ulBit);
847}
848
849
850/**
851 * Injects an event (trap or external interrupt).
852 *
853 * @returns VBox status code. Note that it may return VINF_EM_RESET to
854 * indicate a triple fault when injecting X86_XCPT_DF.
855 *
856 * @param pVM Pointer to the VM.
857 * @param pVCpu Pointer to the VMCPU.
858 * @param pCtx Pointer to the guest CPU Context.
859 * @param intInfo VMX interrupt info.
860 * @param cbInstr Opcode length of faulting instruction.
861 * @param errCode Error code (optional).
862 */
863static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
864{
865 int rc;
866 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
867
868#ifdef VBOX_WITH_STATISTICS
869 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
870#endif
871
872#ifdef VBOX_STRICT
873 if (iGate == 0xE)
874 {
875 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
876 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
877 }
878 else if (iGate < 0x20)
879 {
880 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
881 errCode));
882 }
883 else
884 {
885 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
886 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
887 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
888 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
889 || pCtx->eflags.u32 & X86_EFL_IF);
890 }
891#endif
892
893 if ( CPUMIsGuestInRealModeEx(pCtx)
894 && pVM->hm.s.vmx.pRealModeTSS)
895 {
896 RTGCPHYS GCPhysHandler;
897 uint16_t offset, ip;
898 RTSEL sel;
899
900 /*
901 * Injecting events doesn't work right with real mode emulation.
902 * (#GP if we try to inject external hardware interrupts)
903 * Inject the interrupt or trap directly instead.
904 *
905 * ASSUMES no access handlers for the bits we read or write below (should be safe).
906 */
907 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
908
909 /*
910 * Check if the interrupt handler is present.
911 */
912 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
913 {
914 Log(("IDT cbIdt violation\n"));
915 if (iGate != X86_XCPT_DF)
916 {
917 uint32_t intInfo2;
918
919 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
920 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
921 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
922 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
923
924 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
925 }
926 Log(("Triple fault -> reset the VM!\n"));
927 return VINF_EM_RESET;
928 }
929 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
930 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
931 || iGate == 4)
932 {
933 ip = pCtx->ip + cbInstr;
934 }
935 else
936 ip = pCtx->ip;
937
938 /*
939 * Read the selector:offset pair of the interrupt handler.
940 */
941 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
942 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
943 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
944
945 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
946
947 /*
948 * Construct the stack frame.
949 */
950 /** @todo Check stack limit. */
951 pCtx->sp -= 2;
952 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
953 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
954 pCtx->sp -= 2;
955 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
956 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
957 pCtx->sp -= 2;
958 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
959 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
960
961 /*
962 * Update the CPU state for executing the handler.
963 */
964 pCtx->rip = offset;
965 pCtx->cs.Sel = sel;
966 pCtx->cs.u64Base = sel << 4;
967 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
968
969 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SEGMENT_REGS;
970 return VINF_SUCCESS;
971 }
972
973 /*
974 * Set event injection state.
975 */
976 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
977 rc |= VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
978 rc |= VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
979
980 AssertRC(rc);
981 return rc;
982}
983
984
985/**
986 * Checks for pending guest interrupts and injects them.
987 *
988 * @returns VBox status code.
989 * @param pVM Pointer to the VM.
990 * @param pVCpu Pointer to the VMCPU.
991 * @param pCtx Pointer to the guest CPU context.
992 */
993static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
994{
995 int rc;
996
997 /*
998 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
999 */
1000 if (pVCpu->hm.s.Event.fPending)
1001 {
1002 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.intInfo,
1003 pVCpu->hm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
1004 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntReinject);
1005 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hm.s.Event.intInfo, 0, pVCpu->hm.s.Event.errCode);
1006 AssertRC(rc);
1007
1008 pVCpu->hm.s.Event.fPending = false;
1009 return VINF_SUCCESS;
1010 }
1011
1012 /*
1013 * If an active trap is already pending, we must forward it first!
1014 */
1015 if (!TRPMHasTrap(pVCpu))
1016 {
1017 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
1018 {
1019 RTGCUINTPTR intInfo;
1020
1021 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
1022
1023 intInfo = X86_XCPT_NMI;
1024 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1025 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1026
1027 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
1028 AssertRC(rc);
1029
1030 return VINF_SUCCESS;
1031 }
1032
1033 /** @todo SMI interrupts. */
1034
1035 /*
1036 * When external interrupts are pending, we should exit the VM when IF is set.
1037 */
1038 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
1039 {
1040 if (!(pCtx->eflags.u32 & X86_EFL_IF))
1041 {
1042 if (!(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
1043 {
1044 LogFlow(("Enable irq window exit!\n"));
1045 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
1046 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
1047 AssertRC(rc);
1048 }
1049 /* else nothing to do but wait */
1050 }
1051 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1052 {
1053 uint8_t u8Interrupt;
1054
1055 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1056 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1057 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1058 if (RT_SUCCESS(rc))
1059 {
1060 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1061 AssertRC(rc);
1062 }
1063 else
1064 {
1065 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1066 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1067 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
1068 /* Just continue */
1069 }
1070 }
1071 else
1072 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1073 }
1074 }
1075
1076#ifdef VBOX_STRICT
1077 if (TRPMHasTrap(pVCpu))
1078 {
1079 uint8_t u8Vector;
1080 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1081 AssertRC(rc);
1082 }
1083#endif
1084
1085 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1086 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1087 && TRPMHasTrap(pVCpu)
1088 )
1089 {
1090 uint8_t u8Vector;
1091 TRPMEVENT enmType;
1092 RTGCUINTPTR intInfo;
1093 RTGCUINT errCode;
1094
1095 /*
1096 * If a new event is pending, dispatch it now.
1097 */
1098 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1099 AssertRC(rc);
1100 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1101 Assert(enmType != TRPM_SOFTWARE_INT);
1102
1103 /*
1104 * Clear the pending trap.
1105 */
1106 rc = TRPMResetTrap(pVCpu);
1107 AssertRC(rc);
1108
1109 intInfo = u8Vector;
1110 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1111
1112 if (enmType == TRPM_TRAP)
1113 {
1114 switch (u8Vector)
1115 {
1116 case X86_XCPT_DF:
1117 case X86_XCPT_TS:
1118 case X86_XCPT_NP:
1119 case X86_XCPT_SS:
1120 case X86_XCPT_GP:
1121 case X86_XCPT_PF:
1122 case X86_XCPT_AC:
1123 {
1124 /* Valid error codes. */
1125 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1126 break;
1127 }
1128
1129 default:
1130 break;
1131 }
1132
1133 if ( u8Vector == X86_XCPT_BP
1134 || u8Vector == X86_XCPT_OF)
1135 {
1136 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1137 }
1138 else
1139 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1140 }
1141 else
1142 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1143
1144 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntInject);
1145 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1146 AssertRC(rc);
1147 } /* if (interrupts can be dispatched) */
1148
1149 return VINF_SUCCESS;
1150}
1151
1152/**
1153 * Checks for pending VMX events and converts them to TRPM. Before we execute any instruction
1154 * outside of VMX, any pending VMX event must be converted so that it can be delivered properly.
1155 *
1156 * @returns VBox status code.
1157 * @param pVCpu Pointer to the VMCPU.
1158 */
1159static int hmR0VmxCheckPendingEvent(PVMCPU pVCpu)
1160{
1161 if (pVCpu->hm.s.Event.fPending)
1162 {
1163 TRPMEVENT enmTrapType;
1164
1165 /* If a trap was already pending, we did something wrong! */
1166 Assert((TRPMQueryTrap(pVCpu, NULL, NULL) == VERR_TRPM_NO_ACTIVE_TRAP));
1167
1168 /*
1169 * Clear the pending event and move it over to TRPM for the rest
1170 * of the world to see.
1171 */
1172 pVCpu->hm.s.Event.fPending = false;
1173 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo))
1174 {
1175 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT:
1176 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI:
1177 enmTrapType = TRPM_HARDWARE_INT;
1178 break;
1179 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW:
1180 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT:
1181 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT:
1182 enmTrapType = TRPM_SOFTWARE_INT;
1183 break;
1184 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT:
1185 enmTrapType = TRPM_TRAP;
1186 break;
1187 default:
1188 enmTrapType = TRPM_32BIT_HACK; /* Can't get here. */
1189 AssertFailed();
1190 }
1191 TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.intInfo), enmTrapType);
1192 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.intInfo))
1193 TRPMSetErrorCode(pVCpu, pVCpu->hm.s.Event.errCode);
1194 //@todo: Is there any situation where we need to call TRPMSetFaultAddress()?
1195 }
1196 return VINF_SUCCESS;
1197}
1198
1199/**
1200 * Save the host state into the VMCS.
1201 *
1202 * @returns VBox status code.
1203 * @param pVM Pointer to the VM.
1204 * @param pVCpu Pointer to the VMCPU.
1205 */
1206VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1207{
1208 int rc = VINF_SUCCESS;
1209 NOREF(pVM);
1210
1211 /*
1212 * Host CPU Context.
1213 */
1214 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
1215 {
1216 RTIDTR idtr;
1217 RTGDTR gdtr;
1218 RTSEL SelTR;
1219 PCX86DESCHC pDesc;
1220 uintptr_t trBase;
1221 RTSEL cs;
1222 RTSEL ss;
1223 uint64_t cr3;
1224
1225 /*
1226 * Control registers.
1227 */
1228 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1229 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1230#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1231 if (VMX_IS_64BIT_HOST_MODE())
1232 {
1233 cr3 = hmR0Get64bitCR3();
1234 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1235 }
1236 else
1237#endif
1238 {
1239 cr3 = ASMGetCR3();
1240 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1241 }
1242 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1243 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1244 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1245 AssertRC(rc);
1246
1247 /*
1248 * Selector registers.
1249 */
1250#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1251 if (VMX_IS_64BIT_HOST_MODE())
1252 {
1253 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1254 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1255 }
1256 else
1257 {
1258 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1259 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1260 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1261 }
1262#else
1263 cs = ASMGetCS();
1264 ss = ASMGetSS();
1265#endif
1266 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1267 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1268 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1269 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1270 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1271 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1272#if HC_ARCH_BITS == 32
1273 if (!VMX_IS_64BIT_HOST_MODE())
1274 {
1275 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1276 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1277 }
1278#endif
1279 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1280 SelTR = ASMGetTR();
1281 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1282 AssertRC(rc);
1283 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1284 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1285 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1286 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1287 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1288 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1289 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1290
1291 /*
1292 * GDTR & IDTR.
1293 */
1294#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1295 if (VMX_IS_64BIT_HOST_MODE())
1296 {
1297 X86XDTR64 gdtr64, idtr64;
1298 hmR0Get64bitGdtrAndIdtr(&gdtr64, &idtr64);
1299 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1300 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, idtr64.uAddr);
1301 AssertRC(rc);
1302 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1303 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1304 gdtr.cbGdt = gdtr64.cb;
1305 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1306 }
1307 else
1308#endif
1309 {
1310 ASMGetGDTR(&gdtr);
1311 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1312 ASMGetIDTR(&idtr);
1313 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1314 AssertRC(rc);
1315 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1316 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1317 }
1318
1319 /*
1320 * Save the base address of the TR selector.
1321 */
1322 if (SelTR > gdtr.cbGdt)
1323 {
1324 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1325 return VERR_VMX_INVALID_HOST_STATE;
1326 }
1327
1328 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1329#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1330 if (VMX_IS_64BIT_HOST_MODE())
1331 {
1332 uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc);
1333 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1334 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1335 AssertRC(rc);
1336 }
1337 else
1338#endif
1339 {
1340#if HC_ARCH_BITS == 64
1341 trBase = X86DESC64_BASE(pDesc);
1342#else
1343 trBase = X86DESC_BASE(pDesc);
1344#endif
1345 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1346 AssertRC(rc);
1347 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1348 }
1349
1350 /*
1351 * FS base and GS base.
1352 */
1353#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1354 if (VMX_IS_64BIT_HOST_MODE())
1355 {
1356 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1357 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1358 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1359 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1360 }
1361#endif
1362 AssertRC(rc);
1363
1364 /*
1365 * Sysenter MSRs.
1366 */
1367 /** @todo expensive!! */
1368 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1369 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1370#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1371 if (VMX_IS_64BIT_HOST_MODE())
1372 {
1373 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1374 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1375 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1376 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1377 }
1378 else
1379 {
1380 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1381 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1382 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1383 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1384 }
1385#elif HC_ARCH_BITS == 32
1386 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1387 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1388 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1389 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1390#else
1391 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1392 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1393 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1394 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1395#endif
1396 AssertRC(rc);
1397
1398
1399#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1400 /*
1401 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1402 * the world switch back to the host.
1403 */
1404 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvHostMsr;
1405 unsigned idxMsr = 0;
1406
1407 uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001);
1408 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1409 {
1410 pMsr->u32IndexMSR = MSR_K6_EFER;
1411 pMsr->u32Reserved = 0;
1412# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1413 if (CPUMIsGuestInLongMode(pVCpu))
1414 {
1415 /* Must match the EFER value in our 64 bits switcher. */
1416 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1417 }
1418 else
1419# endif
1420 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1421 pMsr++; idxMsr++;
1422 }
1423
1424# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1425 if (VMX_IS_64BIT_HOST_MODE())
1426 {
1427 pMsr->u32IndexMSR = MSR_K6_STAR;
1428 pMsr->u32Reserved = 0;
1429 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1430 pMsr++; idxMsr++;
1431 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1432 pMsr->u32Reserved = 0;
1433 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1434 pMsr++; idxMsr++;
1435 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1436 pMsr->u32Reserved = 0;
1437 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1438 pMsr++; idxMsr++;
1439
1440 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
1441#if 0
1442 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1443 pMsr->u32Reserved = 0;
1444 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1445 pMsr++; idxMsr++;
1446#endif
1447 }
1448# endif
1449
1450 if (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1451 {
1452 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
1453 pMsr->u32Reserved = 0;
1454 pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
1455 pMsr++; idxMsr++;
1456 }
1457
1458 /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr
1459 * range. */
1460 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1461 AssertRC(rc);
1462#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1463
1464 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_HOST_CONTEXT;
1465 }
1466 return rc;
1467}
1468
1469
1470/**
1471 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1472 * guest operates in PAE mode.
1473 *
1474 * @returns VBox status code.
1475 * @param pVCpu Pointer to the VMCPU.
1476 * @param pCtx Pointer to the guest CPU context.
1477 */
1478static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1479{
1480 if (CPUMIsGuestInPAEModeEx(pCtx))
1481 {
1482 X86PDPE aPdpes[4];
1483 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1484 AssertRCReturn(rc, rc);
1485
1486 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1487 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1488 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1489 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1490 }
1491 return VINF_SUCCESS;
1492}
1493
1494
1495/**
1496 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1497 * guest operates in PAE mode.
1498 *
1499 * @returns VBox status code.
1500 * @param pVCpu Pointer to the VM CPU.
1501 * @param pCtx Pointer to the guest CPU context.
1502 *
1503 * @remarks Tell PGM about CR3 changes before calling this helper.
1504 */
1505static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1506{
1507 if (CPUMIsGuestInPAEModeEx(pCtx))
1508 {
1509 int rc;
1510 X86PDPE aPdpes[4];
1511 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1512 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1513 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1514 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1515
1516 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1517 AssertRCReturn(rc, rc);
1518 }
1519 return VINF_SUCCESS;
1520}
1521
1522
1523/**
1524 * Update the exception bitmap according to the current CPU state.
1525 *
1526 * @param pVM Pointer to the VM.
1527 * @param pVCpu Pointer to the VMCPU.
1528 * @param pCtx Pointer to the guest CPU context.
1529 */
1530static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1531{
1532 uint32_t u32TrapMask;
1533 Assert(pCtx);
1534
1535 /*
1536 * Set up a mask for intercepting traps.
1537 */
1538 /** @todo Do we really need to always intercept #DB? */
1539 u32TrapMask = RT_BIT(X86_XCPT_DB)
1540 | RT_BIT(X86_XCPT_NM)
1541#ifdef VBOX_ALWAYS_TRAP_PF
1542 | RT_BIT(X86_XCPT_PF)
1543#endif
1544#ifdef VBOX_STRICT
1545 | RT_BIT(X86_XCPT_BP)
1546 | RT_BIT(X86_XCPT_DB)
1547 | RT_BIT(X86_XCPT_DE)
1548 | RT_BIT(X86_XCPT_NM)
1549 | RT_BIT(X86_XCPT_UD)
1550 | RT_BIT(X86_XCPT_NP)
1551 | RT_BIT(X86_XCPT_SS)
1552 | RT_BIT(X86_XCPT_GP)
1553 | RT_BIT(X86_XCPT_MF)
1554#endif
1555 ;
1556
1557 /*
1558 * Without nested paging, #PF must be intercepted to implement shadow paging.
1559 */
1560 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1561 if (!pVM->hm.s.fNestedPaging)
1562 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1563
1564 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1565 if (!(pCtx->cr0 & X86_CR0_NE))
1566 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1567
1568#ifdef VBOX_STRICT
1569 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1570#endif
1571
1572 /*
1573 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1574 */
1575 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1576 if ( CPUMIsGuestInRealModeEx(pCtx)
1577 && pVM->hm.s.vmx.pRealModeTSS)
1578 {
1579 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1580 | RT_BIT(X86_XCPT_DB)
1581 | RT_BIT(X86_XCPT_NMI)
1582 | RT_BIT(X86_XCPT_BP)
1583 | RT_BIT(X86_XCPT_OF)
1584 | RT_BIT(X86_XCPT_BR)
1585 | RT_BIT(X86_XCPT_UD)
1586 | RT_BIT(X86_XCPT_DF)
1587 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1588 | RT_BIT(X86_XCPT_TS)
1589 | RT_BIT(X86_XCPT_NP)
1590 | RT_BIT(X86_XCPT_SS)
1591 | RT_BIT(X86_XCPT_GP)
1592 | RT_BIT(X86_XCPT_MF)
1593 | RT_BIT(X86_XCPT_AC)
1594 | RT_BIT(X86_XCPT_MC)
1595 | RT_BIT(X86_XCPT_XF)
1596 ;
1597 }
1598
1599 int rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1600 AssertRC(rc);
1601}
1602
1603
1604/**
1605 * Loads a minimal guest state.
1606 *
1607 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1608 *
1609 * @param pVM Pointer to the VM.
1610 * @param pVCpu Pointer to the VMCPU.
1611 * @param pCtx Pointer to the guest CPU context.
1612 */
1613VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1614{
1615 int rc;
1616 X86EFLAGS eflags;
1617
1618 Assert(!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_ALL_GUEST));
1619
1620 /*
1621 * Load EIP, ESP and EFLAGS.
1622 */
1623 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_RIP, pCtx->rip);
1624 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_RSP, pCtx->rsp);
1625 AssertRC(rc);
1626
1627 /*
1628 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1629 */
1630 eflags = pCtx->eflags;
1631 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1632 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1633
1634 /*
1635 * Check if real mode emulation using v86 mode.
1636 */
1637 if ( CPUMIsGuestInRealModeEx(pCtx)
1638 && pVM->hm.s.vmx.pRealModeTSS)
1639 {
1640 pVCpu->hm.s.vmx.RealMode.eflags = eflags;
1641
1642 eflags.Bits.u1VM = 1;
1643 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1644 }
1645 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1646 AssertRC(rc);
1647}
1648
1649
1650/**
1651 * Loads the guest state.
1652 *
1653 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1654 *
1655 * @returns VBox status code.
1656 * @param pVM Pointer to the VM.
1657 * @param pVCpu Pointer to the VMCPU.
1658 * @param pCtx Pointer to the guest CPU context.
1659 */
1660VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1661{
1662 int rc = VINF_SUCCESS;
1663 RTGCUINTPTR val;
1664
1665 /*
1666 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1667 * Set required bits to one and zero according to the MSR capabilities.
1668 */
1669 val = pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0;
1670
1671 /*
1672 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1673 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1674 */
1675 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1676
1677 if (CPUMIsGuestInLongModeEx(pCtx))
1678 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA32E_MODE_GUEST;
1679 /* else Must be zero when AMD64 is not available. */
1680
1681 /*
1682 * Mask away the bits that the CPU doesn't support.
1683 */
1684 val &= pVM->hm.s.vmx.msr.vmx_entry.n.allowed1;
1685 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_CONTROLS, val);
1686 AssertRC(rc);
1687
1688 /*
1689 * VMX_VMCS_CTRL_EXIT_CONTROLS
1690 * Set required bits to one and zero according to the MSR capabilities.
1691 */
1692 val = pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0;
1693
1694 /*
1695 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1696 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1697 */
1698 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1699
1700#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1701 if (VMX_IS_64BIT_HOST_MODE())
1702 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE;
1703 /* else Must be zero when AMD64 is not available. */
1704#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1705 if (CPUMIsGuestInLongModeEx(pCtx))
1706 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE; /* our switcher goes to long mode */
1707 else
1708 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE));
1709#endif
1710 val &= pVM->hm.s.vmx.msr.vmx_exit.n.allowed1;
1711
1712 /*
1713 * Don't acknowledge external interrupts on VM-exit.
1714 */
1715 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_CONTROLS, val);
1716 AssertRC(rc);
1717
1718 /*
1719 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1720 */
1721 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
1722 {
1723 if (pVM->hm.s.vmx.pRealModeTSS)
1724 {
1725 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1726 if (pVCpu->hm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1727 {
1728 /*
1729 * Correct weird requirements for switching to protected mode.
1730 */
1731 if ( pVCpu->hm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1732 && enmGuestMode >= PGMMODE_PROTECTED)
1733 {
1734#ifdef VBOX_WITH_REM
1735 /*
1736 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1737 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1738 */
1739 REMFlushTBs(pVM);
1740#endif
1741
1742 /*
1743 * DPL of all hidden selector registers must match the current CPL (0).
1744 */
1745 pCtx->cs.Attr.n.u2Dpl = 0;
1746 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1747
1748 pCtx->ds.Attr.n.u2Dpl = 0;
1749 pCtx->es.Attr.n.u2Dpl = 0;
1750 pCtx->fs.Attr.n.u2Dpl = 0;
1751 pCtx->gs.Attr.n.u2Dpl = 0;
1752 pCtx->ss.Attr.n.u2Dpl = 0;
1753 }
1754 pVCpu->hm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1755 }
1756 }
1757
1758 VMX_WRITE_SELREG(ES, es);
1759 AssertRC(rc);
1760
1761 VMX_WRITE_SELREG(CS, cs);
1762 AssertRC(rc);
1763
1764 VMX_WRITE_SELREG(SS, ss);
1765 AssertRC(rc);
1766
1767 VMX_WRITE_SELREG(DS, ds);
1768 AssertRC(rc);
1769
1770 VMX_WRITE_SELREG(FS, fs);
1771 AssertRC(rc);
1772
1773 VMX_WRITE_SELREG(GS, gs);
1774 AssertRC(rc);
1775 }
1776
1777 /*
1778 * Guest CPU context: LDTR.
1779 */
1780 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
1781 {
1782 if (pCtx->ldtr.Sel == 0)
1783 {
1784 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1785 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1786 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_LDTR_BASE, 0); /* @todo removing "64" in the function should be the same. */
1787 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1788 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1789 }
1790 else
1791 {
1792 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1793 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1794 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); /* @todo removing "64" and it should be the same */
1795 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1796 }
1797 AssertRC(rc);
1798 }
1799
1800 /*
1801 * Guest CPU context: TR.
1802 */
1803 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
1804 {
1805 /*
1806 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1807 * using the int bitmap in the TSS).
1808 */
1809 if ( CPUMIsGuestInRealModeEx(pCtx)
1810 && pVM->hm.s.vmx.pRealModeTSS)
1811 {
1812 RTGCPHYS GCPhys;
1813
1814 /* We convert it here every time as PCI regions could be reconfigured. */
1815 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
1816 AssertRC(rc);
1817
1818 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1819 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HM_VTX_TSS_SIZE);
1820 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1821
1822 X86DESCATTR attr;
1823
1824 attr.u = 0;
1825 attr.n.u1Present = 1;
1826 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1827 val = attr.u;
1828 }
1829 else
1830 {
1831 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1832 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1833 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_TR_BASE, pCtx->tr.u64Base);
1834
1835 val = pCtx->tr.Attr.u;
1836
1837 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1838 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1839 {
1840 if (val & 0xf)
1841 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1842 else
1843 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1844 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1845 }
1846 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY,
1847 ("%#x\n", val));
1848 }
1849 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1850 AssertRC(rc);
1851 }
1852
1853 /*
1854 * Guest CPU context: GDTR.
1855 */
1856 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
1857 {
1858 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1859 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1860 AssertRC(rc);
1861 }
1862
1863 /*
1864 * Guest CPU context: IDTR.
1865 */
1866 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
1867 {
1868 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1869 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1870 AssertRC(rc);
1871 }
1872
1873 /*
1874 * Sysenter MSRs.
1875 */
1876 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
1877 {
1878 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1879 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1880 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1881 AssertRC(rc);
1882 }
1883
1884 /*
1885 * Guest CPU context: Control registers.
1886 */
1887 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
1888 {
1889 val = pCtx->cr0;
1890 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1891 Log2(("Guest CR0-shadow %08x\n", val));
1892 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1893 {
1894 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1895 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1896 }
1897 else
1898 {
1899 /** @todo check if we support the old style mess correctly. */
1900 if (!(val & X86_CR0_NE))
1901 Log(("Forcing X86_CR0_NE!!!\n"));
1902
1903 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1904 }
1905 /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1906 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
1907 val |= X86_CR0_PE | X86_CR0_PG;
1908
1909 if (pVM->hm.s.fNestedPaging)
1910 {
1911 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1912 {
1913 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1914 pVCpu->hm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1915 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1916 }
1917 else
1918 {
1919 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1920 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1921 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1922 }
1923 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
1924 AssertRC(rc);
1925 }
1926 else
1927 {
1928 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1929 val |= X86_CR0_WP;
1930 }
1931
1932 /* Always enable caching. */
1933 val &= ~(X86_CR0_CD|X86_CR0_NW);
1934
1935 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_CR0, val);
1936 Log2(("Guest CR0 %08x\n", val));
1937
1938 /*
1939 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1940 */
1941 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1942 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1943 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1944 | X86_CR0_CD /* Bit not restored during VM-exit! */
1945 | X86_CR0_NW /* Bit not restored during VM-exit! */
1946 | X86_CR0_NE;
1947
1948 /*
1949 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1950 */
1951 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1952 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1953
1954 pVCpu->hm.s.vmx.cr0_mask = val;
1955
1956 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1957 Log2(("Guest CR0-mask %08x\n", val));
1958 AssertRC(rc);
1959 }
1960
1961 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
1962 {
1963 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1964 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1965 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1966 val = pCtx->cr4 | (uint32_t)pVM->hm.s.vmx.msr.vmx_cr4_fixed0;
1967
1968 if (!pVM->hm.s.fNestedPaging)
1969 {
1970 switch (pVCpu->hm.s.enmShadowMode)
1971 {
1972 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1973 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1974 case PGMMODE_32_BIT: /* 32-bit paging. */
1975 val &= ~X86_CR4_PAE;
1976 break;
1977
1978 case PGMMODE_PAE: /* PAE paging. */
1979 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1980 /** Must use PAE paging as we could use physical memory > 4 GB */
1981 val |= X86_CR4_PAE;
1982 break;
1983
1984 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1985 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1986#ifdef VBOX_ENABLE_64_BITS_GUESTS
1987 break;
1988#else
1989 AssertFailed();
1990 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1991#endif
1992 default: /* shut up gcc */
1993 AssertFailed();
1994 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1995 }
1996 }
1997 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1998 && !pVM->hm.s.vmx.fUnrestrictedGuest)
1999 {
2000 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
2001 val |= X86_CR4_PSE;
2002 /* Our identity mapping is a 32 bits page directory. */
2003 val &= ~X86_CR4_PAE;
2004 }
2005
2006 /*
2007 * Turn off VME if we're in emulated real mode.
2008 */
2009 if ( CPUMIsGuestInRealModeEx(pCtx)
2010 && pVM->hm.s.vmx.pRealModeTSS)
2011 {
2012 val &= ~X86_CR4_VME;
2013 }
2014
2015 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_CR4, val);
2016 Log2(("Guest CR4 %08x\n", val));
2017
2018 /*
2019 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
2020 */
2021 val = 0
2022 | X86_CR4_VME
2023 | X86_CR4_PAE
2024 | X86_CR4_PGE
2025 | X86_CR4_PSE
2026 | X86_CR4_VMXE;
2027 pVCpu->hm.s.vmx.cr4_mask = val;
2028
2029 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
2030 Log2(("Guest CR4-mask %08x\n", val));
2031 AssertRC(rc);
2032 }
2033
2034#if 0
2035 /* Enable single stepping if requested and CPU supports it. */
2036 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
2037 if (DBGFIsStepping(pVCpu))
2038 {
2039 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
2040 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2041 AssertRC(rc);
2042 }
2043#endif
2044
2045 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
2046 {
2047 if (pVM->hm.s.fNestedPaging)
2048 {
2049 Assert(PGMGetHyperCR3(pVCpu));
2050 pVCpu->hm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
2051
2052 Assert(!(pVCpu->hm.s.vmx.GCPhysEPTP & 0xfff));
2053 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
2054 pVCpu->hm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
2055 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
2056
2057 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.GCPhysEPTP);
2058 AssertRC(rc);
2059
2060 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2061 && !pVM->hm.s.vmx.fUnrestrictedGuest)
2062 {
2063 RTGCPHYS GCPhys;
2064
2065 /* We convert it here every time as PCI regions could be reconfigured. */
2066 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
2067 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hm.s.vmx.pNonPagingModeEPTPageTable));
2068
2069 /*
2070 * We use our identity mapping page table here as we need to map guest virtual to
2071 * guest physical addresses; EPT will take care of the translation to host physical addresses.
2072 */
2073 val = GCPhys;
2074 }
2075 else
2076 {
2077 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
2078 val = pCtx->cr3;
2079 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
2080 AssertRCReturn(rc, rc);
2081 }
2082 }
2083 else
2084 {
2085 val = PGMGetHyperCR3(pVCpu);
2086 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
2087 }
2088
2089 /* Save our shadow CR3 register. */
2090 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_CR3, val);
2091 AssertRC(rc);
2092 }
2093
2094 /*
2095 * Guest CPU context: Debug registers.
2096 */
2097 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG)
2098 {
2099 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2100 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2101
2102 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2103 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2104 pCtx->dr[7] |= 0x400; /* must be one */
2105
2106 /* Resync DR7 */
2107 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2108 AssertRC(rc);
2109
2110#ifdef DEBUG
2111 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2112 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2113 && !CPUMIsHyperDebugStateActive(pVCpu)
2114 && !DBGFIsStepping(pVCpu))
2115 {
2116 /* Save the host and load the hypervisor debug state. */
2117 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2118 AssertRC(rc);
2119
2120 /* DRx intercepts remain enabled. */
2121
2122 /* Override dr7 with the hypervisor value. */
2123 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2124 AssertRC(rc);
2125 }
2126 else
2127#endif
2128 /* Sync the debug state now if any breakpoint is armed. */
2129 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2130 && !CPUMIsGuestDebugStateActive(pVCpu)
2131 && !DBGFIsStepping(pVCpu))
2132 {
2133 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
2134
2135 /* Disable DRx move intercepts. */
2136 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2137 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2138 AssertRC(rc);
2139
2140 /* Save the host and load the guest debug state. */
2141 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2142 AssertRC(rc);
2143 }
2144
2145 /* IA32_DEBUGCTL MSR. */
2146 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0);
2147 AssertRC(rc);
2148
2149 /** @todo do we really ever need this? */
2150 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
2151 AssertRC(rc);
2152 }
2153
2154 /*
2155 * 64-bit guest mode.
2156 */
2157 if (CPUMIsGuestInLongModeEx(pCtx))
2158 {
2159#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2160 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2161#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2162 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2163#else
2164# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2165 if (!pVM->hm.s.fAllow64BitGuests)
2166 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2167# endif
2168 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
2169#endif
2170 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
2171 {
2172 /* Update these as wrmsr might have changed them. */
2173 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_FS_BASE, pCtx->fs.u64Base);
2174 AssertRC(rc);
2175 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_GS_BASE, pCtx->gs.u64Base);
2176 AssertRC(rc);
2177 }
2178 }
2179 else
2180 {
2181 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
2182 }
2183
2184 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2185
2186#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2187 /*
2188 * Store all guest MSRs in the VM-entry load area, so they will be loaded
2189 * during VM-entry and restored into the VM-exit store area during VM-exit.
2190 */
2191 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2192 unsigned idxMsr = 0;
2193
2194 uint32_t u32GstExtFeatures;
2195 uint32_t u32Temp;
2196 CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures);
2197
2198 if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2199 {
2200 pMsr->u32IndexMSR = MSR_K6_EFER;
2201 pMsr->u32Reserved = 0;
2202 pMsr->u64Value = pCtx->msrEFER;
2203 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2204 if (!CPUMIsGuestInLongModeEx(pCtx))
2205 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2206 pMsr++; idxMsr++;
2207
2208 if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2209 {
2210 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2211 pMsr->u32Reserved = 0;
2212 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2213 pMsr++; idxMsr++;
2214 pMsr->u32IndexMSR = MSR_K6_STAR;
2215 pMsr->u32Reserved = 0;
2216 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2217 pMsr++; idxMsr++;
2218 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2219 pMsr->u32Reserved = 0;
2220 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2221 pMsr++; idxMsr++;
2222
2223 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2224#if 0
2225 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2226 pMsr->u32Reserved = 0;
2227 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2228 pMsr++; idxMsr++;
2229#endif
2230 }
2231 }
2232
2233 if ( pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP
2234 && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP))
2235 {
2236 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
2237 pMsr->u32Reserved = 0;
2238 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value);
2239 AssertRC(rc);
2240 pMsr++; idxMsr++;
2241 }
2242
2243 pVCpu->hm.s.vmx.cGuestMsrs = idxMsr;
2244
2245 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2246 AssertRC(rc);
2247
2248 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2249 AssertRC(rc);
2250#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2251
2252 bool fOffsettedTsc;
2253 if (pVM->hm.s.vmx.fUsePreemptTimer)
2254 {
2255 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
2256
2257 /* Make sure the returned values have sane upper and lower boundaries. */
2258 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2259
2260 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2261 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2262
2263 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
2264 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2265 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2266 AssertRC(rc);
2267 }
2268 else
2269 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
2270
2271 if (fOffsettedTsc)
2272 {
2273 uint64_t u64CurTSC = ASMReadTSC();
2274 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2275 {
2276 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2277 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset);
2278 AssertRC(rc);
2279
2280 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2281 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2282 AssertRC(rc);
2283 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
2284 }
2285 else
2286 {
2287 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2288 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2289 pVCpu->hm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset,
2290 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hm.s.vmx.u64TSCOffset,
2291 TMCpuTickGet(pVCpu)));
2292 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2293 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2294 AssertRC(rc);
2295 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
2296 }
2297 }
2298 else
2299 {
2300 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2301 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2302 AssertRC(rc);
2303 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
2304 }
2305
2306 /* Done with the major changes */
2307 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_ALL_GUEST;
2308
2309 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2310 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2311 return rc;
2312}
2313
2314
2315/**
2316 * Syncs back the guest state from VMCS.
2317 *
2318 * @returns VBox status code.
2319 * @param pVM Pointer to the VM.
2320 * @param pVCpu Pointer to the VMCPU.
2321 * @param pCtx Pointer to the guest CPU context.
2322 */
2323DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2324{
2325 RTGCUINTREG val, valShadow;
2326 RTGCUINTPTR uInterruptState;
2327 int rc;
2328
2329 /* First sync back EIP, ESP, and EFLAGS. */
2330 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RIP, &val);
2331 AssertRC(rc);
2332 pCtx->rip = val;
2333 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RSP, &val);
2334 AssertRC(rc);
2335 pCtx->rsp = val;
2336 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2337 AssertRC(rc);
2338 pCtx->eflags.u32 = val;
2339
2340 /* Take care of instruction fusing (sti, mov ss) */
2341 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2342 uInterruptState = val;
2343 if (uInterruptState != 0)
2344 {
2345 Assert(uInterruptState <= 2); /* only sti & mov ss */
2346 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2347 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2348 }
2349 else
2350 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2351
2352 /* Control registers. */
2353 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2354 VMXReadCachedVMCS(VMX_VMCS_GUEST_CR0, &val);
2355 val = (valShadow & pVCpu->hm.s.vmx.cr0_mask) | (val & ~pVCpu->hm.s.vmx.cr0_mask);
2356 CPUMSetGuestCR0(pVCpu, val);
2357
2358 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2359 VMXReadCachedVMCS(VMX_VMCS_GUEST_CR4, &val);
2360 val = (valShadow & pVCpu->hm.s.vmx.cr4_mask) | (val & ~pVCpu->hm.s.vmx.cr4_mask);
2361 CPUMSetGuestCR4(pVCpu, val);
2362
2363 /*
2364 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2365 * the nested paging case where CR3 & CR4 can be changed by the guest.
2366 */
2367 if ( pVM->hm.s.fNestedPaging
2368 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2369 {
2370 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
2371
2372 /* Can be updated behind our back in the nested paging case. */
2373 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2374
2375 VMXReadCachedVMCS(VMX_VMCS_GUEST_CR3, &val);
2376
2377 if (val != pCtx->cr3)
2378 {
2379 CPUMSetGuestCR3(pVCpu, val);
2380 PGMUpdateCR3(pVCpu, val);
2381 }
2382 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2383 AssertRCReturn(rc, rc);
2384 }
2385
2386 /* Sync back DR7. */
2387 VMXReadCachedVMCS(VMX_VMCS_GUEST_DR7, &val);
2388 pCtx->dr[7] = val;
2389
2390 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2391 VMX_READ_SELREG(ES, es);
2392 VMX_READ_SELREG(SS, ss);
2393 VMX_READ_SELREG(CS, cs);
2394 VMX_READ_SELREG(DS, ds);
2395 VMX_READ_SELREG(FS, fs);
2396 VMX_READ_SELREG(GS, gs);
2397
2398 /* System MSRs */
2399 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2400 pCtx->SysEnter.cs = val;
2401 VMXReadCachedVMCS(VMX_VMCS_GUEST_SYSENTER_EIP, &val);
2402 pCtx->SysEnter.eip = val;
2403 VMXReadCachedVMCS(VMX_VMCS_GUEST_SYSENTER_ESP, &val);
2404 pCtx->SysEnter.esp = val;
2405
2406 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2407 VMX_READ_SELREG(LDTR, ldtr);
2408
2409 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2410 pCtx->gdtr.cbGdt = val;
2411 VMXReadCachedVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val);
2412 pCtx->gdtr.pGdt = val;
2413
2414 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2415 pCtx->idtr.cbIdt = val;
2416 VMXReadCachedVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val);
2417 pCtx->idtr.pIdt = val;
2418
2419 /* Real mode emulation using v86 mode. */
2420 if ( CPUMIsGuestInRealModeEx(pCtx)
2421 && pVM->hm.s.vmx.pRealModeTSS)
2422 {
2423 /* Hide our emulation flags */
2424 pCtx->eflags.Bits.u1VM = 0;
2425
2426 /* Restore original IOPL setting as we always use 0. */
2427 pCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2428
2429 /* Force a TR resync every time in case we switch modes. */
2430 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_TR;
2431 }
2432 else
2433 {
2434 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2435 VMX_READ_SELREG(TR, tr);
2436 }
2437
2438#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2439 /*
2440 * Save the possibly changed MSRs that we automatically restore and save during a world switch.
2441 */
2442 for (unsigned i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++)
2443 {
2444 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2445 pMsr += i;
2446
2447 switch (pMsr->u32IndexMSR)
2448 {
2449 case MSR_K8_LSTAR:
2450 pCtx->msrLSTAR = pMsr->u64Value;
2451 break;
2452 case MSR_K6_STAR:
2453 pCtx->msrSTAR = pMsr->u64Value;
2454 break;
2455 case MSR_K8_SF_MASK:
2456 pCtx->msrSFMASK = pMsr->u64Value;
2457 break;
2458 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2459#if 0
2460 case MSR_K8_KERNEL_GS_BASE:
2461 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2462 break;
2463#endif
2464 case MSR_K8_TSC_AUX:
2465 CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value);
2466 break;
2467
2468 case MSR_K6_EFER:
2469 /* EFER can't be changed without causing a VM-exit. */
2470 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2471 break;
2472
2473 default:
2474 AssertFailed();
2475 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2476 }
2477 }
2478#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2479 return VINF_SUCCESS;
2480}
2481
2482
2483/**
2484 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2485 * where neither EPT nor VPID is supported by the CPU.
2486 *
2487 * @param pVM Pointer to the VM.
2488 * @param pVCpu Pointer to the VMCPU.
2489 */
2490static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2491{
2492 NOREF(pVM);
2493 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2494 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2495 pVCpu->hm.s.TlbShootdown.cPages = 0;
2496 return;
2497}
2498
2499
2500/**
2501 * Setup the tagged TLB for EPT+VPID.
2502 *
2503 * @param pVM Pointer to the VM.
2504 * @param pVCpu Pointer to the VMCPU.
2505 */
2506static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2507{
2508 PHMGLOBLCPUINFO pCpu;
2509
2510 Assert(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid);
2511
2512 pCpu = HMR0GetCurrentCpu();
2513
2514 /*
2515 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2516 * This can happen both for start & resume due to long jumps back to ring-3.
2517 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2518 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2519 */
2520 bool fNewAsid = false;
2521 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2522 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2523 {
2524 pVCpu->hm.s.fForceTLBFlush = true;
2525 fNewAsid = true;
2526 }
2527
2528 /*
2529 * Check for explicit TLB shootdowns.
2530 */
2531 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2532 pVCpu->hm.s.fForceTLBFlush = true;
2533
2534 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2535
2536 if (pVCpu->hm.s.fForceTLBFlush)
2537 {
2538 if (fNewAsid)
2539 {
2540 ++pCpu->uCurrentAsid;
2541 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2542 {
2543 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2544 pCpu->cTlbFlushes++;
2545 pCpu->fFlushAsidBeforeUse = true;
2546 }
2547
2548 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2549 if (pCpu->fFlushAsidBeforeUse)
2550 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2551 }
2552 else
2553 {
2554 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2555 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2556 else
2557 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2558 }
2559
2560 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2561 pVCpu->hm.s.fForceTLBFlush = false;
2562 }
2563 else
2564 {
2565 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2566 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2567 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2568 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2569
2570 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2571 * not be executed. See hmQueueInvlPage() where it is commented
2572 * out. Support individual entry flushing someday. */
2573 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2574 {
2575 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2576
2577 /*
2578 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2579 * as supported by the CPU.
2580 */
2581 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2582 {
2583 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2584 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2585 }
2586 else
2587 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2588 }
2589 else
2590 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2591 }
2592
2593 pVCpu->hm.s.TlbShootdown.cPages = 0;
2594 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2595
2596 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2597 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2598 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2599 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2600 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2601 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2602
2603 /* Update VMCS with the VPID. */
2604 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2605 AssertRC(rc);
2606}
2607
2608
2609/**
2610 * Setup the tagged TLB for EPT only.
2611 *
2612 * @returns VBox status code.
2613 * @param pVM Pointer to the VM.
2614 * @param pVCpu Pointer to the VMCPU.
2615 */
2616static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2617{
2618 PHMGLOBLCPUINFO pCpu;
2619
2620 Assert(pVM->hm.s.fNestedPaging);
2621 Assert(!pVM->hm.s.vmx.fVpid);
2622
2623 pCpu = HMR0GetCurrentCpu();
2624
2625 /*
2626 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2627 * This can happen both for start & resume due to long jumps back to ring-3.
2628 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2629 */
2630 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2631 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2632 {
2633 pVCpu->hm.s.fForceTLBFlush = true;
2634 }
2635
2636 /*
2637 * Check for explicit TLB shootdown flushes.
2638 */
2639 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2640 pVCpu->hm.s.fForceTLBFlush = true;
2641
2642 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2643 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2644
2645 if (pVCpu->hm.s.fForceTLBFlush)
2646 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2647 else
2648 {
2649 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2650 * not be executed. See hmQueueInvlPage() where it is commented
2651 * out. Support individual entry flushing someday. */
2652 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2653 {
2654 /*
2655 * We cannot flush individual entries without VPID support. Flush using EPT.
2656 */
2657 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2658 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2659 }
2660 }
2661 pVCpu->hm.s.TlbShootdown.cPages= 0;
2662 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2663
2664#ifdef VBOX_WITH_STATISTICS
2665 /** @todo r=ramshankar: this is not accurate anymore with the VPID+EPT
2666 * handling. Should be fixed later. */
2667 if (pVCpu->hm.s.fForceTLBFlush)
2668 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2669 else
2670 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2671#endif
2672}
2673
2674
2675/**
2676 * Setup the tagged TLB for VPID.
2677 *
2678 * @returns VBox status code.
2679 * @param pVM Pointer to the VM.
2680 * @param pVCpu Pointer to the VMCPU.
2681 */
2682static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2683{
2684 PHMGLOBLCPUINFO pCpu;
2685
2686 Assert(pVM->hm.s.vmx.fVpid);
2687 Assert(!pVM->hm.s.fNestedPaging);
2688
2689 pCpu = HMR0GetCurrentCpu();
2690
2691 /*
2692 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2693 * This can happen both for start & resume due to long jumps back to ring-3.
2694 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2695 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2696 */
2697 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2698 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2699 {
2700 /* Force a TLB flush on VM entry. */
2701 pVCpu->hm.s.fForceTLBFlush = true;
2702 }
2703
2704 /*
2705 * Check for explicit TLB shootdown flushes.
2706 */
2707 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2708 pVCpu->hm.s.fForceTLBFlush = true;
2709
2710 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2711
2712 if (pVCpu->hm.s.fForceTLBFlush)
2713 {
2714 ++pCpu->uCurrentAsid;
2715 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2716 {
2717 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2718 pCpu->cTlbFlushes++;
2719 pCpu->fFlushAsidBeforeUse = true;
2720 }
2721
2722 pVCpu->hm.s.fForceTLBFlush = false;
2723 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2724 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2725 if (pCpu->fFlushAsidBeforeUse)
2726 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2727 }
2728 else
2729 {
2730 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2731 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2732 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2733 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2734
2735 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2736 * not be executed. See hmQueueInvlPage() where it is commented
2737 * out. Support individual entry flushing someday. */
2738 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2739 {
2740 /*
2741 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2742 * as supported by the CPU.
2743 */
2744 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2745 {
2746 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2747 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2748 }
2749 else
2750 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2751 }
2752 }
2753 pVCpu->hm.s.TlbShootdown.cPages = 0;
2754 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2755
2756 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2757 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2758 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2759 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2760 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2761 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2762
2763 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2764 AssertRC(rc);
2765
2766# ifdef VBOX_WITH_STATISTICS
2767 /** @todo r=ramshankar: this is not accurate anymore with EPT+VPID handling.
2768 * Should be fixed later. */
2769 if (pVCpu->hm.s.fForceTLBFlush)
2770 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2771 else
2772 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2773# endif
2774}
2775
2776
2777/**
2778 * Runs guest code in a VT-x VM.
2779 *
2780 * @returns VBox status code.
2781 * @param pVM Pointer to the VM.
2782 * @param pVCpu Pointer to the VMCPU.
2783 * @param pCtx Pointer to the guest CPU context.
2784 */
2785VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2786{
2787 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
2788 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
2789 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
2790
2791 VBOXSTRICTRC rc = VINF_SUCCESS;
2792 int rc2;
2793 RTGCUINTREG val;
2794 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2795 RTGCUINTREG instrError, cbInstr;
2796 RTGCUINTPTR exitQualification = 0;
2797 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2798 RTGCUINTPTR errCode, instrInfo;
2799 bool fSetupTPRCaching = false;
2800 uint64_t u64OldLSTAR = 0;
2801 uint8_t u8LastTPR = 0;
2802 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2803 unsigned cResume = 0;
2804#ifdef VBOX_STRICT
2805 RTCPUID idCpuCheck;
2806 bool fWasInLongMode = false;
2807#endif
2808#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2809 uint64_t u64LastTime = RTTimeMilliTS();
2810#endif
2811
2812 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2813 || (pVCpu->hm.s.vmx.pbVAPIC && pVM->hm.s.vmx.pbApicAccess));
2814
2815 /*
2816 * Check if we need to use TPR shadowing.
2817 */
2818 if ( CPUMIsGuestInLongModeEx(pCtx)
2819 || ( (( pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2820 || pVM->hm.s.fTRPPatchingAllowed)
2821 && pVM->hm.s.fHasIoApic)
2822 )
2823 {
2824 fSetupTPRCaching = true;
2825 }
2826
2827 Log2(("\nE"));
2828
2829 /* This is not ideal, but if we don't clear the event injection in the VMCS right here,
2830 * we may end up injecting some stale event into a VM, including injecting an event that
2831 * originated before a VM reset *after* the VM has been reset. See @bugref{6220}.
2832 */
2833 VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_IRQ_INFO, 0);
2834
2835#ifdef VBOX_STRICT
2836 {
2837 RTCCUINTREG val2;
2838
2839 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val2);
2840 AssertRC(rc2);
2841 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2842
2843 /* allowed zero */
2844 if ((val2 & pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2845 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2846
2847 /* allowed one */
2848 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2849 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2850
2851 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val2);
2852 AssertRC(rc2);
2853 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2854
2855 /*
2856 * Must be set according to the MSR, but can be cleared if nested paging is used.
2857 */
2858 if (pVM->hm.s.fNestedPaging)
2859 {
2860 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2861 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2862 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2863 }
2864
2865 /* allowed zero */
2866 if ((val2 & pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2867 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2868
2869 /* allowed one */
2870 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2871 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2872
2873 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val2);
2874 AssertRC(rc2);
2875 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2876
2877 /* allowed zero */
2878 if ((val2 & pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0)
2879 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2880
2881 /* allowed one */
2882 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2883 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2884
2885 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val2);
2886 AssertRC(rc2);
2887 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2888
2889 /* allowed zero */
2890 if ((val2 & pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0)
2891 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2892
2893 /* allowed one */
2894 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2895 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2896 }
2897 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2898#endif /* VBOX_STRICT */
2899
2900#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2901 pVCpu->hm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2902#endif
2903
2904 /*
2905 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2906 */
2907ResumeExecution:
2908 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hm.s.StatEntry))
2909 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit2, &pVCpu->hm.s.StatEntry, x);
2910 AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
2911 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2912 (int)pVCpu->hm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2913 Assert(!HMR0SuspendPending());
2914 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2915 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2916
2917 /*
2918 * Safety precaution; looping for too long here can have a very bad effect on the host.
2919 */
2920 if (RT_UNLIKELY(++cResume > pVM->hm.s.cMaxResumeLoops))
2921 {
2922 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
2923 rc = VINF_EM_RAW_INTERRUPT;
2924 goto end;
2925 }
2926
2927 /*
2928 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2929 */
2930 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2931 {
2932 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2933 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2934 {
2935 /*
2936 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2937 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2938 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2939 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2940 */
2941 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2942 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2943 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2944 AssertRC(rc2);
2945 }
2946 }
2947 else
2948 {
2949 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2950 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2951 AssertRC(rc2);
2952 }
2953
2954#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2955 if (RT_UNLIKELY((cResume & 0xf) == 0))
2956 {
2957 uint64_t u64CurTime = RTTimeMilliTS();
2958
2959 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2960 {
2961 u64LastTime = u64CurTime;
2962 TMTimerPollVoid(pVM, pVCpu);
2963 }
2964 }
2965#endif
2966
2967 /*
2968 * Check for pending actions that force us to go back to ring-3.
2969 */
2970 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2971 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2972 {
2973 /* Check if a sync operation is pending. */
2974 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2975 {
2976 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2977 if (rc != VINF_SUCCESS)
2978 {
2979 AssertRC(VBOXSTRICTRC_VAL(rc));
2980 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2981 goto end;
2982 }
2983 }
2984
2985#ifdef DEBUG
2986 /* Intercept X86_XCPT_DB if stepping is enabled */
2987 if (!DBGFIsStepping(pVCpu))
2988#endif
2989 {
2990 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK)
2991 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
2992 {
2993 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchToR3);
2994 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2995 goto end;
2996 }
2997 }
2998
2999 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
3000 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
3001 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
3002 {
3003 rc = VINF_EM_PENDING_REQUEST;
3004 goto end;
3005 }
3006
3007 /* Check if a pgm pool flush is in progress. */
3008 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
3009 {
3010 rc = VINF_PGM_POOL_FLUSH_PENDING;
3011 goto end;
3012 }
3013
3014 /* Check if DMA work is pending (2nd+ run). */
3015 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
3016 {
3017 rc = VINF_EM_RAW_TO_R3;
3018 goto end;
3019 }
3020 }
3021
3022#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3023 /*
3024 * Exit to ring-3 preemption/work is pending.
3025 *
3026 * Interrupts are disabled before the call to make sure we don't miss any interrupt
3027 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
3028 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
3029 *
3030 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
3031 * shootdowns rely on this.
3032 */
3033 uOldEFlags = ASMIntDisableFlags();
3034 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
3035 {
3036 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptPending);
3037 rc = VINF_EM_RAW_INTERRUPT;
3038 goto end;
3039 }
3040 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3041#endif
3042
3043 /*
3044 * When external interrupts are pending, we should exit the VM when IF is set.
3045 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
3046 */
3047 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
3048 if (RT_FAILURE(rc))
3049 goto end;
3050
3051 /** @todo check timers?? */
3052
3053 /*
3054 * TPR caching using CR8 is only available in 64-bit mode.
3055 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
3056 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
3057 */
3058 /** @todo query and update the TPR only when it could have been changed (mmio
3059 * access & wrsmr (x2apic) */
3060 if (fSetupTPRCaching)
3061 {
3062 /* TPR caching in CR8 */
3063 bool fPending;
3064
3065 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
3066 AssertRC(rc2);
3067 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
3068 pVCpu->hm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
3069
3070 /*
3071 * Two options here:
3072 * - external interrupt pending, but masked by the TPR value.
3073 * -> a CR8 update that lower the current TPR value should cause an exit
3074 * - no pending interrupts
3075 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
3076 */
3077
3078 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
3079 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0);
3080 AssertRC(VBOXSTRICTRC_VAL(rc));
3081
3082 if (pVM->hm.s.fTPRPatchingActive)
3083 {
3084 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3085 /* Our patch code uses LSTAR for TPR caching. */
3086 pCtx->msrLSTAR = u8LastTPR;
3087
3088 /** @todo r=ramshankar: we should check for MSR-bitmap support here. */
3089 if (fPending)
3090 {
3091 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
3092 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
3093 }
3094 else
3095 {
3096 /*
3097 * No interrupts are pending, so we don't need to be explicitely notified.
3098 * There are enough world switches for detecting pending interrupts.
3099 */
3100 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
3101 }
3102 }
3103 }
3104
3105#ifdef LOG_ENABLED
3106 if ( pVM->hm.s.fNestedPaging
3107 || pVM->hm.s.vmx.fVpid)
3108 {
3109 PHMGLOBLCPUINFO pCpu = HMR0GetCurrentCpu();
3110 if (pVCpu->hm.s.idLastCpu != pCpu->idCpu)
3111 {
3112 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hm.s.idLastCpu,
3113 pCpu->idCpu));
3114 }
3115 else if (pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
3116 {
3117 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hm.s.cTlbFlushes,
3118 pCpu->cTlbFlushes));
3119 }
3120 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
3121 LogFlow(("Manual TLB flush\n"));
3122 }
3123#endif
3124#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3125 PGMRZDynMapFlushAutoSet(pVCpu);
3126#endif
3127
3128 /*
3129 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3130 * (until the actual world switch)
3131 */
3132#ifdef VBOX_STRICT
3133 idCpuCheck = RTMpCpuId();
3134#endif
3135#ifdef LOG_ENABLED
3136 VMMR0LogFlushDisable(pVCpu);
3137#endif
3138
3139 /*
3140 * Save the host state first.
3141 */
3142 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
3143 {
3144 rc = VMXR0SaveHostState(pVM, pVCpu);
3145 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3146 {
3147 VMMR0LogFlushEnable(pVCpu);
3148 goto end;
3149 }
3150 }
3151
3152 /*
3153 * Load the guest state.
3154 */
3155 if (!pVCpu->hm.s.fContextUseFlags)
3156 {
3157 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3158 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
3159 }
3160 else
3161 {
3162 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3163 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3164 {
3165 VMMR0LogFlushEnable(pVCpu);
3166 goto end;
3167 }
3168 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
3169 }
3170
3171#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3172 /*
3173 * Disable interrupts to make sure a poke will interrupt execution.
3174 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3175 */
3176 uOldEFlags = ASMIntDisableFlags();
3177 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3178#endif
3179
3180 /* Non-register state Guest Context */
3181 /** @todo change me according to cpu state */
3182 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
3183 AssertRC(rc2);
3184
3185 /* Set TLB flush state as checked until we return from the world switch. */
3186 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
3187 /* Deal with tagged TLB setup and invalidation. */
3188 pVM->hm.s.vmx.pfnSetupTaggedTlb(pVM, pVCpu);
3189
3190 /*
3191 * Manual save and restore:
3192 * - General purpose registers except RIP, RSP
3193 *
3194 * Trashed:
3195 * - CR2 (we don't care)
3196 * - LDTR (reset to 0)
3197 * - DRx (presumably not changed at all)
3198 * - DR7 (reset to 0x400)
3199 * - EFLAGS (reset to RT_BIT(1); not relevant)
3200 */
3201
3202 /* All done! Let's start VM execution. */
3203 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
3204 Assert(idCpuCheck == RTMpCpuId());
3205
3206#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3207 pVCpu->hm.s.vmx.VMCSCache.cResume = cResume;
3208 pVCpu->hm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3209#endif
3210
3211 /*
3212 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3213 */
3214 if (pVM->hm.s.fTPRPatchingActive)
3215 {
3216 Assert(pVM->hm.s.fTPRPatchingActive);
3217 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3218 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3219 }
3220
3221 TMNotifyStartOfExecution(pVCpu);
3222
3223#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3224 /*
3225 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
3226 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
3227 */
3228 if ( (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3229 && !(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3230 {
3231 pVCpu->hm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX);
3232 uint64_t u64GuestTSCAux = 0;
3233 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux);
3234 AssertRC(rc2);
3235 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux);
3236 }
3237#endif
3238
3239#ifdef VBOX_WITH_KERNEL_USING_XMM
3240 rc = hmR0VMXStartVMWrapXMM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
3241#else
3242 rc = pVCpu->hm.s.vmx.pfnStartVM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
3243#endif
3244 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);
3245 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);
3246
3247 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3248 if (!(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3249 {
3250#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3251 /* Restore host's TSC_AUX. */
3252 if (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3253 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTSCAux);
3254#endif
3255
3256 TMCpuTickSetLastSeen(pVCpu,
3257 ASMReadTSC() + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3258 }
3259
3260 TMNotifyEndOfExecution(pVCpu);
3261 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3262 Assert(!(ASMGetFlags() & X86_EFL_IF));
3263
3264 /*
3265 * Restore the host LSTAR MSR if the guest could have changed it.
3266 */
3267 if (pVM->hm.s.fTPRPatchingActive)
3268 {
3269 Assert(pVM->hm.s.fTPRPatchingActive);
3270 pVCpu->hm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3271 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3272 }
3273
3274 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
3275 ASMSetFlags(uOldEFlags);
3276#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3277 uOldEFlags = ~(RTCCUINTREG)0;
3278#endif
3279
3280 AssertMsg(!pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries=%d\n",
3281 pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries));
3282
3283 /* In case we execute a goto ResumeExecution later on. */
3284 pVCpu->hm.s.fResumeVM = true;
3285 pVCpu->hm.s.fForceTLBFlush = false;
3286
3287 /*
3288 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3289 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3290 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3291 */
3292
3293 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3294 {
3295 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3296 VMMR0LogFlushEnable(pVCpu);
3297 goto end;
3298 }
3299
3300 /* Success. Query the guest state and figure out what has happened. */
3301
3302 /* Investigate why there was a VM-exit. */
3303 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3304 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3305
3306 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3307 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3308 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3309 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3310 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3311 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
3312 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3313 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3314 AssertRC(rc2);
3315
3316 /*
3317 * Sync back the guest state.
3318 */
3319 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3320 AssertRC(rc2);
3321
3322 /* Note! NOW IT'S SAFE FOR LOGGING! */
3323 VMMR0LogFlushEnable(pVCpu);
3324 Log2(("Raw exit reason %08x\n", exitReason));
3325#if ARCH_BITS == 64 /* for the time being */
3326 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3327#endif
3328
3329 /*
3330 * Check if an injected event was interrupted prematurely.
3331 */
3332 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
3333 AssertRC(rc2);
3334 pVCpu->hm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3335 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.intInfo)
3336 /* Ignore 'int xx' as they'll be restarted anyway. */
3337 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
3338 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3339 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3340 {
3341 Assert(!pVCpu->hm.s.Event.fPending);
3342 pVCpu->hm.s.Event.fPending = true;
3343 /* Error code present? */
3344 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.intInfo))
3345 {
3346 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
3347 AssertRC(rc2);
3348 pVCpu->hm.s.Event.errCode = val;
3349 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3350 pVCpu->hm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3351 }
3352 else
3353 {
3354 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hm.s.Event.intInfo,
3355 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3356 pVCpu->hm.s.Event.errCode = 0;
3357 }
3358 }
3359#ifdef VBOX_STRICT
3360 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.intInfo)
3361 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3362 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3363 {
3364 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3365 pVCpu->hm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3366 }
3367
3368 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3369 HMDumpRegs(pVM, pVCpu, pCtx);
3370#endif
3371
3372 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3373 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3374 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3375 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3376 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3377
3378 /*
3379 * Sync back the TPR if it was changed.
3380 */
3381 if ( fSetupTPRCaching
3382 && u8LastTPR != pVCpu->hm.s.vmx.pbVAPIC[0x80])
3383 {
3384 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVAPIC[0x80]);
3385 AssertRC(rc2);
3386 }
3387
3388#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3389 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3390 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3391#endif
3392 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
3393
3394 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3395 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3396 switch (exitReason)
3397 {
3398 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3399 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3400 {
3401 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3402
3403 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3404 {
3405 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3406 /* External interrupt; leave to allow it to be dispatched again. */
3407 rc = VINF_EM_RAW_INTERRUPT;
3408 break;
3409 }
3410 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub3, y3);
3411 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3412 {
3413 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3414 /* External interrupt; leave to allow it to be dispatched again. */
3415 rc = VINF_EM_RAW_INTERRUPT;
3416 break;
3417
3418 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3419 AssertFailed(); /* can't come here; fails the first check. */
3420 break;
3421
3422 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3423 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3424 Assert(vector == 1 || vector == 3 || vector == 4);
3425 /* no break */
3426 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3427 Log2(("Hardware/software interrupt %d\n", vector));
3428 switch (vector)
3429 {
3430 case X86_XCPT_NM:
3431 {
3432 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3433
3434 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3435 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3436 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3437 if (rc == VINF_SUCCESS)
3438 {
3439 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3440
3441 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
3442
3443 /* Continue execution. */
3444 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
3445
3446 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3447 goto ResumeExecution;
3448 }
3449
3450 Log(("Forward #NM fault to the guest\n"));
3451 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
3452 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3453 cbInstr, 0);
3454 AssertRC(rc2);
3455 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3456 goto ResumeExecution;
3457 }
3458
3459 case X86_XCPT_PF: /* Page fault */
3460 {
3461#ifdef VBOX_ALWAYS_TRAP_PF
3462 if (pVM->hm.s.fNestedPaging)
3463 {
3464 /*
3465 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3466 */
3467 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3468 errCode, (RTGCPTR)pCtx->rsp));
3469
3470 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3471
3472 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3473
3474 /* Now we must update CR2. */
3475 pCtx->cr2 = exitQualification;
3476 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3477 cbInstr, errCode);
3478 AssertRC(rc2);
3479
3480 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3481 goto ResumeExecution;
3482 }
3483#else
3484 Assert(!pVM->hm.s.fNestedPaging);
3485#endif
3486
3487#ifdef VBOX_HM_WITH_GUEST_PATCHING
3488 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3489 if ( pVM->hm.s.fTRPPatchingAllowed
3490 && pVM->hm.s.pGuestPatchMem
3491 && (exitQualification & 0xfff) == 0x080
3492 && !(errCode & X86_TRAP_PF_P) /* not present */
3493 && CPUMGetGuestCPL(pVCpu) == 0
3494 && !CPUMIsGuestInLongModeEx(pCtx)
3495 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
3496 {
3497 RTGCPHYS GCPhysApicBase, GCPhys;
3498 GCPhysApicBase = pCtx->msrApicBase;
3499 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3500
3501 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3502 if ( rc == VINF_SUCCESS
3503 && GCPhys == GCPhysApicBase)
3504 {
3505 /* Only attempt to patch the instruction once. */
3506 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3507 if (!pPatch)
3508 {
3509 rc = VINF_EM_HM_PATCH_TPR_INSTR;
3510 break;
3511 }
3512 }
3513 }
3514#endif
3515
3516 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3517 /* Exit qualification contains the linear address of the page fault. */
3518 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3519 TRPMSetErrorCode(pVCpu, errCode);
3520 TRPMSetFaultAddress(pVCpu, exitQualification);
3521
3522 /* Shortcut for APIC TPR reads and writes. */
3523 if ( (exitQualification & 0xfff) == 0x080
3524 && !(errCode & X86_TRAP_PF_P) /* not present */
3525 && fSetupTPRCaching
3526 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3527 {
3528 RTGCPHYS GCPhysApicBase, GCPhys;
3529 GCPhysApicBase = pCtx->msrApicBase;
3530 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3531
3532 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3533 if ( rc == VINF_SUCCESS
3534 && GCPhys == GCPhysApicBase)
3535 {
3536 Log(("Enable VT-x virtual APIC access filtering\n"));
3537 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
3538 AssertRC(rc2);
3539 }
3540 }
3541
3542 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3543 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3544 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3545
3546 if (rc == VINF_SUCCESS)
3547 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3548 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3549 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
3550
3551 TRPMResetTrap(pVCpu);
3552 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3553 goto ResumeExecution;
3554 }
3555 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3556 {
3557 /*
3558 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3559 */
3560 Log2(("Forward page fault to the guest\n"));
3561
3562 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3563 /* The error code might have been changed. */
3564 errCode = TRPMGetErrorCode(pVCpu);
3565
3566 TRPMResetTrap(pVCpu);
3567
3568 /* Now we must update CR2. */
3569 pCtx->cr2 = exitQualification;
3570 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3571 cbInstr, errCode);
3572 AssertRC(rc2);
3573
3574 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3575 goto ResumeExecution;
3576 }
3577#ifdef VBOX_STRICT
3578 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3579 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3580#endif
3581 /* Need to go back to the recompiler to emulate the instruction. */
3582 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
3583 TRPMResetTrap(pVCpu);
3584
3585 /* If event delivery caused the #PF (shadow or not), tell TRPM. */
3586 hmR0VmxCheckPendingEvent(pVCpu);
3587 break;
3588 }
3589
3590 case X86_XCPT_MF: /* Floating point exception. */
3591 {
3592 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
3593 if (!(pCtx->cr0 & X86_CR0_NE))
3594 {
3595 /* old style FPU error reporting needs some extra work. */
3596 /** @todo don't fall back to the recompiler, but do it manually. */
3597 rc = VINF_EM_RAW_EMULATE_INSTR;
3598 break;
3599 }
3600 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3601 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3602 cbInstr, errCode);
3603 AssertRC(rc2);
3604
3605 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3606 goto ResumeExecution;
3607 }
3608
3609 case X86_XCPT_DB: /* Debug exception. */
3610 {
3611 uint64_t uDR6;
3612
3613 /*
3614 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3615 *
3616 * Exit qualification bits:
3617 * 3:0 B0-B3 which breakpoint condition was met
3618 * 12:4 Reserved (0)
3619 * 13 BD - debug register access detected
3620 * 14 BS - single step execution or branch taken
3621 * 63:15 Reserved (0)
3622 */
3623 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
3624
3625 /* Note that we don't support guest and host-initiated debugging at the same time. */
3626
3627 uDR6 = X86_DR6_INIT_VAL;
3628 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3629 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3630 if (rc == VINF_EM_RAW_GUEST_TRAP)
3631 {
3632 /* Update DR6 here. */
3633 pCtx->dr[6] = uDR6;
3634
3635 /* Resync DR6 if the debug state is active. */
3636 if (CPUMIsGuestDebugStateActive(pVCpu))
3637 ASMSetDR6(pCtx->dr[6]);
3638
3639 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3640 pCtx->dr[7] &= ~X86_DR7_GD;
3641
3642 /* Paranoia. */
3643 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3644 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3645 pCtx->dr[7] |= 0x400; /* must be one */
3646
3647 /* Resync DR7 */
3648 rc2 = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
3649 AssertRC(rc2);
3650
3651 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3652 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3653 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3654 cbInstr, errCode);
3655 AssertRC(rc2);
3656
3657 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3658 goto ResumeExecution;
3659 }
3660 /* Return to ring 3 to deal with the debug exit code. */
3661 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3662 break;
3663 }
3664
3665 case X86_XCPT_BP: /* Breakpoint. */
3666 {
3667 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
3668 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3669 if (rc == VINF_EM_RAW_GUEST_TRAP)
3670 {
3671 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3672 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3673 cbInstr, errCode);
3674 AssertRC(rc2);
3675 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3676 goto ResumeExecution;
3677 }
3678 if (rc == VINF_SUCCESS)
3679 {
3680 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3681 goto ResumeExecution;
3682 }
3683 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3684 break;
3685 }
3686
3687 case X86_XCPT_GP: /* General protection failure exception. */
3688 {
3689 uint32_t cbOp;
3690 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
3691
3692 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
3693#ifdef VBOX_STRICT
3694 if ( !CPUMIsGuestInRealModeEx(pCtx)
3695 || !pVM->hm.s.vmx.pRealModeTSS)
3696 {
3697 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3698 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3699 cbInstr, errCode);
3700 AssertRC(rc2);
3701 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3702 goto ResumeExecution;
3703 }
3704#endif
3705 Assert(CPUMIsGuestInRealModeEx(pCtx));
3706
3707 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3708
3709 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
3710 if (RT_SUCCESS(rc2))
3711 {
3712 bool fUpdateRIP = true;
3713
3714 rc = VINF_SUCCESS;
3715 Assert(cbOp == pDis->cbInstr);
3716 switch (pDis->pCurInstr->uOpcode)
3717 {
3718 case OP_CLI:
3719 pCtx->eflags.Bits.u1IF = 0;
3720 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
3721 break;
3722
3723 case OP_STI:
3724 pCtx->eflags.Bits.u1IF = 1;
3725 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3726 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3727 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3728 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3729 AssertRC(rc2);
3730 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
3731 break;
3732
3733 case OP_HLT:
3734 fUpdateRIP = false;
3735 rc = VINF_EM_HALT;
3736 pCtx->rip += pDis->cbInstr;
3737 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
3738 break;
3739
3740 case OP_POPF:
3741 {
3742 RTGCPTR GCPtrStack;
3743 uint32_t cbParm;
3744 uint32_t uMask;
3745 X86EFLAGS eflags;
3746
3747 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3748 {
3749 cbParm = 4;
3750 uMask = 0xffffffff;
3751 }
3752 else
3753 {
3754 cbParm = 2;
3755 uMask = 0xffff;
3756 }
3757
3758 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3759 if (RT_FAILURE(rc2))
3760 {
3761 rc = VERR_EM_INTERPRETER;
3762 break;
3763 }
3764 eflags.u = 0;
3765 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3766 if (RT_FAILURE(rc2))
3767 {
3768 rc = VERR_EM_INTERPRETER;
3769 break;
3770 }
3771 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3772 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3773 | (eflags.u & X86_EFL_POPF_BITS & uMask);
3774 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3775 pCtx->eflags.Bits.u1RF = 0;
3776 pCtx->esp += cbParm;
3777 pCtx->esp &= uMask;
3778
3779 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
3780 break;
3781 }
3782
3783 case OP_PUSHF:
3784 {
3785 RTGCPTR GCPtrStack;
3786 uint32_t cbParm;
3787 uint32_t uMask;
3788 X86EFLAGS eflags;
3789
3790 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3791 {
3792 cbParm = 4;
3793 uMask = 0xffffffff;
3794 }
3795 else
3796 {
3797 cbParm = 2;
3798 uMask = 0xffff;
3799 }
3800
3801 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3802 &GCPtrStack);
3803 if (RT_FAILURE(rc2))
3804 {
3805 rc = VERR_EM_INTERPRETER;
3806 break;
3807 }
3808 eflags = pCtx->eflags;
3809 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3810 eflags.Bits.u1RF = 0;
3811 eflags.Bits.u1VM = 0;
3812
3813 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3814 if (RT_FAILURE(rc2))
3815 {
3816 rc = VERR_EM_INTERPRETER;
3817 break;
3818 }
3819 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3820 pCtx->esp -= cbParm;
3821 pCtx->esp &= uMask;
3822 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
3823 break;
3824 }
3825
3826 case OP_IRET:
3827 {
3828 RTGCPTR GCPtrStack;
3829 uint32_t uMask = 0xffff;
3830 uint16_t aIretFrame[3];
3831
3832 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3833 {
3834 rc = VERR_EM_INTERPRETER;
3835 break;
3836 }
3837
3838 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3839 if (RT_FAILURE(rc2))
3840 {
3841 rc = VERR_EM_INTERPRETER;
3842 break;
3843 }
3844 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3845 if (RT_FAILURE(rc2))
3846 {
3847 rc = VERR_EM_INTERPRETER;
3848 break;
3849 }
3850 pCtx->ip = aIretFrame[0];
3851 pCtx->cs.Sel = aIretFrame[1];
3852 pCtx->cs.ValidSel = aIretFrame[1];
3853 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
3854 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3855 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3856 pCtx->sp += sizeof(aIretFrame);
3857
3858 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
3859 fUpdateRIP = false;
3860 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
3861 break;
3862 }
3863
3864 case OP_INT:
3865 {
3866 uint32_t intInfo2;
3867
3868 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
3869 intInfo2 = pDis->Param1.uValue & 0xff;
3870 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3871 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3872
3873 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3874 AssertRC(VBOXSTRICTRC_VAL(rc));
3875 fUpdateRIP = false;
3876 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3877 break;
3878 }
3879
3880 case OP_INTO:
3881 {
3882 if (pCtx->eflags.Bits.u1OF)
3883 {
3884 uint32_t intInfo2;
3885
3886 LogFlow(("Realmode: INTO\n"));
3887 intInfo2 = X86_XCPT_OF;
3888 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3889 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3890
3891 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3892 AssertRC(VBOXSTRICTRC_VAL(rc));
3893 fUpdateRIP = false;
3894 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3895 }
3896 break;
3897 }
3898
3899 case OP_INT3:
3900 {
3901 uint32_t intInfo2;
3902
3903 LogFlow(("Realmode: INT 3\n"));
3904 intInfo2 = 3;
3905 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3906 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3907
3908 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3909 AssertRC(VBOXSTRICTRC_VAL(rc));
3910 fUpdateRIP = false;
3911 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3912 break;
3913 }
3914
3915 default:
3916 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3917 fUpdateRIP = false;
3918 break;
3919 }
3920
3921 if (rc == VINF_SUCCESS)
3922 {
3923 if (fUpdateRIP)
3924 pCtx->rip += cbOp; /* Move on to the next instruction. */
3925
3926 /*
3927 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3928 * whole context to be done with it.
3929 */
3930 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
3931
3932 /* Only resume if successful. */
3933 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3934 goto ResumeExecution;
3935 }
3936 }
3937 else
3938 rc = VERR_EM_INTERPRETER;
3939
3940 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
3941 ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3942 break;
3943 }
3944
3945#ifdef VBOX_STRICT
3946 case X86_XCPT_XF: /* SIMD exception. */
3947 case X86_XCPT_DE: /* Divide error. */
3948 case X86_XCPT_UD: /* Unknown opcode exception. */
3949 case X86_XCPT_SS: /* Stack segment exception. */
3950 case X86_XCPT_NP: /* Segment not present exception. */
3951 {
3952 switch (vector)
3953 {
3954 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); break;
3955 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); break;
3956 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); break;
3957 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); break;
3958 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); break;
3959 }
3960
3961 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3962 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3963 cbInstr, errCode);
3964 AssertRC(rc2);
3965
3966 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3967 goto ResumeExecution;
3968 }
3969#endif
3970 default:
3971 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
3972 if ( CPUMIsGuestInRealModeEx(pCtx)
3973 && pVM->hm.s.vmx.pRealModeTSS)
3974 {
3975 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
3976 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3977 cbInstr, errCode);
3978 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3979
3980 /* Go back to ring-3 in case of a triple fault. */
3981 if ( vector == X86_XCPT_DF
3982 && rc == VINF_EM_RESET)
3983 {
3984 break;
3985 }
3986
3987 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3988 goto ResumeExecution;
3989 }
3990 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3991 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3992 break;
3993 } /* switch (vector) */
3994
3995 break;
3996
3997 default:
3998 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3999 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
4000 break;
4001 }
4002
4003 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4004 break;
4005 }
4006
4007 /*
4008 * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed
4009 * by the configuration of the EPT paging structures.
4010 */
4011 case VMX_EXIT_EPT_VIOLATION:
4012 {
4013 RTGCPHYS GCPhys;
4014
4015 Assert(pVM->hm.s.fNestedPaging);
4016
4017 rc2 = VMXReadVMCS64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4018 AssertRC(rc2);
4019 Assert(((exitQualification >> 7) & 3) != 2);
4020
4021 /* Determine the kind of violation. */
4022 errCode = 0;
4023 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
4024 errCode |= X86_TRAP_PF_ID;
4025
4026 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
4027 errCode |= X86_TRAP_PF_RW;
4028
4029 /* If the page is present, then it's a page level protection fault. */
4030 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
4031 errCode |= X86_TRAP_PF_P;
4032 else
4033 {
4034 /* Shortcut for APIC TPR reads and writes. */
4035 if ( (GCPhys & 0xfff) == 0x080
4036 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4037 && fSetupTPRCaching
4038 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4039 {
4040 RTGCPHYS GCPhysApicBase;
4041 GCPhysApicBase = pCtx->msrApicBase;
4042 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4043 if (GCPhys == GCPhysApicBase + 0x80)
4044 {
4045 Log(("Enable VT-x virtual APIC access filtering\n"));
4046 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
4047 AssertRC(rc2);
4048 }
4049 }
4050 }
4051 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
4052
4053 /* GCPhys contains the guest physical address of the page fault. */
4054 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
4055 TRPMSetErrorCode(pVCpu, errCode);
4056 TRPMSetFaultAddress(pVCpu, GCPhys);
4057
4058 /* Handle the pagefault trap for the nested shadow table. */
4059 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
4060
4061 /*
4062 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
4063 */
4064 if ( rc == VINF_SUCCESS
4065 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4066 || rc == VERR_PAGE_NOT_PRESENT)
4067 {
4068 /* We've successfully synced our shadow pages, so let's just continue execution. */
4069 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
4070 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
4071
4072 TRPMResetTrap(pVCpu);
4073 goto ResumeExecution;
4074 }
4075
4076#ifdef VBOX_STRICT
4077 if (rc != VINF_EM_RAW_EMULATE_INSTR)
4078 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4079#endif
4080 /* Need to go back to the recompiler to emulate the instruction. */
4081 TRPMResetTrap(pVCpu);
4082 break;
4083 }
4084
4085 case VMX_EXIT_EPT_MISCONFIG:
4086 {
4087 RTGCPHYS GCPhys;
4088
4089 Assert(pVM->hm.s.fNestedPaging);
4090
4091 rc2 = VMXReadVMCS64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4092 AssertRC(rc2);
4093 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
4094
4095 /* Shortcut for APIC TPR reads and writes. */
4096 if ( (GCPhys & 0xfff) == 0x080
4097 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4098 && fSetupTPRCaching
4099 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4100 {
4101 RTGCPHYS GCPhysApicBase = pCtx->msrApicBase;
4102 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4103 if (GCPhys == GCPhysApicBase + 0x80)
4104 {
4105 Log(("Enable VT-x virtual APIC access filtering\n"));
4106 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
4107 AssertRC(rc2);
4108 }
4109 }
4110
4111 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
4112
4113 /*
4114 * If we succeed, resume execution.
4115 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
4116 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
4117 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
4118 * weird case. See @bugref{6043}.
4119 */
4120 if ( rc == VINF_SUCCESS
4121 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4122 || rc == VERR_PAGE_NOT_PRESENT)
4123 {
4124 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
4125 goto ResumeExecution;
4126 }
4127
4128 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4129 break;
4130 }
4131
4132 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4133 /* Clear VM-exit on IF=1 change. */
4134 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
4135 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4136 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
4137 rc2 = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4138 AssertRC(rc2);
4139 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIrqWindow);
4140 goto ResumeExecution; /* we check for pending guest interrupts there */
4141
4142 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4143 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4144 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
4145 /* Skip instruction and continue directly. */
4146 pCtx->rip += cbInstr;
4147 /* Continue execution.*/
4148 goto ResumeExecution;
4149
4150 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4151 {
4152 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4153 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
4154 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4155 if (rc == VINF_SUCCESS)
4156 {
4157 /* Update EIP and continue execution. */
4158 Assert(cbInstr == 2);
4159 pCtx->rip += cbInstr;
4160 goto ResumeExecution;
4161 }
4162 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4163 rc = VINF_EM_RAW_EMULATE_INSTR;
4164 break;
4165 }
4166
4167 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4168 {
4169 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4170 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
4171 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4172 if (rc == VINF_SUCCESS)
4173 {
4174 /* Update EIP and continue execution. */
4175 Assert(cbInstr == 2);
4176 pCtx->rip += cbInstr;
4177 goto ResumeExecution;
4178 }
4179 rc = VINF_EM_RAW_EMULATE_INSTR;
4180 break;
4181 }
4182
4183 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4184 {
4185 Log2(("VMX: Rdtsc\n"));
4186 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
4187 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4188 if (rc == VINF_SUCCESS)
4189 {
4190 /* Update EIP and continue execution. */
4191 Assert(cbInstr == 2);
4192 pCtx->rip += cbInstr;
4193 goto ResumeExecution;
4194 }
4195 rc = VINF_EM_RAW_EMULATE_INSTR;
4196 break;
4197 }
4198
4199 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4200 {
4201 Log2(("VMX: Rdtscp\n"));
4202 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp);
4203 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4204 if (rc == VINF_SUCCESS)
4205 {
4206 /* Update EIP and continue execution. */
4207 Assert(cbInstr == 3);
4208 pCtx->rip += cbInstr;
4209 goto ResumeExecution;
4210 }
4211 rc = VINF_EM_RAW_EMULATE_INSTR;
4212 break;
4213 }
4214
4215 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4216 {
4217 Log2(("VMX: invlpg\n"));
4218 Assert(!pVM->hm.s.fNestedPaging);
4219
4220 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
4221 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4222 if (rc == VINF_SUCCESS)
4223 {
4224 /* Update EIP and continue execution. */
4225 pCtx->rip += cbInstr;
4226 goto ResumeExecution;
4227 }
4228 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4229 break;
4230 }
4231
4232 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4233 {
4234 Log2(("VMX: monitor\n"));
4235
4236 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
4237 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4238 if (rc == VINF_SUCCESS)
4239 {
4240 /* Update EIP and continue execution. */
4241 pCtx->rip += cbInstr;
4242 goto ResumeExecution;
4243 }
4244 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4245 break;
4246 }
4247
4248 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4249 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4250 if ( pVM->hm.s.fTPRPatchingActive
4251 && pCtx->ecx == MSR_K8_LSTAR)
4252 {
4253 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4254 if ((pCtx->eax & 0xff) != u8LastTPR)
4255 {
4256 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4257
4258 /* Our patch code uses LSTAR for TPR caching. */
4259 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4260 AssertRC(rc2);
4261 }
4262
4263 /* Skip the instruction and continue. */
4264 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4265
4266 /* Only resume if successful. */
4267 goto ResumeExecution;
4268 }
4269 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_MSR;
4270 /* no break */
4271 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4272 {
4273 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hm.s.StatExitRdmsr : &pVCpu->hm.s.StatExitWrmsr);
4274
4275 /*
4276 * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different,
4277 * so we play safe by completely disassembling the instruction.
4278 */
4279 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4280 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4281 if (rc == VINF_SUCCESS)
4282 {
4283 /* EIP has been updated already. */
4284 /* Only resume if successful. */
4285 goto ResumeExecution;
4286 }
4287 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4288 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4289 break;
4290 }
4291
4292 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4293 {
4294 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub2, y2);
4295
4296 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4297 {
4298 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4299 {
4300 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4301 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4302 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4303 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4304 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4305 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4306 {
4307 case 0:
4308 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0 | HM_CHANGED_GUEST_CR3;
4309 break;
4310 case 2:
4311 break;
4312 case 3:
4313 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4314 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
4315 break;
4316 case 4:
4317 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
4318 break;
4319 case 8:
4320 /* CR8 contains the APIC TPR */
4321 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4322 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4323 break;
4324
4325 default:
4326 AssertFailed();
4327 break;
4328 }
4329 break;
4330 }
4331
4332 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4333 {
4334 Log2(("VMX: mov x, crx\n"));
4335 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4336
4337 Assert( !pVM->hm.s.fNestedPaging
4338 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4339 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4340
4341 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4342 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4343 || !(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4344
4345 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4346 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4347 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4348 break;
4349 }
4350
4351 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4352 {
4353 Log2(("VMX: clts\n"));
4354 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
4355 rc = EMInterpretCLTS(pVM, pVCpu);
4356 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4357 break;
4358 }
4359
4360 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4361 {
4362 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4363 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLMSW);
4364 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4365 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4366 break;
4367 }
4368 }
4369
4370 /* Update EIP if no error occurred. */
4371 if (RT_SUCCESS(rc))
4372 pCtx->rip += cbInstr;
4373
4374 if (rc == VINF_SUCCESS)
4375 {
4376 /* Only resume if successful. */
4377 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4378 goto ResumeExecution;
4379 }
4380 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4381 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4382 break;
4383 }
4384
4385 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4386 {
4387 if ( !DBGFIsStepping(pVCpu)
4388 && !CPUMIsHyperDebugStateActive(pVCpu))
4389 {
4390 /* Disable DRx move intercepts. */
4391 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4392 rc2 = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4393 AssertRC(rc2);
4394
4395 /* Save the host and load the guest debug state. */
4396 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4397 AssertRC(rc2);
4398
4399#ifdef LOG_ENABLED
4400 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4401 {
4402 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4403 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4404 }
4405 else
4406 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4407#endif
4408
4409#ifdef VBOX_WITH_STATISTICS
4410 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
4411 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4412 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4413 else
4414 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4415#endif
4416
4417 goto ResumeExecution;
4418 }
4419
4420 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4421 * time and restore DRx registers afterwards */
4422 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4423 {
4424 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4425 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4426 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4427 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4428 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4429 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4430 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
4431 Log2(("DR7=%08x\n", pCtx->dr[7]));
4432 }
4433 else
4434 {
4435 Log2(("VMX: mov x, DRx\n"));
4436 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4437 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4438 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4439 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4440 }
4441 /* Update EIP if no error occurred. */
4442 if (RT_SUCCESS(rc))
4443 pCtx->rip += cbInstr;
4444
4445 if (rc == VINF_SUCCESS)
4446 {
4447 /* Only resume if successful. */
4448 goto ResumeExecution;
4449 }
4450 Assert(rc == VERR_EM_INTERPRETER);
4451 break;
4452 }
4453
4454 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4455 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4456 {
4457 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub1, y1);
4458 uint32_t uPort;
4459 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4460 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4461
4462 /** @todo necessary to make the distinction? */
4463 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4464 uPort = pCtx->edx & 0xffff;
4465 else
4466 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4467
4468 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4469 {
4470 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4471 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4472 break;
4473 }
4474
4475 uint32_t cbSize = g_aIOSize[uIOWidth];
4476 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4477 {
4478 /* ins/outs */
4479 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
4480
4481 /* Disassemble manually to deal with segment prefixes. */
4482 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4483 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4484 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
4485 if (RT_SUCCESS(rc))
4486 {
4487 if (fIOWrite)
4488 {
4489 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4490 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
4491 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4492 }
4493 else
4494 {
4495 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4496 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
4497 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4498 }
4499 }
4500 else
4501 rc = VINF_EM_RAW_EMULATE_INSTR;
4502 }
4503 else
4504 {
4505 /* Normal in/out */
4506 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4507
4508 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4509
4510 if (fIOWrite)
4511 {
4512 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
4513 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4514 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4515 HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4516 }
4517 else
4518 {
4519 uint32_t u32Val = 0;
4520
4521 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
4522 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4523 if (IOM_SUCCESS(rc))
4524 {
4525 /* Write back to the EAX register. */
4526 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4527 }
4528 else
4529 if (rc == VINF_IOM_R3_IOPORT_READ)
4530 HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4531 }
4532 }
4533
4534 /*
4535 * Handled the I/O return codes.
4536 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4537 */
4538 if (IOM_SUCCESS(rc))
4539 {
4540 /* Update EIP and continue execution. */
4541 pCtx->rip += cbInstr;
4542 if (RT_LIKELY(rc == VINF_SUCCESS))
4543 {
4544 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4545 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4546 {
4547 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
4548 for (unsigned i = 0; i < 4; i++)
4549 {
4550 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4551
4552 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4553 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4554 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4555 {
4556 uint64_t uDR6;
4557
4558 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4559
4560 uDR6 = ASMGetDR6();
4561
4562 /* Clear all breakpoint status flags and set the one we just hit. */
4563 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4564 uDR6 |= (uint64_t)RT_BIT(i);
4565
4566 /*
4567 * Note: AMD64 Architecture Programmer's Manual 13.1:
4568 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4569 * be cleared by software after the contents have been read.
4570 */
4571 ASMSetDR6(uDR6);
4572
4573 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4574 pCtx->dr[7] &= ~X86_DR7_GD;
4575
4576 /* Paranoia. */
4577 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4578 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4579 pCtx->dr[7] |= 0x400; /* must be one */
4580
4581 /* Resync DR7 */
4582 rc2 = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
4583 AssertRC(rc2);
4584
4585 /* Construct inject info. */
4586 intInfo = X86_XCPT_DB;
4587 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4588 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4589
4590 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4591 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4592 0 /* cbInstr */, 0 /* errCode */);
4593 AssertRC(rc2);
4594
4595 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4596 goto ResumeExecution;
4597 }
4598 }
4599 }
4600 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4601 goto ResumeExecution;
4602 }
4603 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4604 break;
4605 }
4606
4607#ifdef VBOX_STRICT
4608 if (rc == VINF_IOM_R3_IOPORT_READ)
4609 Assert(!fIOWrite);
4610 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4611 Assert(fIOWrite);
4612 else
4613 {
4614 AssertMsg( RT_FAILURE(rc)
4615 || rc == VINF_EM_RAW_EMULATE_INSTR
4616 || rc == VINF_EM_RAW_GUEST_TRAP
4617 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4618 }
4619#endif
4620 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4621 break;
4622 }
4623
4624 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4625 LogFlow(("VMX_EXIT_TPR\n"));
4626 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4627 goto ResumeExecution;
4628
4629 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4630 on the APIC-access page. */
4631 {
4632 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4633 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4634
4635 switch (uAccessType)
4636 {
4637 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4638 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4639 {
4640 RTGCPHYS GCPhys = pCtx->msrApicBase;
4641 GCPhys &= PAGE_BASE_GC_MASK;
4642 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4643
4644 LogFlow(("Apic access at %RGp\n", GCPhys));
4645 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4646 CPUMCTX2CORE(pCtx), GCPhys);
4647 if (rc == VINF_SUCCESS)
4648 goto ResumeExecution; /* rip already updated */
4649 break;
4650 }
4651
4652 default:
4653 rc = VINF_EM_RAW_EMULATE_INSTR;
4654 break;
4655 }
4656 break;
4657 }
4658
4659 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4660 if (!TMTimerPollBool(pVM, pVCpu))
4661 goto ResumeExecution;
4662 rc = VINF_EM_RAW_TIMER_PENDING;
4663 break;
4664
4665 default:
4666 /* The rest is handled after syncing the entire CPU state. */
4667 break;
4668 }
4669
4670
4671 /*
4672 * Note: The guest state is not entirely synced back at this stage!
4673 */
4674
4675 /* Investigate why there was a VM-exit. (part 2) */
4676 switch (exitReason)
4677 {
4678 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4679 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4680 case VMX_EXIT_EPT_VIOLATION:
4681 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4682 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4683 /* Already handled above. */
4684 break;
4685
4686 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4687 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4688 break;
4689
4690 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4691 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4692 rc = VINF_EM_RAW_INTERRUPT;
4693 AssertFailed(); /* Can't happen. Yet. */
4694 break;
4695
4696 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4697 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4698 rc = VINF_EM_RAW_INTERRUPT;
4699 AssertFailed(); /* Can't happen afaik. */
4700 break;
4701
4702 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4703 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4704 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4705 && pVCpu->hm.s.Event.fPending)
4706 {
4707 /* Caused by an injected interrupt. */
4708 pVCpu->hm.s.Event.fPending = false;
4709
4710 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.intInfo)));
4711 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.intInfo));
4712 //@todo: Why do we assume this had to be a hardware interrupt? What about software interrupts or exceptions?
4713 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.intInfo), TRPM_HARDWARE_INT);
4714 AssertRC(rc2);
4715 }
4716 /* else Exceptions and software interrupts can just be restarted. */
4717 rc = VERR_EM_INTERPRETER;
4718 break;
4719
4720 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4721 /* Check if external interrupts are pending; if so, don't switch back. */
4722 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
4723 pCtx->rip++; /* skip hlt */
4724 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4725 goto ResumeExecution;
4726
4727 rc = VINF_EM_HALT;
4728 break;
4729
4730 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4731 Log2(("VMX: mwait\n"));
4732 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
4733 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4734 if ( rc == VINF_EM_HALT
4735 || rc == VINF_SUCCESS)
4736 {
4737 /* Update EIP and continue execution. */
4738 pCtx->rip += cbInstr;
4739
4740 /* Check if external interrupts are pending; if so, don't switch back. */
4741 if ( rc == VINF_SUCCESS
4742 || ( rc == VINF_EM_HALT
4743 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4744 )
4745 goto ResumeExecution;
4746 }
4747 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4748 break;
4749
4750 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4751 AssertFailed(); /* can't happen. */
4752 rc = VERR_EM_INTERPRETER;
4753 break;
4754
4755 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4756 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4757 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4758 rc2 = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4759 AssertRC(rc2);
4760 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
4761#if 0
4762 DBGFDoneStepping(pVCpu);
4763#endif
4764 rc = VINF_EM_DBG_STOP;
4765 break;
4766
4767 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4768 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4769 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4770 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4771 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4772 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4773 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4774 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4775 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4776 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4777 /** @todo inject #UD immediately */
4778 rc = VERR_EM_INTERPRETER;
4779 break;
4780
4781 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4782 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4783 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4784 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4785 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4786 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4787 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4788 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4789 /* already handled above */
4790 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4791 || rc == VINF_EM_RAW_INTERRUPT
4792 || rc == VERR_EM_INTERPRETER
4793 || rc == VINF_EM_RAW_EMULATE_INSTR
4794 || rc == VINF_PGM_SYNC_CR3
4795 || rc == VINF_IOM_R3_IOPORT_READ
4796 || rc == VINF_IOM_R3_IOPORT_WRITE
4797 || rc == VINF_EM_RAW_GUEST_TRAP
4798 || rc == VINF_TRPM_XCPT_DISPATCHED
4799 || rc == VINF_EM_RESCHEDULE_REM,
4800 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4801 break;
4802
4803 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4804 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4805 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4806 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4807 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4808 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4809 on the APIC-access page. */
4810 {
4811 /*
4812 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4813 */
4814 rc = VERR_EM_INTERPRETER;
4815 break;
4816 }
4817
4818 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4819 Assert(rc == VINF_EM_RAW_INTERRUPT);
4820 break;
4821
4822 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4823 {
4824#ifdef VBOX_STRICT
4825 RTCCUINTREG val2 = 0;
4826
4827 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4828
4829 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val2);
4830 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4831
4832 VMXReadVMCS(VMX_VMCS_GUEST_CR0, &val2);
4833 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4834
4835 VMXReadVMCS(VMX_VMCS_GUEST_CR3, &val2);
4836 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4837
4838 VMXReadVMCS(VMX_VMCS_GUEST_CR4, &val2);
4839 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4840
4841 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4842 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4843
4844 VMX_LOG_SELREG(CS, "CS", val2);
4845 VMX_LOG_SELREG(DS, "DS", val2);
4846 VMX_LOG_SELREG(ES, "ES", val2);
4847 VMX_LOG_SELREG(FS, "FS", val2);
4848 VMX_LOG_SELREG(GS, "GS", val2);
4849 VMX_LOG_SELREG(SS, "SS", val2);
4850 VMX_LOG_SELREG(TR, "TR", val2);
4851 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4852
4853 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val2);
4854 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4855 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val2);
4856 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4857#endif /* VBOX_STRICT */
4858 rc = VERR_VMX_INVALID_GUEST_STATE;
4859 break;
4860 }
4861
4862 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4863 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4864 default:
4865 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4866 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4867 break;
4868
4869 }
4870
4871end:
4872 /* We now going back to ring-3, so clear the action flag. */
4873 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4874
4875 /*
4876 * Signal changes for the recompiler.
4877 */
4878 CPUMSetChangedFlags(pVCpu,
4879 CPUM_CHANGED_SYSENTER_MSR
4880 | CPUM_CHANGED_LDTR
4881 | CPUM_CHANGED_GDTR
4882 | CPUM_CHANGED_IDTR
4883 | CPUM_CHANGED_TR
4884 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4885
4886 /*
4887 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4888 */
4889 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4890 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4891 {
4892 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
4893 /* On the next entry we'll only sync the host context. */
4894 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT;
4895 }
4896 else
4897 {
4898 /* On the next entry we'll sync everything. */
4899 /** @todo we can do better than this */
4900 /* Not in the VINF_PGM_CHANGE_MODE though! */
4901 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
4902 }
4903
4904 /* Translate into a less severe return code */
4905 if (rc == VERR_EM_INTERPRETER)
4906 rc = VINF_EM_RAW_EMULATE_INSTR;
4907 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4908 {
4909 /* Try to extract more information about what might have gone wrong here. */
4910 VMXGetActivateVMCS(&pVCpu->hm.s.vmx.lasterror.u64VMCSPhys);
4911 pVCpu->hm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVMCS;
4912 pVCpu->hm.s.vmx.lasterror.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
4913 pVCpu->hm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4914 }
4915
4916 /* Just set the correct state here instead of trying to catch every goto above. */
4917 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4918
4919#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4920 /* Restore interrupts if we exited after disabling them. */
4921 if (uOldEFlags != ~(RTCCUINTREG)0)
4922 ASMSetFlags(uOldEFlags);
4923#endif
4924
4925 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
4926 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
4927 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
4928 Log2(("X"));
4929 return VBOXSTRICTRC_TODO(rc);
4930}
4931
4932
4933/**
4934 * Enters the VT-x session.
4935 *
4936 * @returns VBox status code.
4937 * @param pVM Pointer to the VM.
4938 * @param pVCpu Pointer to the VMCPU.
4939 * @param pCpu Pointer to the CPU info struct.
4940 */
4941VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4942{
4943 Assert(pVM->hm.s.vmx.fSupported);
4944 NOREF(pCpu);
4945
4946 unsigned cr4 = ASMGetCR4();
4947 if (!(cr4 & X86_CR4_VMXE))
4948 {
4949 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4950 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4951 }
4952
4953 /* Activate the VMCS. */
4954 int rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
4955 if (RT_FAILURE(rc))
4956 return rc;
4957
4958 pVCpu->hm.s.fResumeVM = false;
4959 return VINF_SUCCESS;
4960}
4961
4962
4963/**
4964 * Leaves the VT-x session.
4965 *
4966 * @returns VBox status code.
4967 * @param pVM Pointer to the VM.
4968 * @param pVCpu Pointer to the VMCPU.
4969 * @param pCtx Pointer to the guests CPU context.
4970 */
4971VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4972{
4973 Assert(pVM->hm.s.vmx.fSupported);
4974
4975#ifdef DEBUG
4976 if (CPUMIsHyperDebugStateActive(pVCpu))
4977 {
4978 CPUMR0LoadHostDebugState(pVM, pVCpu);
4979 Assert(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4980 }
4981 else
4982#endif
4983
4984 /*
4985 * Save the guest debug state if necessary.
4986 */
4987 if (CPUMIsGuestDebugStateActive(pVCpu))
4988 {
4989 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4990
4991 /* Enable DRx move intercepts again. */
4992 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4993 int rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4994 AssertRC(rc);
4995
4996 /* Resync the debug registers the next time. */
4997 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
4998 }
4999 else
5000 Assert(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
5001
5002 /*
5003 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
5004 * VMCS data back to memory.
5005 */
5006 int rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
5007 AssertRC(rc);
5008
5009 return VINF_SUCCESS;
5010}
5011
5012
5013/**
5014 * Flush the TLB using EPT.
5015 *
5016 * @returns VBox status code.
5017 * @param pVM Pointer to the VM.
5018 * @param pVCpu Pointer to the VMCPU.
5019 * @param enmFlush Type of flush.
5020 */
5021static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
5022{
5023 uint64_t descriptor[2];
5024
5025 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
5026 Assert(pVM->hm.s.fNestedPaging);
5027 descriptor[0] = pVCpu->hm.s.vmx.GCPhysEPTP;
5028 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
5029 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
5030 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hm.s.vmx.GCPhysEPTP, rc));
5031#ifdef VBOX_WITH_STATISTICS
5032 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
5033#endif
5034}
5035
5036
5037/**
5038 * Flush the TLB using VPID.
5039 *
5040 * @returns VBox status code.
5041 * @param pVM Pointer to the VM.
5042 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
5043 * enmFlush).
5044 * @param enmFlush Type of flush.
5045 * @param GCPtr Virtual address of the page to flush (can be 0 depending
5046 * on @a enmFlush).
5047 */
5048static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
5049{
5050 uint64_t descriptor[2];
5051
5052 Assert(pVM->hm.s.vmx.fVpid);
5053 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
5054 {
5055 descriptor[0] = 0;
5056 descriptor[1] = 0;
5057 }
5058 else
5059 {
5060 AssertPtr(pVCpu);
5061 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5062 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5063 descriptor[0] = pVCpu->hm.s.uCurrentAsid;
5064 descriptor[1] = GCPtr;
5065 }
5066 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
5067 AssertMsg(rc == VINF_SUCCESS,
5068 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
5069#ifdef VBOX_WITH_STATISTICS
5070 if (pVCpu)
5071 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
5072#endif
5073}
5074
5075
5076/**
5077 * Invalidates a guest page by guest virtual address. Only relevant for
5078 * EPT/VPID, otherwise there is nothing really to invalidate.
5079 *
5080 * @returns VBox status code.
5081 * @param pVM Pointer to the VM.
5082 * @param pVCpu Pointer to the VMCPU.
5083 * @param GCVirt Guest virtual address of the page to invalidate.
5084 */
5085VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
5086{
5087 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
5088
5089 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
5090
5091 if (!fFlushPending)
5092 {
5093 /*
5094 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
5095 * See @bugref{6043} and @bugref{6177}
5096 *
5097 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
5098 * function maybe called in a loop with individual addresses.
5099 */
5100 if (pVM->hm.s.vmx.fVpid)
5101 {
5102 /* If we can flush just this page do it, otherwise flush as little as possible. */
5103 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
5104 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
5105 else
5106 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5107 }
5108 else if (pVM->hm.s.fNestedPaging)
5109 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5110 }
5111
5112 return VINF_SUCCESS;
5113}
5114
5115
5116/**
5117 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
5118 * otherwise there is nothing really to invalidate.
5119 *
5120 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
5121 *
5122 * @returns VBox status code.
5123 * @param pVM Pointer to the VM.
5124 * @param pVCpu Pointer to the VMCPU.
5125 * @param GCPhys Guest physical address of the page to invalidate.
5126 */
5127VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
5128{
5129 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
5130
5131 /*
5132 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5133 * while invept only flushes by EPT not individual addresses. We update the force flag here
5134 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5135 */
5136 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5137 return VINF_SUCCESS;
5138}
5139
5140
5141/**
5142 * Report world switch error and dump some useful debug info.
5143 *
5144 * @param pVM Pointer to the VM.
5145 * @param pVCpu Pointer to the VMCPU.
5146 * @param rc Return code.
5147 * @param pCtx Pointer to the current guest CPU context (not updated).
5148 */
5149static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5150{
5151 NOREF(pVM);
5152
5153 switch (VBOXSTRICTRC_VAL(rc))
5154 {
5155 case VERR_VMX_INVALID_VMXON_PTR:
5156 AssertFailed();
5157 break;
5158
5159 case VERR_VMX_UNABLE_TO_START_VM:
5160 case VERR_VMX_UNABLE_TO_RESUME_VM:
5161 {
5162 int rc2;
5163 RTCCUINTREG exitReason, instrError;
5164
5165 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5166 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5167 AssertRC(rc2);
5168 if (rc2 == VINF_SUCCESS)
5169 {
5170 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5171 (uint32_t)instrError));
5172 Log(("Current stack %08x\n", &rc2));
5173
5174 pVCpu->hm.s.vmx.lasterror.ulInstrError = instrError;
5175 pVCpu->hm.s.vmx.lasterror.ulExitReason = exitReason;
5176
5177#ifdef VBOX_STRICT
5178 RTGDTR gdtr;
5179 PCX86DESCHC pDesc;
5180 RTCCUINTREG val;
5181
5182 ASMGetGDTR(&gdtr);
5183
5184 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
5185 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5186 VMXReadVMCS(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val);
5187 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5188 VMXReadVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val);
5189 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5190 VMXReadVMCS(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val);
5191 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5192 VMXReadVMCS(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val);
5193 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5194
5195 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
5196 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5197 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
5198 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5199 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
5200 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5201
5202 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
5203 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5204 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
5205 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5206
5207 if (val < gdtr.cbGdt)
5208 {
5209 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5210 HMR0DumpDescriptor(pDesc, val, "CS: ");
5211 }
5212
5213 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
5214 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5215 if (val < gdtr.cbGdt)
5216 {
5217 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5218 HMR0DumpDescriptor(pDesc, val, "DS: ");
5219 }
5220
5221 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
5222 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5223 if (val < gdtr.cbGdt)
5224 {
5225 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5226 HMR0DumpDescriptor(pDesc, val, "ES: ");
5227 }
5228
5229 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
5230 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5231 if (val < gdtr.cbGdt)
5232 {
5233 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5234 HMR0DumpDescriptor(pDesc, val, "FS: ");
5235 }
5236
5237 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
5238 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5239 if (val < gdtr.cbGdt)
5240 {
5241 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5242 HMR0DumpDescriptor(pDesc, val, "GS: ");
5243 }
5244
5245 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
5246 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5247 if (val < gdtr.cbGdt)
5248 {
5249 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5250 HMR0DumpDescriptor(pDesc, val, "SS: ");
5251 }
5252
5253 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
5254 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5255 if (val < gdtr.cbGdt)
5256 {
5257 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5258 HMR0DumpDescriptor(pDesc, val, "TR: ");
5259 }
5260
5261 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
5262 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5263 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
5264 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5265 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
5266 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5267 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5268 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5269 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5270 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5271 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5272 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5273 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
5274 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5275 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
5276 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5277# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5278 if (VMX_IS_64BIT_HOST_MODE())
5279 {
5280 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5281 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5282 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5283 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5284 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5285 Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5286 }
5287# endif
5288#endif /* VBOX_STRICT */
5289 }
5290 break;
5291 }
5292
5293 default:
5294 /* impossible */
5295 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5296 break;
5297 }
5298}
5299
5300
5301#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5302/**
5303 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5304 *
5305 * @returns VBox status code.
5306 * @param fResume Whether to vmlauch/vmresume.
5307 * @param pCtx Pointer to the guest CPU context.
5308 * @param pCache Pointer to the VMCS cache.
5309 * @param pVM Pointer to the VM.
5310 * @param pVCpu Pointer to the VMCPU.
5311 */
5312DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5313{
5314 uint32_t aParam[6];
5315 PHMGLOBLCPUINFO pCpu;
5316 RTHCPHYS HCPhysCpuPage;
5317 int rc;
5318
5319 pCpu = HMR0GetCurrentCpu();
5320 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5321
5322#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5323 pCache->uPos = 1;
5324 pCache->interPD = PGMGetInterPaeCR3(pVM);
5325 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
5326#endif
5327
5328#ifdef DEBUG
5329 pCache->TestIn.HCPhysCpuPage= 0;
5330 pCache->TestIn.HCPhysVMCS = 0;
5331 pCache->TestIn.pCache = 0;
5332 pCache->TestOut.HCPhysVMCS = 0;
5333 pCache->TestOut.pCache = 0;
5334 pCache->TestOut.pCtx = 0;
5335 pCache->TestOut.eflags = 0;
5336#endif
5337
5338 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5339 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5340 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
5341 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
5342 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
5343 aParam[5] = 0;
5344
5345#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5346 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
5347 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
5348#endif
5349 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5350
5351#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5352 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
5353 Assert(pCtx->dr[4] == 10);
5354 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
5355#endif
5356
5357#ifdef DEBUG
5358 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5359 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5360 pVCpu->hm.s.vmx.HCPhysVMCS));
5361 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5362 pCache->TestOut.HCPhysVMCS));
5363 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5364 pCache->TestOut.pCache));
5365 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
5366 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
5367 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5368 pCache->TestOut.pCtx));
5369 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5370#endif
5371 return rc;
5372}
5373
5374
5375# ifdef VBOX_STRICT
5376static bool hmR0VmxIsValidReadField(uint32_t idxField)
5377{
5378 switch (idxField)
5379 {
5380 case VMX_VMCS_GUEST_RIP:
5381 case VMX_VMCS_GUEST_RSP:
5382 case VMX_VMCS_GUEST_RFLAGS:
5383 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5384 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5385 case VMX_VMCS_GUEST_CR0:
5386 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5387 case VMX_VMCS_GUEST_CR4:
5388 case VMX_VMCS_GUEST_DR7:
5389 case VMX_VMCS32_GUEST_SYSENTER_CS:
5390 case VMX_VMCS_GUEST_SYSENTER_EIP:
5391 case VMX_VMCS_GUEST_SYSENTER_ESP:
5392 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5393 case VMX_VMCS_GUEST_GDTR_BASE:
5394 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5395 case VMX_VMCS_GUEST_IDTR_BASE:
5396 case VMX_VMCS16_GUEST_FIELD_CS:
5397 case VMX_VMCS32_GUEST_CS_LIMIT:
5398 case VMX_VMCS_GUEST_CS_BASE:
5399 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5400 case VMX_VMCS16_GUEST_FIELD_DS:
5401 case VMX_VMCS32_GUEST_DS_LIMIT:
5402 case VMX_VMCS_GUEST_DS_BASE:
5403 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5404 case VMX_VMCS16_GUEST_FIELD_ES:
5405 case VMX_VMCS32_GUEST_ES_LIMIT:
5406 case VMX_VMCS_GUEST_ES_BASE:
5407 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5408 case VMX_VMCS16_GUEST_FIELD_FS:
5409 case VMX_VMCS32_GUEST_FS_LIMIT:
5410 case VMX_VMCS_GUEST_FS_BASE:
5411 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5412 case VMX_VMCS16_GUEST_FIELD_GS:
5413 case VMX_VMCS32_GUEST_GS_LIMIT:
5414 case VMX_VMCS_GUEST_GS_BASE:
5415 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5416 case VMX_VMCS16_GUEST_FIELD_SS:
5417 case VMX_VMCS32_GUEST_SS_LIMIT:
5418 case VMX_VMCS_GUEST_SS_BASE:
5419 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5420 case VMX_VMCS16_GUEST_FIELD_LDTR:
5421 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5422 case VMX_VMCS_GUEST_LDTR_BASE:
5423 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5424 case VMX_VMCS16_GUEST_FIELD_TR:
5425 case VMX_VMCS32_GUEST_TR_LIMIT:
5426 case VMX_VMCS_GUEST_TR_BASE:
5427 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5428 case VMX_VMCS32_RO_EXIT_REASON:
5429 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5430 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5431 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
5432 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5433 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5434 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5435 case VMX_VMCS32_RO_IDT_INFO:
5436 case VMX_VMCS32_RO_IDT_ERRCODE:
5437 case VMX_VMCS_GUEST_CR3:
5438 case VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL:
5439 return true;
5440 }
5441 return false;
5442}
5443
5444
5445static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5446{
5447 switch (idxField)
5448 {
5449 case VMX_VMCS_GUEST_LDTR_BASE:
5450 case VMX_VMCS_GUEST_TR_BASE:
5451 case VMX_VMCS_GUEST_GDTR_BASE:
5452 case VMX_VMCS_GUEST_IDTR_BASE:
5453 case VMX_VMCS_GUEST_SYSENTER_EIP:
5454 case VMX_VMCS_GUEST_SYSENTER_ESP:
5455 case VMX_VMCS_GUEST_CR0:
5456 case VMX_VMCS_GUEST_CR4:
5457 case VMX_VMCS_GUEST_CR3:
5458 case VMX_VMCS_GUEST_DR7:
5459 case VMX_VMCS_GUEST_RIP:
5460 case VMX_VMCS_GUEST_RSP:
5461 case VMX_VMCS_GUEST_CS_BASE:
5462 case VMX_VMCS_GUEST_DS_BASE:
5463 case VMX_VMCS_GUEST_ES_BASE:
5464 case VMX_VMCS_GUEST_FS_BASE:
5465 case VMX_VMCS_GUEST_GS_BASE:
5466 case VMX_VMCS_GUEST_SS_BASE:
5467 return true;
5468 }
5469 return false;
5470}
5471# endif /* VBOX_STRICT */
5472
5473
5474/**
5475 * Executes the specified handler in 64-bit mode.
5476 *
5477 * @returns VBox status code.
5478 * @param pVM Pointer to the VM.
5479 * @param pVCpu Pointer to the VMCPU.
5480 * @param pCtx Pointer to the guest CPU context.
5481 * @param pfnHandler Pointer to the RC handler function.
5482 * @param cbParam Number of parameters.
5483 * @param paParam Array of 32-bit parameters.
5484 */
5485VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5486 uint32_t *paParam)
5487{
5488 int rc, rc2;
5489 PHMGLOBLCPUINFO pCpu;
5490 RTHCPHYS HCPhysCpuPage;
5491 RTHCUINTREG uOldEFlags;
5492
5493 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5494 Assert(pfnHandler);
5495 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
5496 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
5497
5498#ifdef VBOX_STRICT
5499 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5500 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
5501
5502 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5503 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
5504#endif
5505
5506 /* Disable interrupts. */
5507 uOldEFlags = ASMIntDisableFlags();
5508
5509#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5510 RTCPUID idHostCpu = RTMpCpuId();
5511 CPUMR0SetLApic(pVM, idHostCpu);
5512#endif
5513
5514 pCpu = HMR0GetCurrentCpu();
5515 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5516
5517 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5518 VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
5519
5520 /* Leave VMX Root Mode. */
5521 VMXDisable();
5522
5523 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5524
5525 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5526 CPUMSetHyperEIP(pVCpu, pfnHandler);
5527 for (int i=(int)cbParam-1;i>=0;i--)
5528 CPUMPushHyper(pVCpu, paParam[i]);
5529
5530 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
5531
5532 /* Call switcher. */
5533 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5534 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
5535
5536 /* Make sure the VMX instructions don't cause #UD faults. */
5537 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5538
5539 /* Enter VMX Root Mode */
5540 rc2 = VMXEnable(HCPhysCpuPage);
5541 if (RT_FAILURE(rc2))
5542 {
5543 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5544 ASMSetFlags(uOldEFlags);
5545 return VERR_VMX_VMXON_FAILED;
5546 }
5547
5548 rc2 = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
5549 AssertRC(rc2);
5550 Assert(!(ASMGetFlags() & X86_EFL_IF));
5551 ASMSetFlags(uOldEFlags);
5552 return rc;
5553}
5554#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5555
5556
5557#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5558/**
5559 * Executes VMWRITE.
5560 *
5561 * @returns VBox status code
5562 * @param pVCpu Pointer to the VMCPU.
5563 * @param idxField VMCS field index.
5564 * @param u64Val 16, 32 or 64 bits value.
5565 */
5566VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5567{
5568 int rc;
5569 switch (idxField)
5570 {
5571 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
5572 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
5573 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
5574 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
5575 case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
5576 case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
5577 case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
5578 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
5579 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
5580 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
5581 case VMX_VMCS64_GUEST_PDPTE0_FULL:
5582 case VMX_VMCS64_GUEST_PDPTE1_FULL:
5583 case VMX_VMCS64_GUEST_PDPTE2_FULL:
5584 case VMX_VMCS64_GUEST_PDPTE3_FULL:
5585 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
5586 case VMX_VMCS64_GUEST_EFER_FULL:
5587 case VMX_VMCS64_CTRL_EPTP_FULL:
5588 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5589 rc = VMXWriteVMCS32(idxField, u64Val);
5590 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5591 AssertRC(rc);
5592 return rc;
5593
5594 case VMX_VMCS_GUEST_LDTR_BASE:
5595 case VMX_VMCS_GUEST_TR_BASE:
5596 case VMX_VMCS_GUEST_GDTR_BASE:
5597 case VMX_VMCS_GUEST_IDTR_BASE:
5598 case VMX_VMCS_GUEST_SYSENTER_EIP:
5599 case VMX_VMCS_GUEST_SYSENTER_ESP:
5600 case VMX_VMCS_GUEST_CR0:
5601 case VMX_VMCS_GUEST_CR4:
5602 case VMX_VMCS_GUEST_CR3:
5603 case VMX_VMCS_GUEST_DR7:
5604 case VMX_VMCS_GUEST_RIP:
5605 case VMX_VMCS_GUEST_RSP:
5606 case VMX_VMCS_GUEST_CS_BASE:
5607 case VMX_VMCS_GUEST_DS_BASE:
5608 case VMX_VMCS_GUEST_ES_BASE:
5609 case VMX_VMCS_GUEST_FS_BASE:
5610 case VMX_VMCS_GUEST_GS_BASE:
5611 case VMX_VMCS_GUEST_SS_BASE:
5612 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5613 if (u64Val >> 32ULL)
5614 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5615 else
5616 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5617
5618 return rc;
5619
5620 default:
5621 AssertMsgFailed(("Unexpected field %x\n", idxField));
5622 return VERR_INVALID_PARAMETER;
5623 }
5624}
5625
5626
5627/**
5628 * Cache VMCS writes for running 64 bits guests on 32 bits hosts.
5629 *
5630 * @param pVCpu Pointer to the VMCPU.
5631 * @param idxField VMCS field index.
5632 * @param u64Val 16, 32 or 64 bits value.
5633 */
5634VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5635{
5636 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
5637
5638 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5639 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5640
5641 /* Make sure there are no duplicates. */
5642 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5643 {
5644 if (pCache->Write.aField[i] == idxField)
5645 {
5646 pCache->Write.aFieldVal[i] = u64Val;
5647 return VINF_SUCCESS;
5648 }
5649 }
5650
5651 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5652 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5653 pCache->Write.cValidEntries++;
5654 return VINF_SUCCESS;
5655}
5656
5657#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_HYBRID_32BIT_KERNEL */
5658
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette