VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 44968

Last change on this file since 44968 was 44968, checked in by vboxsync, 12 years ago

VMM/VMMR0: build fix

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 224.5 KB
Line 
1/* $Id: HWVMXR0.cpp 44968 2013-03-11 09:08:47Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVmcs(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hm.s.vmx.lasterror.u32InstrError = instrError;
106 }
107 pVM->hm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 * @param fEnabledByHost Set if SUPR0EnableVTx or similar was used to enable
120 * VT-x/AMD-V on the host.
121 */
122VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
123{
124 if (!fEnabledByHost)
125 {
126 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
127 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
128
129 if (pVM)
130 {
131 /* Set revision dword at the beginning of the VMXON structure. */
132 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
133 }
134
135 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
136 * (which can have very bad consequences!!!)
137 */
138
139 /** @todo r=bird: Why is this code different than the probing code earlier
140 * on? It just sets VMXE if needed and doesn't check that it isn't
141 * set. Mac OS X host_vmxoff may leave this set and we'll fail here
142 * and debug-assert in the calling code. This is what caused the
143 * "regression" after backing out the SUPR0EnableVTx code hours before
144 * 4.2.0GA (reboot fixed the issue). I've changed here to do the same
145 * as the init code. */
146 uint64_t uCr4 = ASMGetCR4();
147 if (!(uCr4 & X86_CR4_VMXE))
148 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
149
150 /*
151 * Enter VM root mode.
152 */
153 int rc = VMXEnable(HCPhysCpuPage);
154 if (RT_FAILURE(rc))
155 {
156 ASMSetCR4(uCr4);
157 return VERR_VMX_VMXON_FAILED;
158 }
159 }
160
161 /*
162 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
163 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
164 * each time while reusing a VPID after hitting the MaxASID limit once.
165 */
166 if ( pVM
167 && pVM->hm.s.vmx.fVpid
168 && (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS))
169 {
170 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
171 pCpu->fFlushAsidBeforeUse = false;
172 }
173 else
174 pCpu->fFlushAsidBeforeUse = true;
175
176 /*
177 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
178 */
179 ++pCpu->cTlbFlushes;
180
181 return VINF_SUCCESS;
182}
183
184
185/**
186 * Deactivates VT-x on the current CPU.
187 *
188 * @returns VBox status code.
189 * @param pCpu Pointer to the CPU info struct.
190 * @param pvCpuPage Pointer to the global CPU page.
191 * @param HCPhysCpuPage Physical address of the global CPU page.
192 */
193VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
194{
195 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
196 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
197 NOREF(pCpu);
198
199 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
200 if (!(ASMGetCR4() & X86_CR4_VMXE))
201 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
202
203 /* Leave VMX Root Mode. */
204 VMXDisable();
205
206 /* And clear the X86_CR4_VMXE bit. */
207 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
208 return VINF_SUCCESS;
209}
210
211
212/**
213 * Does Ring-0 per VM VT-x initialization.
214 *
215 * @returns VBox status code.
216 * @param pVM Pointer to the VM.
217 */
218VMMR0DECL(int) VMXR0InitVM(PVM pVM)
219{
220 int rc;
221
222#ifdef LOG_ENABLED
223 SUPR0Printf("VMXR0InitVM %p\n", pVM);
224#endif
225
226 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
227
228 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
229 {
230 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
231 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjApicAccess, PAGE_SIZE, false /* fExecutable */);
232 AssertRC(rc);
233 if (RT_FAILURE(rc))
234 return rc;
235
236 pVM->hm.s.vmx.pbApicAccess = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjApicAccess);
237 pVM->hm.s.vmx.HCPhysApicAccess = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjApicAccess, 0);
238 ASMMemZero32(pVM->hm.s.vmx.pbApicAccess, PAGE_SIZE);
239 }
240 else
241 {
242 pVM->hm.s.vmx.hMemObjApicAccess = 0;
243 pVM->hm.s.vmx.pbApicAccess = 0;
244 pVM->hm.s.vmx.HCPhysApicAccess = 0;
245 }
246
247#ifdef VBOX_WITH_CRASHDUMP_MAGIC
248 {
249 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjScratch, PAGE_SIZE, false /* fExecutable */);
250 AssertRC(rc);
251 if (RT_FAILURE(rc))
252 return rc;
253
254 pVM->hm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjScratch);
255 pVM->hm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjScratch, 0);
256
257 ASMMemZero32(pVM->hm.s.vmx.pbScratch, PAGE_SIZE);
258 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
259 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
260 }
261#endif
262
263 /* Allocate VMCSs for all guest CPUs. */
264 for (VMCPUID i = 0; i < pVM->cCpus; i++)
265 {
266 PVMCPU pVCpu = &pVM->aCpus[i];
267
268 pVCpu->hm.s.vmx.hMemObjVmcs = NIL_RTR0MEMOBJ;
269
270 /* Allocate one page for the VM control structure (VMCS). */
271 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVmcs, PAGE_SIZE, false /* fExecutable */);
272 AssertRC(rc);
273 if (RT_FAILURE(rc))
274 return rc;
275
276 pVCpu->hm.s.vmx.pvVmcs = RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVmcs);
277 pVCpu->hm.s.vmx.HCPhysVmcs = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVmcs, 0);
278 ASMMemZeroPage(pVCpu->hm.s.vmx.pvVmcs);
279
280 pVCpu->hm.s.vmx.cr0_mask = 0;
281 pVCpu->hm.s.vmx.cr4_mask = 0;
282
283 /* Allocate one page for the virtual APIC page for TPR caching. */
284 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVirtApic, PAGE_SIZE, false /* fExecutable */);
285 AssertRC(rc);
286 if (RT_FAILURE(rc))
287 return rc;
288
289 pVCpu->hm.s.vmx.pbVirtApic = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVirtApic);
290 pVCpu->hm.s.vmx.HCPhysVirtApic = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVirtApic, 0);
291 ASMMemZeroPage(pVCpu->hm.s.vmx.pbVirtApic);
292
293 /* Allocate the MSR bitmap if this feature is supported. */
294 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
295 {
296 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, PAGE_SIZE, false /* fExecutable */);
297 AssertRC(rc);
298 if (RT_FAILURE(rc))
299 return rc;
300
301 pVCpu->hm.s.vmx.pvMsrBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjMsrBitmap);
302 pVCpu->hm.s.vmx.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjMsrBitmap, 0);
303 memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE);
304 }
305
306#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
307 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
308 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjGuestMsr, PAGE_SIZE, false /* fExecutable */);
309 AssertRC(rc);
310 if (RT_FAILURE(rc))
311 return rc;
312
313 pVCpu->hm.s.vmx.pvGuestMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjGuestMsr);
314 pVCpu->hm.s.vmx.HCPhysGuestMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjGuestMsr, 0);
315 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf));
316 memset(pVCpu->hm.s.vmx.pvGuestMsr, 0, PAGE_SIZE);
317
318 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
319 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjHostMsr, PAGE_SIZE, false /* fExecutable */);
320 AssertRC(rc);
321 if (RT_FAILURE(rc))
322 return rc;
323
324 pVCpu->hm.s.vmx.pvHostMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjHostMsr);
325 pVCpu->hm.s.vmx.HCPhysHostMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjHostMsr, 0);
326 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf));
327 memset(pVCpu->hm.s.vmx.pvHostMsr, 0, PAGE_SIZE);
328#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
329
330 /* Current guest paging mode. */
331 pVCpu->hm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
332
333#ifdef LOG_ENABLED
334 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hm.s.vmx.pvVmcs, (uint32_t)pVCpu->hm.s.vmx.HCPhysVmcs);
335#endif
336 }
337
338 return VINF_SUCCESS;
339}
340
341
342/**
343 * Does Ring-0 per VM VT-x termination.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the VM.
347 */
348VMMR0DECL(int) VMXR0TermVM(PVM pVM)
349{
350 for (VMCPUID i = 0; i < pVM->cCpus; i++)
351 {
352 PVMCPU pVCpu = &pVM->aCpus[i];
353
354 if (pVCpu->hm.s.vmx.hMemObjVmcs != NIL_RTR0MEMOBJ)
355 {
356 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVmcs, false);
357 pVCpu->hm.s.vmx.hMemObjVmcs = NIL_RTR0MEMOBJ;
358 pVCpu->hm.s.vmx.pvVmcs = 0;
359 pVCpu->hm.s.vmx.HCPhysVmcs = 0;
360 }
361 if (pVCpu->hm.s.vmx.hMemObjVirtApic != NIL_RTR0MEMOBJ)
362 {
363 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVirtApic, false);
364 pVCpu->hm.s.vmx.hMemObjVirtApic = NIL_RTR0MEMOBJ;
365 pVCpu->hm.s.vmx.pbVirtApic = 0;
366 pVCpu->hm.s.vmx.HCPhysVirtApic = 0;
367 }
368 if (pVCpu->hm.s.vmx.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
369 {
370 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjMsrBitmap, false);
371 pVCpu->hm.s.vmx.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
372 pVCpu->hm.s.vmx.pvMsrBitmap = 0;
373 pVCpu->hm.s.vmx.HCPhysMsrBitmap = 0;
374 }
375#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
376 if (pVCpu->hm.s.vmx.hMemObjHostMsr != NIL_RTR0MEMOBJ)
377 {
378 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjHostMsr, false);
379 pVCpu->hm.s.vmx.hMemObjHostMsr = NIL_RTR0MEMOBJ;
380 pVCpu->hm.s.vmx.pvHostMsr = 0;
381 pVCpu->hm.s.vmx.HCPhysHostMsr = 0;
382 }
383 if (pVCpu->hm.s.vmx.hMemObjGuestMsr != NIL_RTR0MEMOBJ)
384 {
385 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjGuestMsr, false);
386 pVCpu->hm.s.vmx.hMemObjGuestMsr = NIL_RTR0MEMOBJ;
387 pVCpu->hm.s.vmx.pvGuestMsr = 0;
388 pVCpu->hm.s.vmx.HCPhysGuestMsr = 0;
389 }
390#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
391 }
392 if (pVM->hm.s.vmx.hMemObjApicAccess != NIL_RTR0MEMOBJ)
393 {
394 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjApicAccess, false);
395 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
396 pVM->hm.s.vmx.pbApicAccess = 0;
397 pVM->hm.s.vmx.HCPhysApicAccess = 0;
398 }
399#ifdef VBOX_WITH_CRASHDUMP_MAGIC
400 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
401 {
402 ASMMemZero32(pVM->hm.s.vmx.pScratch, PAGE_SIZE);
403 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjScratch, false);
404 pVM->hm.s.vmx.hMemObjScratch = NIL_RTR0MEMOBJ;
405 pVM->hm.s.vmx.pScratch = 0;
406 pVM->hm.s.vmx.pScratchPhys = 0;
407 }
408#endif
409 return VINF_SUCCESS;
410}
411
412
413/**
414 * Sets up VT-x for the specified VM.
415 *
416 * @returns VBox status code.
417 * @param pVM Pointer to the VM.
418 */
419VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
420{
421 int rc = VINF_SUCCESS;
422 uint32_t val;
423
424 AssertReturn(pVM, VERR_INVALID_PARAMETER);
425
426 /* Initialize these always, see hmR3InitFinalizeR0().*/
427 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE;
428 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE;
429
430 /* Determine optimal flush type for EPT. */
431 if (pVM->hm.s.fNestedPaging)
432 {
433 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
434 {
435 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
436 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT;
437 else if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
438 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS;
439 else
440 {
441 /*
442 * Should never really happen. EPT is supported but no suitable flush types supported.
443 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
444 */
445 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
446 return VERR_VMX_GENERIC;
447 }
448 }
449 else
450 {
451 /*
452 * Should never really happen. EPT is supported but INVEPT instruction is not supported.
453 */
454 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
455 return VERR_VMX_GENERIC;
456 }
457 }
458
459 /* Determine optimal flush type for VPID. */
460 if (pVM->hm.s.vmx.fVpid)
461 {
462 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
463 {
464 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
465 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT;
466 else if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
467 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS;
468 else
469 {
470 /*
471 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
472 * We do not handle other flush type combinations, ignore VPID capabilities.
473 */
474 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
475 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
476 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
477 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
478 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
479 pVM->hm.s.vmx.fVpid = false;
480 }
481 }
482 else
483 {
484 /*
485 * Should not really happen. EPT is supported but INVEPT is not supported.
486 * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing.
487 */
488 Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n"));
489 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
490 pVM->hm.s.vmx.fVpid = false;
491 }
492 }
493
494 for (VMCPUID i = 0; i < pVM->cCpus; i++)
495 {
496 PVMCPU pVCpu = &pVM->aCpus[i];
497
498 AssertPtr(pVCpu->hm.s.vmx.pvVmcs);
499
500 /* Set revision dword at the beginning of the VMCS structure. */
501 *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
502
503 /*
504 * Clear and activate the VMCS.
505 */
506 Log(("HCPhysVmcs = %RHp\n", pVCpu->hm.s.vmx.HCPhysVmcs));
507 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
508 if (RT_FAILURE(rc))
509 goto vmx_end;
510
511 rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
512 if (RT_FAILURE(rc))
513 goto vmx_end;
514
515 /*
516 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
517 * Set required bits to one and zero according to the MSR capabilities.
518 */
519 val = pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
520 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
521 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
522
523 /*
524 * Enable the VMX preemption timer.
525 */
526 if (pVM->hm.s.vmx.fUsePreemptTimer)
527 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
528 val &= pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
529
530 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, val);
531 AssertRC(rc);
532
533 /*
534 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
535 * Set required bits to one and zero according to the MSR capabilities.
536 */
537 val = pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
538 /* Program which event cause VM-exits and which features we want to use. */
539 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
540 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
541 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
542 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
543 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
544 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
545 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
546 the guest (host thinks the cpu load is high) */
547
548 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
549 if (!pVM->hm.s.fNestedPaging)
550 {
551 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
552 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
553 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
554 }
555
556 /*
557 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
558 * failure with an invalid control fields error. (combined with some other exit reasons)
559 */
560 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
561 {
562 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
563 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
564 Assert(pVM->hm.s.vmx.pbApicAccess);
565 }
566 else
567 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
568 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
569
570 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
571 {
572 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
573 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
574 }
575
576 /* We will use the secondary control if it's present. */
577 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
578
579 /* Mask away the bits that the CPU doesn't support */
580 /** @todo make sure they don't conflict with the above requirements. */
581 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
582 pVCpu->hm.s.vmx.u32ProcCtls = val;
583
584 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, val);
585 AssertRC(rc);
586
587 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
588 {
589 /*
590 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
591 * Set required bits to one and zero according to the MSR capabilities.
592 */
593 val = pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
594 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
595
596 if (pVM->hm.s.fNestedPaging)
597 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
598
599 if (pVM->hm.s.vmx.fVpid)
600 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
601
602 if (pVM->hm.s.fHasIoApic)
603 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
604
605 if (pVM->hm.s.vmx.fUnrestrictedGuest)
606 val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST;
607
608 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
609 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
610
611 /* Mask away the bits that the CPU doesn't support */
612 /** @todo make sure they don't conflict with the above requirements. */
613 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
614 pVCpu->hm.s.vmx.u32ProcCtls2 = val;
615 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS2, val);
616 AssertRC(rc);
617 }
618
619 /*
620 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
621 * Set required bits to one and zero according to the MSR capabilities.
622 */
623 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0);
624 AssertRC(rc);
625
626 /*
627 * Forward all exception except #NM & #PF to the guest.
628 * We always need to check pagefaults since our shadow page table can be out of sync.
629 * And we always lazily sync the FPU & XMM state. .
630 */
631
632 /** @todo Possible optimization:
633 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
634 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
635 * registers ourselves of course.
636 *
637 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
638 */
639
640 /*
641 * Don't filter page faults, all of them should cause a world switch.
642 */
643 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0);
644 AssertRC(rc);
645 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0);
646 AssertRC(rc);
647
648 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0);
649 AssertRC(rc);
650 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0);
651 AssertRC(rc);
652 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0);
653 AssertRC(rc);
654
655 /*
656 * Set the MSR bitmap address.
657 */
658 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
659 {
660 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
661
662 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
663 AssertRC(rc);
664
665 /*
666 * Allow the guest to directly modify these MSRs; they are loaded/stored automatically
667 * using MSR-load/store areas in the VMCS.
668 */
669 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
670 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
671 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
672 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
673 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
674 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
675 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
676 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
677 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
678 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
679 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true);
680 }
681
682#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
683 /*
684 * Set the guest & host MSR load/store physical addresses.
685 */
686 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
687 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
688 AssertRC(rc);
689 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
690 AssertRC(rc);
691 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
692 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
693 AssertRC(rc);
694#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
695
696 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
697 AssertRC(rc);
698 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0);
699 AssertRC(rc);
700 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0);
701 AssertRC(rc);
702
703 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
704 {
705 Assert(pVM->hm.s.vmx.hMemObjApicAccess);
706 /* Optional */
707 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
708 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic);
709
710 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
711 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
712
713 AssertRC(rc);
714 }
715
716 /* Set link pointer to -1. Not currently used. */
717 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
718 AssertRC(rc);
719
720 /*
721 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
722 * VMCS data back to memory.
723 */
724 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
725 AssertRC(rc);
726
727 /*
728 * Configure the VMCS read cache.
729 */
730 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
731
732 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_RIP);
733 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_RSP);
734 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_RFLAGS);
735 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
736 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
737 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_CR0);
738 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
739 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_CR4);
740 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_DR7);
741 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
742 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
743 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
744 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
745 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_GDTR_BASE);
746 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
747 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_IDTR_BASE);
748
749 VMX_SETUP_SELREG(ES, pCache);
750 VMX_SETUP_SELREG(SS, pCache);
751 VMX_SETUP_SELREG(CS, pCache);
752 VMX_SETUP_SELREG(DS, pCache);
753 VMX_SETUP_SELREG(FS, pCache);
754 VMX_SETUP_SELREG(GS, pCache);
755 VMX_SETUP_SELREG(LDTR, pCache);
756 VMX_SETUP_SELREG(TR, pCache);
757
758 /*
759 * Status code VMCS reads.
760 */
761 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_REASON);
762 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
763 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
764 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE);
765 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
766 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
767 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
768 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_IDT_INFO);
769 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_IDT_ERROR_CODE);
770
771 if (pVM->hm.s.fNestedPaging)
772 {
773 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_CR3);
774 VMXSetupCachedReadVmcs(pCache, VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL);
775 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
776 }
777 else
778 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
779 } /* for each VMCPU */
780
781 /*
782 * Setup the right TLB function based on CPU capabilities.
783 */
784 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
785 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBBoth;
786 else if (pVM->hm.s.fNestedPaging)
787 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBEPT;
788 else if (pVM->hm.s.vmx.fVpid)
789 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBVPID;
790 else
791 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBDummy;
792
793vmx_end:
794 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
795 return rc;
796}
797
798
799/**
800 * Sets the permission bits for the specified MSR.
801 *
802 * @param pVCpu Pointer to the VMCPU.
803 * @param ulMSR The MSR value.
804 * @param fRead Whether reading is allowed.
805 * @param fWrite Whether writing is allowed.
806 */
807static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
808{
809 unsigned ulBit;
810 uint8_t *pvMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
811
812 /*
813 * Layout:
814 * 0x000 - 0x3ff - Low MSR read bits
815 * 0x400 - 0x7ff - High MSR read bits
816 * 0x800 - 0xbff - Low MSR write bits
817 * 0xc00 - 0xfff - High MSR write bits
818 */
819 if (ulMSR <= 0x00001FFF)
820 {
821 /* Pentium-compatible MSRs */
822 ulBit = ulMSR;
823 }
824 else if ( ulMSR >= 0xC0000000
825 && ulMSR <= 0xC0001FFF)
826 {
827 /* AMD Sixth Generation x86 Processor MSRs */
828 ulBit = (ulMSR - 0xC0000000);
829 pvMsrBitmap += 0x400;
830 }
831 else
832 {
833 AssertFailed();
834 return;
835 }
836
837 Assert(ulBit <= 0x1fff);
838 if (fRead)
839 ASMBitClear(pvMsrBitmap, ulBit);
840 else
841 ASMBitSet(pvMsrBitmap, ulBit);
842
843 if (fWrite)
844 ASMBitClear(pvMsrBitmap + 0x800, ulBit);
845 else
846 ASMBitSet(pvMsrBitmap + 0x800, ulBit);
847}
848
849
850/**
851 * Injects an event (trap or external interrupt).
852 *
853 * @returns VBox status code. Note that it may return VINF_EM_RESET to
854 * indicate a triple fault when injecting X86_XCPT_DF.
855 *
856 * @param pVM Pointer to the VM.
857 * @param pVCpu Pointer to the VMCPU.
858 * @param pCtx Pointer to the guest CPU Context.
859 * @param intInfo VMX interrupt info.
860 * @param cbInstr Opcode length of faulting instruction.
861 * @param errCode Error code (optional).
862 */
863static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
864{
865 int rc;
866 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
867
868#ifdef VBOX_WITH_STATISTICS
869 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
870#endif
871
872#ifdef VBOX_STRICT
873 if (iGate == 0xE)
874 {
875 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
876 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
877 }
878 else if (iGate < 0x20)
879 {
880 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
881 errCode));
882 }
883 else
884 {
885 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
886 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
887 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
888 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
889 || pCtx->eflags.u32 & X86_EFL_IF);
890 }
891#endif
892
893 if ( CPUMIsGuestInRealModeEx(pCtx)
894 && pVM->hm.s.vmx.pRealModeTSS)
895 {
896 RTGCPHYS GCPhysHandler;
897 uint16_t offset, ip;
898 RTSEL sel;
899
900 /*
901 * Injecting events doesn't work right with real mode emulation.
902 * (#GP if we try to inject external hardware interrupts)
903 * Inject the interrupt or trap directly instead.
904 *
905 * ASSUMES no access handlers for the bits we read or write below (should be safe).
906 */
907 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
908
909 /*
910 * Check if the interrupt handler is present.
911 */
912 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
913 {
914 Log(("IDT cbIdt violation\n"));
915 if (iGate != X86_XCPT_DF)
916 {
917 uint32_t intInfo2;
918
919 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : (uint32_t)X86_XCPT_GP;
920 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
921 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
922 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
923
924 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
925 }
926 Log(("Triple fault -> reset the VM!\n"));
927 return VINF_EM_RESET;
928 }
929 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
930 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
931 || iGate == 4)
932 {
933 ip = pCtx->ip + cbInstr;
934 }
935 else
936 ip = pCtx->ip;
937
938 /*
939 * Read the selector:offset pair of the interrupt handler.
940 */
941 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
942 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
943 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
944
945 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
946
947 /*
948 * Construct the stack frame.
949 */
950 /** @todo Check stack limit. */
951 pCtx->sp -= 2;
952 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
953 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
954 pCtx->sp -= 2;
955 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
956 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
957 pCtx->sp -= 2;
958 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
959 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
960
961 /*
962 * Update the CPU state for executing the handler.
963 */
964 pCtx->rip = offset;
965 pCtx->cs.Sel = sel;
966 pCtx->cs.u64Base = sel << 4;
967 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
968
969 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SEGMENT_REGS;
970 return VINF_SUCCESS;
971 }
972
973 /*
974 * Set event injection state.
975 */
976 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
977 rc |= VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
978 rc |= VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
979
980 AssertRC(rc);
981 return rc;
982}
983
984
985/**
986 * Checks for pending guest interrupts and injects them.
987 *
988 * @returns VBox status code.
989 * @param pVM Pointer to the VM.
990 * @param pVCpu Pointer to the VMCPU.
991 * @param pCtx Pointer to the guest CPU context.
992 */
993static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
994{
995 int rc;
996
997 /*
998 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
999 */
1000 if (pVCpu->hm.s.Event.fPending)
1001 {
1002 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntrInfo,
1003 pVCpu->hm.s.Event.u32ErrCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
1004 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntReinject);
1005 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hm.s.Event.u64IntrInfo, 0, pVCpu->hm.s.Event.u32ErrCode);
1006 AssertRC(rc);
1007
1008 pVCpu->hm.s.Event.fPending = false;
1009 return VINF_SUCCESS;
1010 }
1011
1012 /*
1013 * If an active trap is already pending, we must forward it first!
1014 */
1015 if (!TRPMHasTrap(pVCpu))
1016 {
1017 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
1018 {
1019 RTGCUINTPTR intInfo;
1020
1021 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
1022
1023 intInfo = X86_XCPT_NMI;
1024 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1025 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1026
1027 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
1028 AssertRC(rc);
1029
1030 return VINF_SUCCESS;
1031 }
1032
1033 /** @todo SMI interrupts. */
1034
1035 /*
1036 * When external interrupts are pending, we should exit the VM when IF is set.
1037 */
1038 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
1039 {
1040 if (!(pCtx->eflags.u32 & X86_EFL_IF))
1041 {
1042 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INT_WINDOW_EXIT))
1043 {
1044 LogFlow(("Enable irq window exit!\n"));
1045 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INT_WINDOW_EXIT;
1046 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
1047 AssertRC(rc);
1048 }
1049 /* else nothing to do but wait */
1050 }
1051 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1052 {
1053 uint8_t u8Interrupt;
1054
1055 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1056 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1057 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1058 if (RT_SUCCESS(rc))
1059 {
1060 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1061 AssertRC(rc);
1062 }
1063 else
1064 {
1065 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1066 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1067 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
1068 /* Just continue */
1069 }
1070 }
1071 else
1072 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1073 }
1074 }
1075
1076#ifdef VBOX_STRICT
1077 if (TRPMHasTrap(pVCpu))
1078 {
1079 uint8_t u8Vector;
1080 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1081 AssertRC(rc);
1082 }
1083#endif
1084
1085 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1086 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1087 && TRPMHasTrap(pVCpu)
1088 )
1089 {
1090 uint8_t u8Vector;
1091 TRPMEVENT enmType;
1092 RTGCUINTPTR intInfo;
1093 RTGCUINT errCode;
1094
1095 /*
1096 * If a new event is pending, dispatch it now.
1097 */
1098 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1099 AssertRC(rc);
1100 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1101 Assert(enmType != TRPM_SOFTWARE_INT);
1102
1103 /*
1104 * Clear the pending trap.
1105 */
1106 rc = TRPMResetTrap(pVCpu);
1107 AssertRC(rc);
1108
1109 intInfo = u8Vector;
1110 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1111
1112 if (enmType == TRPM_TRAP)
1113 {
1114 switch (u8Vector)
1115 {
1116 case X86_XCPT_DF:
1117 case X86_XCPT_TS:
1118 case X86_XCPT_NP:
1119 case X86_XCPT_SS:
1120 case X86_XCPT_GP:
1121 case X86_XCPT_PF:
1122 case X86_XCPT_AC:
1123 {
1124 /** @todo r=ramshankar: setting this bit would blow up for real-mode guests with
1125 * unrestricted guest execution. */
1126 /* Valid error codes. */
1127 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1128 break;
1129 }
1130
1131 default:
1132 break;
1133 }
1134
1135 if ( u8Vector == X86_XCPT_BP
1136 || u8Vector == X86_XCPT_OF)
1137 {
1138 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1139 }
1140 else
1141 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1142 }
1143 else
1144 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1145
1146 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntInject);
1147 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1148 AssertRC(rc);
1149 } /* if (interrupts can be dispatched) */
1150
1151 return VINF_SUCCESS;
1152}
1153
1154/**
1155 * Checks for pending VMX events and converts them to TRPM. Before we execute any instruction
1156 * outside of VMX, any pending VMX event must be converted so that it can be delivered properly.
1157 *
1158 * @returns VBox status code.
1159 * @param pVCpu Pointer to the VMCPU.
1160 */
1161static int hmR0VmxCheckPendingEvent(PVMCPU pVCpu)
1162{
1163 if (pVCpu->hm.s.Event.fPending)
1164 {
1165 TRPMEVENT enmTrapType;
1166
1167 /* If a trap was already pending, we did something wrong! */
1168 Assert((TRPMQueryTrap(pVCpu, NULL, NULL) == VERR_TRPM_NO_ACTIVE_TRAP));
1169
1170 /*
1171 * Clear the pending event and move it over to TRPM for the rest
1172 * of the world to see.
1173 */
1174 pVCpu->hm.s.Event.fPending = false;
1175 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo))
1176 {
1177 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT:
1178 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI:
1179 enmTrapType = TRPM_HARDWARE_INT;
1180 break;
1181 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT:
1182 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /** @todo Is classifying #BP, #OF as TRPM_SOFTWARE_INT correct? */
1183 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DB_XCPT:
1184 enmTrapType = TRPM_SOFTWARE_INT;
1185 break;
1186 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT:
1187 enmTrapType = TRPM_TRAP;
1188 break;
1189 default:
1190 enmTrapType = TRPM_32BIT_HACK; /* Can't get here. */
1191 AssertFailed();
1192 }
1193 TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo), enmTrapType);
1194 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo))
1195 TRPMSetErrorCode(pVCpu, pVCpu->hm.s.Event.u32ErrCode);
1196 //@todo: Is there any situation where we need to call TRPMSetFaultAddress()?
1197 }
1198 return VINF_SUCCESS;
1199}
1200
1201/**
1202 * Save the host state into the VMCS.
1203 *
1204 * @returns VBox status code.
1205 * @param pVM Pointer to the VM.
1206 * @param pVCpu Pointer to the VMCPU.
1207 */
1208VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1209{
1210 int rc = VINF_SUCCESS;
1211 NOREF(pVM);
1212
1213 /*
1214 * Host CPU Context.
1215 */
1216 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
1217 {
1218 RTIDTR idtr;
1219 RTGDTR gdtr;
1220 RTSEL SelTR;
1221 PCX86DESCHC pDesc;
1222 uintptr_t trBase;
1223 RTSEL cs;
1224 RTSEL ss;
1225 uint64_t cr3;
1226
1227 /*
1228 * Control registers.
1229 */
1230 rc = VMXWriteVmcs(VMX_VMCS_HOST_CR0, ASMGetCR0());
1231 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1232#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1233 if (VMX_IS_64BIT_HOST_MODE())
1234 {
1235 cr3 = hmR0Get64bitCR3();
1236 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_CR3, cr3);
1237 }
1238 else
1239#endif
1240 {
1241 cr3 = ASMGetCR3();
1242 rc |= VMXWriteVmcs(VMX_VMCS_HOST_CR3, cr3);
1243 }
1244 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1245 rc |= VMXWriteVmcs(VMX_VMCS_HOST_CR4, ASMGetCR4());
1246 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1247 AssertRC(rc);
1248
1249 /*
1250 * Selector registers.
1251 */
1252#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1253 if (VMX_IS_64BIT_HOST_MODE())
1254 {
1255 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1256 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1257 }
1258 else
1259 {
1260 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1261 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1262 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1263 }
1264#else
1265 cs = ASMGetCS();
1266 ss = ASMGetSS();
1267#endif
1268 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1269 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1270 rc = VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_CS, cs);
1271 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1272 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_DS, 0);
1273 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_ES, 0);
1274#if HC_ARCH_BITS == 32
1275 if (!VMX_IS_64BIT_HOST_MODE())
1276 {
1277 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_FS, 0);
1278 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_GS, 0);
1279 }
1280#endif
1281 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_SS, ss);
1282 SelTR = ASMGetTR();
1283 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1284 AssertRC(rc);
1285 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1286 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1287 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1288 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1289 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1290 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1291 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1292
1293 /*
1294 * GDTR & IDTR.
1295 */
1296#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1297 if (VMX_IS_64BIT_HOST_MODE())
1298 {
1299 X86XDTR64 gdtr64, idtr64;
1300 hmR0Get64bitGdtrAndIdtr(&gdtr64, &idtr64);
1301 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1302 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_IDTR_BASE, idtr64.uAddr);
1303 AssertRC(rc);
1304 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1305 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1306 gdtr.cbGdt = gdtr64.cb;
1307 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1308 }
1309 else
1310#endif
1311 {
1312 ASMGetGDTR(&gdtr);
1313 rc = VMXWriteVmcs(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1314 ASMGetIDTR(&idtr);
1315 rc |= VMXWriteVmcs(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1316 AssertRC(rc);
1317 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1318 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1319 }
1320
1321 /*
1322 * Save the base address of the TR selector.
1323 */
1324 if (SelTR > gdtr.cbGdt)
1325 {
1326 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1327 return VERR_VMX_INVALID_HOST_STATE;
1328 }
1329
1330 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1331#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1332 if (VMX_IS_64BIT_HOST_MODE())
1333 {
1334 uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc);
1335 rc = VMXWriteVmcs64(VMX_VMCS_HOST_TR_BASE, trBase64);
1336 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1337 AssertRC(rc);
1338 }
1339 else
1340#endif
1341 {
1342#if HC_ARCH_BITS == 64
1343 trBase = X86DESC64_BASE(pDesc);
1344#else
1345 trBase = X86DESC_BASE(pDesc);
1346#endif
1347 rc = VMXWriteVmcs(VMX_VMCS_HOST_TR_BASE, trBase);
1348 AssertRC(rc);
1349 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1350 }
1351
1352 /*
1353 * FS base and GS base.
1354 */
1355#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1356 if (VMX_IS_64BIT_HOST_MODE())
1357 {
1358 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1359 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1360 rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1361 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1362 }
1363#endif
1364 AssertRC(rc);
1365
1366 /*
1367 * Sysenter MSRs.
1368 */
1369 /** @todo expensive!! */
1370 rc = VMXWriteVmcs(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1371 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1372#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1373 if (VMX_IS_64BIT_HOST_MODE())
1374 {
1375 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1376 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1377 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1378 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1379 }
1380 else
1381 {
1382 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1383 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1384 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1385 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1386 }
1387#elif HC_ARCH_BITS == 32
1388 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1389 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1390 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1391 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1392#else
1393 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1394 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1395 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1396 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1397#endif
1398 AssertRC(rc);
1399
1400
1401#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1402 /*
1403 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1404 * the world switch back to the host.
1405 */
1406 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvHostMsr;
1407 unsigned idxMsr = 0;
1408
1409 uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001);
1410 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1411 {
1412 pMsr->u32IndexMSR = MSR_K6_EFER;
1413 pMsr->u32Reserved = 0;
1414# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1415 if (CPUMIsGuestInLongMode(pVCpu))
1416 {
1417 /* Must match the EFER value in our 64 bits switcher. */
1418 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1419 }
1420 else
1421# endif
1422 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1423 pMsr++; idxMsr++;
1424 }
1425
1426# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1427 if (VMX_IS_64BIT_HOST_MODE())
1428 {
1429 pMsr->u32IndexMSR = MSR_K6_STAR;
1430 pMsr->u32Reserved = 0;
1431 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1432 pMsr++; idxMsr++;
1433 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1434 pMsr->u32Reserved = 0;
1435 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1436 pMsr++; idxMsr++;
1437 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1438 pMsr->u32Reserved = 0;
1439 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1440 pMsr++; idxMsr++;
1441
1442 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
1443#if 0
1444 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1445 pMsr->u32Reserved = 0;
1446 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1447 pMsr++; idxMsr++;
1448#endif
1449 }
1450# endif
1451
1452 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1453 {
1454 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
1455 pMsr->u32Reserved = 0;
1456 pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
1457 pMsr++; idxMsr++;
1458 }
1459
1460 /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr
1461 * range. */
1462 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1463 AssertRC(rc);
1464#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1465
1466 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_HOST_CONTEXT;
1467 }
1468 return rc;
1469}
1470
1471
1472/**
1473 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1474 * guest operates in PAE mode.
1475 *
1476 * @returns VBox status code.
1477 * @param pVCpu Pointer to the VMCPU.
1478 * @param pCtx Pointer to the guest CPU context.
1479 */
1480static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1481{
1482 if (CPUMIsGuestInPAEModeEx(pCtx))
1483 {
1484 X86PDPE aPdpes[4];
1485 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1486 AssertRCReturn(rc, rc);
1487
1488 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1489 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1490 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1491 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1492 }
1493 return VINF_SUCCESS;
1494}
1495
1496
1497/**
1498 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1499 * guest operates in PAE mode.
1500 *
1501 * @returns VBox status code.
1502 * @param pVCpu Pointer to the VM CPU.
1503 * @param pCtx Pointer to the guest CPU context.
1504 *
1505 * @remarks Tell PGM about CR3 changes before calling this helper.
1506 */
1507static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1508{
1509 if (CPUMIsGuestInPAEModeEx(pCtx))
1510 {
1511 int rc;
1512 X86PDPE aPdpes[4];
1513 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1514 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1515 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1516 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1517
1518 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1519 AssertRCReturn(rc, rc);
1520 }
1521 return VINF_SUCCESS;
1522}
1523
1524
1525/**
1526 * Update the exception bitmap according to the current CPU state.
1527 *
1528 * @param pVM Pointer to the VM.
1529 * @param pVCpu Pointer to the VMCPU.
1530 * @param pCtx Pointer to the guest CPU context.
1531 */
1532static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1533{
1534 uint32_t u32TrapMask;
1535 Assert(pCtx);
1536
1537 /*
1538 * Set up a mask for intercepting traps.
1539 */
1540 /** @todo Do we really need to always intercept #DB? */
1541 u32TrapMask = RT_BIT(X86_XCPT_DB)
1542 | RT_BIT(X86_XCPT_NM)
1543#ifdef VBOX_ALWAYS_TRAP_PF
1544 | RT_BIT(X86_XCPT_PF)
1545#endif
1546#ifdef VBOX_STRICT
1547 | RT_BIT(X86_XCPT_BP)
1548 | RT_BIT(X86_XCPT_DB)
1549 | RT_BIT(X86_XCPT_DE)
1550 | RT_BIT(X86_XCPT_NM)
1551 | RT_BIT(X86_XCPT_UD)
1552 | RT_BIT(X86_XCPT_NP)
1553 | RT_BIT(X86_XCPT_SS)
1554 | RT_BIT(X86_XCPT_GP)
1555 | RT_BIT(X86_XCPT_MF)
1556#endif
1557 ;
1558
1559 /*
1560 * Without nested paging, #PF must be intercepted to implement shadow paging.
1561 */
1562 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1563 if (!pVM->hm.s.fNestedPaging)
1564 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1565
1566 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1567 if (!(pCtx->cr0 & X86_CR0_NE))
1568 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1569
1570#ifdef VBOX_STRICT
1571 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1572#endif
1573
1574 /*
1575 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1576 */
1577 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1578 if ( CPUMIsGuestInRealModeEx(pCtx)
1579 && pVM->hm.s.vmx.pRealModeTSS)
1580 {
1581 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1582 | RT_BIT(X86_XCPT_DB)
1583 | RT_BIT(X86_XCPT_NMI)
1584 | RT_BIT(X86_XCPT_BP)
1585 | RT_BIT(X86_XCPT_OF)
1586 | RT_BIT(X86_XCPT_BR)
1587 | RT_BIT(X86_XCPT_UD)
1588 | RT_BIT(X86_XCPT_DF)
1589 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1590 | RT_BIT(X86_XCPT_TS)
1591 | RT_BIT(X86_XCPT_NP)
1592 | RT_BIT(X86_XCPT_SS)
1593 | RT_BIT(X86_XCPT_GP)
1594 | RT_BIT(X86_XCPT_MF)
1595 | RT_BIT(X86_XCPT_AC)
1596 | RT_BIT(X86_XCPT_MC)
1597 | RT_BIT(X86_XCPT_XF)
1598 ;
1599 }
1600
1601 int rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1602 AssertRC(rc);
1603}
1604
1605
1606/**
1607 * Loads a minimal guest state.
1608 *
1609 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1610 *
1611 * @param pVM Pointer to the VM.
1612 * @param pVCpu Pointer to the VMCPU.
1613 * @param pCtx Pointer to the guest CPU context.
1614 */
1615VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1616{
1617 int rc;
1618 X86EFLAGS eflags;
1619
1620 Assert(!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_ALL_GUEST));
1621
1622 /*
1623 * Load EIP, ESP and EFLAGS.
1624 */
1625 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_RIP, pCtx->rip);
1626 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_RSP, pCtx->rsp);
1627 AssertRC(rc);
1628
1629 /*
1630 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1631 */
1632 eflags = pCtx->eflags;
1633 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1634 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1635
1636 /*
1637 * Check if real mode emulation using v86 mode.
1638 */
1639 if ( CPUMIsGuestInRealModeEx(pCtx)
1640 && pVM->hm.s.vmx.pRealModeTSS)
1641 {
1642 pVCpu->hm.s.vmx.RealMode.eflags = eflags;
1643
1644 eflags.Bits.u1VM = 1;
1645 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1646 }
1647 rc = VMXWriteVmcs(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1648 AssertRC(rc);
1649}
1650
1651
1652/**
1653 * Loads the guest state.
1654 *
1655 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1656 *
1657 * @returns VBox status code.
1658 * @param pVM Pointer to the VM.
1659 * @param pVCpu Pointer to the VMCPU.
1660 * @param pCtx Pointer to the guest CPU context.
1661 */
1662VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1663{
1664 int rc = VINF_SUCCESS;
1665 RTGCUINTPTR val;
1666
1667 /*
1668 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1669 * Set required bits to one and zero according to the MSR capabilities.
1670 */
1671 val = pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0;
1672
1673 /*
1674 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1675 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1676 */
1677 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1678
1679 if (CPUMIsGuestInLongModeEx(pCtx))
1680 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA32E_MODE_GUEST;
1681 /* else Must be zero when AMD64 is not available. */
1682
1683 /*
1684 * Mask away the bits that the CPU doesn't support.
1685 */
1686 val &= pVM->hm.s.vmx.msr.vmx_entry.n.allowed1;
1687 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_CONTROLS, val);
1688 AssertRC(rc);
1689
1690 /*
1691 * VMX_VMCS_CTRL_EXIT_CONTROLS
1692 * Set required bits to one and zero according to the MSR capabilities.
1693 */
1694 val = pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0;
1695
1696 /*
1697 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1698 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1699 */
1700 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1701
1702#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1703 if (VMX_IS_64BIT_HOST_MODE())
1704 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE;
1705 /* else Must be zero when AMD64 is not available. */
1706#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1707 if (CPUMIsGuestInLongModeEx(pCtx))
1708 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE; /* our switcher goes to long mode */
1709 else
1710 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE));
1711#endif
1712 val &= pVM->hm.s.vmx.msr.vmx_exit.n.allowed1;
1713
1714 /*
1715 * Don't acknowledge external interrupts on VM-exit.
1716 */
1717 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_CONTROLS, val);
1718 AssertRC(rc);
1719
1720 /*
1721 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1722 */
1723 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
1724 {
1725 if (pVM->hm.s.vmx.pRealModeTSS)
1726 {
1727 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1728 if (pVCpu->hm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1729 {
1730 /*
1731 * Correct weird requirements for switching to protected mode.
1732 */
1733 if ( pVCpu->hm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1734 && enmGuestMode >= PGMMODE_PROTECTED)
1735 {
1736#ifdef VBOX_WITH_REM
1737 /*
1738 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1739 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1740 */
1741 REMFlushTBs(pVM);
1742#endif
1743
1744 /*
1745 * DPL of all hidden selector registers must match the current CPL (0).
1746 */
1747 pCtx->cs.Attr.n.u2Dpl = 0;
1748 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1749
1750 pCtx->ds.Attr.n.u2Dpl = 0;
1751 pCtx->es.Attr.n.u2Dpl = 0;
1752 pCtx->fs.Attr.n.u2Dpl = 0;
1753 pCtx->gs.Attr.n.u2Dpl = 0;
1754 pCtx->ss.Attr.n.u2Dpl = 0;
1755 }
1756 pVCpu->hm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1757 }
1758 }
1759
1760 VMX_WRITE_SELREG(ES, es);
1761 AssertRC(rc);
1762
1763 VMX_WRITE_SELREG(CS, cs);
1764 AssertRC(rc);
1765
1766 VMX_WRITE_SELREG(SS, ss);
1767 AssertRC(rc);
1768
1769 VMX_WRITE_SELREG(DS, ds);
1770 AssertRC(rc);
1771
1772 VMX_WRITE_SELREG(FS, fs);
1773 AssertRC(rc);
1774
1775 VMX_WRITE_SELREG(GS, gs);
1776 AssertRC(rc);
1777 }
1778
1779 /*
1780 * Guest CPU context: LDTR.
1781 */
1782 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
1783 {
1784 if (pCtx->ldtr.Sel == 0)
1785 {
1786 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1787 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1788 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_LDTR_BASE, 0); /* @todo removing "64" in the function should be the same. */
1789 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1790 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1791 }
1792 else
1793 {
1794 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1795 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1796 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); /* @todo removing "64" and it should be the same */
1797 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1798 }
1799 AssertRC(rc);
1800 }
1801
1802 /*
1803 * Guest CPU context: TR.
1804 */
1805 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
1806 {
1807 /*
1808 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1809 * using the int bitmap in the TSS).
1810 */
1811 if ( CPUMIsGuestInRealModeEx(pCtx)
1812 && pVM->hm.s.vmx.pRealModeTSS)
1813 {
1814 RTGCPHYS GCPhys;
1815
1816 /* We convert it here every time as PCI regions could be reconfigured. */
1817 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
1818 AssertRC(rc);
1819
1820 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_TR, 0);
1821 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_TR_LIMIT, HM_VTX_TSS_SIZE);
1822 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1823
1824 X86DESCATTR attr;
1825
1826 attr.u = 0;
1827 attr.n.u1Present = 1;
1828 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1829 val = attr.u;
1830 }
1831 else
1832 {
1833 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1834 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1835 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_TR_BASE, pCtx->tr.u64Base);
1836
1837 val = pCtx->tr.Attr.u;
1838
1839 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1840 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1841 {
1842 if (val & 0xf)
1843 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1844 else
1845 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1846 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1847 }
1848 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY,
1849 ("%#x\n", val));
1850 }
1851 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1852 AssertRC(rc);
1853 }
1854
1855 /*
1856 * Guest CPU context: GDTR.
1857 */
1858 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
1859 {
1860 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1861 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1862 AssertRC(rc);
1863 }
1864
1865 /*
1866 * Guest CPU context: IDTR.
1867 */
1868 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
1869 {
1870 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1871 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1872 AssertRC(rc);
1873 }
1874
1875 /*
1876 * Sysenter MSRs.
1877 */
1878 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
1879 {
1880 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1881 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1882 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1883 AssertRC(rc);
1884 }
1885
1886 /*
1887 * Guest CPU context: Control registers.
1888 */
1889 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
1890 {
1891 val = pCtx->cr0;
1892 rc = VMXWriteVmcs(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1893 Log2(("Guest CR0-shadow %08x\n", val));
1894 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1895 {
1896 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1897 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1898 }
1899 else
1900 {
1901 /** @todo check if we support the old style mess correctly. */
1902 if (!(val & X86_CR0_NE))
1903 Log(("Forcing X86_CR0_NE!!!\n"));
1904
1905 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1906 }
1907 /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1908 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
1909 val |= X86_CR0_PE | X86_CR0_PG;
1910
1911 if (pVM->hm.s.fNestedPaging)
1912 {
1913 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1914 {
1915 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1916 pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1917 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1918 }
1919 else
1920 {
1921 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1922 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1923 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1924 }
1925 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
1926 AssertRC(rc);
1927 }
1928 else
1929 {
1930 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1931 val |= X86_CR0_WP;
1932 }
1933
1934 /* Always enable caching. */
1935 val &= ~(X86_CR0_CD|X86_CR0_NW);
1936
1937 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_CR0, val);
1938 Log2(("Guest CR0 %08x\n", val));
1939
1940 /*
1941 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1942 */
1943 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1944 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1945 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1946 | X86_CR0_CD /* Bit not restored during VM-exit! */
1947 | X86_CR0_NW /* Bit not restored during VM-exit! */
1948 | X86_CR0_NE;
1949
1950 /*
1951 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1952 */
1953 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1954 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1955
1956 pVCpu->hm.s.vmx.cr0_mask = val;
1957
1958 rc |= VMXWriteVmcs(VMX_VMCS_CTRL_CR0_MASK, val);
1959 Log2(("Guest CR0-mask %08x\n", val));
1960 AssertRC(rc);
1961 }
1962
1963 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
1964 {
1965 rc = VMXWriteVmcs(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1966 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1967 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1968 val = pCtx->cr4 | (uint32_t)pVM->hm.s.vmx.msr.vmx_cr4_fixed0;
1969
1970 if (!pVM->hm.s.fNestedPaging)
1971 {
1972 switch (pVCpu->hm.s.enmShadowMode)
1973 {
1974 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1975 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1976 case PGMMODE_32_BIT: /* 32-bit paging. */
1977 val &= ~X86_CR4_PAE;
1978 break;
1979
1980 case PGMMODE_PAE: /* PAE paging. */
1981 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1982 /** Must use PAE paging as we could use physical memory > 4 GB */
1983 val |= X86_CR4_PAE;
1984 break;
1985
1986 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1987 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1988#ifdef VBOX_ENABLE_64_BITS_GUESTS
1989 break;
1990#else
1991 AssertFailed();
1992 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1993#endif
1994 default: /* shut up gcc */
1995 AssertFailed();
1996 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1997 }
1998 }
1999 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2000 && !pVM->hm.s.vmx.fUnrestrictedGuest)
2001 {
2002 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
2003 val |= X86_CR4_PSE;
2004 /* Our identity mapping is a 32 bits page directory. */
2005 val &= ~X86_CR4_PAE;
2006 }
2007
2008 /*
2009 * Turn off VME if we're in emulated real mode.
2010 */
2011 if ( CPUMIsGuestInRealModeEx(pCtx)
2012 && pVM->hm.s.vmx.pRealModeTSS)
2013 {
2014 val &= ~X86_CR4_VME;
2015 }
2016
2017 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_CR4, val);
2018 Log2(("Guest CR4 %08x\n", val));
2019
2020 /*
2021 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
2022 */
2023 val = 0
2024 | X86_CR4_VME
2025 | X86_CR4_PAE
2026 | X86_CR4_PGE
2027 | X86_CR4_PSE
2028 | X86_CR4_VMXE;
2029 pVCpu->hm.s.vmx.cr4_mask = val;
2030
2031 rc |= VMXWriteVmcs(VMX_VMCS_CTRL_CR4_MASK, val);
2032 Log2(("Guest CR4-mask %08x\n", val));
2033 AssertRC(rc);
2034 }
2035
2036#if 0
2037 /* Enable single stepping if requested and CPU supports it. */
2038 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
2039 if (DBGFIsStepping(pVCpu))
2040 {
2041 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
2042 rc = VMXWriteVmcs(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2043 AssertRC(rc);
2044 }
2045#endif
2046
2047 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
2048 {
2049 if (pVM->hm.s.fNestedPaging)
2050 {
2051 Assert(PGMGetHyperCR3(pVCpu));
2052 pVCpu->hm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
2053
2054 Assert(!(pVCpu->hm.s.vmx.GCPhysEPTP & 0xfff));
2055 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
2056 pVCpu->hm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
2057 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
2058
2059 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.GCPhysEPTP);
2060 AssertRC(rc);
2061
2062 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2063 && !pVM->hm.s.vmx.fUnrestrictedGuest)
2064 {
2065 RTGCPHYS GCPhys;
2066
2067 /* We convert it here every time as PCI regions could be reconfigured. */
2068 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
2069 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hm.s.vmx.pNonPagingModeEPTPageTable));
2070
2071 /*
2072 * We use our identity mapping page table here as we need to map guest virtual to
2073 * guest physical addresses; EPT will take care of the translation to host physical addresses.
2074 */
2075 val = GCPhys;
2076 }
2077 else
2078 {
2079 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
2080 val = pCtx->cr3;
2081 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
2082 AssertRCReturn(rc, rc);
2083 }
2084 }
2085 else
2086 {
2087 val = PGMGetHyperCR3(pVCpu);
2088 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
2089 }
2090
2091 /* Save our shadow CR3 register. */
2092 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_CR3, val);
2093 AssertRC(rc);
2094 }
2095
2096 /*
2097 * Guest CPU context: Debug registers.
2098 */
2099 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG)
2100 {
2101 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2102 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2103
2104 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2105 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2106 pCtx->dr[7] |= 0x400; /* must be one */
2107
2108 /* Resync DR7 */
2109 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2110 AssertRC(rc);
2111
2112#ifdef DEBUG
2113 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2114 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2115 && !CPUMIsHyperDebugStateActive(pVCpu)
2116 && !DBGFIsStepping(pVCpu))
2117 {
2118 /* Save the host and load the hypervisor debug state. */
2119 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2120 AssertRC(rc);
2121
2122 /* DRx intercepts remain enabled. */
2123
2124 /* Override dr7 with the hypervisor value. */
2125 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2126 AssertRC(rc);
2127 }
2128 else
2129#endif
2130 /* Sync the debug state now if any breakpoint is armed. */
2131 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2132 && !CPUMIsGuestDebugStateActive(pVCpu)
2133 && !DBGFIsStepping(pVCpu))
2134 {
2135 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
2136
2137 /* Disable DRx move intercepts. */
2138 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2139 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2140 AssertRC(rc);
2141
2142 /* Save the host and load the guest debug state. */
2143 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2144 AssertRC(rc);
2145 }
2146
2147 /* IA32_DEBUGCTL MSR. */
2148 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0);
2149 AssertRC(rc);
2150
2151 /** @todo do we really ever need this? */
2152 rc |= VMXWriteVmcs(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
2153 AssertRC(rc);
2154 }
2155
2156 /*
2157 * 64-bit guest mode.
2158 */
2159 if (CPUMIsGuestInLongModeEx(pCtx))
2160 {
2161#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2162 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2163#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2164 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2165#else
2166# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2167 if (!pVM->hm.s.fAllow64BitGuests)
2168 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2169# endif
2170 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
2171#endif
2172 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
2173 {
2174 /* Update these as wrmsr might have changed them. */
2175 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_FS_BASE, pCtx->fs.u64Base);
2176 AssertRC(rc);
2177 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_GS_BASE, pCtx->gs.u64Base);
2178 AssertRC(rc);
2179 }
2180 }
2181 else
2182 {
2183 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
2184 }
2185
2186 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2187
2188#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2189 /*
2190 * Store all guest MSRs in the VM-entry load area, so they will be loaded
2191 * during VM-entry and restored into the VM-exit store area during VM-exit.
2192 */
2193 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2194 unsigned idxMsr = 0;
2195
2196 uint32_t u32GstExtFeatures;
2197 uint32_t u32Temp;
2198 CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures);
2199
2200 if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2201 {
2202 pMsr->u32IndexMSR = MSR_K6_EFER;
2203 pMsr->u32Reserved = 0;
2204 pMsr->u64Value = pCtx->msrEFER;
2205 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2206 if (!CPUMIsGuestInLongModeEx(pCtx))
2207 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2208 pMsr++; idxMsr++;
2209
2210 if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2211 {
2212 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2213 pMsr->u32Reserved = 0;
2214 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2215 pMsr++; idxMsr++;
2216 pMsr->u32IndexMSR = MSR_K6_STAR;
2217 pMsr->u32Reserved = 0;
2218 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2219 pMsr++; idxMsr++;
2220 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2221 pMsr->u32Reserved = 0;
2222 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2223 pMsr++; idxMsr++;
2224
2225 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2226#if 0
2227 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2228 pMsr->u32Reserved = 0;
2229 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2230 pMsr++; idxMsr++;
2231#endif
2232 }
2233 }
2234
2235 if ( pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP
2236 && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP))
2237 {
2238 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
2239 pMsr->u32Reserved = 0;
2240 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value);
2241 AssertRC(rc);
2242 pMsr++; idxMsr++;
2243 }
2244
2245 pVCpu->hm.s.vmx.cGuestMsrs = idxMsr;
2246
2247 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2248 AssertRC(rc);
2249
2250 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2251 AssertRC(rc);
2252#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2253
2254#if 0 /* Temp move for testing. */
2255 bool fOffsettedTsc;
2256 if (pVM->hm.s.vmx.fUsePreemptTimer)
2257 {
2258 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
2259
2260 /* Make sure the returned values have sane upper and lower boundaries. */
2261 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2262
2263 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2264 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2265
2266 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
2267 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2268 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2269 AssertRC(rc);
2270 }
2271 else
2272 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
2273
2274 if (fOffsettedTsc)
2275 {
2276 uint64_t u64CurTSC = ASMReadTSC();
2277 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset > TMCpuTickGetLastSeen(pVCpu))
2278 {
2279 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2280 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset);
2281 AssertRC(rc);
2282
2283 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2284 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2285 AssertRC(rc);
2286 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
2287 }
2288 else
2289 {
2290 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2291 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2292 pVCpu->hm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset,
2293 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hm.s.vmx.u64TSCOffset,
2294 TMCpuTickGet(pVCpu)));
2295 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2296 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2297 AssertRC(rc);
2298 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
2299 }
2300 }
2301 else
2302 {
2303 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2304 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2305 AssertRC(rc);
2306 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
2307 }
2308#endif
2309
2310 /* Done with the major changes */
2311 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_ALL_GUEST;
2312
2313 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2314 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2315 return rc;
2316}
2317
2318
2319/**
2320 * Syncs back the guest state from VMCS.
2321 *
2322 * @returns VBox status code.
2323 * @param pVM Pointer to the VM.
2324 * @param pVCpu Pointer to the VMCPU.
2325 * @param pCtx Pointer to the guest CPU context.
2326 */
2327DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2328{
2329 RTGCUINTREG val, valShadow;
2330 RTGCUINTPTR uInterruptState;
2331 int rc;
2332
2333 /* First sync back EIP, ESP, and EFLAGS. */
2334 rc = VMXReadCachedVmcs(VMX_VMCS_GUEST_RIP, &val);
2335 AssertRC(rc);
2336 pCtx->rip = val;
2337 rc = VMXReadCachedVmcs(VMX_VMCS_GUEST_RSP, &val);
2338 AssertRC(rc);
2339 pCtx->rsp = val;
2340 rc = VMXReadCachedVmcs(VMX_VMCS_GUEST_RFLAGS, &val);
2341 AssertRC(rc);
2342 pCtx->eflags.u32 = val;
2343
2344 /* Take care of instruction fusing (sti, mov ss) */
2345 rc |= VMXReadCachedVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2346 uInterruptState = val;
2347 if (uInterruptState != 0)
2348 {
2349 Assert(uInterruptState <= 2); /* only sti & mov ss */
2350 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2351 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2352 }
2353 else
2354 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2355
2356 /* Control registers. */
2357 VMXReadCachedVmcs(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2358 VMXReadCachedVmcs(VMX_VMCS_GUEST_CR0, &val);
2359 val = (valShadow & pVCpu->hm.s.vmx.cr0_mask) | (val & ~pVCpu->hm.s.vmx.cr0_mask);
2360 CPUMSetGuestCR0(pVCpu, val);
2361
2362 VMXReadCachedVmcs(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2363 VMXReadCachedVmcs(VMX_VMCS_GUEST_CR4, &val);
2364 val = (valShadow & pVCpu->hm.s.vmx.cr4_mask) | (val & ~pVCpu->hm.s.vmx.cr4_mask);
2365 CPUMSetGuestCR4(pVCpu, val);
2366
2367 /*
2368 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2369 * the nested paging case where CR3 & CR4 can be changed by the guest.
2370 */
2371 if ( pVM->hm.s.fNestedPaging
2372 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2373 {
2374 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
2375
2376 /* Can be updated behind our back in the nested paging case. */
2377 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2378
2379 VMXReadCachedVmcs(VMX_VMCS_GUEST_CR3, &val);
2380
2381 if (val != pCtx->cr3)
2382 {
2383 CPUMSetGuestCR3(pVCpu, val);
2384 PGMUpdateCR3(pVCpu, val);
2385 }
2386 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2387 AssertRCReturn(rc, rc);
2388 }
2389
2390 /* Sync back DR7. */
2391 VMXReadCachedVmcs(VMX_VMCS_GUEST_DR7, &val);
2392 pCtx->dr[7] = val;
2393
2394 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2395 VMX_READ_SELREG(ES, es);
2396 VMX_READ_SELREG(SS, ss);
2397 VMX_READ_SELREG(CS, cs);
2398 VMX_READ_SELREG(DS, ds);
2399 VMX_READ_SELREG(FS, fs);
2400 VMX_READ_SELREG(GS, gs);
2401
2402 /* System MSRs */
2403 VMXReadCachedVmcs(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2404 pCtx->SysEnter.cs = val;
2405 VMXReadCachedVmcs(VMX_VMCS_GUEST_SYSENTER_EIP, &val);
2406 pCtx->SysEnter.eip = val;
2407 VMXReadCachedVmcs(VMX_VMCS_GUEST_SYSENTER_ESP, &val);
2408 pCtx->SysEnter.esp = val;
2409
2410 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2411 VMX_READ_SELREG(LDTR, ldtr);
2412
2413 VMXReadCachedVmcs(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2414 pCtx->gdtr.cbGdt = val;
2415 VMXReadCachedVmcs(VMX_VMCS_GUEST_GDTR_BASE, &val);
2416 pCtx->gdtr.pGdt = val;
2417
2418 VMXReadCachedVmcs(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2419 pCtx->idtr.cbIdt = val;
2420 VMXReadCachedVmcs(VMX_VMCS_GUEST_IDTR_BASE, &val);
2421 pCtx->idtr.pIdt = val;
2422
2423 /* Real mode emulation using v86 mode. */
2424 if ( CPUMIsGuestInRealModeEx(pCtx)
2425 && pVM->hm.s.vmx.pRealModeTSS)
2426 {
2427 /* Hide our emulation flags */
2428 pCtx->eflags.Bits.u1VM = 0;
2429
2430 /* Restore original IOPL setting as we always use 0. */
2431 pCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2432
2433 /* Force a TR resync every time in case we switch modes. */
2434 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_TR;
2435 }
2436 else
2437 {
2438 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2439 VMX_READ_SELREG(TR, tr);
2440 }
2441
2442#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2443 /*
2444 * Save the possibly changed MSRs that we automatically restore and save during a world switch.
2445 */
2446 for (unsigned i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++)
2447 {
2448 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2449 pMsr += i;
2450
2451 switch (pMsr->u32IndexMSR)
2452 {
2453 case MSR_K8_LSTAR:
2454 pCtx->msrLSTAR = pMsr->u64Value;
2455 break;
2456 case MSR_K6_STAR:
2457 pCtx->msrSTAR = pMsr->u64Value;
2458 break;
2459 case MSR_K8_SF_MASK:
2460 pCtx->msrSFMASK = pMsr->u64Value;
2461 break;
2462 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2463#if 0
2464 case MSR_K8_KERNEL_GS_BASE:
2465 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2466 break;
2467#endif
2468 case MSR_K8_TSC_AUX:
2469 CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value);
2470 break;
2471
2472 case MSR_K6_EFER:
2473 /* EFER can't be changed without causing a VM-exit. */
2474 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2475 break;
2476
2477 default:
2478 AssertFailed();
2479 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2480 }
2481 }
2482#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2483 return VINF_SUCCESS;
2484}
2485
2486
2487/**
2488 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2489 * where neither EPT nor VPID is supported by the CPU.
2490 *
2491 * @param pVM Pointer to the VM.
2492 * @param pVCpu Pointer to the VMCPU.
2493 */
2494static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2495{
2496 NOREF(pVM);
2497 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2498 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2499 pVCpu->hm.s.TlbShootdown.cPages = 0;
2500 return;
2501}
2502
2503
2504/**
2505 * Setup the tagged TLB for EPT+VPID.
2506 *
2507 * @param pVM Pointer to the VM.
2508 * @param pVCpu Pointer to the VMCPU.
2509 */
2510static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2511{
2512 PHMGLOBLCPUINFO pCpu;
2513
2514 Assert(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid);
2515
2516 pCpu = HMR0GetCurrentCpu();
2517
2518 /*
2519 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2520 * This can happen both for start & resume due to long jumps back to ring-3.
2521 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2522 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2523 */
2524 bool fNewAsid = false;
2525 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2526 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2527 {
2528 pVCpu->hm.s.fForceTLBFlush = true;
2529 fNewAsid = true;
2530 }
2531
2532 /*
2533 * Check for explicit TLB shootdowns.
2534 */
2535 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2536 pVCpu->hm.s.fForceTLBFlush = true;
2537
2538 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2539
2540 if (pVCpu->hm.s.fForceTLBFlush)
2541 {
2542 if (fNewAsid)
2543 {
2544 ++pCpu->uCurrentAsid;
2545 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2546 {
2547 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2548 pCpu->cTlbFlushes++;
2549 pCpu->fFlushAsidBeforeUse = true;
2550 }
2551
2552 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2553 if (pCpu->fFlushAsidBeforeUse)
2554 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2555 }
2556 else
2557 {
2558 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2559 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2560 else
2561 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2562 }
2563
2564 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2565 pVCpu->hm.s.fForceTLBFlush = false;
2566 }
2567 else
2568 {
2569 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2570 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2571 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2572 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2573
2574 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2575 * not be executed. See hmQueueInvlPage() where it is commented
2576 * out. Support individual entry flushing someday. */
2577 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2578 {
2579 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2580
2581 /*
2582 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2583 * as supported by the CPU.
2584 */
2585 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2586 {
2587 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2588 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2589 }
2590 else
2591 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2592 }
2593 else
2594 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2595 }
2596
2597 pVCpu->hm.s.TlbShootdown.cPages = 0;
2598 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2599
2600 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2601 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2602 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2603 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2604 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2605 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2606
2607 /* Update VMCS with the VPID. */
2608 int rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2609 AssertRC(rc);
2610}
2611
2612
2613/**
2614 * Setup the tagged TLB for EPT only.
2615 *
2616 * @returns VBox status code.
2617 * @param pVM Pointer to the VM.
2618 * @param pVCpu Pointer to the VMCPU.
2619 */
2620static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2621{
2622 PHMGLOBLCPUINFO pCpu;
2623
2624 Assert(pVM->hm.s.fNestedPaging);
2625 Assert(!pVM->hm.s.vmx.fVpid);
2626
2627 pCpu = HMR0GetCurrentCpu();
2628
2629 /*
2630 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2631 * This can happen both for start & resume due to long jumps back to ring-3.
2632 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2633 */
2634 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2635 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2636 {
2637 pVCpu->hm.s.fForceTLBFlush = true;
2638 }
2639
2640 /*
2641 * Check for explicit TLB shootdown flushes.
2642 */
2643 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2644 pVCpu->hm.s.fForceTLBFlush = true;
2645
2646 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2647 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2648
2649 if (pVCpu->hm.s.fForceTLBFlush)
2650 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2651 else
2652 {
2653 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2654 * not be executed. See hmQueueInvlPage() where it is commented
2655 * out. Support individual entry flushing someday. */
2656 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2657 {
2658 /*
2659 * We cannot flush individual entries without VPID support. Flush using EPT.
2660 */
2661 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2662 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2663 }
2664 }
2665 pVCpu->hm.s.TlbShootdown.cPages= 0;
2666 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2667
2668#ifdef VBOX_WITH_STATISTICS
2669 /** @todo r=ramshankar: this is not accurate anymore with the VPID+EPT
2670 * handling. Should be fixed later. */
2671 if (pVCpu->hm.s.fForceTLBFlush)
2672 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2673 else
2674 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2675#endif
2676}
2677
2678
2679/**
2680 * Setup the tagged TLB for VPID.
2681 *
2682 * @returns VBox status code.
2683 * @param pVM Pointer to the VM.
2684 * @param pVCpu Pointer to the VMCPU.
2685 */
2686static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2687{
2688 PHMGLOBLCPUINFO pCpu;
2689
2690 Assert(pVM->hm.s.vmx.fVpid);
2691 Assert(!pVM->hm.s.fNestedPaging);
2692
2693 pCpu = HMR0GetCurrentCpu();
2694
2695 /*
2696 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2697 * This can happen both for start & resume due to long jumps back to ring-3.
2698 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2699 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2700 */
2701 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2702 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2703 {
2704 /* Force a TLB flush on VM entry. */
2705 pVCpu->hm.s.fForceTLBFlush = true;
2706 }
2707
2708 /*
2709 * Check for explicit TLB shootdown flushes.
2710 */
2711 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2712 pVCpu->hm.s.fForceTLBFlush = true;
2713
2714 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2715
2716 if (pVCpu->hm.s.fForceTLBFlush)
2717 {
2718 ++pCpu->uCurrentAsid;
2719 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2720 {
2721 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2722 pCpu->cTlbFlushes++;
2723 pCpu->fFlushAsidBeforeUse = true;
2724 }
2725
2726 pVCpu->hm.s.fForceTLBFlush = false;
2727 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2728 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2729 if (pCpu->fFlushAsidBeforeUse)
2730 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2731 }
2732 else
2733 {
2734 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2735 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2736 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2737 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2738
2739 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2740 * not be executed. See hmQueueInvlPage() where it is commented
2741 * out. Support individual entry flushing someday. */
2742 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2743 {
2744 /*
2745 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2746 * as supported by the CPU.
2747 */
2748 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2749 {
2750 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2751 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2752 }
2753 else
2754 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2755 }
2756 }
2757 pVCpu->hm.s.TlbShootdown.cPages = 0;
2758 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2759
2760 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2761 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2762 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2763 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2764 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2765 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2766
2767 int rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2768 AssertRC(rc);
2769
2770# ifdef VBOX_WITH_STATISTICS
2771 /** @todo r=ramshankar: this is not accurate anymore with EPT+VPID handling.
2772 * Should be fixed later. */
2773 if (pVCpu->hm.s.fForceTLBFlush)
2774 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2775 else
2776 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2777# endif
2778}
2779
2780
2781/**
2782 * Runs guest code in a VT-x VM.
2783 *
2784 * @returns VBox status code.
2785 * @param pVM Pointer to the VM.
2786 * @param pVCpu Pointer to the VMCPU.
2787 * @param pCtx Pointer to the guest CPU context.
2788 */
2789VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2790{
2791 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
2792 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
2793 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
2794
2795 VBOXSTRICTRC rc = VINF_SUCCESS;
2796 int rc2;
2797 RTGCUINTREG val;
2798 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2799 RTGCUINTREG instrError, cbInstr;
2800 RTGCUINTPTR exitQualification = 0;
2801 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2802 RTGCUINTPTR errCode, instrInfo;
2803 bool fSetupTPRCaching = false;
2804 uint64_t u64OldLSTAR = 0;
2805 uint8_t u8LastTPR = 0;
2806 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2807 unsigned cResume = 0;
2808#ifdef VBOX_STRICT
2809 RTCPUID idCpuCheck;
2810 bool fWasInLongMode = false;
2811#endif
2812#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2813 uint64_t u64LastTime = RTTimeMilliTS();
2814#endif
2815
2816 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2817 || (pVCpu->hm.s.vmx.pbVirtApic && pVM->hm.s.vmx.pbApicAccess));
2818
2819 /*
2820 * Check if we need to use TPR shadowing.
2821 */
2822 if ( CPUMIsGuestInLongModeEx(pCtx)
2823 || ( (( pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2824 || pVM->hm.s.fTRPPatchingAllowed)
2825 && pVM->hm.s.fHasIoApic)
2826 )
2827 {
2828 fSetupTPRCaching = true;
2829 }
2830
2831 Log2(("\nE"));
2832
2833 /* This is not ideal, but if we don't clear the event injection in the VMCS right here,
2834 * we may end up injecting some stale event into a VM, including injecting an event that
2835 * originated before a VM reset *after* the VM has been reset. See @bugref{6220}.
2836 */
2837 VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);
2838
2839#ifdef VBOX_STRICT
2840 {
2841 RTCCUINTREG val2;
2842
2843 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val2);
2844 AssertRC(rc2);
2845 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2846
2847 /* allowed zero */
2848 if ((val2 & pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2849 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2850
2851 /* allowed one */
2852 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2853 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2854
2855 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val2);
2856 AssertRC(rc2);
2857 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2858
2859 /*
2860 * Must be set according to the MSR, but can be cleared if nested paging is used.
2861 */
2862 if (pVM->hm.s.fNestedPaging)
2863 {
2864 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2865 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2866 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2867 }
2868
2869 /* allowed zero */
2870 if ((val2 & pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2871 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2872
2873 /* allowed one */
2874 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2875 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2876
2877 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val2);
2878 AssertRC(rc2);
2879 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2880
2881 /* allowed zero */
2882 if ((val2 & pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0)
2883 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2884
2885 /* allowed one */
2886 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2887 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2888
2889 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val2);
2890 AssertRC(rc2);
2891 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2892
2893 /* allowed zero */
2894 if ((val2 & pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0)
2895 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2896
2897 /* allowed one */
2898 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2899 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2900 }
2901 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2902#endif /* VBOX_STRICT */
2903
2904#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2905 pVCpu->hm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2906#endif
2907
2908 /*
2909 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2910 */
2911ResumeExecution:
2912 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hm.s.StatEntry))
2913 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit2, &pVCpu->hm.s.StatEntry, x);
2914 AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
2915 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2916 (int)pVCpu->hm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2917 Assert(!HMR0SuspendPending());
2918 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2919 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2920
2921 /*
2922 * Safety precaution; looping for too long here can have a very bad effect on the host.
2923 */
2924 if (RT_UNLIKELY(++cResume > pVM->hm.s.cMaxResumeLoops))
2925 {
2926 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
2927 rc = VINF_EM_RAW_INTERRUPT;
2928 goto end;
2929 }
2930
2931 /*
2932 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2933 */
2934 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2935 {
2936 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2937 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2938 {
2939 /*
2940 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2941 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2942 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2943 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2944 */
2945 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2946 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2947 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2948 AssertRC(rc2);
2949 }
2950 }
2951 else
2952 {
2953 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2954 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2955 AssertRC(rc2);
2956 }
2957
2958#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2959 if (RT_UNLIKELY((cResume & 0xf) == 0))
2960 {
2961 uint64_t u64CurTime = RTTimeMilliTS();
2962
2963 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2964 {
2965 u64LastTime = u64CurTime;
2966 TMTimerPollVoid(pVM, pVCpu);
2967 }
2968 }
2969#endif
2970
2971 /*
2972 * Check for pending actions that force us to go back to ring-3.
2973 */
2974 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2975 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2976 {
2977 /* Check if a sync operation is pending. */
2978 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2979 {
2980 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2981 if (rc != VINF_SUCCESS)
2982 {
2983 AssertRC(VBOXSTRICTRC_VAL(rc));
2984 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2985 goto end;
2986 }
2987 }
2988
2989#ifdef DEBUG
2990 /* Intercept X86_XCPT_DB if stepping is enabled */
2991 if (!DBGFIsStepping(pVCpu))
2992#endif
2993 {
2994 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK)
2995 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
2996 {
2997 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchToR3);
2998 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2999 goto end;
3000 }
3001 }
3002
3003 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
3004 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
3005 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
3006 {
3007 rc = VINF_EM_PENDING_REQUEST;
3008 goto end;
3009 }
3010
3011 /* Check if a pgm pool flush is in progress. */
3012 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
3013 {
3014 rc = VINF_PGM_POOL_FLUSH_PENDING;
3015 goto end;
3016 }
3017
3018 /* Check if DMA work is pending (2nd+ run). */
3019 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
3020 {
3021 rc = VINF_EM_RAW_TO_R3;
3022 goto end;
3023 }
3024 }
3025
3026#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3027 /*
3028 * Exit to ring-3 preemption/work is pending.
3029 *
3030 * Interrupts are disabled before the call to make sure we don't miss any interrupt
3031 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
3032 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
3033 *
3034 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
3035 * shootdowns rely on this.
3036 */
3037 uOldEFlags = ASMIntDisableFlags();
3038 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
3039 {
3040 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptPending);
3041 rc = VINF_EM_RAW_INTERRUPT;
3042 goto end;
3043 }
3044 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3045#endif
3046
3047 /*
3048 * When external interrupts are pending, we should exit the VM when IF is set.
3049 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
3050 */
3051 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
3052 if (RT_FAILURE(rc))
3053 goto end;
3054
3055 /** @todo check timers?? */
3056
3057 /*
3058 * TPR caching using CR8 is only available in 64-bit mode.
3059 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
3060 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
3061 */
3062 /** @todo query and update the TPR only when it could have been changed (mmio
3063 * access & wrsmr (x2apic) */
3064 if (fSetupTPRCaching)
3065 {
3066 /* TPR caching in CR8 */
3067 bool fPending;
3068
3069 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
3070 AssertRC(rc2);
3071 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
3072 pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8LastTPR;
3073
3074 /*
3075 * Two options here:
3076 * - external interrupt pending, but masked by the TPR value.
3077 * -> a CR8 update that lower the current TPR value should cause an exit
3078 * - no pending interrupts
3079 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
3080 */
3081
3082 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
3083 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0);
3084 AssertRC(VBOXSTRICTRC_VAL(rc));
3085
3086 if (pVM->hm.s.fTPRPatchingActive)
3087 {
3088 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3089 /* Our patch code uses LSTAR for TPR caching. */
3090 pCtx->msrLSTAR = u8LastTPR;
3091
3092 /** @todo r=ramshankar: we should check for MSR-bitmap support here. */
3093 if (fPending)
3094 {
3095 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
3096 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
3097 }
3098 else
3099 {
3100 /*
3101 * No interrupts are pending, so we don't need to be explicitely notified.
3102 * There are enough world switches for detecting pending interrupts.
3103 */
3104 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
3105 }
3106 }
3107 }
3108
3109#ifdef LOG_ENABLED
3110 if ( pVM->hm.s.fNestedPaging
3111 || pVM->hm.s.vmx.fVpid)
3112 {
3113 PHMGLOBLCPUINFO pCpu = HMR0GetCurrentCpu();
3114 if (pVCpu->hm.s.idLastCpu != pCpu->idCpu)
3115 {
3116 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hm.s.idLastCpu,
3117 pCpu->idCpu));
3118 }
3119 else if (pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
3120 {
3121 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hm.s.cTlbFlushes,
3122 pCpu->cTlbFlushes));
3123 }
3124 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
3125 LogFlow(("Manual TLB flush\n"));
3126 }
3127#endif
3128#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3129 PGMRZDynMapFlushAutoSet(pVCpu);
3130#endif
3131
3132 /*
3133 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3134 * (until the actual world switch)
3135 */
3136#ifdef VBOX_STRICT
3137 idCpuCheck = RTMpCpuId();
3138#endif
3139#ifdef LOG_ENABLED
3140 VMMR0LogFlushDisable(pVCpu);
3141#endif
3142
3143 /*
3144 * Save the host state first.
3145 */
3146 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
3147 {
3148 rc = VMXR0SaveHostState(pVM, pVCpu);
3149 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3150 {
3151 VMMR0LogFlushEnable(pVCpu);
3152 goto end;
3153 }
3154 }
3155
3156 /*
3157 * Load the guest state.
3158 */
3159 if (!pVCpu->hm.s.fContextUseFlags)
3160 {
3161 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3162 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
3163 }
3164 else
3165 {
3166 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3167 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3168 {
3169 VMMR0LogFlushEnable(pVCpu);
3170 goto end;
3171 }
3172 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
3173 }
3174
3175#if 1 /* Moved for testing. */
3176 bool fOffsettedTsc;
3177 if (pVM->hm.s.vmx.fUsePreemptTimer)
3178 {
3179 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
3180
3181 /* Make sure the returned values have sane upper and lower boundaries. */
3182 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
3183
3184 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
3185 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
3186
3187 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
3188 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
3189 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
3190 AssertRC(VBOXSTRICTRC_VAL(rc));
3191 }
3192 else
3193 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
3194
3195 if (fOffsettedTsc)
3196 {
3197 uint64_t u64CurTSC = ASMReadTSC();
3198 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset > TMCpuTickGetLastSeen(pVCpu))
3199 {
3200 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
3201 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset);
3202 AssertRC(VBOXSTRICTRC_VAL(rc));
3203
3204 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
3205 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
3206 AssertRC(VBOXSTRICTRC_VAL(rc));
3207 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
3208 }
3209 else
3210 {
3211 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
3212 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
3213 pVCpu->hm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset,
3214 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hm.s.vmx.u64TSCOffset,
3215 TMCpuTickGet(pVCpu)));
3216 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
3217 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
3218 AssertRC(VBOXSTRICTRC_VAL(rc));
3219 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
3220 }
3221 }
3222 else
3223 {
3224 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
3225 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
3226 AssertRC(VBOXSTRICTRC_VAL(rc));
3227 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
3228 }
3229#endif
3230
3231#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3232 /*
3233 * Disable interrupts to make sure a poke will interrupt execution.
3234 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3235 */
3236 uOldEFlags = ASMIntDisableFlags();
3237 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3238#endif
3239
3240 /* Non-register state Guest Context */
3241 /** @todo change me according to cpu state */
3242 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE);
3243 AssertRC(rc2);
3244
3245 /* Set TLB flush state as checked until we return from the world switch. */
3246 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
3247 /* Deal with tagged TLB setup and invalidation. */
3248 pVM->hm.s.vmx.pfnFlushTaggedTlb(pVM, pVCpu);
3249
3250 /*
3251 * Manual save and restore:
3252 * - General purpose registers except RIP, RSP
3253 *
3254 * Trashed:
3255 * - CR2 (we don't care)
3256 * - LDTR (reset to 0)
3257 * - DRx (presumably not changed at all)
3258 * - DR7 (reset to 0x400)
3259 * - EFLAGS (reset to RT_BIT(1); not relevant)
3260 */
3261
3262 /* All done! Let's start VM execution. */
3263 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
3264 Assert(idCpuCheck == RTMpCpuId());
3265
3266#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3267 pVCpu->hm.s.vmx.VMCSCache.cResume = cResume;
3268 pVCpu->hm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3269#endif
3270
3271 /*
3272 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3273 */
3274 if (pVM->hm.s.fTPRPatchingActive)
3275 {
3276 Assert(pVM->hm.s.fTPRPatchingActive);
3277 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3278 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3279 }
3280
3281 TMNotifyStartOfExecution(pVCpu);
3282
3283#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3284 /*
3285 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
3286 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
3287 */
3288 if ( (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3289 && !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3290 {
3291 pVCpu->hm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX);
3292 uint64_t u64GuestTSCAux = 0;
3293 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux);
3294 AssertRC(rc2);
3295 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux);
3296 }
3297#endif
3298
3299#ifdef VBOX_WITH_KERNEL_USING_XMM
3300 rc = hmR0VMXStartVMWrapXMM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
3301#else
3302 rc = pVCpu->hm.s.vmx.pfnStartVM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
3303#endif
3304 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);
3305 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);
3306
3307 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3308 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3309 {
3310#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3311 /* Restore host's TSC_AUX. */
3312 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3313 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTSCAux);
3314#endif
3315
3316 TMCpuTickSetLastSeen(pVCpu,
3317 ASMReadTSC() + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3318 }
3319
3320 TMNotifyEndOfExecution(pVCpu);
3321 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3322 Assert(!(ASMGetFlags() & X86_EFL_IF));
3323
3324 /*
3325 * Restore the host LSTAR MSR if the guest could have changed it.
3326 */
3327 if (pVM->hm.s.fTPRPatchingActive)
3328 {
3329 Assert(pVM->hm.s.fTPRPatchingActive);
3330 pVCpu->hm.s.vmx.pbVirtApic[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3331 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3332 }
3333
3334 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
3335 ASMSetFlags(uOldEFlags);
3336#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3337 uOldEFlags = ~(RTCCUINTREG)0;
3338#endif
3339
3340 AssertMsg(!pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries=%d\n",
3341 pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries));
3342
3343 /* In case we execute a goto ResumeExecution later on. */
3344 pVCpu->hm.s.fResumeVM = true;
3345 pVCpu->hm.s.fForceTLBFlush = false;
3346
3347 /*
3348 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3349 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3350 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3351 */
3352
3353 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3354 {
3355 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3356 VMMR0LogFlushEnable(pVCpu);
3357 goto end;
3358 }
3359
3360 /* Success. Query the guest state and figure out what has happened. */
3361
3362 /* Investigate why there was a VM-exit. */
3363 rc2 = VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3364 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3365
3366 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3367 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3368 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3369 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3370 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3371 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &errCode);
3372 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3373 rc2 |= VMXReadCachedVmcs(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3374 AssertRC(rc2);
3375
3376 /*
3377 * Sync back the guest state.
3378 */
3379 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3380 AssertRC(rc2);
3381
3382 /* Note! NOW IT'S SAFE FOR LOGGING! */
3383 VMMR0LogFlushEnable(pVCpu);
3384 Log2(("Raw exit reason %08x\n", exitReason));
3385#if ARCH_BITS == 64 /* for the time being */
3386 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3387#endif
3388
3389 /*
3390 * Check if an injected event was interrupted prematurely.
3391 */
3392 rc2 = VMXReadCachedVmcs(VMX_VMCS32_RO_IDT_INFO, &val);
3393 AssertRC(rc2);
3394 pVCpu->hm.s.Event.u64IntrInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3395 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.u64IntrInfo)
3396 /* Ignore 'int xx' as they'll be restarted anyway. */
3397 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
3398 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3399 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
3400 {
3401 Assert(!pVCpu->hm.s.Event.fPending);
3402 pVCpu->hm.s.Event.fPending = true;
3403 /* Error code present? */
3404 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo))
3405 {
3406 rc2 = VMXReadCachedVmcs(VMX_VMCS32_RO_IDT_ERROR_CODE, &val);
3407 AssertRC(rc2);
3408 pVCpu->hm.s.Event.u32ErrCode = val;
3409 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3410 pVCpu->hm.s.Event.u64IntrInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3411 }
3412 else
3413 {
3414 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hm.s.Event.u64IntrInfo,
3415 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3416 pVCpu->hm.s.Event.u32ErrCode = 0;
3417 }
3418 }
3419#ifdef VBOX_STRICT
3420 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.u64IntrInfo)
3421 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3422 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
3423 {
3424 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3425 pVCpu->hm.s.Event.u64IntrInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3426 }
3427
3428 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3429 HMDumpRegs(pVM, pVCpu, pCtx);
3430#endif
3431
3432 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3433 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3434 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3435 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3436 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3437
3438 /*
3439 * Sync back the TPR if it was changed.
3440 */
3441 if ( fSetupTPRCaching
3442 && u8LastTPR != pVCpu->hm.s.vmx.pbVirtApic[0x80])
3443 {
3444 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]);
3445 AssertRC(rc2);
3446 }
3447
3448#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3449 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3450 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3451#endif
3452 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
3453
3454 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3455 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3456 switch (exitReason)
3457 {
3458 case VMX_EXIT_XCPT_NMI: /* 0 Exception or non-maskable interrupt (NMI). */
3459 case VMX_EXIT_EXT_INT: /* 1 External interrupt. */
3460 {
3461 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3462
3463 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3464 {
3465 Assert(exitReason == VMX_EXIT_EXT_INT);
3466 /* External interrupt; leave to allow it to be dispatched again. */
3467 rc = VINF_EM_RAW_INTERRUPT;
3468 break;
3469 }
3470 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub3, y3);
3471 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3472 {
3473 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3474 /* External interrupt; leave to allow it to be dispatched again. */
3475 rc = VINF_EM_RAW_INTERRUPT;
3476 break;
3477
3478 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT: /* External hardware interrupt. */
3479 AssertFailed(); /* can't come here; fails the first check. */
3480 break;
3481
3482 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DB_XCPT: /* Unknown why we get this type for #DB */
3483 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */
3484 Assert(vector == 1 || vector == 3 || vector == 4);
3485 /* no break */
3486 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT: /* Hardware exception. */
3487 Log2(("Hardware/software interrupt %d\n", vector));
3488 switch (vector)
3489 {
3490 case X86_XCPT_NM:
3491 {
3492 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3493
3494 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3495 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3496 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3497 if (rc == VINF_SUCCESS)
3498 {
3499 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3500
3501 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
3502
3503 /* Continue execution. */
3504 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
3505
3506 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3507 goto ResumeExecution;
3508 }
3509
3510 Log(("Forward #NM fault to the guest\n"));
3511 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
3512 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3513 cbInstr, 0);
3514 AssertRC(rc2);
3515 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3516 goto ResumeExecution;
3517 }
3518
3519 case X86_XCPT_PF: /* Page fault */
3520 {
3521#ifdef VBOX_ALWAYS_TRAP_PF
3522 if (pVM->hm.s.fNestedPaging)
3523 {
3524 /*
3525 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3526 */
3527 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3528 errCode, (RTGCPTR)pCtx->rsp));
3529
3530 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3531
3532 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3533
3534 /* Now we must update CR2. */
3535 pCtx->cr2 = exitQualification;
3536 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3537 cbInstr, errCode);
3538 AssertRC(rc2);
3539
3540 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3541 goto ResumeExecution;
3542 }
3543#else
3544 Assert(!pVM->hm.s.fNestedPaging);
3545#endif
3546
3547#ifdef VBOX_HM_WITH_GUEST_PATCHING
3548 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3549 if ( pVM->hm.s.fTRPPatchingAllowed
3550 && pVM->hm.s.pGuestPatchMem
3551 && (exitQualification & 0xfff) == 0x080
3552 && !(errCode & X86_TRAP_PF_P) /* not present */
3553 && CPUMGetGuestCPL(pVCpu) == 0
3554 && !CPUMIsGuestInLongModeEx(pCtx)
3555 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
3556 {
3557 RTGCPHYS GCPhysApicBase, GCPhys;
3558 GCPhysApicBase = pCtx->msrApicBase;
3559 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3560
3561 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3562 if ( rc == VINF_SUCCESS
3563 && GCPhys == GCPhysApicBase)
3564 {
3565 /* Only attempt to patch the instruction once. */
3566 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3567 if (!pPatch)
3568 {
3569 rc = VINF_EM_HM_PATCH_TPR_INSTR;
3570 break;
3571 }
3572 }
3573 }
3574#endif
3575
3576 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3577 /* Exit qualification contains the linear address of the page fault. */
3578 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3579 TRPMSetErrorCode(pVCpu, errCode);
3580 TRPMSetFaultAddress(pVCpu, exitQualification);
3581
3582 /* Shortcut for APIC TPR reads and writes. */
3583 if ( (exitQualification & 0xfff) == 0x080
3584 && !(errCode & X86_TRAP_PF_P) /* not present */
3585 && fSetupTPRCaching
3586 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3587 {
3588 RTGCPHYS GCPhysApicBase, GCPhys;
3589 GCPhysApicBase = pCtx->msrApicBase;
3590 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3591
3592 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3593 if ( rc == VINF_SUCCESS
3594 && GCPhys == GCPhysApicBase)
3595 {
3596 Log(("Enable VT-x virtual APIC access filtering\n"));
3597 rc2 = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess,
3598 X86_PTE_RW | X86_PTE_P);
3599 AssertRC(rc2);
3600 }
3601 }
3602
3603 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3604 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3605 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3606
3607 if (rc == VINF_SUCCESS)
3608 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3609 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3610 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
3611
3612 TRPMResetTrap(pVCpu);
3613 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3614 goto ResumeExecution;
3615 }
3616 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3617 {
3618 /*
3619 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3620 */
3621 Log2(("Forward page fault to the guest\n"));
3622
3623 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3624 /* The error code might have been changed. */
3625 errCode = TRPMGetErrorCode(pVCpu);
3626
3627 TRPMResetTrap(pVCpu);
3628
3629 /* Now we must update CR2. */
3630 pCtx->cr2 = exitQualification;
3631 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3632 cbInstr, errCode);
3633 AssertRC(rc2);
3634
3635 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3636 goto ResumeExecution;
3637 }
3638#ifdef VBOX_STRICT
3639 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3640 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3641#endif
3642 /* Need to go back to the recompiler to emulate the instruction. */
3643 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
3644 TRPMResetTrap(pVCpu);
3645
3646 /* If event delivery caused the #PF (shadow or not), tell TRPM. */
3647 hmR0VmxCheckPendingEvent(pVCpu);
3648 break;
3649 }
3650
3651 case X86_XCPT_MF: /* Floating point exception. */
3652 {
3653 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
3654 if (!(pCtx->cr0 & X86_CR0_NE))
3655 {
3656 /* old style FPU error reporting needs some extra work. */
3657 /** @todo don't fall back to the recompiler, but do it manually. */
3658 rc = VINF_EM_RAW_EMULATE_INSTR;
3659 break;
3660 }
3661 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3662 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3663 cbInstr, errCode);
3664 AssertRC(rc2);
3665
3666 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3667 goto ResumeExecution;
3668 }
3669
3670 case X86_XCPT_DB: /* Debug exception. */
3671 {
3672 uint64_t uDR6;
3673
3674 /*
3675 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3676 *
3677 * Exit qualification bits:
3678 * 3:0 B0-B3 which breakpoint condition was met
3679 * 12:4 Reserved (0)
3680 * 13 BD - debug register access detected
3681 * 14 BS - single step execution or branch taken
3682 * 63:15 Reserved (0)
3683 */
3684 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
3685
3686 /* Note that we don't support guest and host-initiated debugging at the same time. */
3687
3688 uDR6 = X86_DR6_INIT_VAL;
3689 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3690 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3691 if (rc == VINF_EM_RAW_GUEST_TRAP)
3692 {
3693 /* Update DR6 here. */
3694 pCtx->dr[6] = uDR6;
3695
3696 /* Resync DR6 if the debug state is active. */
3697 if (CPUMIsGuestDebugStateActive(pVCpu))
3698 ASMSetDR6(pCtx->dr[6]);
3699
3700 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3701 pCtx->dr[7] &= ~X86_DR7_GD;
3702
3703 /* Paranoia. */
3704 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3705 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3706 pCtx->dr[7] |= 0x400; /* must be one */
3707
3708 /* Resync DR7 */
3709 rc2 = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
3710 AssertRC(rc2);
3711
3712 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3713 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3714 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3715 cbInstr, errCode);
3716 AssertRC(rc2);
3717
3718 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3719 goto ResumeExecution;
3720 }
3721 /* Return to ring 3 to deal with the debug exit code. */
3722 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3723 break;
3724 }
3725
3726 case X86_XCPT_BP: /* Breakpoint. */
3727 {
3728 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
3729 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3730 if (rc == VINF_EM_RAW_GUEST_TRAP)
3731 {
3732 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3733 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3734 cbInstr, errCode);
3735 AssertRC(rc2);
3736 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3737 goto ResumeExecution;
3738 }
3739 if (rc == VINF_SUCCESS)
3740 {
3741 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3742 goto ResumeExecution;
3743 }
3744 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3745 break;
3746 }
3747
3748 case X86_XCPT_GP: /* General protection failure exception. */
3749 {
3750 uint32_t cbOp;
3751 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
3752
3753 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
3754#ifdef VBOX_STRICT
3755 if ( !CPUMIsGuestInRealModeEx(pCtx)
3756 || !pVM->hm.s.vmx.pRealModeTSS)
3757 {
3758 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3759 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3760 cbInstr, errCode);
3761 AssertRC(rc2);
3762 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3763 goto ResumeExecution;
3764 }
3765#endif
3766 Assert(CPUMIsGuestInRealModeEx(pCtx));
3767
3768 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3769
3770 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
3771 if (RT_SUCCESS(rc2))
3772 {
3773 bool fUpdateRIP = true;
3774
3775 rc = VINF_SUCCESS;
3776 Assert(cbOp == pDis->cbInstr);
3777 switch (pDis->pCurInstr->uOpcode)
3778 {
3779 case OP_CLI:
3780 pCtx->eflags.Bits.u1IF = 0;
3781 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
3782 break;
3783
3784 case OP_STI:
3785 pCtx->eflags.Bits.u1IF = 1;
3786 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3787 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3788 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3789 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3790 AssertRC(rc2);
3791 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
3792 break;
3793
3794 case OP_HLT:
3795 fUpdateRIP = false;
3796 rc = VINF_EM_HALT;
3797 pCtx->rip += pDis->cbInstr;
3798 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
3799 break;
3800
3801 case OP_POPF:
3802 {
3803 RTGCPTR GCPtrStack;
3804 uint32_t cbParm;
3805 uint32_t uMask;
3806 X86EFLAGS eflags;
3807
3808 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3809 {
3810 cbParm = 4;
3811 uMask = 0xffffffff;
3812 }
3813 else
3814 {
3815 cbParm = 2;
3816 uMask = 0xffff;
3817 }
3818
3819 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3820 if (RT_FAILURE(rc2))
3821 {
3822 rc = VERR_EM_INTERPRETER;
3823 break;
3824 }
3825 eflags.u = 0;
3826 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3827 if (RT_FAILURE(rc2))
3828 {
3829 rc = VERR_EM_INTERPRETER;
3830 break;
3831 }
3832 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3833 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3834 | (eflags.u & X86_EFL_POPF_BITS & uMask);
3835 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3836 pCtx->eflags.Bits.u1RF = 0;
3837 pCtx->esp += cbParm;
3838 pCtx->esp &= uMask;
3839
3840 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
3841 break;
3842 }
3843
3844 case OP_PUSHF:
3845 {
3846 RTGCPTR GCPtrStack;
3847 uint32_t cbParm;
3848 uint32_t uMask;
3849 X86EFLAGS eflags;
3850
3851 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3852 {
3853 cbParm = 4;
3854 uMask = 0xffffffff;
3855 }
3856 else
3857 {
3858 cbParm = 2;
3859 uMask = 0xffff;
3860 }
3861
3862 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3863 &GCPtrStack);
3864 if (RT_FAILURE(rc2))
3865 {
3866 rc = VERR_EM_INTERPRETER;
3867 break;
3868 }
3869 eflags = pCtx->eflags;
3870 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3871 eflags.Bits.u1RF = 0;
3872 eflags.Bits.u1VM = 0;
3873
3874 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3875 if (RT_FAILURE(rc2))
3876 {
3877 rc = VERR_EM_INTERPRETER;
3878 break;
3879 }
3880 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3881 pCtx->esp -= cbParm;
3882 pCtx->esp &= uMask;
3883 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
3884 break;
3885 }
3886
3887 case OP_IRET:
3888 {
3889 RTGCPTR GCPtrStack;
3890 uint32_t uMask = 0xffff;
3891 uint16_t aIretFrame[3];
3892
3893 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3894 {
3895 rc = VERR_EM_INTERPRETER;
3896 break;
3897 }
3898
3899 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3900 if (RT_FAILURE(rc2))
3901 {
3902 rc = VERR_EM_INTERPRETER;
3903 break;
3904 }
3905 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3906 if (RT_FAILURE(rc2))
3907 {
3908 rc = VERR_EM_INTERPRETER;
3909 break;
3910 }
3911 pCtx->ip = aIretFrame[0];
3912 pCtx->cs.Sel = aIretFrame[1];
3913 pCtx->cs.ValidSel = aIretFrame[1];
3914 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
3915 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3916 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3917 pCtx->sp += sizeof(aIretFrame);
3918
3919 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
3920 fUpdateRIP = false;
3921 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
3922 break;
3923 }
3924
3925 case OP_INT:
3926 {
3927 uint32_t intInfo2;
3928
3929 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
3930 intInfo2 = pDis->Param1.uValue & 0xff;
3931 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3932 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3933
3934 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3935 AssertRC(VBOXSTRICTRC_VAL(rc));
3936 fUpdateRIP = false;
3937 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3938 break;
3939 }
3940
3941 case OP_INTO:
3942 {
3943 if (pCtx->eflags.Bits.u1OF)
3944 {
3945 uint32_t intInfo2;
3946
3947 LogFlow(("Realmode: INTO\n"));
3948 intInfo2 = X86_XCPT_OF;
3949 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3950 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3951
3952 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3953 AssertRC(VBOXSTRICTRC_VAL(rc));
3954 fUpdateRIP = false;
3955 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3956 }
3957 break;
3958 }
3959
3960 case OP_INT3:
3961 {
3962 uint32_t intInfo2;
3963
3964 LogFlow(("Realmode: INT 3\n"));
3965 intInfo2 = 3;
3966 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3967 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3968
3969 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3970 AssertRC(VBOXSTRICTRC_VAL(rc));
3971 fUpdateRIP = false;
3972 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3973 break;
3974 }
3975
3976 default:
3977 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3978 fUpdateRIP = false;
3979 break;
3980 }
3981
3982 if (rc == VINF_SUCCESS)
3983 {
3984 if (fUpdateRIP)
3985 pCtx->rip += cbOp; /* Move on to the next instruction. */
3986
3987 /*
3988 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3989 * whole context to be done with it.
3990 */
3991 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
3992
3993 /* Only resume if successful. */
3994 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3995 goto ResumeExecution;
3996 }
3997 }
3998 else
3999 rc = VERR_EM_INTERPRETER;
4000
4001 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
4002 ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4003 break;
4004 }
4005
4006#ifdef VBOX_STRICT
4007 case X86_XCPT_XF: /* SIMD exception. */
4008 case X86_XCPT_DE: /* Divide error. */
4009 case X86_XCPT_UD: /* Unknown opcode exception. */
4010 case X86_XCPT_SS: /* Stack segment exception. */
4011 case X86_XCPT_NP: /* Segment not present exception. */
4012 {
4013 switch (vector)
4014 {
4015 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); break;
4016 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); break;
4017 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); break;
4018 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); break;
4019 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); break;
4020 }
4021
4022 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
4023 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4024 cbInstr, errCode);
4025 AssertRC(rc2);
4026
4027 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4028 goto ResumeExecution;
4029 }
4030#endif
4031 default:
4032 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
4033 if ( CPUMIsGuestInRealModeEx(pCtx)
4034 && pVM->hm.s.vmx.pRealModeTSS)
4035 {
4036 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
4037 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4038 cbInstr, errCode);
4039 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
4040
4041 /* Go back to ring-3 in case of a triple fault. */
4042 if ( vector == X86_XCPT_DF
4043 && rc == VINF_EM_RESET)
4044 {
4045 break;
4046 }
4047
4048 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4049 goto ResumeExecution;
4050 }
4051 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
4052 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
4053 break;
4054 } /* switch (vector) */
4055
4056 break;
4057
4058 default:
4059 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
4060 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
4061 break;
4062 }
4063
4064 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4065 break;
4066 }
4067
4068 /*
4069 * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed
4070 * by the configuration of the EPT paging structures.
4071 */
4072 case VMX_EXIT_EPT_VIOLATION:
4073 {
4074 RTGCPHYS GCPhys;
4075
4076 Assert(pVM->hm.s.fNestedPaging);
4077
4078 rc2 = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4079 AssertRC(rc2);
4080 Assert(((exitQualification >> 7) & 3) != 2);
4081
4082 /* Determine the kind of violation. */
4083 errCode = 0;
4084 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
4085 errCode |= X86_TRAP_PF_ID;
4086
4087 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
4088 errCode |= X86_TRAP_PF_RW;
4089
4090 /* If the page is present, then it's a page level protection fault. */
4091 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
4092 errCode |= X86_TRAP_PF_P;
4093 else
4094 {
4095 /* Shortcut for APIC TPR reads and writes. */
4096 if ( (GCPhys & 0xfff) == 0x080
4097 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4098 && fSetupTPRCaching
4099 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4100 {
4101 RTGCPHYS GCPhysApicBase;
4102 GCPhysApicBase = pCtx->msrApicBase;
4103 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4104 if (GCPhys == GCPhysApicBase + 0x80)
4105 {
4106 Log(("Enable VT-x virtual APIC access filtering\n"));
4107 rc2 = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess,
4108 X86_PTE_RW | X86_PTE_P);
4109 AssertRC(rc2);
4110 }
4111 }
4112 }
4113 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
4114
4115 /* GCPhys contains the guest physical address of the page fault. */
4116 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
4117 TRPMSetErrorCode(pVCpu, errCode);
4118 TRPMSetFaultAddress(pVCpu, GCPhys);
4119
4120 /* Handle the pagefault trap for the nested shadow table. */
4121 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
4122
4123 /*
4124 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
4125 */
4126 if ( rc == VINF_SUCCESS
4127 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4128 || rc == VERR_PAGE_NOT_PRESENT)
4129 {
4130 /* We've successfully synced our shadow pages, so let's just continue execution. */
4131 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
4132 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
4133
4134 TRPMResetTrap(pVCpu);
4135 goto ResumeExecution;
4136 }
4137
4138#ifdef VBOX_STRICT
4139 if (rc != VINF_EM_RAW_EMULATE_INSTR)
4140 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4141#endif
4142 /* Need to go back to the recompiler to emulate the instruction. */
4143 TRPMResetTrap(pVCpu);
4144 break;
4145 }
4146
4147 case VMX_EXIT_EPT_MISCONFIG:
4148 {
4149 RTGCPHYS GCPhys;
4150
4151 Assert(pVM->hm.s.fNestedPaging);
4152
4153 rc2 = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4154 AssertRC(rc2);
4155 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
4156
4157 /* Shortcut for APIC TPR reads and writes. */
4158 if ( (GCPhys & 0xfff) == 0x080
4159 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4160 && fSetupTPRCaching
4161 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4162 {
4163 RTGCPHYS GCPhysApicBase = pCtx->msrApicBase;
4164 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4165 if (GCPhys == GCPhysApicBase + 0x80)
4166 {
4167 Log(("Enable VT-x virtual APIC access filtering\n"));
4168 rc2 = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess,
4169 X86_PTE_RW | X86_PTE_P);
4170 AssertRC(rc2);
4171 }
4172 }
4173
4174 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
4175
4176 /*
4177 * If we succeed, resume execution.
4178 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
4179 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
4180 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
4181 * weird case. See @bugref{6043}.
4182 */
4183 if ( rc == VINF_SUCCESS
4184 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4185 || rc == VERR_PAGE_NOT_PRESENT)
4186 {
4187 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
4188 goto ResumeExecution;
4189 }
4190
4191 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4192 break;
4193 }
4194
4195 case VMX_EXIT_INT_WINDOW: /* 7 Interrupt window exiting. */
4196 /* Clear VM-exit on IF=1 change. */
4197 LogFlow(("VMX_EXIT_INT_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
4198 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4199 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INT_WINDOW_EXIT;
4200 rc2 = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
4201 AssertRC(rc2);
4202 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
4203 goto ResumeExecution; /* we check for pending guest interrupts there */
4204
4205 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4206 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4207 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
4208 /* Skip instruction and continue directly. */
4209 pCtx->rip += cbInstr;
4210 /* Continue execution.*/
4211 goto ResumeExecution;
4212
4213 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4214 {
4215 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4216 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
4217 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4218 if (rc == VINF_SUCCESS)
4219 {
4220 /* Update EIP and continue execution. */
4221 Assert(cbInstr == 2);
4222 pCtx->rip += cbInstr;
4223 goto ResumeExecution;
4224 }
4225 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4226 rc = VINF_EM_RAW_EMULATE_INSTR;
4227 break;
4228 }
4229
4230 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4231 {
4232 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4233 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
4234 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4235 if (rc == VINF_SUCCESS)
4236 {
4237 /* Update EIP and continue execution. */
4238 Assert(cbInstr == 2);
4239 pCtx->rip += cbInstr;
4240 goto ResumeExecution;
4241 }
4242 rc = VINF_EM_RAW_EMULATE_INSTR;
4243 break;
4244 }
4245
4246 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4247 {
4248 Log2(("VMX: Rdtsc\n"));
4249 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
4250 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4251 if (rc == VINF_SUCCESS)
4252 {
4253 /* Update EIP and continue execution. */
4254 Assert(cbInstr == 2);
4255 pCtx->rip += cbInstr;
4256 goto ResumeExecution;
4257 }
4258 rc = VINF_EM_RAW_EMULATE_INSTR;
4259 break;
4260 }
4261
4262 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4263 {
4264 Log2(("VMX: Rdtscp\n"));
4265 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp);
4266 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4267 if (rc == VINF_SUCCESS)
4268 {
4269 /* Update EIP and continue execution. */
4270 Assert(cbInstr == 3);
4271 pCtx->rip += cbInstr;
4272 goto ResumeExecution;
4273 }
4274 rc = VINF_EM_RAW_EMULATE_INSTR;
4275 break;
4276 }
4277
4278 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4279 {
4280 Log2(("VMX: invlpg\n"));
4281 Assert(!pVM->hm.s.fNestedPaging);
4282
4283 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
4284 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4285 if (rc == VINF_SUCCESS)
4286 {
4287 /* Update EIP and continue execution. */
4288 pCtx->rip += cbInstr;
4289 goto ResumeExecution;
4290 }
4291 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4292 break;
4293 }
4294
4295 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4296 {
4297 Log2(("VMX: monitor\n"));
4298
4299 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
4300 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4301 if (rc == VINF_SUCCESS)
4302 {
4303 /* Update EIP and continue execution. */
4304 pCtx->rip += cbInstr;
4305 goto ResumeExecution;
4306 }
4307 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4308 break;
4309 }
4310
4311 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4312 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4313 if ( pVM->hm.s.fTPRPatchingActive
4314 && pCtx->ecx == MSR_K8_LSTAR)
4315 {
4316 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4317 if ((pCtx->eax & 0xff) != u8LastTPR)
4318 {
4319 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4320
4321 /* Our patch code uses LSTAR for TPR caching. */
4322 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4323 AssertRC(rc2);
4324 }
4325
4326 /* Skip the instruction and continue. */
4327 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4328
4329 /* Only resume if successful. */
4330 goto ResumeExecution;
4331 }
4332 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_MSR;
4333 /* no break */
4334 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4335 {
4336 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hm.s.StatExitRdmsr : &pVCpu->hm.s.StatExitWrmsr);
4337
4338 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4339 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4340 if (rc == VINF_SUCCESS)
4341 {
4342 /* EIP has been updated already. */
4343 /* Only resume if successful. */
4344 goto ResumeExecution;
4345 }
4346 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4347 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4348 break;
4349 }
4350
4351 case VMX_EXIT_MOV_CRX: /* 28 Control-register accesses. */
4352 {
4353 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub2, y2);
4354
4355 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4356 {
4357 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4358 {
4359 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4360 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4361 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4362 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4363 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4364 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4365 {
4366 case 0:
4367 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0 | HM_CHANGED_GUEST_CR3;
4368 break;
4369 case 2:
4370 break;
4371 case 3:
4372 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4373 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
4374 break;
4375 case 4:
4376 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
4377 break;
4378 case 8:
4379 /* CR8 contains the APIC TPR */
4380 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4381 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4382 break;
4383
4384 default:
4385 AssertFailed();
4386 break;
4387 }
4388 break;
4389 }
4390
4391 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4392 {
4393 Log2(("VMX: mov x, crx\n"));
4394 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4395
4396 Assert( !pVM->hm.s.fNestedPaging
4397 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4398 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4399
4400 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4401 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4402 || !(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4403
4404 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4405 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4406 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4407 break;
4408 }
4409
4410 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4411 {
4412 Log2(("VMX: clts\n"));
4413 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
4414 rc = EMInterpretCLTS(pVM, pVCpu);
4415 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4416 break;
4417 }
4418
4419 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4420 {
4421 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4422 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
4423 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4424 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4425 break;
4426 }
4427 }
4428
4429 /* Update EIP if no error occurred. */
4430 if (RT_SUCCESS(rc))
4431 pCtx->rip += cbInstr;
4432
4433 if (rc == VINF_SUCCESS)
4434 {
4435 /* Only resume if successful. */
4436 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4437 goto ResumeExecution;
4438 }
4439 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4440 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4441 break;
4442 }
4443
4444 case VMX_EXIT_MOV_DRX: /* 29 Debug-register accesses. */
4445 {
4446 if ( !DBGFIsStepping(pVCpu)
4447 && !CPUMIsHyperDebugStateActive(pVCpu))
4448 {
4449 /* Disable DRx move intercepts. */
4450 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4451 rc2 = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
4452 AssertRC(rc2);
4453
4454 /* Save the host and load the guest debug state. */
4455 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4456 AssertRC(rc2);
4457
4458#ifdef LOG_ENABLED
4459 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4460 {
4461 Log(("VMX_EXIT_MOV_DRX: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4462 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4463 }
4464 else
4465 Log(("VMX_EXIT_MOV_DRX: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4466#endif
4467
4468#ifdef VBOX_WITH_STATISTICS
4469 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
4470 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4471 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4472 else
4473 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4474#endif
4475
4476 goto ResumeExecution;
4477 }
4478
4479 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4480 * time and restore DRx registers afterwards */
4481 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4482 {
4483 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4484 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4485 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4486 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4487 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4488 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4489 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
4490 Log2(("DR7=%08x\n", pCtx->dr[7]));
4491 }
4492 else
4493 {
4494 Log2(("VMX: mov x, DRx\n"));
4495 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4496 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4497 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4498 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4499 }
4500 /* Update EIP if no error occurred. */
4501 if (RT_SUCCESS(rc))
4502 pCtx->rip += cbInstr;
4503
4504 if (rc == VINF_SUCCESS)
4505 {
4506 /* Only resume if successful. */
4507 goto ResumeExecution;
4508 }
4509 Assert(rc == VERR_EM_INTERPRETER);
4510 break;
4511 }
4512
4513 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4514 case VMX_EXIT_IO_INSTR: /* 30 I/O instruction. */
4515 {
4516 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub1, y1);
4517 uint32_t uPort;
4518 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4519 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4520
4521 /** @todo necessary to make the distinction? */
4522 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4523 uPort = pCtx->edx & 0xffff;
4524 else
4525 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4526
4527 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4528 {
4529 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4530 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4531 break;
4532 }
4533
4534 uint32_t cbSize = g_aIOSize[uIOWidth];
4535 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4536 {
4537 /* ins/outs */
4538 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
4539
4540 /* Disassemble manually to deal with segment prefixes. */
4541 /** @todo VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR contains the flat pointer
4542 * operand of the instruction. */
4543 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4544 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
4545 if (RT_SUCCESS(rc))
4546 {
4547 if (fIOWrite)
4548 {
4549 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4550 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
4551 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4552 }
4553 else
4554 {
4555 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4556 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
4557 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4558 }
4559 }
4560 else
4561 rc = VINF_EM_RAW_EMULATE_INSTR;
4562 }
4563 else
4564 {
4565 /* Normal in/out */
4566 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4567
4568 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4569
4570 if (fIOWrite)
4571 {
4572 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
4573 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4574 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4575 HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4576 }
4577 else
4578 {
4579 uint32_t u32Val = 0;
4580
4581 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
4582 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4583 if (IOM_SUCCESS(rc))
4584 {
4585 /* Write back to the EAX register. */
4586 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4587 }
4588 else
4589 if (rc == VINF_IOM_R3_IOPORT_READ)
4590 HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4591 }
4592 }
4593
4594 /*
4595 * Handled the I/O return codes.
4596 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4597 */
4598 if (IOM_SUCCESS(rc))
4599 {
4600 /* Update EIP and continue execution. */
4601 pCtx->rip += cbInstr;
4602 if (RT_LIKELY(rc == VINF_SUCCESS))
4603 {
4604 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4605 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4606 {
4607 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
4608 for (unsigned i = 0; i < 4; i++)
4609 {
4610 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4611
4612 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4613 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4614 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4615 {
4616 uint64_t uDR6;
4617
4618 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4619
4620 uDR6 = ASMGetDR6();
4621
4622 /* Clear all breakpoint status flags and set the one we just hit. */
4623 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4624 uDR6 |= (uint64_t)RT_BIT(i);
4625
4626 /*
4627 * Note: AMD64 Architecture Programmer's Manual 13.1:
4628 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4629 * be cleared by software after the contents have been read.
4630 */
4631 ASMSetDR6(uDR6);
4632
4633 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4634 pCtx->dr[7] &= ~X86_DR7_GD;
4635
4636 /* Paranoia. */
4637 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4638 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4639 pCtx->dr[7] |= 0x400; /* must be one */
4640
4641 /* Resync DR7 */
4642 rc2 = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
4643 AssertRC(rc2);
4644
4645 /* Construct inject info. */
4646 intInfo = X86_XCPT_DB;
4647 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4648 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4649
4650 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4651 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4652 0 /* cbInstr */, 0 /* errCode */);
4653 AssertRC(rc2);
4654
4655 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4656 goto ResumeExecution;
4657 }
4658 }
4659 }
4660 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4661 goto ResumeExecution;
4662 }
4663 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4664 break;
4665 }
4666
4667#ifdef VBOX_STRICT
4668 if (rc == VINF_IOM_R3_IOPORT_READ)
4669 Assert(!fIOWrite);
4670 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4671 Assert(fIOWrite);
4672 else
4673 {
4674 AssertMsg( RT_FAILURE(rc)
4675 || rc == VINF_EM_RAW_EMULATE_INSTR
4676 || rc == VINF_EM_RAW_GUEST_TRAP
4677 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4678 }
4679#endif
4680 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4681 break;
4682 }
4683
4684 case VMX_EXIT_TPR_BELOW_THRESHOLD: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4685 LogFlow(("VMX_EXIT_TPR_BELOW_THRESHOLD\n"));
4686 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4687 goto ResumeExecution;
4688
4689 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4690 on the APIC-access page. */
4691 {
4692 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4693 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4694
4695 switch (uAccessType)
4696 {
4697 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4698 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4699 {
4700 RTGCPHYS GCPhys = pCtx->msrApicBase;
4701 GCPhys &= PAGE_BASE_GC_MASK;
4702 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4703
4704 LogFlow(("Apic access at %RGp\n", GCPhys));
4705 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4706 CPUMCTX2CORE(pCtx), GCPhys);
4707 if (rc == VINF_SUCCESS)
4708 goto ResumeExecution; /* rip already updated */
4709 break;
4710 }
4711
4712 default:
4713 rc = VINF_EM_RAW_EMULATE_INSTR;
4714 break;
4715 }
4716 break;
4717 }
4718
4719 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4720 if (!TMTimerPollBool(pVM, pVCpu))
4721 goto ResumeExecution;
4722 rc = VINF_EM_RAW_TIMER_PENDING;
4723 break;
4724
4725 default:
4726 /* The rest is handled after syncing the entire CPU state. */
4727 break;
4728 }
4729
4730
4731 /*
4732 * Note: The guest state is not entirely synced back at this stage!
4733 */
4734
4735 /* Investigate why there was a VM-exit. (part 2) */
4736 switch (exitReason)
4737 {
4738 case VMX_EXIT_XCPT_NMI: /* 0 Exception or non-maskable interrupt (NMI). */
4739 case VMX_EXIT_EXT_INT: /* 1 External interrupt. */
4740 case VMX_EXIT_EPT_VIOLATION:
4741 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4742 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4743 /* Already handled above. */
4744 break;
4745
4746 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4747 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4748 break;
4749
4750 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4751 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4752 rc = VINF_EM_RAW_INTERRUPT;
4753 AssertFailed(); /* Can't happen. Yet. */
4754 break;
4755
4756 case VMX_EXIT_IO_SMI: /* 5 I/O system-management interrupt (SMI). */
4757 case VMX_EXIT_SMI: /* 6 Other SMI. */
4758 rc = VINF_EM_RAW_INTERRUPT;
4759 AssertFailed(); /* Can't happen afaik. */
4760 break;
4761
4762 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4763 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4764 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4765 && pVCpu->hm.s.Event.fPending)
4766 {
4767 /* Caused by an injected interrupt. */
4768 pVCpu->hm.s.Event.fPending = false;
4769
4770 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo)));
4771 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo));
4772 //@todo: Why do we assume this had to be a hardware interrupt? What about software interrupts or exceptions?
4773 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo), TRPM_HARDWARE_INT);
4774 AssertRC(rc2);
4775 }
4776 /* else Exceptions and software interrupts can just be restarted. */
4777 rc = VERR_EM_INTERPRETER;
4778 break;
4779
4780 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4781 /* Check if external interrupts are pending; if so, don't switch back. */
4782 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
4783 pCtx->rip++; /* skip hlt */
4784 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4785 goto ResumeExecution;
4786
4787 rc = VINF_EM_HALT;
4788 break;
4789
4790 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4791 Log2(("VMX: mwait\n"));
4792 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
4793 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4794 if ( rc == VINF_EM_HALT
4795 || rc == VINF_SUCCESS)
4796 {
4797 /* Update EIP and continue execution. */
4798 pCtx->rip += cbInstr;
4799
4800 /* Check if external interrupts are pending; if so, don't switch back. */
4801 if ( rc == VINF_SUCCESS
4802 || ( rc == VINF_EM_HALT
4803 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4804 )
4805 goto ResumeExecution;
4806 }
4807 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4808 break;
4809
4810 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4811 AssertFailed(); /* can't happen. */
4812 rc = VERR_EM_INTERPRETER;
4813 break;
4814
4815 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4816 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4817 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4818 rc2 = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
4819 AssertRC(rc2);
4820 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
4821#if 0
4822 DBGFDoneStepping(pVCpu);
4823#endif
4824 rc = VINF_EM_DBG_STOP;
4825 break;
4826
4827 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4828 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4829 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4830 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4831 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4832 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4833 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4834 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4835 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4836 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4837 /** @todo inject #UD immediately */
4838 rc = VERR_EM_INTERPRETER;
4839 break;
4840
4841 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4842 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4843 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4844 case VMX_EXIT_MOV_CRX: /* 28 Control-register accesses. */
4845 case VMX_EXIT_MOV_DRX: /* 29 Debug-register accesses. */
4846 case VMX_EXIT_IO_INSTR: /* 30 I/O instruction. */
4847 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4848 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4849 /* already handled above */
4850 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4851 || rc == VINF_EM_RAW_INTERRUPT
4852 || rc == VERR_EM_INTERPRETER
4853 || rc == VINF_EM_RAW_EMULATE_INSTR
4854 || rc == VINF_PGM_SYNC_CR3
4855 || rc == VINF_IOM_R3_IOPORT_READ
4856 || rc == VINF_IOM_R3_IOPORT_WRITE
4857 || rc == VINF_EM_RAW_GUEST_TRAP
4858 || rc == VINF_TRPM_XCPT_DISPATCHED
4859 || rc == VINF_EM_RESCHEDULE_REM,
4860 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4861 break;
4862
4863 case VMX_EXIT_TPR_BELOW_THRESHOLD: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4864 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4865 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4866 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4867 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4868 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4869 on the APIC-access page. */
4870 {
4871 /*
4872 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4873 */
4874 rc = VERR_EM_INTERPRETER;
4875 break;
4876 }
4877
4878 case VMX_EXIT_INT_WINDOW: /* 7 Interrupt window. */
4879 Assert(rc == VINF_EM_RAW_INTERRUPT);
4880 break;
4881
4882 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4883 {
4884#ifdef VBOX_STRICT
4885 RTCCUINTREG val2 = 0;
4886
4887 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4888
4889 VMXReadVmcs(VMX_VMCS_GUEST_RIP, &val2);
4890 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4891
4892 VMXReadVmcs(VMX_VMCS_GUEST_CR0, &val2);
4893 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4894
4895 VMXReadVmcs(VMX_VMCS_GUEST_CR3, &val2);
4896 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4897
4898 VMXReadVmcs(VMX_VMCS_GUEST_CR4, &val2);
4899 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4900
4901 VMXReadVmcs(VMX_VMCS_GUEST_RFLAGS, &val2);
4902 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4903
4904 VMX_LOG_SELREG(CS, "CS", val2);
4905 VMX_LOG_SELREG(DS, "DS", val2);
4906 VMX_LOG_SELREG(ES, "ES", val2);
4907 VMX_LOG_SELREG(FS, "FS", val2);
4908 VMX_LOG_SELREG(GS, "GS", val2);
4909 VMX_LOG_SELREG(SS, "SS", val2);
4910 VMX_LOG_SELREG(TR, "TR", val2);
4911 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4912
4913 VMXReadVmcs(VMX_VMCS_GUEST_GDTR_BASE, &val2);
4914 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4915 VMXReadVmcs(VMX_VMCS_GUEST_IDTR_BASE, &val2);
4916 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4917#endif /* VBOX_STRICT */
4918 rc = VERR_VMX_INVALID_GUEST_STATE;
4919 break;
4920 }
4921
4922 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4923 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4924 default:
4925 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4926 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4927 break;
4928
4929 }
4930
4931end:
4932 /* We now going back to ring-3, so clear the action flag. */
4933 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4934
4935 /*
4936 * Signal changes for the recompiler.
4937 */
4938 CPUMSetChangedFlags(pVCpu,
4939 CPUM_CHANGED_SYSENTER_MSR
4940 | CPUM_CHANGED_LDTR
4941 | CPUM_CHANGED_GDTR
4942 | CPUM_CHANGED_IDTR
4943 | CPUM_CHANGED_TR
4944 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4945
4946 /*
4947 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4948 */
4949 if ( exitReason == VMX_EXIT_EXT_INT
4950 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4951 {
4952 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
4953 /* On the next entry we'll only sync the host context. */
4954 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT;
4955 }
4956 else
4957 {
4958 /* On the next entry we'll sync everything. */
4959 /** @todo we can do better than this */
4960 /* Not in the VINF_PGM_CHANGE_MODE though! */
4961 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
4962 }
4963
4964 /* Translate into a less severe return code */
4965 if (rc == VERR_EM_INTERPRETER)
4966 rc = VINF_EM_RAW_EMULATE_INSTR;
4967 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4968 {
4969 /* Try to extract more information about what might have gone wrong here. */
4970 VMXGetActivateVMCS(&pVCpu->hm.s.vmx.lasterror.u64VMCSPhys);
4971 pVCpu->hm.s.vmx.lasterror.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs;
4972 pVCpu->hm.s.vmx.lasterror.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
4973 pVCpu->hm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4974 }
4975
4976 /* Just set the correct state here instead of trying to catch every goto above. */
4977 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4978
4979#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4980 /* Restore interrupts if we exited after disabling them. */
4981 if (uOldEFlags != ~(RTCCUINTREG)0)
4982 ASMSetFlags(uOldEFlags);
4983#endif
4984
4985 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
4986 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
4987 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
4988 Log2(("X"));
4989 return VBOXSTRICTRC_TODO(rc);
4990}
4991
4992
4993/**
4994 * Enters the VT-x session.
4995 *
4996 * @returns VBox status code.
4997 * @param pVM Pointer to the VM.
4998 * @param pVCpu Pointer to the VMCPU.
4999 * @param pCpu Pointer to the CPU info struct.
5000 */
5001VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
5002{
5003 Assert(pVM->hm.s.vmx.fSupported);
5004 NOREF(pCpu);
5005
5006 unsigned cr4 = ASMGetCR4();
5007 if (!(cr4 & X86_CR4_VMXE))
5008 {
5009 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
5010 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5011 }
5012
5013 /* Activate the VMCS. */
5014 int rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5015 if (RT_FAILURE(rc))
5016 return rc;
5017
5018 pVCpu->hm.s.fResumeVM = false;
5019 return VINF_SUCCESS;
5020}
5021
5022
5023/**
5024 * Leaves the VT-x session.
5025 *
5026 * @returns VBox status code.
5027 * @param pVM Pointer to the VM.
5028 * @param pVCpu Pointer to the VMCPU.
5029 * @param pCtx Pointer to the guests CPU context.
5030 */
5031VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
5032{
5033 Assert(pVM->hm.s.vmx.fSupported);
5034
5035#ifdef DEBUG
5036 if (CPUMIsHyperDebugStateActive(pVCpu))
5037 {
5038 CPUMR0LoadHostDebugState(pVM, pVCpu);
5039 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
5040 }
5041 else
5042#endif
5043
5044 /*
5045 * Save the guest debug state if necessary.
5046 */
5047 if (CPUMIsGuestDebugStateActive(pVCpu))
5048 {
5049 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
5050
5051 /* Enable DRx move intercepts again. */
5052 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
5053 int rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
5054 AssertRC(rc);
5055
5056 /* Resync the debug registers the next time. */
5057 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
5058 }
5059 else
5060 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
5061
5062 /*
5063 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
5064 * VMCS data back to memory.
5065 */
5066 int rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5067 AssertRC(rc);
5068
5069 return VINF_SUCCESS;
5070}
5071
5072
5073/**
5074 * Flush the TLB using EPT.
5075 *
5076 * @returns VBox status code.
5077 * @param pVM Pointer to the VM.
5078 * @param pVCpu Pointer to the VMCPU.
5079 * @param enmFlush Type of flush.
5080 */
5081static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
5082{
5083 uint64_t descriptor[2];
5084
5085 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
5086 Assert(pVM->hm.s.fNestedPaging);
5087 descriptor[0] = pVCpu->hm.s.vmx.GCPhysEPTP;
5088 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
5089 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
5090 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hm.s.vmx.GCPhysEPTP, rc));
5091#ifdef VBOX_WITH_STATISTICS
5092 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
5093#endif
5094}
5095
5096
5097/**
5098 * Flush the TLB using VPID.
5099 *
5100 * @returns VBox status code.
5101 * @param pVM Pointer to the VM.
5102 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
5103 * enmFlush).
5104 * @param enmFlush Type of flush.
5105 * @param GCPtr Virtual address of the page to flush (can be 0 depending
5106 * on @a enmFlush).
5107 */
5108static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
5109{
5110 uint64_t descriptor[2];
5111
5112 Assert(pVM->hm.s.vmx.fVpid);
5113 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
5114 {
5115 descriptor[0] = 0;
5116 descriptor[1] = 0;
5117 }
5118 else
5119 {
5120 AssertPtr(pVCpu);
5121 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5122 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5123 descriptor[0] = pVCpu->hm.s.uCurrentAsid;
5124 descriptor[1] = GCPtr;
5125 }
5126 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
5127 AssertMsg(rc == VINF_SUCCESS,
5128 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
5129#ifdef VBOX_WITH_STATISTICS
5130 if (pVCpu)
5131 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
5132#endif
5133}
5134
5135
5136/**
5137 * Invalidates a guest page by guest virtual address. Only relevant for
5138 * EPT/VPID, otherwise there is nothing really to invalidate.
5139 *
5140 * @returns VBox status code.
5141 * @param pVM Pointer to the VM.
5142 * @param pVCpu Pointer to the VMCPU.
5143 * @param GCVirt Guest virtual address of the page to invalidate.
5144 */
5145VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
5146{
5147 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
5148
5149 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
5150
5151 if (!fFlushPending)
5152 {
5153 /*
5154 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
5155 * See @bugref{6043} and @bugref{6177}
5156 *
5157 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
5158 * function maybe called in a loop with individual addresses.
5159 */
5160 if (pVM->hm.s.vmx.fVpid)
5161 {
5162 /* If we can flush just this page do it, otherwise flush as little as possible. */
5163 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
5164 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
5165 else
5166 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5167 }
5168 else if (pVM->hm.s.fNestedPaging)
5169 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5170 }
5171
5172 return VINF_SUCCESS;
5173}
5174
5175
5176/**
5177 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
5178 * otherwise there is nothing really to invalidate.
5179 *
5180 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
5181 *
5182 * @returns VBox status code.
5183 * @param pVM Pointer to the VM.
5184 * @param pVCpu Pointer to the VMCPU.
5185 * @param GCPhys Guest physical address of the page to invalidate.
5186 */
5187VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
5188{
5189 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
5190
5191 /*
5192 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5193 * while invept only flushes by EPT not individual addresses. We update the force flag here
5194 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5195 */
5196 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5197 return VINF_SUCCESS;
5198}
5199
5200
5201/**
5202 * Report world switch error and dump some useful debug info.
5203 *
5204 * @param pVM Pointer to the VM.
5205 * @param pVCpu Pointer to the VMCPU.
5206 * @param rc Return code.
5207 * @param pCtx Pointer to the current guest CPU context (not updated).
5208 */
5209static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5210{
5211 NOREF(pVM);
5212
5213 switch (VBOXSTRICTRC_VAL(rc))
5214 {
5215 case VERR_VMX_INVALID_VMXON_PTR:
5216 AssertFailed();
5217 break;
5218
5219 case VERR_VMX_UNABLE_TO_START_VM:
5220 case VERR_VMX_UNABLE_TO_RESUME_VM:
5221 {
5222 int rc2;
5223 RTCCUINTREG exitReason, instrError;
5224
5225 rc2 = VMXReadVmcs(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5226 rc2 |= VMXReadVmcs(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5227 AssertRC(rc2);
5228 if (rc2 == VINF_SUCCESS)
5229 {
5230 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5231 (uint32_t)instrError));
5232 Log(("Current stack %08x\n", &rc2));
5233
5234 pVCpu->hm.s.vmx.lasterror.u32InstrError = instrError;
5235 pVCpu->hm.s.vmx.lasterror.u32ExitReason = exitReason;
5236
5237#ifdef VBOX_STRICT
5238 RTGDTR gdtr;
5239 PCX86DESCHC pDesc;
5240 RTCCUINTREG val;
5241
5242 ASMGetGDTR(&gdtr);
5243
5244 VMXReadVmcs(VMX_VMCS_GUEST_RIP, &val);
5245 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5246 VMXReadVmcs(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val);
5247 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5248 VMXReadVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val);
5249 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5250 VMXReadVmcs(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val);
5251 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5252 VMXReadVmcs(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val);
5253 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5254
5255 VMXReadVmcs(VMX_VMCS_HOST_CR0, &val);
5256 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5257 VMXReadVmcs(VMX_VMCS_HOST_CR3, &val);
5258 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5259 VMXReadVmcs(VMX_VMCS_HOST_CR4, &val);
5260 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5261
5262 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_CS, &val);
5263 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5264 VMXReadVmcs(VMX_VMCS_GUEST_RFLAGS, &val);
5265 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5266
5267 if (val < gdtr.cbGdt)
5268 {
5269 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5270 HMR0DumpDescriptor(pDesc, val, "CS: ");
5271 }
5272
5273 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_DS, &val);
5274 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5275 if (val < gdtr.cbGdt)
5276 {
5277 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5278 HMR0DumpDescriptor(pDesc, val, "DS: ");
5279 }
5280
5281 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_ES, &val);
5282 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5283 if (val < gdtr.cbGdt)
5284 {
5285 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5286 HMR0DumpDescriptor(pDesc, val, "ES: ");
5287 }
5288
5289 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_FS, &val);
5290 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5291 if (val < gdtr.cbGdt)
5292 {
5293 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5294 HMR0DumpDescriptor(pDesc, val, "FS: ");
5295 }
5296
5297 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_GS, &val);
5298 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5299 if (val < gdtr.cbGdt)
5300 {
5301 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5302 HMR0DumpDescriptor(pDesc, val, "GS: ");
5303 }
5304
5305 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_SS, &val);
5306 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5307 if (val < gdtr.cbGdt)
5308 {
5309 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5310 HMR0DumpDescriptor(pDesc, val, "SS: ");
5311 }
5312
5313 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_TR, &val);
5314 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5315 if (val < gdtr.cbGdt)
5316 {
5317 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5318 HMR0DumpDescriptor(pDesc, val, "TR: ");
5319 }
5320
5321 VMXReadVmcs(VMX_VMCS_HOST_TR_BASE, &val);
5322 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5323 VMXReadVmcs(VMX_VMCS_HOST_GDTR_BASE, &val);
5324 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5325 VMXReadVmcs(VMX_VMCS_HOST_IDTR_BASE, &val);
5326 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5327 VMXReadVmcs(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5328 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5329 VMXReadVmcs(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5330 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5331 VMXReadVmcs(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5332 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5333 VMXReadVmcs(VMX_VMCS_HOST_RSP, &val);
5334 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5335 VMXReadVmcs(VMX_VMCS_HOST_RIP, &val);
5336 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5337# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5338 if (VMX_IS_64BIT_HOST_MODE())
5339 {
5340 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5341 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5342 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5343 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5344 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5345 Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5346 }
5347# endif
5348#endif /* VBOX_STRICT */
5349 }
5350 break;
5351 }
5352
5353 default:
5354 /* impossible */
5355 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5356 break;
5357 }
5358}
5359
5360
5361#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5362/**
5363 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5364 *
5365 * @returns VBox status code.
5366 * @param fResume Whether to vmlauch/vmresume.
5367 * @param pCtx Pointer to the guest CPU context.
5368 * @param pCache Pointer to the VMCS cache.
5369 * @param pVM Pointer to the VM.
5370 * @param pVCpu Pointer to the VMCPU.
5371 */
5372DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5373{
5374 uint32_t aParam[6];
5375 PHMGLOBLCPUINFO pCpu;
5376 RTHCPHYS HCPhysCpuPage;
5377 int rc;
5378
5379 pCpu = HMR0GetCurrentCpu();
5380 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5381
5382#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5383 pCache->uPos = 1;
5384 pCache->interPD = PGMGetInterPaeCR3(pVM);
5385 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
5386#endif
5387
5388#ifdef DEBUG
5389 pCache->TestIn.HCPhysCpuPage= 0;
5390 pCache->TestIn.HCPhysVmcs = 0;
5391 pCache->TestIn.pCache = 0;
5392 pCache->TestOut.HCPhysVmcs = 0;
5393 pCache->TestOut.pCache = 0;
5394 pCache->TestOut.pCtx = 0;
5395 pCache->TestOut.eflags = 0;
5396#endif
5397
5398 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5399 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5400 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */
5401 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */
5402 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
5403 aParam[5] = 0;
5404
5405#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5406 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
5407 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
5408#endif
5409 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5410
5411#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5412 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
5413 Assert(pCtx->dr[4] == 10);
5414 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
5415#endif
5416
5417#ifdef DEBUG
5418 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5419 AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
5420 pVCpu->hm.s.vmx.HCPhysVmcs));
5421 AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
5422 pCache->TestOut.HCPhysVmcs));
5423 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5424 pCache->TestOut.pCache));
5425 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
5426 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
5427 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5428 pCache->TestOut.pCtx));
5429 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5430#endif
5431 return rc;
5432}
5433
5434
5435# ifdef VBOX_STRICT
5436static bool hmR0VmxIsValidReadField(uint32_t idxField)
5437{
5438 switch (idxField)
5439 {
5440 case VMX_VMCS_GUEST_RIP:
5441 case VMX_VMCS_GUEST_RSP:
5442 case VMX_VMCS_GUEST_RFLAGS:
5443 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5444 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5445 case VMX_VMCS_GUEST_CR0:
5446 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5447 case VMX_VMCS_GUEST_CR4:
5448 case VMX_VMCS_GUEST_DR7:
5449 case VMX_VMCS32_GUEST_SYSENTER_CS:
5450 case VMX_VMCS_GUEST_SYSENTER_EIP:
5451 case VMX_VMCS_GUEST_SYSENTER_ESP:
5452 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5453 case VMX_VMCS_GUEST_GDTR_BASE:
5454 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5455 case VMX_VMCS_GUEST_IDTR_BASE:
5456 case VMX_VMCS16_GUEST_FIELD_CS:
5457 case VMX_VMCS32_GUEST_CS_LIMIT:
5458 case VMX_VMCS_GUEST_CS_BASE:
5459 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5460 case VMX_VMCS16_GUEST_FIELD_DS:
5461 case VMX_VMCS32_GUEST_DS_LIMIT:
5462 case VMX_VMCS_GUEST_DS_BASE:
5463 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5464 case VMX_VMCS16_GUEST_FIELD_ES:
5465 case VMX_VMCS32_GUEST_ES_LIMIT:
5466 case VMX_VMCS_GUEST_ES_BASE:
5467 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5468 case VMX_VMCS16_GUEST_FIELD_FS:
5469 case VMX_VMCS32_GUEST_FS_LIMIT:
5470 case VMX_VMCS_GUEST_FS_BASE:
5471 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5472 case VMX_VMCS16_GUEST_FIELD_GS:
5473 case VMX_VMCS32_GUEST_GS_LIMIT:
5474 case VMX_VMCS_GUEST_GS_BASE:
5475 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5476 case VMX_VMCS16_GUEST_FIELD_SS:
5477 case VMX_VMCS32_GUEST_SS_LIMIT:
5478 case VMX_VMCS_GUEST_SS_BASE:
5479 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5480 case VMX_VMCS16_GUEST_FIELD_LDTR:
5481 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5482 case VMX_VMCS_GUEST_LDTR_BASE:
5483 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5484 case VMX_VMCS16_GUEST_FIELD_TR:
5485 case VMX_VMCS32_GUEST_TR_LIMIT:
5486 case VMX_VMCS_GUEST_TR_BASE:
5487 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5488 case VMX_VMCS32_RO_EXIT_REASON:
5489 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5490 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5491 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE:
5492 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5493 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5494 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5495 case VMX_VMCS32_RO_IDT_INFO:
5496 case VMX_VMCS32_RO_IDT_ERROR_CODE:
5497 case VMX_VMCS_GUEST_CR3:
5498 case VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL:
5499 return true;
5500 }
5501 return false;
5502}
5503
5504
5505static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5506{
5507 switch (idxField)
5508 {
5509 case VMX_VMCS_GUEST_LDTR_BASE:
5510 case VMX_VMCS_GUEST_TR_BASE:
5511 case VMX_VMCS_GUEST_GDTR_BASE:
5512 case VMX_VMCS_GUEST_IDTR_BASE:
5513 case VMX_VMCS_GUEST_SYSENTER_EIP:
5514 case VMX_VMCS_GUEST_SYSENTER_ESP:
5515 case VMX_VMCS_GUEST_CR0:
5516 case VMX_VMCS_GUEST_CR4:
5517 case VMX_VMCS_GUEST_CR3:
5518 case VMX_VMCS_GUEST_DR7:
5519 case VMX_VMCS_GUEST_RIP:
5520 case VMX_VMCS_GUEST_RSP:
5521 case VMX_VMCS_GUEST_CS_BASE:
5522 case VMX_VMCS_GUEST_DS_BASE:
5523 case VMX_VMCS_GUEST_ES_BASE:
5524 case VMX_VMCS_GUEST_FS_BASE:
5525 case VMX_VMCS_GUEST_GS_BASE:
5526 case VMX_VMCS_GUEST_SS_BASE:
5527 return true;
5528 }
5529 return false;
5530}
5531# endif /* VBOX_STRICT */
5532
5533
5534/**
5535 * Executes the specified handler in 64-bit mode.
5536 *
5537 * @returns VBox status code.
5538 * @param pVM Pointer to the VM.
5539 * @param pVCpu Pointer to the VMCPU.
5540 * @param pCtx Pointer to the guest CPU context.
5541 * @param pfnHandler Pointer to the RC handler function.
5542 * @param cbParam Number of parameters.
5543 * @param paParam Array of 32-bit parameters.
5544 */
5545VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5546 uint32_t *paParam)
5547{
5548 int rc, rc2;
5549 PHMGLOBLCPUINFO pCpu;
5550 RTHCPHYS HCPhysCpuPage;
5551 RTHCUINTREG uOldEFlags;
5552
5553 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5554 Assert(pfnHandler);
5555 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
5556 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
5557
5558#ifdef VBOX_STRICT
5559 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5560 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
5561
5562 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5563 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
5564#endif
5565
5566 /* Disable interrupts. */
5567 uOldEFlags = ASMIntDisableFlags();
5568
5569#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5570 RTCPUID idHostCpu = RTMpCpuId();
5571 CPUMR0SetLApic(pVM, idHostCpu);
5572#endif
5573
5574 pCpu = HMR0GetCurrentCpu();
5575 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5576
5577 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5578 VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5579
5580 /* Leave VMX Root Mode. */
5581 VMXDisable();
5582
5583 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5584
5585 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5586 CPUMSetHyperEIP(pVCpu, pfnHandler);
5587 for (int i=(int)cbParam-1;i>=0;i--)
5588 CPUMPushHyper(pVCpu, paParam[i]);
5589
5590 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
5591
5592 /* Call switcher. */
5593 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5594 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
5595
5596 /* Make sure the VMX instructions don't cause #UD faults. */
5597 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5598
5599 /* Enter VMX Root Mode */
5600 rc2 = VMXEnable(HCPhysCpuPage);
5601 if (RT_FAILURE(rc2))
5602 {
5603 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5604 ASMSetFlags(uOldEFlags);
5605 return VERR_VMX_VMXON_FAILED;
5606 }
5607
5608 rc2 = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5609 AssertRC(rc2);
5610 Assert(!(ASMGetFlags() & X86_EFL_IF));
5611 ASMSetFlags(uOldEFlags);
5612 return rc;
5613}
5614#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5615
5616
5617#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5618/**
5619 * Executes VMWRITE.
5620 *
5621 * @returns VBox status code
5622 * @param pVCpu Pointer to the VMCPU.
5623 * @param idxField VMCS field index.
5624 * @param u64Val 16, 32 or 64 bits value.
5625 */
5626VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5627{
5628 int rc;
5629 switch (idxField)
5630 {
5631 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
5632 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
5633 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
5634 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
5635 case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
5636 case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
5637 case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
5638 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
5639 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
5640 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
5641 case VMX_VMCS64_GUEST_PDPTE0_FULL:
5642 case VMX_VMCS64_GUEST_PDPTE1_FULL:
5643 case VMX_VMCS64_GUEST_PDPTE2_FULL:
5644 case VMX_VMCS64_GUEST_PDPTE3_FULL:
5645 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
5646 case VMX_VMCS64_GUEST_EFER_FULL:
5647 case VMX_VMCS64_CTRL_EPTP_FULL:
5648 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5649 rc = VMXWriteVmcs32(idxField, u64Val);
5650 rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5651 AssertRC(rc);
5652 return rc;
5653
5654 case VMX_VMCS_GUEST_LDTR_BASE:
5655 case VMX_VMCS_GUEST_TR_BASE:
5656 case VMX_VMCS_GUEST_GDTR_BASE:
5657 case VMX_VMCS_GUEST_IDTR_BASE:
5658 case VMX_VMCS_GUEST_SYSENTER_EIP:
5659 case VMX_VMCS_GUEST_SYSENTER_ESP:
5660 case VMX_VMCS_GUEST_CR0:
5661 case VMX_VMCS_GUEST_CR4:
5662 case VMX_VMCS_GUEST_CR3:
5663 case VMX_VMCS_GUEST_DR7:
5664 case VMX_VMCS_GUEST_RIP:
5665 case VMX_VMCS_GUEST_RSP:
5666 case VMX_VMCS_GUEST_CS_BASE:
5667 case VMX_VMCS_GUEST_DS_BASE:
5668 case VMX_VMCS_GUEST_ES_BASE:
5669 case VMX_VMCS_GUEST_FS_BASE:
5670 case VMX_VMCS_GUEST_GS_BASE:
5671 case VMX_VMCS_GUEST_SS_BASE:
5672 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5673 if (u64Val >> 32ULL)
5674 rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val);
5675 else
5676 rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val);
5677
5678 return rc;
5679
5680 default:
5681 AssertMsgFailed(("Unexpected field %x\n", idxField));
5682 return VERR_INVALID_PARAMETER;
5683 }
5684}
5685
5686
5687/**
5688 * Cache VMCS writes for running 64 bits guests on 32 bits hosts.
5689 *
5690 * @param pVCpu Pointer to the VMCPU.
5691 * @param idxField VMCS field index.
5692 * @param u64Val 16, 32 or 64 bits value.
5693 */
5694VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5695{
5696 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
5697
5698 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5699 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5700
5701 /* Make sure there are no duplicates. */
5702 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5703 {
5704 if (pCache->Write.aField[i] == idxField)
5705 {
5706 pCache->Write.aFieldVal[i] = u64Val;
5707 return VINF_SUCCESS;
5708 }
5709 }
5710
5711 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5712 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5713 pCache->Write.cValidEntries++;
5714 return VINF_SUCCESS;
5715}
5716
5717#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_HYBRID_32BIT_KERNEL */
5718
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette