VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 43737

Last change on this file since 43737 was 43737, checked in by vboxsync, 12 years ago

VMM/VMMR0/HWVMXR0: fix typo while saving the host IDTR into the VMCS.
It worked previously as we restore it manually) but still having the right value in the VMCS is good
in case of VM entry exceptions.
Renamed a function to camel case.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 221.1 KB
Line 
1/* $Id: HWVMXR0.cpp 43737 2012-10-25 11:00:40Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hm.s.vmx.lasterror.ulInstrError = instrError;
106 }
107 pVM->hm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 * @param fEnabledByHost Set if SUPR0EnableVTx or similar was used to enable
120 * VT-x/AMD-V on the host.
121 */
122VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
123{
124 if (!fEnabledByHost)
125 {
126 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
127 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
128
129 if (pVM)
130 {
131 /* Set revision dword at the beginning of the VMXON structure. */
132 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
133 }
134
135 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
136 * (which can have very bad consequences!!!)
137 */
138
139 /** @todo r=bird: Why is this code different than the probing code earlier
140 * on? It just sets VMXE if needed and doesn't check that it isn't
141 * set. Mac OS X host_vmxoff may leave this set and we'll fail here
142 * and debug-assert in the calling code. This is what caused the
143 * "regression" after backing out the SUPR0EnableVTx code hours before
144 * 4.2.0GA (reboot fixed the issue). I've changed here to do the same
145 * as the init code. */
146 uint64_t uCr4 = ASMGetCR4();
147 if (!(uCr4 & X86_CR4_VMXE))
148 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
149
150 /*
151 * Enter VM root mode.
152 */
153 int rc = VMXEnable(HCPhysCpuPage);
154 if (RT_FAILURE(rc))
155 {
156 ASMSetCR4(uCr4);
157 return VERR_VMX_VMXON_FAILED;
158 }
159 }
160
161 /*
162 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
163 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
164 * each time while reusing a VPID after hitting the MaxASID limit once.
165 */
166 if ( pVM
167 && pVM->hm.s.vmx.fVpid
168 && (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
169 {
170 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
171 pCpu->fFlushAsidBeforeUse = false;
172 }
173 else
174 pCpu->fFlushAsidBeforeUse = true;
175
176 /*
177 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
178 */
179 ++pCpu->cTlbFlushes;
180
181 return VINF_SUCCESS;
182}
183
184
185/**
186 * Deactivates VT-x on the current CPU.
187 *
188 * @returns VBox status code.
189 * @param pCpu Pointer to the CPU info struct.
190 * @param pvCpuPage Pointer to the global CPU page.
191 * @param HCPhysCpuPage Physical address of the global CPU page.
192 */
193VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
194{
195 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
196 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
197 NOREF(pCpu);
198
199 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
200 if (!(ASMGetCR4() & X86_CR4_VMXE))
201 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
202
203 /* Leave VMX Root Mode. */
204 VMXDisable();
205
206 /* And clear the X86_CR4_VMXE bit. */
207 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
208 return VINF_SUCCESS;
209}
210
211
212/**
213 * Does Ring-0 per VM VT-x initialization.
214 *
215 * @returns VBox status code.
216 * @param pVM Pointer to the VM.
217 */
218VMMR0DECL(int) VMXR0InitVM(PVM pVM)
219{
220 int rc;
221
222#ifdef LOG_ENABLED
223 SUPR0Printf("VMXR0InitVM %p\n", pVM);
224#endif
225
226 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
227
228 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
229 {
230 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
231 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjApicAccess, PAGE_SIZE, false /* fExecutable */);
232 AssertRC(rc);
233 if (RT_FAILURE(rc))
234 return rc;
235
236 pVM->hm.s.vmx.pbApicAccess = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjApicAccess);
237 pVM->hm.s.vmx.HCPhysApicAccess = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjApicAccess, 0);
238 ASMMemZero32(pVM->hm.s.vmx.pbApicAccess, PAGE_SIZE);
239 }
240 else
241 {
242 pVM->hm.s.vmx.hMemObjApicAccess = 0;
243 pVM->hm.s.vmx.pbApicAccess = 0;
244 pVM->hm.s.vmx.HCPhysApicAccess = 0;
245 }
246
247#ifdef VBOX_WITH_CRASHDUMP_MAGIC
248 {
249 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjScratch, PAGE_SIZE, false /* fExecutable */);
250 AssertRC(rc);
251 if (RT_FAILURE(rc))
252 return rc;
253
254 pVM->hm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjScratch);
255 pVM->hm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjScratch, 0);
256
257 ASMMemZero32(pVM->hm.s.vmx.pbScratch, PAGE_SIZE);
258 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
259 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
260 }
261#endif
262
263 /* Allocate VMCSs for all guest CPUs. */
264 for (VMCPUID i = 0; i < pVM->cCpus; i++)
265 {
266 PVMCPU pVCpu = &pVM->aCpus[i];
267
268 pVCpu->hm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
269
270 /* Allocate one page for the VM control structure (VMCS). */
271 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVMCS, PAGE_SIZE, false /* fExecutable */);
272 AssertRC(rc);
273 if (RT_FAILURE(rc))
274 return rc;
275
276 pVCpu->hm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVMCS);
277 pVCpu->hm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVMCS, 0);
278 ASMMemZeroPage(pVCpu->hm.s.vmx.pvVMCS);
279
280 pVCpu->hm.s.vmx.cr0_mask = 0;
281 pVCpu->hm.s.vmx.cr4_mask = 0;
282
283 /* Allocate one page for the virtual APIC page for TPR caching. */
284 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVAPIC, PAGE_SIZE, false /* fExecutable */);
285 AssertRC(rc);
286 if (RT_FAILURE(rc))
287 return rc;
288
289 pVCpu->hm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVAPIC);
290 pVCpu->hm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVAPIC, 0);
291 ASMMemZeroPage(pVCpu->hm.s.vmx.pbVAPIC);
292
293 /* Allocate the MSR bitmap if this feature is supported. */
294 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
295 {
296 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, PAGE_SIZE, false /* fExecutable */);
297 AssertRC(rc);
298 if (RT_FAILURE(rc))
299 return rc;
300
301 pVCpu->hm.s.vmx.pvMsrBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjMsrBitmap);
302 pVCpu->hm.s.vmx.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjMsrBitmap, 0);
303 memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE);
304 }
305
306#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
307 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
308 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjGuestMsr, PAGE_SIZE, false /* fExecutable */);
309 AssertRC(rc);
310 if (RT_FAILURE(rc))
311 return rc;
312
313 pVCpu->hm.s.vmx.pvGuestMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjGuestMsr);
314 pVCpu->hm.s.vmx.HCPhysGuestMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjGuestMsr, 0);
315 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf));
316 memset(pVCpu->hm.s.vmx.pvGuestMsr, 0, PAGE_SIZE);
317
318 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
319 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjHostMsr, PAGE_SIZE, false /* fExecutable */);
320 AssertRC(rc);
321 if (RT_FAILURE(rc))
322 return rc;
323
324 pVCpu->hm.s.vmx.pvHostMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjHostMsr);
325 pVCpu->hm.s.vmx.HCPhysHostMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjHostMsr, 0);
326 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf));
327 memset(pVCpu->hm.s.vmx.pvHostMsr, 0, PAGE_SIZE);
328#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
329
330 /* Current guest paging mode. */
331 pVCpu->hm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
332
333#ifdef LOG_ENABLED
334 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hm.s.vmx.pvVMCS, (uint32_t)pVCpu->hm.s.vmx.HCPhysVMCS);
335#endif
336 }
337
338 return VINF_SUCCESS;
339}
340
341
342/**
343 * Does Ring-0 per VM VT-x termination.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the VM.
347 */
348VMMR0DECL(int) VMXR0TermVM(PVM pVM)
349{
350 for (VMCPUID i = 0; i < pVM->cCpus; i++)
351 {
352 PVMCPU pVCpu = &pVM->aCpus[i];
353
354 if (pVCpu->hm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
355 {
356 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVMCS, false);
357 pVCpu->hm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
358 pVCpu->hm.s.vmx.pvVMCS = 0;
359 pVCpu->hm.s.vmx.HCPhysVMCS = 0;
360 }
361 if (pVCpu->hm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
362 {
363 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVAPIC, false);
364 pVCpu->hm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
365 pVCpu->hm.s.vmx.pbVAPIC = 0;
366 pVCpu->hm.s.vmx.HCPhysVAPIC = 0;
367 }
368 if (pVCpu->hm.s.vmx.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
369 {
370 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjMsrBitmap, false);
371 pVCpu->hm.s.vmx.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
372 pVCpu->hm.s.vmx.pvMsrBitmap = 0;
373 pVCpu->hm.s.vmx.HCPhysMsrBitmap = 0;
374 }
375#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
376 if (pVCpu->hm.s.vmx.hMemObjHostMsr != NIL_RTR0MEMOBJ)
377 {
378 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjHostMsr, false);
379 pVCpu->hm.s.vmx.hMemObjHostMsr = NIL_RTR0MEMOBJ;
380 pVCpu->hm.s.vmx.pvHostMsr = 0;
381 pVCpu->hm.s.vmx.HCPhysHostMsr = 0;
382 }
383 if (pVCpu->hm.s.vmx.hMemObjGuestMsr != NIL_RTR0MEMOBJ)
384 {
385 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjGuestMsr, false);
386 pVCpu->hm.s.vmx.hMemObjGuestMsr = NIL_RTR0MEMOBJ;
387 pVCpu->hm.s.vmx.pvGuestMsr = 0;
388 pVCpu->hm.s.vmx.HCPhysGuestMsr = 0;
389 }
390#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
391 }
392 if (pVM->hm.s.vmx.hMemObjApicAccess != NIL_RTR0MEMOBJ)
393 {
394 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjApicAccess, false);
395 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
396 pVM->hm.s.vmx.pbApicAccess = 0;
397 pVM->hm.s.vmx.HCPhysApicAccess = 0;
398 }
399#ifdef VBOX_WITH_CRASHDUMP_MAGIC
400 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
401 {
402 ASMMemZero32(pVM->hm.s.vmx.pScratch, PAGE_SIZE);
403 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjScratch, false);
404 pVM->hm.s.vmx.hMemObjScratch = NIL_RTR0MEMOBJ;
405 pVM->hm.s.vmx.pScratch = 0;
406 pVM->hm.s.vmx.pScratchPhys = 0;
407 }
408#endif
409 return VINF_SUCCESS;
410}
411
412
413/**
414 * Sets up VT-x for the specified VM.
415 *
416 * @returns VBox status code.
417 * @param pVM Pointer to the VM.
418 */
419VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
420{
421 int rc = VINF_SUCCESS;
422 uint32_t val;
423
424 AssertReturn(pVM, VERR_INVALID_PARAMETER);
425
426 /* Initialize these always, see hmR3InitFinalizeR0().*/
427 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE;
428 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE;
429
430 /* Determine optimal flush type for EPT. */
431 if (pVM->hm.s.fNestedPaging)
432 {
433 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT)
434 {
435 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
436 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT;
437 else if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
438 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS;
439 else
440 {
441 /*
442 * Should never really happen. EPT is supported but no suitable flush types supported.
443 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
444 */
445 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
446 return VERR_VMX_GENERIC;
447 }
448 }
449 else
450 {
451 /*
452 * Should never really happen. EPT is supported but INVEPT instruction is not supported.
453 */
454 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
455 return VERR_VMX_GENERIC;
456 }
457 }
458
459 /* Determine optimal flush type for VPID. */
460 if (pVM->hm.s.vmx.fVpid)
461 {
462 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID)
463 {
464 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
465 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT;
466 else if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
467 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS;
468 else
469 {
470 /*
471 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
472 * We do not handle other flush type combinations, ignore VPID capabilities.
473 */
474 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
475 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
476 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
477 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
478 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
479 pVM->hm.s.vmx.fVpid = false;
480 }
481 }
482 else
483 {
484 /*
485 * Should not really happen. EPT is supported but INVEPT is not supported.
486 * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing.
487 */
488 Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n"));
489 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
490 pVM->hm.s.vmx.fVpid = false;
491 }
492 }
493
494 for (VMCPUID i = 0; i < pVM->cCpus; i++)
495 {
496 PVMCPU pVCpu = &pVM->aCpus[i];
497
498 AssertPtr(pVCpu->hm.s.vmx.pvVMCS);
499
500 /* Set revision dword at the beginning of the VMCS structure. */
501 *(uint32_t *)pVCpu->hm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
502
503 /*
504 * Clear and activate the VMCS.
505 */
506 Log(("HCPhysVMCS = %RHp\n", pVCpu->hm.s.vmx.HCPhysVMCS));
507 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
508 if (RT_FAILURE(rc))
509 goto vmx_end;
510
511 rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
512 if (RT_FAILURE(rc))
513 goto vmx_end;
514
515 /*
516 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
517 * Set required bits to one and zero according to the MSR capabilities.
518 */
519 val = pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
520 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
521 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
522
523 /*
524 * Enable the VMX preemption timer.
525 */
526 if (pVM->hm.s.vmx.fUsePreemptTimer)
527 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
528 val &= pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
529
530 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, val);
531 AssertRC(rc);
532
533 /*
534 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
535 * Set required bits to one and zero according to the MSR capabilities.
536 */
537 val = pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
538 /* Program which event cause VM-exits and which features we want to use. */
539 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
540 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
541 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
542 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
543 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
544 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
545 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
546 the guest (host thinks the cpu load is high) */
547
548 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
549 if (!pVM->hm.s.fNestedPaging)
550 {
551 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
552 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
553 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
554 }
555
556 /*
557 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
558 * failure with an invalid control fields error. (combined with some other exit reasons)
559 */
560 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
561 {
562 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
563 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
564 Assert(pVM->hm.s.vmx.pbApicAccess);
565 }
566 else
567 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
568 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
569
570 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
571 {
572 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
573 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
574 }
575
576 /* We will use the secondary control if it's present. */
577 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
578
579 /* Mask away the bits that the CPU doesn't support */
580 /** @todo make sure they don't conflict with the above requirements. */
581 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
582 pVCpu->hm.s.vmx.proc_ctls = val;
583
584 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, val);
585 AssertRC(rc);
586
587 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
588 {
589 /*
590 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
591 * Set required bits to one and zero according to the MSR capabilities.
592 */
593 val = pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
594 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
595
596 if (pVM->hm.s.fNestedPaging)
597 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
598
599 if (pVM->hm.s.vmx.fVpid)
600 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
601
602 if (pVM->hm.s.fHasIoApic)
603 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
604
605 if (pVM->hm.s.vmx.fUnrestrictedGuest)
606 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
607
608 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
609 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
610
611 /* Mask away the bits that the CPU doesn't support */
612 /** @todo make sure they don't conflict with the above requirements. */
613 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
614 pVCpu->hm.s.vmx.proc_ctls2 = val;
615 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS2, val);
616 AssertRC(rc);
617 }
618
619 /*
620 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
621 * Set required bits to one and zero according to the MSR capabilities.
622 */
623 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0);
624 AssertRC(rc);
625
626 /*
627 * Forward all exception except #NM & #PF to the guest.
628 * We always need to check pagefaults since our shadow page table can be out of sync.
629 * And we always lazily sync the FPU & XMM state. .
630 */
631
632 /** @todo Possible optimization:
633 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
634 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
635 * registers ourselves of course.
636 *
637 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
638 */
639
640 /*
641 * Don't filter page faults, all of them should cause a world switch.
642 */
643 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0);
644 AssertRC(rc);
645 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0);
646 AssertRC(rc);
647
648 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0);
649 AssertRC(rc);
650 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0);
651 AssertRC(rc);
652 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0);
653 AssertRC(rc);
654
655 /*
656 * Set the MSR bitmap address.
657 */
658 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
659 {
660 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
661
662 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
663 AssertRC(rc);
664
665 /*
666 * Allow the guest to directly modify these MSRs; they are loaded/stored automatically
667 * using MSR-load/store areas in the VMCS.
668 */
669 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
670 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
671 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
672 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
673 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
674 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
675 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
676 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
677 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
678 if (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
679 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true);
680 }
681
682#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
683 /*
684 * Set the guest & host MSR load/store physical addresses.
685 */
686 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
687 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
688 AssertRC(rc);
689 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
690 AssertRC(rc);
691 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
692 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
693 AssertRC(rc);
694#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
695
696 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
697 AssertRC(rc);
698 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0);
699 AssertRC(rc);
700 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0);
701 AssertRC(rc);
702
703 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
704 {
705 Assert(pVM->hm.s.vmx.hMemObjApicAccess);
706 /* Optional */
707 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
708 rc |= VMXWriteVMCS64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVAPIC);
709
710 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
711 rc |= VMXWriteVMCS64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
712
713 AssertRC(rc);
714 }
715
716 /* Set link pointer to -1. Not currently used. */
717 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
718 AssertRC(rc);
719
720 /*
721 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
722 * VMCS data back to memory.
723 */
724 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
725 AssertRC(rc);
726
727 /*
728 * Configure the VMCS read cache.
729 */
730 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
731
732 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RIP);
733 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RSP);
734 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
735 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
736 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
737 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_CR0);
738 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
739 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_CR4);
740 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_DR7);
741 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
742 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
743 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
744 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
745 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_GDTR_BASE);
746 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
747 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_IDTR_BASE);
748
749 VMX_SETUP_SELREG(ES, pCache);
750 VMX_SETUP_SELREG(SS, pCache);
751 VMX_SETUP_SELREG(CS, pCache);
752 VMX_SETUP_SELREG(DS, pCache);
753 VMX_SETUP_SELREG(FS, pCache);
754 VMX_SETUP_SELREG(GS, pCache);
755 VMX_SETUP_SELREG(LDTR, pCache);
756 VMX_SETUP_SELREG(TR, pCache);
757
758 /*
759 * Status code VMCS reads.
760 */
761 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
762 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
763 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
764 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
765 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
766 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
767 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
768 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
769 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
770
771 if (pVM->hm.s.fNestedPaging)
772 {
773 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_CR3);
774 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL);
775 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
776 }
777 else
778 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
779 } /* for each VMCPU */
780
781 /*
782 * Setup the right TLB function based on CPU capabilities.
783 */
784 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
785 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBBoth;
786 else if (pVM->hm.s.fNestedPaging)
787 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBEPT;
788 else if (pVM->hm.s.vmx.fVpid)
789 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBVPID;
790 else
791 pVM->hm.s.vmx.pfnSetupTaggedTlb = hmR0VmxSetupTLBDummy;
792
793vmx_end:
794 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
795 return rc;
796}
797
798
799/**
800 * Sets the permission bits for the specified MSR.
801 *
802 * @param pVCpu Pointer to the VMCPU.
803 * @param ulMSR The MSR value.
804 * @param fRead Whether reading is allowed.
805 * @param fWrite Whether writing is allowed.
806 */
807static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
808{
809 unsigned ulBit;
810 uint8_t *pvMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
811
812 /*
813 * Layout:
814 * 0x000 - 0x3ff - Low MSR read bits
815 * 0x400 - 0x7ff - High MSR read bits
816 * 0x800 - 0xbff - Low MSR write bits
817 * 0xc00 - 0xfff - High MSR write bits
818 */
819 if (ulMSR <= 0x00001FFF)
820 {
821 /* Pentium-compatible MSRs */
822 ulBit = ulMSR;
823 }
824 else if ( ulMSR >= 0xC0000000
825 && ulMSR <= 0xC0001FFF)
826 {
827 /* AMD Sixth Generation x86 Processor MSRs */
828 ulBit = (ulMSR - 0xC0000000);
829 pvMsrBitmap += 0x400;
830 }
831 else
832 {
833 AssertFailed();
834 return;
835 }
836
837 Assert(ulBit <= 0x1fff);
838 if (fRead)
839 ASMBitClear(pvMsrBitmap, ulBit);
840 else
841 ASMBitSet(pvMsrBitmap, ulBit);
842
843 if (fWrite)
844 ASMBitClear(pvMsrBitmap + 0x800, ulBit);
845 else
846 ASMBitSet(pvMsrBitmap + 0x800, ulBit);
847}
848
849
850/**
851 * Injects an event (trap or external interrupt).
852 *
853 * @returns VBox status code. Note that it may return VINF_EM_RESET to
854 * indicate a triple fault when injecting X86_XCPT_DF.
855 *
856 * @param pVM Pointer to the VM.
857 * @param pVCpu Pointer to the VMCPU.
858 * @param pCtx Pointer to the guest CPU Context.
859 * @param intInfo VMX interrupt info.
860 * @param cbInstr Opcode length of faulting instruction.
861 * @param errCode Error code (optional).
862 */
863static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
864{
865 int rc;
866 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
867
868#ifdef VBOX_WITH_STATISTICS
869 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
870#endif
871
872#ifdef VBOX_STRICT
873 if (iGate == 0xE)
874 {
875 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
876 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
877 }
878 else if (iGate < 0x20)
879 {
880 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
881 errCode));
882 }
883 else
884 {
885 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
886 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
887 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
888 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
889 || pCtx->eflags.u32 & X86_EFL_IF);
890 }
891#endif
892
893 if ( CPUMIsGuestInRealModeEx(pCtx)
894 && pVM->hm.s.vmx.pRealModeTSS)
895 {
896 RTGCPHYS GCPhysHandler;
897 uint16_t offset, ip;
898 RTSEL sel;
899
900 /*
901 * Injecting events doesn't work right with real mode emulation.
902 * (#GP if we try to inject external hardware interrupts)
903 * Inject the interrupt or trap directly instead.
904 *
905 * ASSUMES no access handlers for the bits we read or write below (should be safe).
906 */
907 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
908
909 /*
910 * Check if the interrupt handler is present.
911 */
912 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
913 {
914 Log(("IDT cbIdt violation\n"));
915 if (iGate != X86_XCPT_DF)
916 {
917 uint32_t intInfo2;
918
919 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
920 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
921 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
922 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
923
924 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
925 }
926 Log(("Triple fault -> reset the VM!\n"));
927 return VINF_EM_RESET;
928 }
929 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
930 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
931 || iGate == 4)
932 {
933 ip = pCtx->ip + cbInstr;
934 }
935 else
936 ip = pCtx->ip;
937
938 /*
939 * Read the selector:offset pair of the interrupt handler.
940 */
941 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
942 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
943 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
944
945 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
946
947 /*
948 * Construct the stack frame.
949 */
950 /** @todo Check stack limit. */
951 pCtx->sp -= 2;
952 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
953 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
954 pCtx->sp -= 2;
955 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
956 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
957 pCtx->sp -= 2;
958 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
959 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
960
961 /*
962 * Update the CPU state for executing the handler.
963 */
964 pCtx->rip = offset;
965 pCtx->cs.Sel = sel;
966 pCtx->cs.u64Base = sel << 4;
967 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
968
969 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SEGMENT_REGS;
970 return VINF_SUCCESS;
971 }
972
973 /*
974 * Set event injection state.
975 */
976 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
977 rc |= VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
978 rc |= VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
979
980 AssertRC(rc);
981 return rc;
982}
983
984
985/**
986 * Checks for pending guest interrupts and injects them.
987 *
988 * @returns VBox status code.
989 * @param pVM Pointer to the VM.
990 * @param pVCpu Pointer to the VMCPU.
991 * @param pCtx Pointer to the guest CPU context.
992 */
993static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
994{
995 int rc;
996
997 /*
998 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
999 */
1000 if (pVCpu->hm.s.Event.fPending)
1001 {
1002 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.intInfo,
1003 pVCpu->hm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
1004 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntReinject);
1005 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hm.s.Event.intInfo, 0, pVCpu->hm.s.Event.errCode);
1006 AssertRC(rc);
1007
1008 pVCpu->hm.s.Event.fPending = false;
1009 return VINF_SUCCESS;
1010 }
1011
1012 /*
1013 * If an active trap is already pending, we must forward it first!
1014 */
1015 if (!TRPMHasTrap(pVCpu))
1016 {
1017 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
1018 {
1019 RTGCUINTPTR intInfo;
1020
1021 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
1022
1023 intInfo = X86_XCPT_NMI;
1024 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1025 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1026
1027 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
1028 AssertRC(rc);
1029
1030 return VINF_SUCCESS;
1031 }
1032
1033 /** @todo SMI interrupts. */
1034
1035 /*
1036 * When external interrupts are pending, we should exit the VM when IF is set.
1037 */
1038 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
1039 {
1040 if (!(pCtx->eflags.u32 & X86_EFL_IF))
1041 {
1042 if (!(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
1043 {
1044 LogFlow(("Enable irq window exit!\n"));
1045 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
1046 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
1047 AssertRC(rc);
1048 }
1049 /* else nothing to do but wait */
1050 }
1051 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1052 {
1053 uint8_t u8Interrupt;
1054
1055 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1056 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1057 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1058 if (RT_SUCCESS(rc))
1059 {
1060 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1061 AssertRC(rc);
1062 }
1063 else
1064 {
1065 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1066 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1067 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
1068 /* Just continue */
1069 }
1070 }
1071 else
1072 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1073 }
1074 }
1075
1076#ifdef VBOX_STRICT
1077 if (TRPMHasTrap(pVCpu))
1078 {
1079 uint8_t u8Vector;
1080 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1081 AssertRC(rc);
1082 }
1083#endif
1084
1085 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1086 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1087 && TRPMHasTrap(pVCpu)
1088 )
1089 {
1090 uint8_t u8Vector;
1091 TRPMEVENT enmType;
1092 RTGCUINTPTR intInfo;
1093 RTGCUINT errCode;
1094
1095 /*
1096 * If a new event is pending, dispatch it now.
1097 */
1098 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1099 AssertRC(rc);
1100 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1101 Assert(enmType != TRPM_SOFTWARE_INT);
1102
1103 /*
1104 * Clear the pending trap.
1105 */
1106 rc = TRPMResetTrap(pVCpu);
1107 AssertRC(rc);
1108
1109 intInfo = u8Vector;
1110 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1111
1112 if (enmType == TRPM_TRAP)
1113 {
1114 switch (u8Vector)
1115 {
1116 case X86_XCPT_DF:
1117 case X86_XCPT_TS:
1118 case X86_XCPT_NP:
1119 case X86_XCPT_SS:
1120 case X86_XCPT_GP:
1121 case X86_XCPT_PF:
1122 case X86_XCPT_AC:
1123 {
1124 /* Valid error codes. */
1125 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1126 break;
1127 }
1128
1129 default:
1130 break;
1131 }
1132
1133 if ( u8Vector == X86_XCPT_BP
1134 || u8Vector == X86_XCPT_OF)
1135 {
1136 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1137 }
1138 else
1139 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1140 }
1141 else
1142 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1143
1144 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntInject);
1145 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1146 AssertRC(rc);
1147 } /* if (interrupts can be dispatched) */
1148
1149 return VINF_SUCCESS;
1150}
1151
1152/**
1153 * Checks for pending VMX events and converts them to TRPM. Before we execute any instruction
1154 * outside of VMX, any pending VMX event must be converted so that it can be delivered properly.
1155 *
1156 * @returns VBox status code.
1157 * @param pVCpu Pointer to the VMCPU.
1158 */
1159static int hmR0VmxCheckPendingEvent(PVMCPU pVCpu)
1160{
1161 if (pVCpu->hm.s.Event.fPending)
1162 {
1163 TRPMEVENT enmTrapType;
1164
1165 /* If a trap was already pending, we did something wrong! */
1166 Assert((TRPMQueryTrap(pVCpu, NULL, NULL) == VERR_TRPM_NO_ACTIVE_TRAP));
1167
1168 /*
1169 * Clear the pending event and move it over to TRPM for the rest
1170 * of the world to see.
1171 */
1172 pVCpu->hm.s.Event.fPending = false;
1173 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo))
1174 {
1175 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT:
1176 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI:
1177 enmTrapType = TRPM_HARDWARE_INT;
1178 break;
1179 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW:
1180 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT:
1181 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT:
1182 enmTrapType = TRPM_SOFTWARE_INT;
1183 break;
1184 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT:
1185 enmTrapType = TRPM_TRAP;
1186 break;
1187 default:
1188 enmTrapType = TRPM_32BIT_HACK; /* Can't get here. */
1189 AssertFailed();
1190 }
1191 TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.intInfo), enmTrapType);
1192 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.intInfo))
1193 TRPMSetErrorCode(pVCpu, pVCpu->hm.s.Event.errCode);
1194 //@todo: Is there any situation where we need to call TRPMSetFaultAddress()?
1195 }
1196 return VINF_SUCCESS;
1197}
1198
1199/**
1200 * Save the host state into the VMCS.
1201 *
1202 * @returns VBox status code.
1203 * @param pVM Pointer to the VM.
1204 * @param pVCpu Pointer to the VMCPU.
1205 */
1206VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1207{
1208 int rc = VINF_SUCCESS;
1209 NOREF(pVM);
1210
1211 /*
1212 * Host CPU Context.
1213 */
1214 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
1215 {
1216 RTIDTR idtr;
1217 RTGDTR gdtr;
1218 RTSEL SelTR;
1219 PCX86DESCHC pDesc;
1220 uintptr_t trBase;
1221 RTSEL cs;
1222 RTSEL ss;
1223 uint64_t cr3;
1224
1225 /*
1226 * Control registers.
1227 */
1228 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1229 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1230#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1231 if (VMX_IS_64BIT_HOST_MODE())
1232 {
1233 cr3 = hmR0Get64bitCR3();
1234 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1235 }
1236 else
1237#endif
1238 {
1239 cr3 = ASMGetCR3();
1240 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1241 }
1242 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1243 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1244 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1245 AssertRC(rc);
1246
1247 /*
1248 * Selector registers.
1249 */
1250#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1251 if (VMX_IS_64BIT_HOST_MODE())
1252 {
1253 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1254 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1255 }
1256 else
1257 {
1258 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1259 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1260 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1261 }
1262#else
1263 cs = ASMGetCS();
1264 ss = ASMGetSS();
1265#endif
1266 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1267 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1268 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1269 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1270 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1271 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1272#if HC_ARCH_BITS == 32
1273 if (!VMX_IS_64BIT_HOST_MODE())
1274 {
1275 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1276 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1277 }
1278#endif
1279 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1280 SelTR = ASMGetTR();
1281 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1282 AssertRC(rc);
1283 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1284 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1285 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1286 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1287 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1288 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1289 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1290
1291 /*
1292 * GDTR & IDTR.
1293 */
1294#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1295 if (VMX_IS_64BIT_HOST_MODE())
1296 {
1297 X86XDTR64 gdtr64, idtr64;
1298 hmR0Get64bitGdtrAndIdtr(&gdtr64, &idtr64);
1299 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1300 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, idtr64.uAddr);
1301 AssertRC(rc);
1302 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1303 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1304 gdtr.cbGdt = gdtr64.cb;
1305 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1306 }
1307 else
1308#endif
1309 {
1310 ASMGetGDTR(&gdtr);
1311 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1312 ASMGetIDTR(&idtr);
1313 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1314 AssertRC(rc);
1315 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1316 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1317 }
1318
1319 /*
1320 * Save the base address of the TR selector.
1321 */
1322 if (SelTR > gdtr.cbGdt)
1323 {
1324 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1325 return VERR_VMX_INVALID_HOST_STATE;
1326 }
1327
1328 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1329#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1330 if (VMX_IS_64BIT_HOST_MODE())
1331 {
1332 uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc);
1333 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1334 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1335 AssertRC(rc);
1336 }
1337 else
1338#endif
1339 {
1340#if HC_ARCH_BITS == 64
1341 trBase = X86DESC64_BASE(pDesc);
1342#else
1343 trBase = X86DESC_BASE(pDesc);
1344#endif
1345 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1346 AssertRC(rc);
1347 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1348 }
1349
1350 /*
1351 * FS base and GS base.
1352 */
1353#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1354 if (VMX_IS_64BIT_HOST_MODE())
1355 {
1356 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1357 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1358 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1359 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1360 }
1361#endif
1362 AssertRC(rc);
1363
1364 /*
1365 * Sysenter MSRs.
1366 */
1367 /** @todo expensive!! */
1368 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1369 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1370#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1371 if (VMX_IS_64BIT_HOST_MODE())
1372 {
1373 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1374 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1375 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1376 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1377 }
1378 else
1379 {
1380 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1381 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1382 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1383 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1384 }
1385#elif HC_ARCH_BITS == 32
1386 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1387 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1388 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1389 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1390#else
1391 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1392 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1393 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1394 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1395#endif
1396 AssertRC(rc);
1397
1398
1399#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1400 /*
1401 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1402 * the world switch back to the host.
1403 */
1404 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvHostMsr;
1405 unsigned idxMsr = 0;
1406
1407 uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001);
1408 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1409 {
1410 pMsr->u32IndexMSR = MSR_K6_EFER;
1411 pMsr->u32Reserved = 0;
1412# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1413 if (CPUMIsGuestInLongMode(pVCpu))
1414 {
1415 /* Must match the EFER value in our 64 bits switcher. */
1416 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1417 }
1418 else
1419# endif
1420 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1421 pMsr++; idxMsr++;
1422 }
1423
1424# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1425 if (VMX_IS_64BIT_HOST_MODE())
1426 {
1427 pMsr->u32IndexMSR = MSR_K6_STAR;
1428 pMsr->u32Reserved = 0;
1429 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1430 pMsr++; idxMsr++;
1431 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1432 pMsr->u32Reserved = 0;
1433 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1434 pMsr++; idxMsr++;
1435 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1436 pMsr->u32Reserved = 0;
1437 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1438 pMsr++; idxMsr++;
1439
1440 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
1441#if 0
1442 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1443 pMsr->u32Reserved = 0;
1444 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1445 pMsr++; idxMsr++;
1446#endif
1447 }
1448# endif
1449
1450 if (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1451 {
1452 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
1453 pMsr->u32Reserved = 0;
1454 pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
1455 pMsr++; idxMsr++;
1456 }
1457
1458 /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr
1459 * range. */
1460 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1461 AssertRC(rc);
1462#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1463
1464 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_HOST_CONTEXT;
1465 }
1466 return rc;
1467}
1468
1469
1470/**
1471 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1472 * guest operates in PAE mode.
1473 *
1474 * @returns VBox status code.
1475 * @param pVCpu Pointer to the VMCPU.
1476 * @param pCtx Pointer to the guest CPU context.
1477 */
1478static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1479{
1480 if (CPUMIsGuestInPAEModeEx(pCtx))
1481 {
1482 X86PDPE aPdpes[4];
1483 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1484 AssertRCReturn(rc, rc);
1485
1486 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1487 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1488 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1489 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_PDPTE3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1490 }
1491 return VINF_SUCCESS;
1492}
1493
1494
1495/**
1496 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1497 * guest operates in PAE mode.
1498 *
1499 * @returns VBox status code.
1500 * @param pVCpu Pointer to the VM CPU.
1501 * @param pCtx Pointer to the guest CPU context.
1502 *
1503 * @remarks Tell PGM about CR3 changes before calling this helper.
1504 */
1505static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1506{
1507 if (CPUMIsGuestInPAEModeEx(pCtx))
1508 {
1509 int rc;
1510 X86PDPE aPdpes[4];
1511 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1512 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1513 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1514 rc = VMXReadVMCS64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1515
1516 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1517 AssertRCReturn(rc, rc);
1518 }
1519 return VINF_SUCCESS;
1520}
1521
1522
1523/**
1524 * Update the exception bitmap according to the current CPU state.
1525 *
1526 * @param pVM Pointer to the VM.
1527 * @param pVCpu Pointer to the VMCPU.
1528 * @param pCtx Pointer to the guest CPU context.
1529 */
1530static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1531{
1532 uint32_t u32TrapMask;
1533 Assert(pCtx);
1534
1535 /*
1536 * Set up a mask for intercepting traps.
1537 */
1538 /** @todo Do we really need to always intercept #DB? */
1539 u32TrapMask = RT_BIT(X86_XCPT_DB)
1540 | RT_BIT(X86_XCPT_NM)
1541#ifdef VBOX_ALWAYS_TRAP_PF
1542 | RT_BIT(X86_XCPT_PF)
1543#endif
1544#ifdef VBOX_STRICT
1545 | RT_BIT(X86_XCPT_BP)
1546 | RT_BIT(X86_XCPT_DB)
1547 | RT_BIT(X86_XCPT_DE)
1548 | RT_BIT(X86_XCPT_NM)
1549 | RT_BIT(X86_XCPT_UD)
1550 | RT_BIT(X86_XCPT_NP)
1551 | RT_BIT(X86_XCPT_SS)
1552 | RT_BIT(X86_XCPT_GP)
1553 | RT_BIT(X86_XCPT_MF)
1554#endif
1555 ;
1556
1557 /*
1558 * Without nested paging, #PF must be intercepted to implement shadow paging.
1559 */
1560 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1561 if (!pVM->hm.s.fNestedPaging)
1562 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1563
1564 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1565 if (!(pCtx->cr0 & X86_CR0_NE))
1566 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1567
1568#ifdef VBOX_STRICT
1569 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1570#endif
1571
1572 /*
1573 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1574 */
1575 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1576 if ( CPUMIsGuestInRealModeEx(pCtx)
1577 && pVM->hm.s.vmx.pRealModeTSS)
1578 {
1579 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1580 | RT_BIT(X86_XCPT_DB)
1581 | RT_BIT(X86_XCPT_NMI)
1582 | RT_BIT(X86_XCPT_BP)
1583 | RT_BIT(X86_XCPT_OF)
1584 | RT_BIT(X86_XCPT_BR)
1585 | RT_BIT(X86_XCPT_UD)
1586 | RT_BIT(X86_XCPT_DF)
1587 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1588 | RT_BIT(X86_XCPT_TS)
1589 | RT_BIT(X86_XCPT_NP)
1590 | RT_BIT(X86_XCPT_SS)
1591 | RT_BIT(X86_XCPT_GP)
1592 | RT_BIT(X86_XCPT_MF)
1593 | RT_BIT(X86_XCPT_AC)
1594 | RT_BIT(X86_XCPT_MC)
1595 | RT_BIT(X86_XCPT_XF)
1596 ;
1597 }
1598
1599 int rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1600 AssertRC(rc);
1601}
1602
1603
1604/**
1605 * Loads a minimal guest state.
1606 *
1607 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1608 *
1609 * @param pVM Pointer to the VM.
1610 * @param pVCpu Pointer to the VMCPU.
1611 * @param pCtx Pointer to the guest CPU context.
1612 */
1613VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1614{
1615 int rc;
1616 X86EFLAGS eflags;
1617
1618 Assert(!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_ALL_GUEST));
1619
1620 /*
1621 * Load EIP, ESP and EFLAGS.
1622 */
1623 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_RIP, pCtx->rip);
1624 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_RSP, pCtx->rsp);
1625 AssertRC(rc);
1626
1627 /*
1628 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1629 */
1630 eflags = pCtx->eflags;
1631 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1632 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1633
1634 /*
1635 * Check if real mode emulation using v86 mode.
1636 */
1637 if ( CPUMIsGuestInRealModeEx(pCtx)
1638 && pVM->hm.s.vmx.pRealModeTSS)
1639 {
1640 pVCpu->hm.s.vmx.RealMode.eflags = eflags;
1641
1642 eflags.Bits.u1VM = 1;
1643 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1644 }
1645 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1646 AssertRC(rc);
1647}
1648
1649
1650/**
1651 * Loads the guest state.
1652 *
1653 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1654 *
1655 * @returns VBox status code.
1656 * @param pVM Pointer to the VM.
1657 * @param pVCpu Pointer to the VMCPU.
1658 * @param pCtx Pointer to the guest CPU context.
1659 */
1660VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1661{
1662 int rc = VINF_SUCCESS;
1663 RTGCUINTPTR val;
1664
1665 /*
1666 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1667 * Set required bits to one and zero according to the MSR capabilities.
1668 */
1669 val = pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0;
1670
1671 /*
1672 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1673 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1674 */
1675 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1676
1677 if (CPUMIsGuestInLongModeEx(pCtx))
1678 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1679 /* else Must be zero when AMD64 is not available. */
1680
1681 /*
1682 * Mask away the bits that the CPU doesn't support.
1683 */
1684 val &= pVM->hm.s.vmx.msr.vmx_entry.n.allowed1;
1685 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_CONTROLS, val);
1686 AssertRC(rc);
1687
1688 /*
1689 * VMX_VMCS_CTRL_EXIT_CONTROLS
1690 * Set required bits to one and zero according to the MSR capabilities.
1691 */
1692 val = pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0;
1693
1694 /*
1695 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1696 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1697 */
1698 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1699
1700#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1701 if (VMX_IS_64BIT_HOST_MODE())
1702 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1703 /* else Must be zero when AMD64 is not available. */
1704#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1705 if (CPUMIsGuestInLongModeEx(pCtx))
1706 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1707 else
1708 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1709#endif
1710 val &= pVM->hm.s.vmx.msr.vmx_exit.n.allowed1;
1711
1712 /*
1713 * Don't acknowledge external interrupts on VM-exit.
1714 */
1715 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_CONTROLS, val);
1716 AssertRC(rc);
1717
1718 /*
1719 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1720 */
1721 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
1722 {
1723 if (pVM->hm.s.vmx.pRealModeTSS)
1724 {
1725 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1726 if (pVCpu->hm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1727 {
1728 /*
1729 * Correct weird requirements for switching to protected mode.
1730 */
1731 if ( pVCpu->hm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1732 && enmGuestMode >= PGMMODE_PROTECTED)
1733 {
1734#ifdef VBOX_WITH_REM
1735 /*
1736 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1737 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1738 */
1739 REMFlushTBs(pVM);
1740#endif
1741
1742 /*
1743 * DPL of all hidden selector registers must match the current CPL (0).
1744 */
1745 pCtx->cs.Attr.n.u2Dpl = 0;
1746 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1747
1748 pCtx->ds.Attr.n.u2Dpl = 0;
1749 pCtx->es.Attr.n.u2Dpl = 0;
1750 pCtx->fs.Attr.n.u2Dpl = 0;
1751 pCtx->gs.Attr.n.u2Dpl = 0;
1752 pCtx->ss.Attr.n.u2Dpl = 0;
1753 }
1754 pVCpu->hm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1755 }
1756 }
1757
1758 VMX_WRITE_SELREG(ES, es);
1759 AssertRC(rc);
1760
1761 VMX_WRITE_SELREG(CS, cs);
1762 AssertRC(rc);
1763
1764 VMX_WRITE_SELREG(SS, ss);
1765 AssertRC(rc);
1766
1767 VMX_WRITE_SELREG(DS, ds);
1768 AssertRC(rc);
1769
1770 VMX_WRITE_SELREG(FS, fs);
1771 AssertRC(rc);
1772
1773 VMX_WRITE_SELREG(GS, gs);
1774 AssertRC(rc);
1775 }
1776
1777 /*
1778 * Guest CPU context: LDTR.
1779 */
1780 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
1781 {
1782 if (pCtx->ldtr.Sel == 0)
1783 {
1784 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1785 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1786 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_LDTR_BASE, 0); /* @todo removing "64" in the function should be the same. */
1787 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1788 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1789 }
1790 else
1791 {
1792 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1793 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1794 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); /* @todo removing "64" and it should be the same */
1795 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1796 }
1797 AssertRC(rc);
1798 }
1799
1800 /*
1801 * Guest CPU context: TR.
1802 */
1803 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
1804 {
1805 /*
1806 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1807 * using the int bitmap in the TSS).
1808 */
1809 if ( CPUMIsGuestInRealModeEx(pCtx)
1810 && pVM->hm.s.vmx.pRealModeTSS)
1811 {
1812 RTGCPHYS GCPhys;
1813
1814 /* We convert it here every time as PCI regions could be reconfigured. */
1815 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
1816 AssertRC(rc);
1817
1818 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1819 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HM_VTX_TSS_SIZE);
1820 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1821
1822 X86DESCATTR attr;
1823
1824 attr.u = 0;
1825 attr.n.u1Present = 1;
1826 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1827 val = attr.u;
1828 }
1829 else
1830 {
1831 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1832 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1833 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_TR_BASE, pCtx->tr.u64Base);
1834
1835 val = pCtx->tr.Attr.u;
1836
1837 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1838 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1839 {
1840 if (val & 0xf)
1841 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1842 else
1843 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1844 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1845 }
1846 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY,
1847 ("%#x\n", val));
1848 }
1849 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1850 AssertRC(rc);
1851 }
1852
1853 /*
1854 * Guest CPU context: GDTR.
1855 */
1856 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
1857 {
1858 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1859 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1860 AssertRC(rc);
1861 }
1862
1863 /*
1864 * Guest CPU context: IDTR.
1865 */
1866 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
1867 {
1868 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1869 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1870 AssertRC(rc);
1871 }
1872
1873 /*
1874 * Sysenter MSRs.
1875 */
1876 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
1877 {
1878 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1879 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1880 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1881 AssertRC(rc);
1882 }
1883
1884 /*
1885 * Guest CPU context: Control registers.
1886 */
1887 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
1888 {
1889 val = pCtx->cr0;
1890 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1891 Log2(("Guest CR0-shadow %08x\n", val));
1892 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1893 {
1894 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1895 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1896 }
1897 else
1898 {
1899 /** @todo check if we support the old style mess correctly. */
1900 if (!(val & X86_CR0_NE))
1901 Log(("Forcing X86_CR0_NE!!!\n"));
1902
1903 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1904 }
1905 /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1906 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
1907 val |= X86_CR0_PE | X86_CR0_PG;
1908
1909 if (pVM->hm.s.fNestedPaging)
1910 {
1911 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1912 {
1913 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1914 pVCpu->hm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1915 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1916 }
1917 else
1918 {
1919 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1920 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1921 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1922 }
1923 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
1924 AssertRC(rc);
1925 }
1926 else
1927 {
1928 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1929 val |= X86_CR0_WP;
1930 }
1931
1932 /* Always enable caching. */
1933 val &= ~(X86_CR0_CD|X86_CR0_NW);
1934
1935 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_CR0, val);
1936 Log2(("Guest CR0 %08x\n", val));
1937
1938 /*
1939 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1940 */
1941 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1942 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1943 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1944 | X86_CR0_CD /* Bit not restored during VM-exit! */
1945 | X86_CR0_NW /* Bit not restored during VM-exit! */
1946 | X86_CR0_NE;
1947
1948 /*
1949 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1950 */
1951 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1952 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1953
1954 pVCpu->hm.s.vmx.cr0_mask = val;
1955
1956 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1957 Log2(("Guest CR0-mask %08x\n", val));
1958 AssertRC(rc);
1959 }
1960
1961 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
1962 {
1963 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1964 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1965 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1966 val = pCtx->cr4 | (uint32_t)pVM->hm.s.vmx.msr.vmx_cr4_fixed0;
1967
1968 if (!pVM->hm.s.fNestedPaging)
1969 {
1970 switch (pVCpu->hm.s.enmShadowMode)
1971 {
1972 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1973 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1974 case PGMMODE_32_BIT: /* 32-bit paging. */
1975 val &= ~X86_CR4_PAE;
1976 break;
1977
1978 case PGMMODE_PAE: /* PAE paging. */
1979 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1980 /** Must use PAE paging as we could use physical memory > 4 GB */
1981 val |= X86_CR4_PAE;
1982 break;
1983
1984 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1985 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1986#ifdef VBOX_ENABLE_64_BITS_GUESTS
1987 break;
1988#else
1989 AssertFailed();
1990 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1991#endif
1992 default: /* shut up gcc */
1993 AssertFailed();
1994 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1995 }
1996 }
1997 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1998 && !pVM->hm.s.vmx.fUnrestrictedGuest)
1999 {
2000 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
2001 val |= X86_CR4_PSE;
2002 /* Our identity mapping is a 32 bits page directory. */
2003 val &= ~X86_CR4_PAE;
2004 }
2005
2006 /*
2007 * Turn off VME if we're in emulated real mode.
2008 */
2009 if ( CPUMIsGuestInRealModeEx(pCtx)
2010 && pVM->hm.s.vmx.pRealModeTSS)
2011 {
2012 val &= ~X86_CR4_VME;
2013 }
2014
2015 rc |= VMXWriteVMCS64(VMX_VMCS_GUEST_CR4, val);
2016 Log2(("Guest CR4 %08x\n", val));
2017
2018 /*
2019 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
2020 */
2021 val = 0
2022 | X86_CR4_VME
2023 | X86_CR4_PAE
2024 | X86_CR4_PGE
2025 | X86_CR4_PSE
2026 | X86_CR4_VMXE;
2027 pVCpu->hm.s.vmx.cr4_mask = val;
2028
2029 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
2030 Log2(("Guest CR4-mask %08x\n", val));
2031 AssertRC(rc);
2032 }
2033
2034#if 0
2035 /* Enable single stepping if requested and CPU supports it. */
2036 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
2037 if (DBGFIsStepping(pVCpu))
2038 {
2039 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
2040 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2041 AssertRC(rc);
2042 }
2043#endif
2044
2045 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
2046 {
2047 if (pVM->hm.s.fNestedPaging)
2048 {
2049 Assert(PGMGetHyperCR3(pVCpu));
2050 pVCpu->hm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
2051
2052 Assert(!(pVCpu->hm.s.vmx.GCPhysEPTP & 0xfff));
2053 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
2054 pVCpu->hm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
2055 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
2056
2057 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.GCPhysEPTP);
2058 AssertRC(rc);
2059
2060 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2061 && !pVM->hm.s.vmx.fUnrestrictedGuest)
2062 {
2063 RTGCPHYS GCPhys;
2064
2065 /* We convert it here every time as PCI regions could be reconfigured. */
2066 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
2067 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hm.s.vmx.pNonPagingModeEPTPageTable));
2068
2069 /*
2070 * We use our identity mapping page table here as we need to map guest virtual to
2071 * guest physical addresses; EPT will take care of the translation to host physical addresses.
2072 */
2073 val = GCPhys;
2074 }
2075 else
2076 {
2077 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
2078 val = pCtx->cr3;
2079 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
2080 AssertRCReturn(rc, rc);
2081 }
2082 }
2083 else
2084 {
2085 val = PGMGetHyperCR3(pVCpu);
2086 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
2087 }
2088
2089 /* Save our shadow CR3 register. */
2090 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_CR3, val);
2091 AssertRC(rc);
2092 }
2093
2094 /*
2095 * Guest CPU context: Debug registers.
2096 */
2097 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG)
2098 {
2099 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2100 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2101
2102 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2103 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2104 pCtx->dr[7] |= 0x400; /* must be one */
2105
2106 /* Resync DR7 */
2107 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2108 AssertRC(rc);
2109
2110#ifdef DEBUG
2111 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2112 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2113 && !CPUMIsHyperDebugStateActive(pVCpu)
2114 && !DBGFIsStepping(pVCpu))
2115 {
2116 /* Save the host and load the hypervisor debug state. */
2117 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2118 AssertRC(rc);
2119
2120 /* DRx intercepts remain enabled. */
2121
2122 /* Override dr7 with the hypervisor value. */
2123 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2124 AssertRC(rc);
2125 }
2126 else
2127#endif
2128 /* Sync the debug state now if any breakpoint is armed. */
2129 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2130 && !CPUMIsGuestDebugStateActive(pVCpu)
2131 && !DBGFIsStepping(pVCpu))
2132 {
2133 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
2134
2135 /* Disable DRx move intercepts. */
2136 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2137 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2138 AssertRC(rc);
2139
2140 /* Save the host and load the guest debug state. */
2141 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2142 AssertRC(rc);
2143 }
2144
2145 /* IA32_DEBUGCTL MSR. */
2146 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0);
2147 AssertRC(rc);
2148
2149 /** @todo do we really ever need this? */
2150 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
2151 AssertRC(rc);
2152 }
2153
2154 /*
2155 * 64-bit guest mode.
2156 */
2157 if (CPUMIsGuestInLongModeEx(pCtx))
2158 {
2159#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2160 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2161#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2162 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2163#else
2164# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2165 if (!pVM->hm.s.fAllow64BitGuests)
2166 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2167# endif
2168 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
2169#endif
2170 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
2171 {
2172 /* Update these as wrmsr might have changed them. */
2173 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_FS_BASE, pCtx->fs.u64Base);
2174 AssertRC(rc);
2175 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_GS_BASE, pCtx->gs.u64Base);
2176 AssertRC(rc);
2177 }
2178 }
2179 else
2180 {
2181 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
2182 }
2183
2184 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2185
2186#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2187 /*
2188 * Store all guest MSRs in the VM-entry load area, so they will be loaded
2189 * during VM-entry and restored into the VM-exit store area during VM-exit.
2190 */
2191 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2192 unsigned idxMsr = 0;
2193
2194 uint32_t u32GstExtFeatures;
2195 uint32_t u32Temp;
2196 CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures);
2197
2198 if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2199 {
2200 pMsr->u32IndexMSR = MSR_K6_EFER;
2201 pMsr->u32Reserved = 0;
2202 pMsr->u64Value = pCtx->msrEFER;
2203 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2204 if (!CPUMIsGuestInLongModeEx(pCtx))
2205 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2206 pMsr++; idxMsr++;
2207
2208 if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2209 {
2210 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2211 pMsr->u32Reserved = 0;
2212 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2213 pMsr++; idxMsr++;
2214 pMsr->u32IndexMSR = MSR_K6_STAR;
2215 pMsr->u32Reserved = 0;
2216 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2217 pMsr++; idxMsr++;
2218 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2219 pMsr->u32Reserved = 0;
2220 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2221 pMsr++; idxMsr++;
2222
2223 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2224#if 0
2225 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2226 pMsr->u32Reserved = 0;
2227 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2228 pMsr++; idxMsr++;
2229#endif
2230 }
2231 }
2232
2233 if ( pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP
2234 && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP))
2235 {
2236 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
2237 pMsr->u32Reserved = 0;
2238 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value);
2239 AssertRC(rc);
2240 pMsr++; idxMsr++;
2241 }
2242
2243 pVCpu->hm.s.vmx.cCachedMsrs = idxMsr;
2244
2245 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2246 AssertRC(rc);
2247
2248 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2249 AssertRC(rc);
2250#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2251
2252 bool fOffsettedTsc;
2253 if (pVM->hm.s.vmx.fUsePreemptTimer)
2254 {
2255 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
2256
2257 /* Make sure the returned values have sane upper and lower boundaries. */
2258 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2259
2260 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2261 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2262
2263 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
2264 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2265 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2266 AssertRC(rc);
2267 }
2268 else
2269 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
2270
2271 if (fOffsettedTsc)
2272 {
2273 uint64_t u64CurTSC = ASMReadTSC();
2274 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2275 {
2276 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2277 rc = VMXWriteVMCS64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset);
2278 AssertRC(rc);
2279
2280 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2281 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2282 AssertRC(rc);
2283 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
2284 }
2285 else
2286 {
2287 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2288 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2289 pVCpu->hm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset,
2290 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hm.s.vmx.u64TSCOffset,
2291 TMCpuTickGet(pVCpu)));
2292 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2293 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2294 AssertRC(rc);
2295 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
2296 }
2297 }
2298 else
2299 {
2300 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2301 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
2302 AssertRC(rc);
2303 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
2304 }
2305
2306 /* Done with the major changes */
2307 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_ALL_GUEST;
2308
2309 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2310 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2311 return rc;
2312}
2313
2314
2315/**
2316 * Syncs back the guest state from VMCS.
2317 *
2318 * @returns VBox status code.
2319 * @param pVM Pointer to the VM.
2320 * @param pVCpu Pointer to the VMCPU.
2321 * @param pCtx Pointer to the guest CPU context.
2322 */
2323DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2324{
2325 RTGCUINTREG val, valShadow;
2326 RTGCUINTPTR uInterruptState;
2327 int rc;
2328
2329 /* First sync back EIP, ESP, and EFLAGS. */
2330 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RIP, &val);
2331 AssertRC(rc);
2332 pCtx->rip = val;
2333 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RSP, &val);
2334 AssertRC(rc);
2335 pCtx->rsp = val;
2336 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2337 AssertRC(rc);
2338 pCtx->eflags.u32 = val;
2339
2340 /* Take care of instruction fusing (sti, mov ss) */
2341 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2342 uInterruptState = val;
2343 if (uInterruptState != 0)
2344 {
2345 Assert(uInterruptState <= 2); /* only sti & mov ss */
2346 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2347 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2348 }
2349 else
2350 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2351
2352 /* Control registers. */
2353 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2354 VMXReadCachedVMCS(VMX_VMCS_GUEST_CR0, &val);
2355 val = (valShadow & pVCpu->hm.s.vmx.cr0_mask) | (val & ~pVCpu->hm.s.vmx.cr0_mask);
2356 CPUMSetGuestCR0(pVCpu, val);
2357
2358 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2359 VMXReadCachedVMCS(VMX_VMCS_GUEST_CR4, &val);
2360 val = (valShadow & pVCpu->hm.s.vmx.cr4_mask) | (val & ~pVCpu->hm.s.vmx.cr4_mask);
2361 CPUMSetGuestCR4(pVCpu, val);
2362
2363 /*
2364 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2365 * the nested paging case where CR3 & CR4 can be changed by the guest.
2366 */
2367 if ( pVM->hm.s.fNestedPaging
2368 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2369 {
2370 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
2371
2372 /* Can be updated behind our back in the nested paging case. */
2373 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2374
2375 VMXReadCachedVMCS(VMX_VMCS_GUEST_CR3, &val);
2376
2377 if (val != pCtx->cr3)
2378 {
2379 CPUMSetGuestCR3(pVCpu, val);
2380 PGMUpdateCR3(pVCpu, val);
2381 }
2382 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2383 AssertRCReturn(rc, rc);
2384 }
2385
2386 /* Sync back DR7. */
2387 VMXReadCachedVMCS(VMX_VMCS_GUEST_DR7, &val);
2388 pCtx->dr[7] = val;
2389
2390 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2391 VMX_READ_SELREG(ES, es);
2392 VMX_READ_SELREG(SS, ss);
2393 VMX_READ_SELREG(CS, cs);
2394 VMX_READ_SELREG(DS, ds);
2395 VMX_READ_SELREG(FS, fs);
2396 VMX_READ_SELREG(GS, gs);
2397
2398 /* System MSRs */
2399 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2400 pCtx->SysEnter.cs = val;
2401 VMXReadCachedVMCS(VMX_VMCS_GUEST_SYSENTER_EIP, &val);
2402 pCtx->SysEnter.eip = val;
2403 VMXReadCachedVMCS(VMX_VMCS_GUEST_SYSENTER_ESP, &val);
2404 pCtx->SysEnter.esp = val;
2405
2406 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2407 VMX_READ_SELREG(LDTR, ldtr);
2408
2409 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2410 pCtx->gdtr.cbGdt = val;
2411 VMXReadCachedVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val);
2412 pCtx->gdtr.pGdt = val;
2413
2414 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2415 pCtx->idtr.cbIdt = val;
2416 VMXReadCachedVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val);
2417 pCtx->idtr.pIdt = val;
2418
2419 /* Real mode emulation using v86 mode. */
2420 if ( CPUMIsGuestInRealModeEx(pCtx)
2421 && pVM->hm.s.vmx.pRealModeTSS)
2422 {
2423 /* Hide our emulation flags */
2424 pCtx->eflags.Bits.u1VM = 0;
2425
2426 /* Restore original IOPL setting as we always use 0. */
2427 pCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2428
2429 /* Force a TR resync every time in case we switch modes. */
2430 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_TR;
2431 }
2432 else
2433 {
2434 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2435 VMX_READ_SELREG(TR, tr);
2436 }
2437
2438#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2439 /*
2440 * Save the possibly changed MSRs that we automatically restore and save during a world switch.
2441 */
2442 for (unsigned i = 0; i < pVCpu->hm.s.vmx.cCachedMsrs; i++)
2443 {
2444 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2445 pMsr += i;
2446
2447 switch (pMsr->u32IndexMSR)
2448 {
2449 case MSR_K8_LSTAR:
2450 pCtx->msrLSTAR = pMsr->u64Value;
2451 break;
2452 case MSR_K6_STAR:
2453 pCtx->msrSTAR = pMsr->u64Value;
2454 break;
2455 case MSR_K8_SF_MASK:
2456 pCtx->msrSFMASK = pMsr->u64Value;
2457 break;
2458 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2459#if 0
2460 case MSR_K8_KERNEL_GS_BASE:
2461 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2462 break;
2463#endif
2464 case MSR_K8_TSC_AUX:
2465 CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value);
2466 break;
2467
2468 case MSR_K6_EFER:
2469 /* EFER can't be changed without causing a VM-exit. */
2470 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2471 break;
2472
2473 default:
2474 AssertFailed();
2475 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2476 }
2477 }
2478#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2479 return VINF_SUCCESS;
2480}
2481
2482
2483/**
2484 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2485 * where neither EPT nor VPID is supported by the CPU.
2486 *
2487 * @param pVM Pointer to the VM.
2488 * @param pVCpu Pointer to the VMCPU.
2489 */
2490static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2491{
2492 NOREF(pVM);
2493 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2494 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2495 pVCpu->hm.s.TlbShootdown.cPages = 0;
2496 return;
2497}
2498
2499
2500/**
2501 * Setup the tagged TLB for EPT+VPID.
2502 *
2503 * @param pVM Pointer to the VM.
2504 * @param pVCpu Pointer to the VMCPU.
2505 */
2506static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2507{
2508 PHMGLOBLCPUINFO pCpu;
2509
2510 Assert(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid);
2511
2512 pCpu = HMR0GetCurrentCpu();
2513
2514 /*
2515 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2516 * This can happen both for start & resume due to long jumps back to ring-3.
2517 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2518 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2519 */
2520 bool fNewAsid = false;
2521 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2522 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2523 {
2524 pVCpu->hm.s.fForceTLBFlush = true;
2525 fNewAsid = true;
2526 }
2527
2528 /*
2529 * Check for explicit TLB shootdowns.
2530 */
2531 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2532 pVCpu->hm.s.fForceTLBFlush = true;
2533
2534 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2535
2536 if (pVCpu->hm.s.fForceTLBFlush)
2537 {
2538 if (fNewAsid)
2539 {
2540 ++pCpu->uCurrentAsid;
2541 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2542 {
2543 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2544 pCpu->cTlbFlushes++;
2545 pCpu->fFlushAsidBeforeUse = true;
2546 }
2547
2548 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2549 if (pCpu->fFlushAsidBeforeUse)
2550 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2551 }
2552 else
2553 {
2554 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2555 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2556 else
2557 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2558 }
2559
2560 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2561 pVCpu->hm.s.fForceTLBFlush = false;
2562 }
2563 else
2564 {
2565 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2566 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2567 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2568 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2569
2570 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2571 * not be executed. See hmQueueInvlPage() where it is commented
2572 * out. Support individual entry flushing someday. */
2573 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2574 {
2575 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2576
2577 /*
2578 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2579 * as supported by the CPU.
2580 */
2581 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2582 {
2583 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2584 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2585 }
2586 else
2587 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2588 }
2589 else
2590 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2591 }
2592
2593 pVCpu->hm.s.TlbShootdown.cPages = 0;
2594 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2595
2596 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2597 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2598 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2599 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2600 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2601 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2602
2603 /* Update VMCS with the VPID. */
2604 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2605 AssertRC(rc);
2606}
2607
2608
2609/**
2610 * Setup the tagged TLB for EPT only.
2611 *
2612 * @returns VBox status code.
2613 * @param pVM Pointer to the VM.
2614 * @param pVCpu Pointer to the VMCPU.
2615 */
2616static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2617{
2618 PHMGLOBLCPUINFO pCpu;
2619
2620 Assert(pVM->hm.s.fNestedPaging);
2621 Assert(!pVM->hm.s.vmx.fVpid);
2622
2623 pCpu = HMR0GetCurrentCpu();
2624
2625 /*
2626 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2627 * This can happen both for start & resume due to long jumps back to ring-3.
2628 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2629 */
2630 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2631 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2632 {
2633 pVCpu->hm.s.fForceTLBFlush = true;
2634 }
2635
2636 /*
2637 * Check for explicit TLB shootdown flushes.
2638 */
2639 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2640 pVCpu->hm.s.fForceTLBFlush = true;
2641
2642 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2643 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2644
2645 if (pVCpu->hm.s.fForceTLBFlush)
2646 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2647 else
2648 {
2649 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2650 * not be executed. See hmQueueInvlPage() where it is commented
2651 * out. Support individual entry flushing someday. */
2652 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2653 {
2654 /*
2655 * We cannot flush individual entries without VPID support. Flush using EPT.
2656 */
2657 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2658 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2659 }
2660 }
2661 pVCpu->hm.s.TlbShootdown.cPages= 0;
2662 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2663
2664#ifdef VBOX_WITH_STATISTICS
2665 /** @todo r=ramshankar: this is not accurate anymore with the VPID+EPT
2666 * handling. Should be fixed later. */
2667 if (pVCpu->hm.s.fForceTLBFlush)
2668 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2669 else
2670 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2671#endif
2672}
2673
2674
2675/**
2676 * Setup the tagged TLB for VPID.
2677 *
2678 * @returns VBox status code.
2679 * @param pVM Pointer to the VM.
2680 * @param pVCpu Pointer to the VMCPU.
2681 */
2682static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2683{
2684 PHMGLOBLCPUINFO pCpu;
2685
2686 Assert(pVM->hm.s.vmx.fVpid);
2687 Assert(!pVM->hm.s.fNestedPaging);
2688
2689 pCpu = HMR0GetCurrentCpu();
2690
2691 /*
2692 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2693 * This can happen both for start & resume due to long jumps back to ring-3.
2694 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2695 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2696 */
2697 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2698 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2699 {
2700 /* Force a TLB flush on VM entry. */
2701 pVCpu->hm.s.fForceTLBFlush = true;
2702 }
2703
2704 /*
2705 * Check for explicit TLB shootdown flushes.
2706 */
2707 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2708 pVCpu->hm.s.fForceTLBFlush = true;
2709
2710 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2711
2712 if (pVCpu->hm.s.fForceTLBFlush)
2713 {
2714 ++pCpu->uCurrentAsid;
2715 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2716 {
2717 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2718 pCpu->cTlbFlushes++;
2719 pCpu->fFlushAsidBeforeUse = true;
2720 }
2721
2722 pVCpu->hm.s.fForceTLBFlush = false;
2723 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2724 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2725 if (pCpu->fFlushAsidBeforeUse)
2726 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2727 }
2728 else
2729 {
2730 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2731 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2732 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2733 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2734
2735 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2736 * not be executed. See hmQueueInvlPage() where it is commented
2737 * out. Support individual entry flushing someday. */
2738 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2739 {
2740 /*
2741 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2742 * as supported by the CPU.
2743 */
2744 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2745 {
2746 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2747 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2748 }
2749 else
2750 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2751 }
2752 }
2753 pVCpu->hm.s.TlbShootdown.cPages = 0;
2754 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2755
2756 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2757 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2758 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2759 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2760 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2761 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2762
2763 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2764 AssertRC(rc);
2765
2766# ifdef VBOX_WITH_STATISTICS
2767 /** @todo r=ramshankar: this is not accurate anymore with EPT+VPID handling.
2768 * Should be fixed later. */
2769 if (pVCpu->hm.s.fForceTLBFlush)
2770 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2771 else
2772 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2773# endif
2774}
2775
2776
2777/**
2778 * Runs guest code in a VT-x VM.
2779 *
2780 * @returns VBox status code.
2781 * @param pVM Pointer to the VM.
2782 * @param pVCpu Pointer to the VMCPU.
2783 * @param pCtx Pointer to the guest CPU context.
2784 */
2785VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2786{
2787 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
2788 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
2789 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
2790
2791 VBOXSTRICTRC rc = VINF_SUCCESS;
2792 int rc2;
2793 RTGCUINTREG val;
2794 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2795 RTGCUINTREG instrError, cbInstr;
2796 RTGCUINTPTR exitQualification = 0;
2797 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2798 RTGCUINTPTR errCode, instrInfo;
2799 bool fSetupTPRCaching = false;
2800 uint64_t u64OldLSTAR = 0;
2801 uint8_t u8LastTPR = 0;
2802 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2803 unsigned cResume = 0;
2804#ifdef VBOX_STRICT
2805 RTCPUID idCpuCheck;
2806 bool fWasInLongMode = false;
2807#endif
2808#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2809 uint64_t u64LastTime = RTTimeMilliTS();
2810#endif
2811
2812 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2813 || (pVCpu->hm.s.vmx.pbVAPIC && pVM->hm.s.vmx.pbApicAccess));
2814
2815 /*
2816 * Check if we need to use TPR shadowing.
2817 */
2818 if ( CPUMIsGuestInLongModeEx(pCtx)
2819 || ( (( pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2820 || pVM->hm.s.fTRPPatchingAllowed)
2821 && pVM->hm.s.fHasIoApic)
2822 )
2823 {
2824 fSetupTPRCaching = true;
2825 }
2826
2827 Log2(("\nE"));
2828
2829 /* This is not ideal, but if we don't clear the event injection in the VMCS right here,
2830 * we may end up injecting some stale event into a VM, including injecting an event that
2831 * originated before a VM reset *after* the VM has been reset. See @bugref{6220}.
2832 */
2833 VMXWriteVMCS(VMX_VMCS32_CTRL_ENTRY_IRQ_INFO, 0);
2834
2835#ifdef VBOX_STRICT
2836 {
2837 RTCCUINTREG val2;
2838
2839 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val2);
2840 AssertRC(rc2);
2841 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2842
2843 /* allowed zero */
2844 if ((val2 & pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2845 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2846
2847 /* allowed one */
2848 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2849 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2850
2851 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val2);
2852 AssertRC(rc2);
2853 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2854
2855 /*
2856 * Must be set according to the MSR, but can be cleared if nested paging is used.
2857 */
2858 if (pVM->hm.s.fNestedPaging)
2859 {
2860 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2861 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2862 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2863 }
2864
2865 /* allowed zero */
2866 if ((val2 & pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2867 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2868
2869 /* allowed one */
2870 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2871 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2872
2873 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val2);
2874 AssertRC(rc2);
2875 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2876
2877 /* allowed zero */
2878 if ((val2 & pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0)
2879 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2880
2881 /* allowed one */
2882 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2883 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2884
2885 rc2 = VMXReadVMCS(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val2);
2886 AssertRC(rc2);
2887 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2888
2889 /* allowed zero */
2890 if ((val2 & pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0)
2891 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2892
2893 /* allowed one */
2894 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2895 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2896 }
2897 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2898#endif /* VBOX_STRICT */
2899
2900#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2901 pVCpu->hm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2902#endif
2903
2904 /*
2905 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2906 */
2907ResumeExecution:
2908 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hm.s.StatEntry))
2909 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit2, &pVCpu->hm.s.StatEntry, x);
2910 AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
2911 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2912 (int)pVCpu->hm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2913 Assert(!HMR0SuspendPending());
2914 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2915 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2916
2917 /*
2918 * Safety precaution; looping for too long here can have a very bad effect on the host.
2919 */
2920 if (RT_UNLIKELY(++cResume > pVM->hm.s.cMaxResumeLoops))
2921 {
2922 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
2923 rc = VINF_EM_RAW_INTERRUPT;
2924 goto end;
2925 }
2926
2927 /*
2928 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2929 */
2930 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2931 {
2932 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2933 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2934 {
2935 /*
2936 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2937 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2938 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2939 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2940 */
2941 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2942 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2943 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2944 AssertRC(rc2);
2945 }
2946 }
2947 else
2948 {
2949 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2950 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2951 AssertRC(rc2);
2952 }
2953
2954#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2955 if (RT_UNLIKELY((cResume & 0xf) == 0))
2956 {
2957 uint64_t u64CurTime = RTTimeMilliTS();
2958
2959 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2960 {
2961 u64LastTime = u64CurTime;
2962 TMTimerPollVoid(pVM, pVCpu);
2963 }
2964 }
2965#endif
2966
2967 /*
2968 * Check for pending actions that force us to go back to ring-3.
2969 */
2970 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2971 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2972 {
2973 /* Check if a sync operation is pending. */
2974 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2975 {
2976 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2977 if (rc != VINF_SUCCESS)
2978 {
2979 AssertRC(VBOXSTRICTRC_VAL(rc));
2980 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2981 goto end;
2982 }
2983 }
2984
2985#ifdef DEBUG
2986 /* Intercept X86_XCPT_DB if stepping is enabled */
2987 if (!DBGFIsStepping(pVCpu))
2988#endif
2989 {
2990 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK)
2991 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
2992 {
2993 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchToR3);
2994 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2995 goto end;
2996 }
2997 }
2998
2999 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
3000 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
3001 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
3002 {
3003 rc = VINF_EM_PENDING_REQUEST;
3004 goto end;
3005 }
3006
3007 /* Check if a pgm pool flush is in progress. */
3008 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
3009 {
3010 rc = VINF_PGM_POOL_FLUSH_PENDING;
3011 goto end;
3012 }
3013
3014 /* Check if DMA work is pending (2nd+ run). */
3015 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
3016 {
3017 rc = VINF_EM_RAW_TO_R3;
3018 goto end;
3019 }
3020 }
3021
3022#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3023 /*
3024 * Exit to ring-3 preemption/work is pending.
3025 *
3026 * Interrupts are disabled before the call to make sure we don't miss any interrupt
3027 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
3028 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
3029 *
3030 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
3031 * shootdowns rely on this.
3032 */
3033 uOldEFlags = ASMIntDisableFlags();
3034 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
3035 {
3036 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptPending);
3037 rc = VINF_EM_RAW_INTERRUPT;
3038 goto end;
3039 }
3040 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3041#endif
3042
3043 /*
3044 * When external interrupts are pending, we should exit the VM when IF is set.
3045 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
3046 */
3047 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
3048 if (RT_FAILURE(rc))
3049 goto end;
3050
3051 /** @todo check timers?? */
3052
3053 /*
3054 * TPR caching using CR8 is only available in 64-bit mode.
3055 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
3056 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
3057 */
3058 /** @todo query and update the TPR only when it could have been changed (mmio
3059 * access & wrsmr (x2apic) */
3060 if (fSetupTPRCaching)
3061 {
3062 /* TPR caching in CR8 */
3063 bool fPending;
3064
3065 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
3066 AssertRC(rc2);
3067 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
3068 pVCpu->hm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
3069
3070 /*
3071 * Two options here:
3072 * - external interrupt pending, but masked by the TPR value.
3073 * -> a CR8 update that lower the current TPR value should cause an exit
3074 * - no pending interrupts
3075 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
3076 */
3077
3078 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
3079 rc = VMXWriteVMCS(VMX_VMCS32_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0);
3080 AssertRC(VBOXSTRICTRC_VAL(rc));
3081
3082 if (pVM->hm.s.fTPRPatchingActive)
3083 {
3084 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3085 /* Our patch code uses LSTAR for TPR caching. */
3086 pCtx->msrLSTAR = u8LastTPR;
3087
3088 /** @todo r=ramshankar: we should check for MSR-bitmap support here. */
3089 if (fPending)
3090 {
3091 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
3092 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
3093 }
3094 else
3095 {
3096 /*
3097 * No interrupts are pending, so we don't need to be explicitely notified.
3098 * There are enough world switches for detecting pending interrupts.
3099 */
3100 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
3101 }
3102 }
3103 }
3104
3105#ifdef LOG_ENABLED
3106 if ( pVM->hm.s.fNestedPaging
3107 || pVM->hm.s.vmx.fVpid)
3108 {
3109 PHMGLOBLCPUINFO pCpu = HMR0GetCurrentCpu();
3110 if (pVCpu->hm.s.idLastCpu != pCpu->idCpu)
3111 {
3112 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hm.s.idLastCpu,
3113 pCpu->idCpu));
3114 }
3115 else if (pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
3116 {
3117 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hm.s.cTlbFlushes,
3118 pCpu->cTlbFlushes));
3119 }
3120 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
3121 LogFlow(("Manual TLB flush\n"));
3122 }
3123#endif
3124#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3125 PGMRZDynMapFlushAutoSet(pVCpu);
3126#endif
3127
3128 /*
3129 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3130 * (until the actual world switch)
3131 */
3132#ifdef VBOX_STRICT
3133 idCpuCheck = RTMpCpuId();
3134#endif
3135#ifdef LOG_ENABLED
3136 VMMR0LogFlushDisable(pVCpu);
3137#endif
3138
3139 /*
3140 * Save the host state first.
3141 */
3142 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
3143 {
3144 rc = VMXR0SaveHostState(pVM, pVCpu);
3145 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3146 {
3147 VMMR0LogFlushEnable(pVCpu);
3148 goto end;
3149 }
3150 }
3151
3152 /*
3153 * Load the guest state.
3154 */
3155 if (!pVCpu->hm.s.fContextUseFlags)
3156 {
3157 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3158 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
3159 }
3160 else
3161 {
3162 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3163 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3164 {
3165 VMMR0LogFlushEnable(pVCpu);
3166 goto end;
3167 }
3168 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
3169 }
3170
3171#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3172 /*
3173 * Disable interrupts to make sure a poke will interrupt execution.
3174 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3175 */
3176 uOldEFlags = ASMIntDisableFlags();
3177 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3178#endif
3179
3180 /* Non-register state Guest Context */
3181 /** @todo change me according to cpu state */
3182 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
3183 AssertRC(rc2);
3184
3185 /* Set TLB flush state as checked until we return from the world switch. */
3186 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
3187 /* Deal with tagged TLB setup and invalidation. */
3188 pVM->hm.s.vmx.pfnSetupTaggedTlb(pVM, pVCpu);
3189
3190 /*
3191 * Manual save and restore:
3192 * - General purpose registers except RIP, RSP
3193 *
3194 * Trashed:
3195 * - CR2 (we don't care)
3196 * - LDTR (reset to 0)
3197 * - DRx (presumably not changed at all)
3198 * - DR7 (reset to 0x400)
3199 * - EFLAGS (reset to RT_BIT(1); not relevant)
3200 */
3201
3202 /* All done! Let's start VM execution. */
3203 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
3204 Assert(idCpuCheck == RTMpCpuId());
3205
3206#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3207 pVCpu->hm.s.vmx.VMCSCache.cResume = cResume;
3208 pVCpu->hm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3209#endif
3210
3211 /*
3212 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3213 */
3214 if (pVM->hm.s.fTPRPatchingActive)
3215 {
3216 Assert(pVM->hm.s.fTPRPatchingActive);
3217 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3218 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3219 }
3220
3221 TMNotifyStartOfExecution(pVCpu);
3222
3223#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3224 /*
3225 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
3226 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
3227 */
3228 if ( (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3229 && !(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3230 {
3231 pVCpu->hm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX);
3232 uint64_t u64GuestTSCAux = 0;
3233 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux);
3234 AssertRC(rc2);
3235 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux);
3236 }
3237#endif
3238
3239#ifdef VBOX_WITH_KERNEL_USING_XMM
3240 rc = hmR0VMXStartVMWrapXMM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
3241#else
3242 rc = pVCpu->hm.s.vmx.pfnStartVM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
3243#endif
3244 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);
3245 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);
3246
3247 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3248 if (!(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3249 {
3250#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3251 /* Restore host's TSC_AUX. */
3252 if (pVCpu->hm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3253 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTSCAux);
3254#endif
3255
3256 TMCpuTickSetLastSeen(pVCpu,
3257 ASMReadTSC() + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3258 }
3259
3260 TMNotifyEndOfExecution(pVCpu);
3261 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3262 Assert(!(ASMGetFlags() & X86_EFL_IF));
3263
3264 /*
3265 * Restore the host LSTAR MSR if the guest could have changed it.
3266 */
3267 if (pVM->hm.s.fTPRPatchingActive)
3268 {
3269 Assert(pVM->hm.s.fTPRPatchingActive);
3270 pVCpu->hm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3271 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3272 }
3273
3274 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
3275 ASMSetFlags(uOldEFlags);
3276#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3277 uOldEFlags = ~(RTCCUINTREG)0;
3278#endif
3279
3280 AssertMsg(!pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries=%d\n",
3281 pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries));
3282
3283 /* In case we execute a goto ResumeExecution later on. */
3284 pVCpu->hm.s.fResumeVM = true;
3285 pVCpu->hm.s.fForceTLBFlush = false;
3286
3287 /*
3288 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3289 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3290 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3291 */
3292
3293 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3294 {
3295 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3296 VMMR0LogFlushEnable(pVCpu);
3297 goto end;
3298 }
3299
3300 /* Success. Query the guest state and figure out what has happened. */
3301
3302 /* Investigate why there was a VM-exit. */
3303 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3304 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3305
3306 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3307 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3308 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3309 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3310 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3311 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
3312 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3313 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3314 AssertRC(rc2);
3315
3316 /*
3317 * Sync back the guest state.
3318 */
3319 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3320 AssertRC(rc2);
3321
3322 /* Note! NOW IT'S SAFE FOR LOGGING! */
3323 VMMR0LogFlushEnable(pVCpu);
3324 Log2(("Raw exit reason %08x\n", exitReason));
3325#if ARCH_BITS == 64 /* for the time being */
3326 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3327#endif
3328
3329 /*
3330 * Check if an injected event was interrupted prematurely.
3331 */
3332 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
3333 AssertRC(rc2);
3334 pVCpu->hm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3335 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.intInfo)
3336 /* Ignore 'int xx' as they'll be restarted anyway. */
3337 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
3338 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3339 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3340 {
3341 Assert(!pVCpu->hm.s.Event.fPending);
3342 pVCpu->hm.s.Event.fPending = true;
3343 /* Error code present? */
3344 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.intInfo))
3345 {
3346 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
3347 AssertRC(rc2);
3348 pVCpu->hm.s.Event.errCode = val;
3349 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3350 pVCpu->hm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3351 }
3352 else
3353 {
3354 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hm.s.Event.intInfo,
3355 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3356 pVCpu->hm.s.Event.errCode = 0;
3357 }
3358 }
3359#ifdef VBOX_STRICT
3360 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.intInfo)
3361 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3362 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3363 {
3364 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3365 pVCpu->hm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3366 }
3367
3368 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3369 HMDumpRegs(pVM, pVCpu, pCtx);
3370#endif
3371
3372 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3373 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3374 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3375 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3376 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3377
3378 /*
3379 * Sync back the TPR if it was changed.
3380 */
3381 if ( fSetupTPRCaching
3382 && u8LastTPR != pVCpu->hm.s.vmx.pbVAPIC[0x80])
3383 {
3384 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVAPIC[0x80]);
3385 AssertRC(rc2);
3386 }
3387
3388#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3389 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3390 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3391#endif
3392 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
3393
3394 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3395 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3396 switch (exitReason)
3397 {
3398 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3399 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3400 {
3401 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3402
3403 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3404 {
3405 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3406 /* External interrupt; leave to allow it to be dispatched again. */
3407 rc = VINF_EM_RAW_INTERRUPT;
3408 break;
3409 }
3410 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub3, y3);
3411 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3412 {
3413 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3414 /* External interrupt; leave to allow it to be dispatched again. */
3415 rc = VINF_EM_RAW_INTERRUPT;
3416 break;
3417
3418 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3419 AssertFailed(); /* can't come here; fails the first check. */
3420 break;
3421
3422 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3423 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3424 Assert(vector == 1 || vector == 3 || vector == 4);
3425 /* no break */
3426 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3427 Log2(("Hardware/software interrupt %d\n", vector));
3428 switch (vector)
3429 {
3430 case X86_XCPT_NM:
3431 {
3432 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3433
3434 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3435 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3436 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3437 if (rc == VINF_SUCCESS)
3438 {
3439 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3440
3441 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
3442
3443 /* Continue execution. */
3444 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
3445
3446 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3447 goto ResumeExecution;
3448 }
3449
3450 Log(("Forward #NM fault to the guest\n"));
3451 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
3452 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3453 cbInstr, 0);
3454 AssertRC(rc2);
3455 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3456 goto ResumeExecution;
3457 }
3458
3459 case X86_XCPT_PF: /* Page fault */
3460 {
3461#ifdef VBOX_ALWAYS_TRAP_PF
3462 if (pVM->hm.s.fNestedPaging)
3463 {
3464 /*
3465 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3466 */
3467 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3468 errCode, (RTGCPTR)pCtx->rsp));
3469
3470 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3471
3472 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3473
3474 /* Now we must update CR2. */
3475 pCtx->cr2 = exitQualification;
3476 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3477 cbInstr, errCode);
3478 AssertRC(rc2);
3479
3480 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3481 goto ResumeExecution;
3482 }
3483#else
3484 Assert(!pVM->hm.s.fNestedPaging);
3485#endif
3486
3487#ifdef VBOX_HM_WITH_GUEST_PATCHING
3488 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3489 if ( pVM->hm.s.fTRPPatchingAllowed
3490 && pVM->hm.s.pGuestPatchMem
3491 && (exitQualification & 0xfff) == 0x080
3492 && !(errCode & X86_TRAP_PF_P) /* not present */
3493 && CPUMGetGuestCPL(pVCpu) == 0
3494 && !CPUMIsGuestInLongModeEx(pCtx)
3495 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
3496 {
3497 RTGCPHYS GCPhysApicBase, GCPhys;
3498 GCPhysApicBase = pCtx->msrApicBase;
3499 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3500
3501 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3502 if ( rc == VINF_SUCCESS
3503 && GCPhys == GCPhysApicBase)
3504 {
3505 /* Only attempt to patch the instruction once. */
3506 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3507 if (!pPatch)
3508 {
3509 rc = VINF_EM_HM_PATCH_TPR_INSTR;
3510 break;
3511 }
3512 }
3513 }
3514#endif
3515
3516 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3517 /* Exit qualification contains the linear address of the page fault. */
3518 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3519 TRPMSetErrorCode(pVCpu, errCode);
3520 TRPMSetFaultAddress(pVCpu, exitQualification);
3521
3522 /* Shortcut for APIC TPR reads and writes. */
3523 if ( (exitQualification & 0xfff) == 0x080
3524 && !(errCode & X86_TRAP_PF_P) /* not present */
3525 && fSetupTPRCaching
3526 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3527 {
3528 RTGCPHYS GCPhysApicBase, GCPhys;
3529 GCPhysApicBase = pCtx->msrApicBase;
3530 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3531
3532 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3533 if ( rc == VINF_SUCCESS
3534 && GCPhys == GCPhysApicBase)
3535 {
3536 Log(("Enable VT-x virtual APIC access filtering\n"));
3537 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
3538 AssertRC(rc2);
3539 }
3540 }
3541
3542 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3543 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3544 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3545
3546 if (rc == VINF_SUCCESS)
3547 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3548 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3549 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
3550
3551 TRPMResetTrap(pVCpu);
3552 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3553 goto ResumeExecution;
3554 }
3555 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3556 {
3557 /*
3558 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3559 */
3560 Log2(("Forward page fault to the guest\n"));
3561
3562 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3563 /* The error code might have been changed. */
3564 errCode = TRPMGetErrorCode(pVCpu);
3565
3566 TRPMResetTrap(pVCpu);
3567
3568 /* Now we must update CR2. */
3569 pCtx->cr2 = exitQualification;
3570 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3571 cbInstr, errCode);
3572 AssertRC(rc2);
3573
3574 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3575 goto ResumeExecution;
3576 }
3577#ifdef VBOX_STRICT
3578 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3579 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3580#endif
3581 /* Need to go back to the recompiler to emulate the instruction. */
3582 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
3583 TRPMResetTrap(pVCpu);
3584
3585 /* If event delivery caused the #PF (shadow or not), tell TRPM. */
3586 hmR0VmxCheckPendingEvent(pVCpu);
3587 break;
3588 }
3589
3590 case X86_XCPT_MF: /* Floating point exception. */
3591 {
3592 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
3593 if (!(pCtx->cr0 & X86_CR0_NE))
3594 {
3595 /* old style FPU error reporting needs some extra work. */
3596 /** @todo don't fall back to the recompiler, but do it manually. */
3597 rc = VINF_EM_RAW_EMULATE_INSTR;
3598 break;
3599 }
3600 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3601 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3602 cbInstr, errCode);
3603 AssertRC(rc2);
3604
3605 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3606 goto ResumeExecution;
3607 }
3608
3609 case X86_XCPT_DB: /* Debug exception. */
3610 {
3611 uint64_t uDR6;
3612
3613 /*
3614 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3615 *
3616 * Exit qualification bits:
3617 * 3:0 B0-B3 which breakpoint condition was met
3618 * 12:4 Reserved (0)
3619 * 13 BD - debug register access detected
3620 * 14 BS - single step execution or branch taken
3621 * 63:15 Reserved (0)
3622 */
3623 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
3624
3625 /* Note that we don't support guest and host-initiated debugging at the same time. */
3626
3627 uDR6 = X86_DR6_INIT_VAL;
3628 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3629 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3630 if (rc == VINF_EM_RAW_GUEST_TRAP)
3631 {
3632 /* Update DR6 here. */
3633 pCtx->dr[6] = uDR6;
3634
3635 /* Resync DR6 if the debug state is active. */
3636 if (CPUMIsGuestDebugStateActive(pVCpu))
3637 ASMSetDR6(pCtx->dr[6]);
3638
3639 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3640 pCtx->dr[7] &= ~X86_DR7_GD;
3641
3642 /* Paranoia. */
3643 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3644 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3645 pCtx->dr[7] |= 0x400; /* must be one */
3646
3647 /* Resync DR7 */
3648 rc2 = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
3649 AssertRC(rc2);
3650
3651 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3652 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3653 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3654 cbInstr, errCode);
3655 AssertRC(rc2);
3656
3657 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3658 goto ResumeExecution;
3659 }
3660 /* Return to ring 3 to deal with the debug exit code. */
3661 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3662 break;
3663 }
3664
3665 case X86_XCPT_BP: /* Breakpoint. */
3666 {
3667 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
3668 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3669 if (rc == VINF_EM_RAW_GUEST_TRAP)
3670 {
3671 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3672 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3673 cbInstr, errCode);
3674 AssertRC(rc2);
3675 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3676 goto ResumeExecution;
3677 }
3678 if (rc == VINF_SUCCESS)
3679 {
3680 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3681 goto ResumeExecution;
3682 }
3683 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3684 break;
3685 }
3686
3687 case X86_XCPT_GP: /* General protection failure exception. */
3688 {
3689 uint32_t cbOp;
3690 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
3691
3692 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
3693#ifdef VBOX_STRICT
3694 if ( !CPUMIsGuestInRealModeEx(pCtx)
3695 || !pVM->hm.s.vmx.pRealModeTSS)
3696 {
3697 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3698 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3699 cbInstr, errCode);
3700 AssertRC(rc2);
3701 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3702 goto ResumeExecution;
3703 }
3704#endif
3705 Assert(CPUMIsGuestInRealModeEx(pCtx));
3706
3707 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3708
3709 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
3710 if (RT_SUCCESS(rc2))
3711 {
3712 bool fUpdateRIP = true;
3713
3714 rc = VINF_SUCCESS;
3715 Assert(cbOp == pDis->cbInstr);
3716 switch (pDis->pCurInstr->uOpcode)
3717 {
3718 case OP_CLI:
3719 pCtx->eflags.Bits.u1IF = 0;
3720 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
3721 break;
3722
3723 case OP_STI:
3724 pCtx->eflags.Bits.u1IF = 1;
3725 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3726 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3727 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3728 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3729 AssertRC(rc2);
3730 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
3731 break;
3732
3733 case OP_HLT:
3734 fUpdateRIP = false;
3735 rc = VINF_EM_HALT;
3736 pCtx->rip += pDis->cbInstr;
3737 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
3738 break;
3739
3740 case OP_POPF:
3741 {
3742 RTGCPTR GCPtrStack;
3743 uint32_t cbParm;
3744 uint32_t uMask;
3745 X86EFLAGS eflags;
3746
3747 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3748 {
3749 cbParm = 4;
3750 uMask = 0xffffffff;
3751 }
3752 else
3753 {
3754 cbParm = 2;
3755 uMask = 0xffff;
3756 }
3757
3758 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3759 if (RT_FAILURE(rc2))
3760 {
3761 rc = VERR_EM_INTERPRETER;
3762 break;
3763 }
3764 eflags.u = 0;
3765 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3766 if (RT_FAILURE(rc2))
3767 {
3768 rc = VERR_EM_INTERPRETER;
3769 break;
3770 }
3771 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3772 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3773 | (eflags.u & X86_EFL_POPF_BITS & uMask);
3774 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3775 pCtx->eflags.Bits.u1RF = 0;
3776 pCtx->esp += cbParm;
3777 pCtx->esp &= uMask;
3778
3779 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
3780 break;
3781 }
3782
3783 case OP_PUSHF:
3784 {
3785 RTGCPTR GCPtrStack;
3786 uint32_t cbParm;
3787 uint32_t uMask;
3788 X86EFLAGS eflags;
3789
3790 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3791 {
3792 cbParm = 4;
3793 uMask = 0xffffffff;
3794 }
3795 else
3796 {
3797 cbParm = 2;
3798 uMask = 0xffff;
3799 }
3800
3801 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3802 &GCPtrStack);
3803 if (RT_FAILURE(rc2))
3804 {
3805 rc = VERR_EM_INTERPRETER;
3806 break;
3807 }
3808 eflags = pCtx->eflags;
3809 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3810 eflags.Bits.u1RF = 0;
3811 eflags.Bits.u1VM = 0;
3812
3813 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3814 if (RT_FAILURE(rc2))
3815 {
3816 rc = VERR_EM_INTERPRETER;
3817 break;
3818 }
3819 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3820 pCtx->esp -= cbParm;
3821 pCtx->esp &= uMask;
3822 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
3823 break;
3824 }
3825
3826 case OP_IRET:
3827 {
3828 RTGCPTR GCPtrStack;
3829 uint32_t uMask = 0xffff;
3830 uint16_t aIretFrame[3];
3831
3832 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3833 {
3834 rc = VERR_EM_INTERPRETER;
3835 break;
3836 }
3837
3838 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3839 if (RT_FAILURE(rc2))
3840 {
3841 rc = VERR_EM_INTERPRETER;
3842 break;
3843 }
3844 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3845 if (RT_FAILURE(rc2))
3846 {
3847 rc = VERR_EM_INTERPRETER;
3848 break;
3849 }
3850 pCtx->ip = aIretFrame[0];
3851 pCtx->cs.Sel = aIretFrame[1];
3852 pCtx->cs.ValidSel = aIretFrame[1];
3853 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
3854 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3855 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3856 pCtx->sp += sizeof(aIretFrame);
3857
3858 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
3859 fUpdateRIP = false;
3860 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
3861 break;
3862 }
3863
3864 case OP_INT:
3865 {
3866 uint32_t intInfo2;
3867
3868 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
3869 intInfo2 = pDis->Param1.uValue & 0xff;
3870 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3871 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3872
3873 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3874 AssertRC(VBOXSTRICTRC_VAL(rc));
3875 fUpdateRIP = false;
3876 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3877 break;
3878 }
3879
3880 case OP_INTO:
3881 {
3882 if (pCtx->eflags.Bits.u1OF)
3883 {
3884 uint32_t intInfo2;
3885
3886 LogFlow(("Realmode: INTO\n"));
3887 intInfo2 = X86_XCPT_OF;
3888 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3889 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3890
3891 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3892 AssertRC(VBOXSTRICTRC_VAL(rc));
3893 fUpdateRIP = false;
3894 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3895 }
3896 break;
3897 }
3898
3899 case OP_INT3:
3900 {
3901 uint32_t intInfo2;
3902
3903 LogFlow(("Realmode: INT 3\n"));
3904 intInfo2 = 3;
3905 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3906 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3907
3908 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3909 AssertRC(VBOXSTRICTRC_VAL(rc));
3910 fUpdateRIP = false;
3911 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
3912 break;
3913 }
3914
3915 default:
3916 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3917 fUpdateRIP = false;
3918 break;
3919 }
3920
3921 if (rc == VINF_SUCCESS)
3922 {
3923 if (fUpdateRIP)
3924 pCtx->rip += cbOp; /* Move on to the next instruction. */
3925
3926 /*
3927 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3928 * whole context to be done with it.
3929 */
3930 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
3931
3932 /* Only resume if successful. */
3933 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3934 goto ResumeExecution;
3935 }
3936 }
3937 else
3938 rc = VERR_EM_INTERPRETER;
3939
3940 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
3941 ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3942 break;
3943 }
3944
3945#ifdef VBOX_STRICT
3946 case X86_XCPT_XF: /* SIMD exception. */
3947 case X86_XCPT_DE: /* Divide error. */
3948 case X86_XCPT_UD: /* Unknown opcode exception. */
3949 case X86_XCPT_SS: /* Stack segment exception. */
3950 case X86_XCPT_NP: /* Segment not present exception. */
3951 {
3952 switch (vector)
3953 {
3954 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); break;
3955 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); break;
3956 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); break;
3957 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); break;
3958 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); break;
3959 }
3960
3961 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3962 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3963 cbInstr, errCode);
3964 AssertRC(rc2);
3965
3966 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3967 goto ResumeExecution;
3968 }
3969#endif
3970 default:
3971 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
3972 if ( CPUMIsGuestInRealModeEx(pCtx)
3973 && pVM->hm.s.vmx.pRealModeTSS)
3974 {
3975 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
3976 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3977 cbInstr, errCode);
3978 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3979
3980 /* Go back to ring-3 in case of a triple fault. */
3981 if ( vector == X86_XCPT_DF
3982 && rc == VINF_EM_RESET)
3983 {
3984 break;
3985 }
3986
3987 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3988 goto ResumeExecution;
3989 }
3990 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3991 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3992 break;
3993 } /* switch (vector) */
3994
3995 break;
3996
3997 default:
3998 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3999 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
4000 break;
4001 }
4002
4003 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4004 break;
4005 }
4006
4007 /*
4008 * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed
4009 * by the configuration of the EPT paging structures.
4010 */
4011 case VMX_EXIT_EPT_VIOLATION:
4012 {
4013 RTGCPHYS GCPhys;
4014
4015 Assert(pVM->hm.s.fNestedPaging);
4016
4017 rc2 = VMXReadVMCS64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4018 AssertRC(rc2);
4019 Assert(((exitQualification >> 7) & 3) != 2);
4020
4021 /* Determine the kind of violation. */
4022 errCode = 0;
4023 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
4024 errCode |= X86_TRAP_PF_ID;
4025
4026 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
4027 errCode |= X86_TRAP_PF_RW;
4028
4029 /* If the page is present, then it's a page level protection fault. */
4030 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
4031 errCode |= X86_TRAP_PF_P;
4032 else
4033 {
4034 /* Shortcut for APIC TPR reads and writes. */
4035 if ( (GCPhys & 0xfff) == 0x080
4036 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4037 && fSetupTPRCaching
4038 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4039 {
4040 RTGCPHYS GCPhysApicBase;
4041 GCPhysApicBase = pCtx->msrApicBase;
4042 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4043 if (GCPhys == GCPhysApicBase + 0x80)
4044 {
4045 Log(("Enable VT-x virtual APIC access filtering\n"));
4046 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
4047 AssertRC(rc2);
4048 }
4049 }
4050 }
4051 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
4052
4053 /* GCPhys contains the guest physical address of the page fault. */
4054 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
4055 TRPMSetErrorCode(pVCpu, errCode);
4056 TRPMSetFaultAddress(pVCpu, GCPhys);
4057
4058 /* Handle the pagefault trap for the nested shadow table. */
4059 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
4060
4061 /*
4062 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
4063 */
4064 if ( rc == VINF_SUCCESS
4065 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4066 || rc == VERR_PAGE_NOT_PRESENT)
4067 {
4068 /* We've successfully synced our shadow pages, so let's just continue execution. */
4069 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
4070 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
4071
4072 TRPMResetTrap(pVCpu);
4073 goto ResumeExecution;
4074 }
4075
4076#ifdef VBOX_STRICT
4077 if (rc != VINF_EM_RAW_EMULATE_INSTR)
4078 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4079#endif
4080 /* Need to go back to the recompiler to emulate the instruction. */
4081 TRPMResetTrap(pVCpu);
4082 break;
4083 }
4084
4085 case VMX_EXIT_EPT_MISCONFIG:
4086 {
4087 RTGCPHYS GCPhys;
4088
4089 Assert(pVM->hm.s.fNestedPaging);
4090
4091 rc2 = VMXReadVMCS64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4092 AssertRC(rc2);
4093 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
4094
4095 /* Shortcut for APIC TPR reads and writes. */
4096 if ( (GCPhys & 0xfff) == 0x080
4097 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4098 && fSetupTPRCaching
4099 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4100 {
4101 RTGCPHYS GCPhysApicBase = pCtx->msrApicBase;
4102 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4103 if (GCPhys == GCPhysApicBase + 0x80)
4104 {
4105 Log(("Enable VT-x virtual APIC access filtering\n"));
4106 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P);
4107 AssertRC(rc2);
4108 }
4109 }
4110
4111 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
4112
4113 /*
4114 * If we succeed, resume execution.
4115 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
4116 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
4117 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
4118 * weird case. See @bugref{6043}.
4119 */
4120 if ( rc == VINF_SUCCESS
4121 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4122 || rc == VERR_PAGE_NOT_PRESENT)
4123 {
4124 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
4125 goto ResumeExecution;
4126 }
4127
4128 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4129 break;
4130 }
4131
4132 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4133 /* Clear VM-exit on IF=1 change. */
4134 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
4135 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4136 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
4137 rc2 = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4138 AssertRC(rc2);
4139 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIrqWindow);
4140 goto ResumeExecution; /* we check for pending guest interrupts there */
4141
4142 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4143 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4144 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
4145 /* Skip instruction and continue directly. */
4146 pCtx->rip += cbInstr;
4147 /* Continue execution.*/
4148 goto ResumeExecution;
4149
4150 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4151 {
4152 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4153 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
4154 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4155 if (rc == VINF_SUCCESS)
4156 {
4157 /* Update EIP and continue execution. */
4158 Assert(cbInstr == 2);
4159 pCtx->rip += cbInstr;
4160 goto ResumeExecution;
4161 }
4162 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4163 rc = VINF_EM_RAW_EMULATE_INSTR;
4164 break;
4165 }
4166
4167 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4168 {
4169 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4170 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
4171 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4172 if (rc == VINF_SUCCESS)
4173 {
4174 /* Update EIP and continue execution. */
4175 Assert(cbInstr == 2);
4176 pCtx->rip += cbInstr;
4177 goto ResumeExecution;
4178 }
4179 rc = VINF_EM_RAW_EMULATE_INSTR;
4180 break;
4181 }
4182
4183 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4184 {
4185 Log2(("VMX: Rdtsc\n"));
4186 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
4187 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4188 if (rc == VINF_SUCCESS)
4189 {
4190 /* Update EIP and continue execution. */
4191 Assert(cbInstr == 2);
4192 pCtx->rip += cbInstr;
4193 goto ResumeExecution;
4194 }
4195 rc = VINF_EM_RAW_EMULATE_INSTR;
4196 break;
4197 }
4198
4199 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4200 {
4201 Log2(("VMX: Rdtscp\n"));
4202 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp);
4203 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4204 if (rc == VINF_SUCCESS)
4205 {
4206 /* Update EIP and continue execution. */
4207 Assert(cbInstr == 3);
4208 pCtx->rip += cbInstr;
4209 goto ResumeExecution;
4210 }
4211 rc = VINF_EM_RAW_EMULATE_INSTR;
4212 break;
4213 }
4214
4215 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4216 {
4217 Log2(("VMX: invlpg\n"));
4218 Assert(!pVM->hm.s.fNestedPaging);
4219
4220 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
4221 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4222 if (rc == VINF_SUCCESS)
4223 {
4224 /* Update EIP and continue execution. */
4225 pCtx->rip += cbInstr;
4226 goto ResumeExecution;
4227 }
4228 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4229 break;
4230 }
4231
4232 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4233 {
4234 Log2(("VMX: monitor\n"));
4235
4236 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
4237 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4238 if (rc == VINF_SUCCESS)
4239 {
4240 /* Update EIP and continue execution. */
4241 pCtx->rip += cbInstr;
4242 goto ResumeExecution;
4243 }
4244 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4245 break;
4246 }
4247
4248 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4249 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4250 if ( pVM->hm.s.fTPRPatchingActive
4251 && pCtx->ecx == MSR_K8_LSTAR)
4252 {
4253 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4254 if ((pCtx->eax & 0xff) != u8LastTPR)
4255 {
4256 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4257
4258 /* Our patch code uses LSTAR for TPR caching. */
4259 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4260 AssertRC(rc2);
4261 }
4262
4263 /* Skip the instruction and continue. */
4264 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4265
4266 /* Only resume if successful. */
4267 goto ResumeExecution;
4268 }
4269 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_MSR;
4270 /* no break */
4271 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4272 {
4273 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hm.s.StatExitRdmsr : &pVCpu->hm.s.StatExitWrmsr);
4274
4275 /*
4276 * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different,
4277 * so we play safe by completely disassembling the instruction.
4278 */
4279 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4280 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4281 if (rc == VINF_SUCCESS)
4282 {
4283 /* EIP has been updated already. */
4284 /* Only resume if successful. */
4285 goto ResumeExecution;
4286 }
4287 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4288 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4289 break;
4290 }
4291
4292 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4293 {
4294 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub2, y2);
4295
4296 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4297 {
4298 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4299 {
4300 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4301 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4302 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4303 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4304 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4305 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4306 {
4307 case 0:
4308 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0 | HM_CHANGED_GUEST_CR3;
4309 break;
4310 case 2:
4311 break;
4312 case 3:
4313 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4314 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
4315 break;
4316 case 4:
4317 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
4318 break;
4319 case 8:
4320 /* CR8 contains the APIC TPR */
4321 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4322 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4323 break;
4324
4325 default:
4326 AssertFailed();
4327 break;
4328 }
4329 break;
4330 }
4331
4332 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4333 {
4334 Log2(("VMX: mov x, crx\n"));
4335 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4336
4337 Assert( !pVM->hm.s.fNestedPaging
4338 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4339 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4340
4341 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4342 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4343 || !(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4344
4345 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4346 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4347 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4348 break;
4349 }
4350
4351 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4352 {
4353 Log2(("VMX: clts\n"));
4354 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
4355 rc = EMInterpretCLTS(pVM, pVCpu);
4356 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4357 break;
4358 }
4359
4360 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4361 {
4362 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4363 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLMSW);
4364 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4365 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4366 break;
4367 }
4368 }
4369
4370 /* Update EIP if no error occurred. */
4371 if (RT_SUCCESS(rc))
4372 pCtx->rip += cbInstr;
4373
4374 if (rc == VINF_SUCCESS)
4375 {
4376 /* Only resume if successful. */
4377 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4378 goto ResumeExecution;
4379 }
4380 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4381 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4382 break;
4383 }
4384
4385 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4386 {
4387 if ( !DBGFIsStepping(pVCpu)
4388 && !CPUMIsHyperDebugStateActive(pVCpu))
4389 {
4390 /* Disable DRx move intercepts. */
4391 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4392 rc2 = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4393 AssertRC(rc2);
4394
4395 /* Save the host and load the guest debug state. */
4396 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4397 AssertRC(rc2);
4398
4399#ifdef LOG_ENABLED
4400 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4401 {
4402 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4403 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4404 }
4405 else
4406 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4407#endif
4408
4409#ifdef VBOX_WITH_STATISTICS
4410 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
4411 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4412 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4413 else
4414 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4415#endif
4416
4417 goto ResumeExecution;
4418 }
4419
4420 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4421 * time and restore DRx registers afterwards */
4422 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4423 {
4424 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4425 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4426 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4427 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4428 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4429 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4430 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
4431 Log2(("DR7=%08x\n", pCtx->dr[7]));
4432 }
4433 else
4434 {
4435 Log2(("VMX: mov x, DRx\n"));
4436 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4437 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4438 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4439 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4440 }
4441 /* Update EIP if no error occurred. */
4442 if (RT_SUCCESS(rc))
4443 pCtx->rip += cbInstr;
4444
4445 if (rc == VINF_SUCCESS)
4446 {
4447 /* Only resume if successful. */
4448 goto ResumeExecution;
4449 }
4450 Assert(rc == VERR_EM_INTERPRETER);
4451 break;
4452 }
4453
4454 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4455 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4456 {
4457 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub1, y1);
4458 uint32_t uPort;
4459 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4460 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4461
4462 /** @todo necessary to make the distinction? */
4463 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4464 uPort = pCtx->edx & 0xffff;
4465 else
4466 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4467
4468 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4469 {
4470 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4471 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4472 break;
4473 }
4474
4475 uint32_t cbSize = g_aIOSize[uIOWidth];
4476 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4477 {
4478 /* ins/outs */
4479 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
4480
4481 /* Disassemble manually to deal with segment prefixes. */
4482 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4483 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4484 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
4485 if (RT_SUCCESS(rc))
4486 {
4487 if (fIOWrite)
4488 {
4489 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4490 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
4491 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4492 }
4493 else
4494 {
4495 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4496 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
4497 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4498 }
4499 }
4500 else
4501 rc = VINF_EM_RAW_EMULATE_INSTR;
4502 }
4503 else
4504 {
4505 /* Normal in/out */
4506 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4507
4508 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4509
4510 if (fIOWrite)
4511 {
4512 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
4513 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4514 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4515 HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4516 }
4517 else
4518 {
4519 uint32_t u32Val = 0;
4520
4521 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
4522 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4523 if (IOM_SUCCESS(rc))
4524 {
4525 /* Write back to the EAX register. */
4526 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4527 }
4528 else
4529 if (rc == VINF_IOM_R3_IOPORT_READ)
4530 HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4531 }
4532 }
4533
4534 /*
4535 * Handled the I/O return codes.
4536 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4537 */
4538 if (IOM_SUCCESS(rc))
4539 {
4540 /* Update EIP and continue execution. */
4541 pCtx->rip += cbInstr;
4542 if (RT_LIKELY(rc == VINF_SUCCESS))
4543 {
4544 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4545 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4546 {
4547 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
4548 for (unsigned i = 0; i < 4; i++)
4549 {
4550 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4551
4552 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4553 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4554 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4555 {
4556 uint64_t uDR6;
4557
4558 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4559
4560 uDR6 = ASMGetDR6();
4561
4562 /* Clear all breakpoint status flags and set the one we just hit. */
4563 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4564 uDR6 |= (uint64_t)RT_BIT(i);
4565
4566 /*
4567 * Note: AMD64 Architecture Programmer's Manual 13.1:
4568 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4569 * be cleared by software after the contents have been read.
4570 */
4571 ASMSetDR6(uDR6);
4572
4573 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4574 pCtx->dr[7] &= ~X86_DR7_GD;
4575
4576 /* Paranoia. */
4577 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4578 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4579 pCtx->dr[7] |= 0x400; /* must be one */
4580
4581 /* Resync DR7 */
4582 rc2 = VMXWriteVMCS64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
4583 AssertRC(rc2);
4584
4585 /* Construct inject info. */
4586 intInfo = X86_XCPT_DB;
4587 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4588 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4589
4590 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4591 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4592 0 /* cbInstr */, 0 /* errCode */);
4593 AssertRC(rc2);
4594
4595 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4596 goto ResumeExecution;
4597 }
4598 }
4599 }
4600 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4601 goto ResumeExecution;
4602 }
4603 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4604 break;
4605 }
4606
4607#ifdef VBOX_STRICT
4608 if (rc == VINF_IOM_R3_IOPORT_READ)
4609 Assert(!fIOWrite);
4610 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4611 Assert(fIOWrite);
4612 else
4613 {
4614 AssertMsg( RT_FAILURE(rc)
4615 || rc == VINF_EM_RAW_EMULATE_INSTR
4616 || rc == VINF_EM_RAW_GUEST_TRAP
4617 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4618 }
4619#endif
4620 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4621 break;
4622 }
4623
4624 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4625 LogFlow(("VMX_EXIT_TPR\n"));
4626 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4627 goto ResumeExecution;
4628
4629 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4630 on the APIC-access page. */
4631 {
4632 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4633 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4634
4635 switch (uAccessType)
4636 {
4637 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4638 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4639 {
4640 RTGCPHYS GCPhys = pCtx->msrApicBase;
4641 GCPhys &= PAGE_BASE_GC_MASK;
4642 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4643
4644 LogFlow(("Apic access at %RGp\n", GCPhys));
4645 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4646 CPUMCTX2CORE(pCtx), GCPhys);
4647 if (rc == VINF_SUCCESS)
4648 goto ResumeExecution; /* rip already updated */
4649 break;
4650 }
4651
4652 default:
4653 rc = VINF_EM_RAW_EMULATE_INSTR;
4654 break;
4655 }
4656 break;
4657 }
4658
4659 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4660 if (!TMTimerPollBool(pVM, pVCpu))
4661 goto ResumeExecution;
4662 rc = VINF_EM_RAW_TIMER_PENDING;
4663 break;
4664
4665 default:
4666 /* The rest is handled after syncing the entire CPU state. */
4667 break;
4668 }
4669
4670
4671 /*
4672 * Note: The guest state is not entirely synced back at this stage!
4673 */
4674
4675 /* Investigate why there was a VM-exit. (part 2) */
4676 switch (exitReason)
4677 {
4678 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4679 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4680 case VMX_EXIT_EPT_VIOLATION:
4681 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4682 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4683 /* Already handled above. */
4684 break;
4685
4686 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4687 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4688 break;
4689
4690 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4691 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4692 rc = VINF_EM_RAW_INTERRUPT;
4693 AssertFailed(); /* Can't happen. Yet. */
4694 break;
4695
4696 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4697 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4698 rc = VINF_EM_RAW_INTERRUPT;
4699 AssertFailed(); /* Can't happen afaik. */
4700 break;
4701
4702 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4703 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4704 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4705 && pVCpu->hm.s.Event.fPending)
4706 {
4707 /* Caused by an injected interrupt. */
4708 pVCpu->hm.s.Event.fPending = false;
4709
4710 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.intInfo)));
4711 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.intInfo));
4712 //@todo: Why do we assume this had to be a hardware interrupt? What about software interrupts or exceptions?
4713 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.intInfo), TRPM_HARDWARE_INT);
4714 AssertRC(rc2);
4715 }
4716 /* else Exceptions and software interrupts can just be restarted. */
4717 rc = VERR_EM_INTERPRETER;
4718 break;
4719
4720 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4721 /* Check if external interrupts are pending; if so, don't switch back. */
4722 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
4723 pCtx->rip++; /* skip hlt */
4724 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4725 goto ResumeExecution;
4726
4727 rc = VINF_EM_HALT;
4728 break;
4729
4730 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4731 Log2(("VMX: mwait\n"));
4732 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
4733 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4734 if ( rc == VINF_EM_HALT
4735 || rc == VINF_SUCCESS)
4736 {
4737 /* Update EIP and continue execution. */
4738 pCtx->rip += cbInstr;
4739
4740 /* Check if external interrupts are pending; if so, don't switch back. */
4741 if ( rc == VINF_SUCCESS
4742 || ( rc == VINF_EM_HALT
4743 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4744 )
4745 goto ResumeExecution;
4746 }
4747 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4748 break;
4749
4750 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4751 AssertFailed(); /* can't happen. */
4752 rc = VERR_EM_INTERPRETER;
4753 break;
4754
4755 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4756 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4757 pVCpu->hm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4758 rc2 = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4759 AssertRC(rc2);
4760 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
4761#if 0
4762 DBGFDoneStepping(pVCpu);
4763#endif
4764 rc = VINF_EM_DBG_STOP;
4765 break;
4766
4767 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4768 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4769 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4770 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4771 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4772 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4773 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4774 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4775 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4776 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4777 /** @todo inject #UD immediately */
4778 rc = VERR_EM_INTERPRETER;
4779 break;
4780
4781 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4782 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4783 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4784 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4785 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4786 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4787 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4788 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4789 /* already handled above */
4790 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4791 || rc == VINF_EM_RAW_INTERRUPT
4792 || rc == VERR_EM_INTERPRETER
4793 || rc == VINF_EM_RAW_EMULATE_INSTR
4794 || rc == VINF_PGM_SYNC_CR3
4795 || rc == VINF_IOM_R3_IOPORT_READ
4796 || rc == VINF_IOM_R3_IOPORT_WRITE
4797 || rc == VINF_EM_RAW_GUEST_TRAP
4798 || rc == VINF_TRPM_XCPT_DISPATCHED
4799 || rc == VINF_EM_RESCHEDULE_REM,
4800 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4801 break;
4802
4803 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4804 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4805 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4806 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4807 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4808 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4809 on the APIC-access page. */
4810 {
4811 /*
4812 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4813 */
4814 rc = VERR_EM_INTERPRETER;
4815 break;
4816 }
4817
4818 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4819 Assert(rc == VINF_EM_RAW_INTERRUPT);
4820 break;
4821
4822 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4823 {
4824#ifdef VBOX_STRICT
4825 RTCCUINTREG val2 = 0;
4826
4827 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4828
4829 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val2);
4830 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4831
4832 VMXReadVMCS(VMX_VMCS_GUEST_CR0, &val2);
4833 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4834
4835 VMXReadVMCS(VMX_VMCS_GUEST_CR3, &val2);
4836 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4837
4838 VMXReadVMCS(VMX_VMCS_GUEST_CR4, &val2);
4839 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4840
4841 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4842 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4843
4844 VMX_LOG_SELREG(CS, "CS", val2);
4845 VMX_LOG_SELREG(DS, "DS", val2);
4846 VMX_LOG_SELREG(ES, "ES", val2);
4847 VMX_LOG_SELREG(FS, "FS", val2);
4848 VMX_LOG_SELREG(GS, "GS", val2);
4849 VMX_LOG_SELREG(SS, "SS", val2);
4850 VMX_LOG_SELREG(TR, "TR", val2);
4851 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4852
4853 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val2);
4854 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4855 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val2);
4856 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4857#endif /* VBOX_STRICT */
4858 rc = VERR_VMX_INVALID_GUEST_STATE;
4859 break;
4860 }
4861
4862 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4863 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4864 default:
4865 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4866 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4867 break;
4868
4869 }
4870
4871end:
4872 /* We now going back to ring-3, so clear the action flag. */
4873 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4874
4875 /*
4876 * Signal changes for the recompiler.
4877 */
4878 CPUMSetChangedFlags(pVCpu,
4879 CPUM_CHANGED_SYSENTER_MSR
4880 | CPUM_CHANGED_LDTR
4881 | CPUM_CHANGED_GDTR
4882 | CPUM_CHANGED_IDTR
4883 | CPUM_CHANGED_TR
4884 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4885
4886 /*
4887 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4888 */
4889 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4890 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4891 {
4892 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
4893 /* On the next entry we'll only sync the host context. */
4894 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT;
4895 }
4896 else
4897 {
4898 /* On the next entry we'll sync everything. */
4899 /** @todo we can do better than this */
4900 /* Not in the VINF_PGM_CHANGE_MODE though! */
4901 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
4902 }
4903
4904 /* Translate into a less severe return code */
4905 if (rc == VERR_EM_INTERPRETER)
4906 rc = VINF_EM_RAW_EMULATE_INSTR;
4907 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4908 {
4909 /* Try to extract more information about what might have gone wrong here. */
4910 VMXGetActivateVMCS(&pVCpu->hm.s.vmx.lasterror.u64VMCSPhys);
4911 pVCpu->hm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVMCS;
4912 pVCpu->hm.s.vmx.lasterror.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
4913 pVCpu->hm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4914 }
4915
4916 /* Just set the correct state here instead of trying to catch every goto above. */
4917 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4918
4919#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4920 /* Restore interrupts if we exited after disabling them. */
4921 if (uOldEFlags != ~(RTCCUINTREG)0)
4922 ASMSetFlags(uOldEFlags);
4923#endif
4924
4925 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
4926 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
4927 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
4928 Log2(("X"));
4929 return VBOXSTRICTRC_TODO(rc);
4930}
4931
4932
4933/**
4934 * Enters the VT-x session.
4935 *
4936 * @returns VBox status code.
4937 * @param pVM Pointer to the VM.
4938 * @param pVCpu Pointer to the VMCPU.
4939 * @param pCpu Pointer to the CPU info struct.
4940 */
4941VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4942{
4943 Assert(pVM->hm.s.vmx.fSupported);
4944 NOREF(pCpu);
4945
4946 unsigned cr4 = ASMGetCR4();
4947 if (!(cr4 & X86_CR4_VMXE))
4948 {
4949 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4950 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4951 }
4952
4953 /* Activate the VMCS. */
4954 int rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
4955 if (RT_FAILURE(rc))
4956 return rc;
4957
4958 pVCpu->hm.s.fResumeVM = false;
4959 return VINF_SUCCESS;
4960}
4961
4962
4963/**
4964 * Leaves the VT-x session.
4965 *
4966 * @returns VBox status code.
4967 * @param pVM Pointer to the VM.
4968 * @param pVCpu Pointer to the VMCPU.
4969 * @param pCtx Pointer to the guests CPU context.
4970 */
4971VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4972{
4973 Assert(pVM->hm.s.vmx.fSupported);
4974
4975#ifdef DEBUG
4976 if (CPUMIsHyperDebugStateActive(pVCpu))
4977 {
4978 CPUMR0LoadHostDebugState(pVM, pVCpu);
4979 Assert(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4980 }
4981 else
4982#endif
4983
4984 /*
4985 * Save the guest debug state if necessary.
4986 */
4987 if (CPUMIsGuestDebugStateActive(pVCpu))
4988 {
4989 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4990
4991 /* Enable DRx move intercepts again. */
4992 pVCpu->hm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4993 int rc = VMXWriteVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.proc_ctls);
4994 AssertRC(rc);
4995
4996 /* Resync the debug registers the next time. */
4997 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
4998 }
4999 else
5000 Assert(pVCpu->hm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
5001
5002 /*
5003 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
5004 * VMCS data back to memory.
5005 */
5006 int rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
5007 AssertRC(rc);
5008
5009 return VINF_SUCCESS;
5010}
5011
5012
5013/**
5014 * Flush the TLB using EPT.
5015 *
5016 * @returns VBox status code.
5017 * @param pVM Pointer to the VM.
5018 * @param pVCpu Pointer to the VMCPU.
5019 * @param enmFlush Type of flush.
5020 */
5021static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
5022{
5023 uint64_t descriptor[2];
5024
5025 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
5026 Assert(pVM->hm.s.fNestedPaging);
5027 descriptor[0] = pVCpu->hm.s.vmx.GCPhysEPTP;
5028 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
5029 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
5030 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hm.s.vmx.GCPhysEPTP, rc));
5031#ifdef VBOX_WITH_STATISTICS
5032 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
5033#endif
5034}
5035
5036
5037/**
5038 * Flush the TLB using VPID.
5039 *
5040 * @returns VBox status code.
5041 * @param pVM Pointer to the VM.
5042 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
5043 * enmFlush).
5044 * @param enmFlush Type of flush.
5045 * @param GCPtr Virtual address of the page to flush (can be 0 depending
5046 * on @a enmFlush).
5047 */
5048static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
5049{
5050 uint64_t descriptor[2];
5051
5052 Assert(pVM->hm.s.vmx.fVpid);
5053 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
5054 {
5055 descriptor[0] = 0;
5056 descriptor[1] = 0;
5057 }
5058 else
5059 {
5060 AssertPtr(pVCpu);
5061 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5062 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5063 descriptor[0] = pVCpu->hm.s.uCurrentAsid;
5064 descriptor[1] = GCPtr;
5065 }
5066 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
5067 AssertMsg(rc == VINF_SUCCESS,
5068 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
5069#ifdef VBOX_WITH_STATISTICS
5070 if (pVCpu)
5071 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
5072#endif
5073}
5074
5075
5076/**
5077 * Invalidates a guest page by guest virtual address. Only relevant for
5078 * EPT/VPID, otherwise there is nothing really to invalidate.
5079 *
5080 * @returns VBox status code.
5081 * @param pVM Pointer to the VM.
5082 * @param pVCpu Pointer to the VMCPU.
5083 * @param GCVirt Guest virtual address of the page to invalidate.
5084 */
5085VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
5086{
5087 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
5088
5089 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
5090
5091 if (!fFlushPending)
5092 {
5093 /*
5094 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
5095 * See @bugref{6043} and @bugref{6177}
5096 *
5097 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
5098 * function maybe called in a loop with individual addresses.
5099 */
5100 if (pVM->hm.s.vmx.fVpid)
5101 {
5102 /* If we can flush just this page do it, otherwise flush as little as possible. */
5103 if (pVM->hm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
5104 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
5105 else
5106 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5107 }
5108 else if (pVM->hm.s.fNestedPaging)
5109 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5110 }
5111
5112 return VINF_SUCCESS;
5113}
5114
5115
5116/**
5117 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
5118 * otherwise there is nothing really to invalidate.
5119 *
5120 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
5121 *
5122 * @returns VBox status code.
5123 * @param pVM Pointer to the VM.
5124 * @param pVCpu Pointer to the VMCPU.
5125 * @param GCPhys Guest physical address of the page to invalidate.
5126 */
5127VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
5128{
5129 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
5130
5131 /*
5132 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5133 * while invept only flushes by EPT not individual addresses. We update the force flag here
5134 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5135 */
5136 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5137 return VINF_SUCCESS;
5138}
5139
5140
5141/**
5142 * Report world switch error and dump some useful debug info.
5143 *
5144 * @param pVM Pointer to the VM.
5145 * @param pVCpu Pointer to the VMCPU.
5146 * @param rc Return code.
5147 * @param pCtx Pointer to the current guest CPU context (not updated).
5148 */
5149static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5150{
5151 NOREF(pVM);
5152
5153 switch (VBOXSTRICTRC_VAL(rc))
5154 {
5155 case VERR_VMX_INVALID_VMXON_PTR:
5156 AssertFailed();
5157 break;
5158
5159 case VERR_VMX_UNABLE_TO_START_VM:
5160 case VERR_VMX_UNABLE_TO_RESUME_VM:
5161 {
5162 int rc2;
5163 RTCCUINTREG exitReason, instrError;
5164
5165 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5166 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5167 AssertRC(rc2);
5168 if (rc2 == VINF_SUCCESS)
5169 {
5170 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5171 (uint32_t)instrError));
5172 Log(("Current stack %08x\n", &rc2));
5173
5174 pVCpu->hm.s.vmx.lasterror.ulInstrError = instrError;
5175 pVCpu->hm.s.vmx.lasterror.ulExitReason = exitReason;
5176
5177#ifdef VBOX_STRICT
5178 RTGDTR gdtr;
5179 PCX86DESCHC pDesc;
5180 RTCCUINTREG val;
5181
5182 ASMGetGDTR(&gdtr);
5183
5184 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
5185 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5186 VMXReadVMCS(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val);
5187 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5188 VMXReadVMCS(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val);
5189 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5190 VMXReadVMCS(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val);
5191 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5192 VMXReadVMCS(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val);
5193 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5194
5195 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
5196 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5197 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
5198 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5199 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
5200 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5201
5202 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
5203 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5204 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
5205 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5206
5207 if (val < gdtr.cbGdt)
5208 {
5209 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5210 HMR0DumpDescriptor(pDesc, val, "CS: ");
5211 }
5212
5213 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
5214 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5215 if (val < gdtr.cbGdt)
5216 {
5217 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5218 HMR0DumpDescriptor(pDesc, val, "DS: ");
5219 }
5220
5221 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
5222 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5223 if (val < gdtr.cbGdt)
5224 {
5225 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5226 HMR0DumpDescriptor(pDesc, val, "ES: ");
5227 }
5228
5229 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
5230 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5231 if (val < gdtr.cbGdt)
5232 {
5233 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5234 HMR0DumpDescriptor(pDesc, val, "FS: ");
5235 }
5236
5237 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
5238 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5239 if (val < gdtr.cbGdt)
5240 {
5241 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5242 HMR0DumpDescriptor(pDesc, val, "GS: ");
5243 }
5244
5245 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
5246 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5247 if (val < gdtr.cbGdt)
5248 {
5249 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5250 HMR0DumpDescriptor(pDesc, val, "SS: ");
5251 }
5252
5253 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
5254 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5255 if (val < gdtr.cbGdt)
5256 {
5257 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5258 HMR0DumpDescriptor(pDesc, val, "TR: ");
5259 }
5260
5261 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
5262 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5263 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
5264 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5265 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
5266 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5267 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5268 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5269 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5270 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5271 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5272 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5273 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
5274 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5275 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
5276 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5277# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5278 if (VMX_IS_64BIT_HOST_MODE())
5279 {
5280 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5281 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5282 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5283 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5284 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5285 Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5286 }
5287# endif
5288#endif /* VBOX_STRICT */
5289 }
5290 break;
5291 }
5292
5293 default:
5294 /* impossible */
5295 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5296 break;
5297 }
5298}
5299
5300
5301#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5302/**
5303 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5304 *
5305 * @returns VBox status code.
5306 * @param fResume Whether to vmlauch/vmresume.
5307 * @param pCtx Pointer to the guest CPU context.
5308 * @param pCache Pointer to the VMCS cache.
5309 * @param pVM Pointer to the VM.
5310 * @param pVCpu Pointer to the VMCPU.
5311 */
5312DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5313{
5314 uint32_t aParam[6];
5315 PHMGLOBLCPUINFO pCpu;
5316 RTHCPHYS HCPhysCpuPage;
5317 int rc;
5318
5319 pCpu = HMR0GetCurrentCpu();
5320 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5321
5322#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5323 pCache->uPos = 1;
5324 pCache->interPD = PGMGetInterPaeCR3(pVM);
5325 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
5326#endif
5327
5328#ifdef DEBUG
5329 pCache->TestIn.HCPhysCpuPage= 0;
5330 pCache->TestIn.HCPhysVMCS = 0;
5331 pCache->TestIn.pCache = 0;
5332 pCache->TestOut.HCPhysVMCS = 0;
5333 pCache->TestOut.pCache = 0;
5334 pCache->TestOut.pCtx = 0;
5335 pCache->TestOut.eflags = 0;
5336#endif
5337
5338 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5339 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5340 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
5341 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
5342 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
5343 aParam[5] = 0;
5344
5345#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5346 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
5347 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
5348#endif
5349 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5350
5351#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5352 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
5353 Assert(pCtx->dr[4] == 10);
5354 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
5355#endif
5356
5357#ifdef DEBUG
5358 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5359 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5360 pVCpu->hm.s.vmx.HCPhysVMCS));
5361 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5362 pCache->TestOut.HCPhysVMCS));
5363 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5364 pCache->TestOut.pCache));
5365 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
5366 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
5367 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5368 pCache->TestOut.pCtx));
5369 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5370#endif
5371 return rc;
5372}
5373
5374
5375# ifdef VBOX_STRICT
5376static bool hmR0VmxIsValidReadField(uint32_t idxField)
5377{
5378 switch (idxField)
5379 {
5380 case VMX_VMCS_GUEST_RIP:
5381 case VMX_VMCS_GUEST_RSP:
5382 case VMX_VMCS_GUEST_RFLAGS:
5383 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5384 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5385 case VMX_VMCS_GUEST_CR0:
5386 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5387 case VMX_VMCS_GUEST_CR4:
5388 case VMX_VMCS_GUEST_DR7:
5389 case VMX_VMCS32_GUEST_SYSENTER_CS:
5390 case VMX_VMCS_GUEST_SYSENTER_EIP:
5391 case VMX_VMCS_GUEST_SYSENTER_ESP:
5392 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5393 case VMX_VMCS_GUEST_GDTR_BASE:
5394 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5395 case VMX_VMCS_GUEST_IDTR_BASE:
5396 case VMX_VMCS16_GUEST_FIELD_CS:
5397 case VMX_VMCS32_GUEST_CS_LIMIT:
5398 case VMX_VMCS_GUEST_CS_BASE:
5399 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5400 case VMX_VMCS16_GUEST_FIELD_DS:
5401 case VMX_VMCS32_GUEST_DS_LIMIT:
5402 case VMX_VMCS_GUEST_DS_BASE:
5403 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5404 case VMX_VMCS16_GUEST_FIELD_ES:
5405 case VMX_VMCS32_GUEST_ES_LIMIT:
5406 case VMX_VMCS_GUEST_ES_BASE:
5407 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5408 case VMX_VMCS16_GUEST_FIELD_FS:
5409 case VMX_VMCS32_GUEST_FS_LIMIT:
5410 case VMX_VMCS_GUEST_FS_BASE:
5411 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5412 case VMX_VMCS16_GUEST_FIELD_GS:
5413 case VMX_VMCS32_GUEST_GS_LIMIT:
5414 case VMX_VMCS_GUEST_GS_BASE:
5415 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5416 case VMX_VMCS16_GUEST_FIELD_SS:
5417 case VMX_VMCS32_GUEST_SS_LIMIT:
5418 case VMX_VMCS_GUEST_SS_BASE:
5419 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5420 case VMX_VMCS16_GUEST_FIELD_LDTR:
5421 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5422 case VMX_VMCS_GUEST_LDTR_BASE:
5423 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5424 case VMX_VMCS16_GUEST_FIELD_TR:
5425 case VMX_VMCS32_GUEST_TR_LIMIT:
5426 case VMX_VMCS_GUEST_TR_BASE:
5427 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5428 case VMX_VMCS32_RO_EXIT_REASON:
5429 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5430 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5431 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
5432 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5433 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5434 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5435 case VMX_VMCS32_RO_IDT_INFO:
5436 case VMX_VMCS32_RO_IDT_ERRCODE:
5437 case VMX_VMCS_GUEST_CR3:
5438 case VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL:
5439 return true;
5440 }
5441 return false;
5442}
5443
5444
5445static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5446{
5447 switch (idxField)
5448 {
5449 case VMX_VMCS_GUEST_LDTR_BASE:
5450 case VMX_VMCS_GUEST_TR_BASE:
5451 case VMX_VMCS_GUEST_GDTR_BASE:
5452 case VMX_VMCS_GUEST_IDTR_BASE:
5453 case VMX_VMCS_GUEST_SYSENTER_EIP:
5454 case VMX_VMCS_GUEST_SYSENTER_ESP:
5455 case VMX_VMCS_GUEST_CR0:
5456 case VMX_VMCS_GUEST_CR4:
5457 case VMX_VMCS_GUEST_CR3:
5458 case VMX_VMCS_GUEST_DR7:
5459 case VMX_VMCS_GUEST_RIP:
5460 case VMX_VMCS_GUEST_RSP:
5461 case VMX_VMCS_GUEST_CS_BASE:
5462 case VMX_VMCS_GUEST_DS_BASE:
5463 case VMX_VMCS_GUEST_ES_BASE:
5464 case VMX_VMCS_GUEST_FS_BASE:
5465 case VMX_VMCS_GUEST_GS_BASE:
5466 case VMX_VMCS_GUEST_SS_BASE:
5467 return true;
5468 }
5469 return false;
5470}
5471# endif /* VBOX_STRICT */
5472
5473
5474/**
5475 * Executes the specified handler in 64-bit mode.
5476 *
5477 * @returns VBox status code.
5478 * @param pVM Pointer to the VM.
5479 * @param pVCpu Pointer to the VMCPU.
5480 * @param pCtx Pointer to the guest CPU context.
5481 * @param pfnHandler Pointer to the RC handler function.
5482 * @param cbParam Number of parameters.
5483 * @param paParam Array of 32-bit parameters.
5484 */
5485VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5486 uint32_t *paParam)
5487{
5488 int rc, rc2;
5489 PHMGLOBLCPUINFO pCpu;
5490 RTHCPHYS HCPhysCpuPage;
5491 RTHCUINTREG uOldEFlags;
5492
5493 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5494 Assert(pfnHandler);
5495 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
5496 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
5497
5498#ifdef VBOX_STRICT
5499 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5500 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
5501
5502 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5503 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
5504#endif
5505
5506 /* Disable interrupts. */
5507 uOldEFlags = ASMIntDisableFlags();
5508
5509#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5510 RTCPUID idHostCpu = RTMpCpuId();
5511 CPUMR0SetLApic(pVM, idHostCpu);
5512#endif
5513
5514 pCpu = HMR0GetCurrentCpu();
5515 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5516
5517 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5518 VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
5519
5520 /* Leave VMX Root Mode. */
5521 VMXDisable();
5522
5523 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5524
5525 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5526 CPUMSetHyperEIP(pVCpu, pfnHandler);
5527 for (int i=(int)cbParam-1;i>=0;i--)
5528 CPUMPushHyper(pVCpu, paParam[i]);
5529
5530 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
5531
5532 /* Call switcher. */
5533 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5534 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
5535
5536 /* Make sure the VMX instructions don't cause #UD faults. */
5537 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5538
5539 /* Enter VMX Root Mode */
5540 rc2 = VMXEnable(HCPhysCpuPage);
5541 if (RT_FAILURE(rc2))
5542 {
5543 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5544 ASMSetFlags(uOldEFlags);
5545 return VERR_VMX_VMXON_FAILED;
5546 }
5547
5548 rc2 = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVMCS);
5549 AssertRC(rc2);
5550 Assert(!(ASMGetFlags() & X86_EFL_IF));
5551 ASMSetFlags(uOldEFlags);
5552 return rc;
5553}
5554#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5555
5556
5557#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5558/**
5559 * Executes VMWRITE.
5560 *
5561 * @returns VBox status code
5562 * @param pVCpu Pointer to the VMCPU.
5563 * @param idxField VMCS field index.
5564 * @param u64Val 16, 32 or 64 bits value.
5565 */
5566VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5567{
5568 int rc;
5569 switch (idxField)
5570 {
5571 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
5572 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
5573 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
5574 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
5575 case VMX_VMCS64_CTRL_VMEXIT_MSR_STORE_FULL:
5576 case VMX_VMCS64_CTRL_VMEXIT_MSR_LOAD_FULL:
5577 case VMX_VMCS64_CTRL_VMENTRY_MSR_LOAD_FULL:
5578 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
5579 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
5580 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
5581 case VMX_VMCS64_GUEST_PDPTE0_FULL:
5582 case VMX_VMCS64_GUEST_PDPTE1_FULL:
5583 case VMX_VMCS64_GUEST_PDPTE2_FULL:
5584 case VMX_VMCS64_GUEST_PDPTE3_FULL:
5585 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
5586 case VMX_VMCS64_GUEST_EFER_FULL:
5587 case VMX_VMCS64_CTRL_EPTP_FULL:
5588 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5589 rc = VMXWriteVMCS32(idxField, u64Val);
5590 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5591 AssertRC(rc);
5592 return rc;
5593
5594 case VMX_VMCS_GUEST_LDTR_BASE:
5595 case VMX_VMCS_GUEST_TR_BASE:
5596 case VMX_VMCS_GUEST_GDTR_BASE:
5597 case VMX_VMCS_GUEST_IDTR_BASE:
5598 case VMX_VMCS_GUEST_SYSENTER_EIP:
5599 case VMX_VMCS_GUEST_SYSENTER_ESP:
5600 case VMX_VMCS_GUEST_CR0:
5601 case VMX_VMCS_GUEST_CR4:
5602 case VMX_VMCS_GUEST_CR3:
5603 case VMX_VMCS_GUEST_DR7:
5604 case VMX_VMCS_GUEST_RIP:
5605 case VMX_VMCS_GUEST_RSP:
5606 case VMX_VMCS_GUEST_CS_BASE:
5607 case VMX_VMCS_GUEST_DS_BASE:
5608 case VMX_VMCS_GUEST_ES_BASE:
5609 case VMX_VMCS_GUEST_FS_BASE:
5610 case VMX_VMCS_GUEST_GS_BASE:
5611 case VMX_VMCS_GUEST_SS_BASE:
5612 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5613 if (u64Val >> 32ULL)
5614 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5615 else
5616 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5617
5618 return rc;
5619
5620 default:
5621 AssertMsgFailed(("Unexpected field %x\n", idxField));
5622 return VERR_INVALID_PARAMETER;
5623 }
5624}
5625
5626
5627/**
5628 * Cache VMCS writes for running 64 bits guests on 32 bits hosts.
5629 *
5630 * @param pVCpu Pointer to the VMCPU.
5631 * @param idxField VMCS field index.
5632 * @param u64Val 16, 32 or 64 bits value.
5633 */
5634VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5635{
5636 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
5637
5638 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5639 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5640
5641 /* Make sure there are no duplicates. */
5642 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5643 {
5644 if (pCache->Write.aField[i] == idxField)
5645 {
5646 pCache->Write.aFieldVal[i] = u64Val;
5647 return VINF_SUCCESS;
5648 }
5649 }
5650
5651 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5652 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5653 pCache->Write.cValidEntries++;
5654 return VINF_SUCCESS;
5655}
5656
5657#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_HYBRID_32BIT_KERNEL */
5658
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette