VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 43379

Last change on this file since 43379 was 43379, checked in by vboxsync, 12 years ago

SUPDrv,VMM: Prepared for SUPR0EnableVTx on darwin.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 222.6 KB
Line 
1/* $Id: HWVMXR0.cpp 43379 2012-09-20 23:29:12Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hwaccm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HWACCMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HWACCMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HWACCMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
106 }
107 pVM->hwaccm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 * @param fEnabledByHost Set if SUPR0EnableVTx or similar was used to enable
120 * VT-x/AMD-V on the host.
121 */
122VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
123{
124 if (!fEnabledByHost)
125 {
126 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
127 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
128
129 if (pVM)
130 {
131 /* Set revision dword at the beginning of the VMXON structure. */
132 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
133 }
134
135 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
136 * (which can have very bad consequences!!!)
137 */
138
139 /** @todo r=bird: Why is this code different than the probing code earlier
140 * on? It just sets VMXE if needed and doesn't check that it isn't
141 * set. Mac OS X host_vmxoff may leave this set and we'll fail here
142 * and debug-assert in the calling code. This is what caused the
143 * "regression" after backing out the SUPR0EnableVTx code hours before
144 * 4.2.0GA (reboot fixed the issue). I've changed here to do the same
145 * as the init code. */
146 uint64_t uCr4 = ASMGetCR4();
147 if (!(uCr4 & X86_CR4_VMXE))
148 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
149
150 /*
151 * Enter VM root mode.
152 */
153 int rc = VMXEnable(HCPhysCpuPage);
154 if (RT_FAILURE(rc))
155 {
156 ASMSetCR4(uCr4);
157 return VERR_VMX_VMXON_FAILED;
158 }
159 }
160
161 /*
162 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
163 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
164 * each time while reusing a VPID after hitting the MaxASID limit once.
165 */
166 if ( pVM
167 && pVM->hwaccm.s.vmx.fVPID
168 && (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
169 {
170 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
171 pCpu->fFlushASIDBeforeUse = false;
172 }
173 else
174 pCpu->fFlushASIDBeforeUse = true;
175
176 /*
177 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
178 */
179 ++pCpu->cTLBFlushes;
180
181 return VINF_SUCCESS;
182}
183
184
185/**
186 * Deactivates VT-x on the current CPU.
187 *
188 * @returns VBox status code.
189 * @param pCpu Pointer to the CPU info struct.
190 * @param pvCpuPage Pointer to the global CPU page.
191 * @param HCPhysCpuPage Physical address of the global CPU page.
192 */
193VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
194{
195 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
196 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
197 NOREF(pCpu);
198
199 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
200 if (!(ASMGetCR4() & X86_CR4_VMXE))
201 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
202
203 /* Leave VMX Root Mode. */
204 VMXDisable();
205
206 /* And clear the X86_CR4_VMXE bit. */
207 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
208 return VINF_SUCCESS;
209}
210
211
212/**
213 * Does Ring-0 per VM VT-x initialization.
214 *
215 * @returns VBox status code.
216 * @param pVM Pointer to the VM.
217 */
218VMMR0DECL(int) VMXR0InitVM(PVM pVM)
219{
220 int rc;
221
222#ifdef LOG_ENABLED
223 SUPR0Printf("VMXR0InitVM %p\n", pVM);
224#endif
225
226 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
227
228 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
229 {
230 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
231 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, false /* executable R0 mapping */);
232 AssertRC(rc);
233 if (RT_FAILURE(rc))
234 return rc;
235
236 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
237 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
238 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
239 }
240 else
241 {
242 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
243 pVM->hwaccm.s.vmx.pAPIC = 0;
244 pVM->hwaccm.s.vmx.pAPICPhys = 0;
245 }
246
247#ifdef VBOX_WITH_CRASHDUMP_MAGIC
248 {
249 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, false /* executable R0 mapping */);
250 AssertRC(rc);
251 if (RT_FAILURE(rc))
252 return rc;
253
254 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
255 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
256
257 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
258 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
259 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
260 }
261#endif
262
263 /* Allocate VMCSs for all guest CPUs. */
264 for (VMCPUID i = 0; i < pVM->cCpus; i++)
265 {
266 PVMCPU pVCpu = &pVM->aCpus[i];
267
268 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
269
270 /* Allocate one page for the VM control structure (VMCS). */
271 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, false /* executable R0 mapping */);
272 AssertRC(rc);
273 if (RT_FAILURE(rc))
274 return rc;
275
276 pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS);
277 pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0);
278 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS);
279
280 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
281 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
282
283 /* Allocate one page for the virtual APIC page for TPR caching. */
284 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, false /* executable R0 mapping */);
285 AssertRC(rc);
286 if (RT_FAILURE(rc))
287 return rc;
288
289 pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC);
290 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0);
291 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC);
292
293 /* Allocate the MSR bitmap if this feature is supported. */
294 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
295 {
296 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, false /* executable R0 mapping */);
297 AssertRC(rc);
298 if (RT_FAILURE(rc))
299 return rc;
300
301 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
302 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
303 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
304 }
305
306#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
307 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
308 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, false /* executable R0 mapping */);
309 AssertRC(rc);
310 if (RT_FAILURE(rc))
311 return rc;
312
313 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
314 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
315 Assert(!(pVCpu->hwaccm.s.vmx.pGuestMSRPhys & 0xf));
316 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
317
318 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
319 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, false /* executable R0 mapping */);
320 AssertRC(rc);
321 if (RT_FAILURE(rc))
322 return rc;
323
324 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
325 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
326 Assert(!(pVCpu->hwaccm.s.vmx.pHostMSRPhys & 0xf));
327 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
328#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
329
330 /* Current guest paging mode. */
331 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
332
333#ifdef LOG_ENABLED
334 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS);
335#endif
336 }
337
338 return VINF_SUCCESS;
339}
340
341
342/**
343 * Does Ring-0 per VM VT-x termination.
344 *
345 * @returns VBox status code.
346 * @param pVM Pointer to the VM.
347 */
348VMMR0DECL(int) VMXR0TermVM(PVM pVM)
349{
350 for (VMCPUID i = 0; i < pVM->cCpus; i++)
351 {
352 PVMCPU pVCpu = &pVM->aCpus[i];
353
354 if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
355 {
356 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false);
357 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
358 pVCpu->hwaccm.s.vmx.pvVMCS = 0;
359 pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0;
360 }
361 if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
362 {
363 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false);
364 pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
365 pVCpu->hwaccm.s.vmx.pbVAPIC = 0;
366 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0;
367 }
368 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
369 {
370 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
371 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
372 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
373 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
374 }
375#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
376 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
377 {
378 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
379 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
380 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
381 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
382 }
383 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
384 {
385 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
386 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
387 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
388 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
389 }
390#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
391 }
392 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
393 {
394 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
395 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
396 pVM->hwaccm.s.vmx.pAPIC = 0;
397 pVM->hwaccm.s.vmx.pAPICPhys = 0;
398 }
399#ifdef VBOX_WITH_CRASHDUMP_MAGIC
400 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
401 {
402 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
403 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
404 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
405 pVM->hwaccm.s.vmx.pScratch = 0;
406 pVM->hwaccm.s.vmx.pScratchPhys = 0;
407 }
408#endif
409 return VINF_SUCCESS;
410}
411
412
413/**
414 * Sets up VT-x for the specified VM.
415 *
416 * @returns VBox status code.
417 * @param pVM Pointer to the VM.
418 */
419VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
420{
421 int rc = VINF_SUCCESS;
422 uint32_t val;
423
424 AssertReturn(pVM, VERR_INVALID_PARAMETER);
425
426 /* Initialize these always, see hwaccmR3InitFinalizeR0().*/
427 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NONE;
428 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NONE;
429
430 /* Determine optimal flush type for EPT. */
431 if (pVM->hwaccm.s.fNestedPaging)
432 {
433 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT)
434 {
435 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
436 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_SINGLE_CONTEXT;
437 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
438 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_ALL_CONTEXTS;
439 else
440 {
441 /*
442 * Should never really happen. EPT is supported but no suitable flush types supported.
443 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
444 */
445 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
446 return VERR_VMX_GENERIC;
447 }
448 }
449 else
450 {
451 /*
452 * Should never really happen. EPT is supported but INVEPT instruction is not supported.
453 */
454 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
455 return VERR_VMX_GENERIC;
456 }
457 }
458
459 /* Determine optimal flush type for VPID. */
460 if (pVM->hwaccm.s.vmx.fVPID)
461 {
462 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID)
463 {
464 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
465 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_SINGLE_CONTEXT;
466 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
467 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_ALL_CONTEXTS;
468 else
469 {
470 /*
471 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
472 * We do not handle other flush type combinations, ignore VPID capabilities.
473 */
474 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
475 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
476 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
477 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
478 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
479 pVM->hwaccm.s.vmx.fVPID = false;
480 }
481 }
482 else
483 {
484 /*
485 * Should not really happen. EPT is supported but INVEPT is not supported.
486 * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing.
487 */
488 Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n"));
489 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
490 pVM->hwaccm.s.vmx.fVPID = false;
491 }
492 }
493
494 for (VMCPUID i = 0; i < pVM->cCpus; i++)
495 {
496 PVMCPU pVCpu = &pVM->aCpus[i];
497
498 AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS);
499
500 /* Set revision dword at the beginning of the VMCS structure. */
501 *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
502
503 /*
504 * Clear and activate the VMCS.
505 */
506 Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS));
507 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
508 if (RT_FAILURE(rc))
509 goto vmx_end;
510
511 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
512 if (RT_FAILURE(rc))
513 goto vmx_end;
514
515 /*
516 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
517 * Set required bits to one and zero according to the MSR capabilities.
518 */
519 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
520 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
521 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
522
523 /*
524 * Enable the VMX preemption timer.
525 */
526 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
527 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
528 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
529
530 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
531 AssertRC(rc);
532
533 /*
534 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
535 * Set required bits to one and zero according to the MSR capabilities.
536 */
537 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
538 /* Program which event cause VM-exits and which features we want to use. */
539 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
540 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
541 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
542 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
543 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
544 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
545 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
546 the guest (host thinks the cpu load is high) */
547
548 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
549 if (!pVM->hwaccm.s.fNestedPaging)
550 {
551 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
552 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
553 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
554 }
555
556 /*
557 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
558 * failure with an invalid control fields error. (combined with some other exit reasons)
559 */
560 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
561 {
562 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
563 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
564 Assert(pVM->hwaccm.s.vmx.pAPIC);
565 }
566 else
567 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
568 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
569
570 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
571 {
572 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
573 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
574 }
575
576 /* We will use the secondary control if it's present. */
577 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
578
579 /* Mask away the bits that the CPU doesn't support */
580 /** @todo make sure they don't conflict with the above requirements. */
581 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
582 pVCpu->hwaccm.s.vmx.proc_ctls = val;
583
584 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
585 AssertRC(rc);
586
587 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
588 {
589 /*
590 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
591 * Set required bits to one and zero according to the MSR capabilities.
592 */
593 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
594 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
595
596 if (pVM->hwaccm.s.fNestedPaging)
597 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
598
599 if (pVM->hwaccm.s.vmx.fVPID)
600 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
601
602 if (pVM->hwaccm.s.fHasIoApic)
603 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
604
605 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
606 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
607
608 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
609 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
610
611 /* Mask away the bits that the CPU doesn't support */
612 /** @todo make sure they don't conflict with the above requirements. */
613 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
614 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
615 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
616 AssertRC(rc);
617 }
618
619 /*
620 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
621 * Set required bits to one and zero according to the MSR capabilities.
622 */
623 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
624 AssertRC(rc);
625
626 /*
627 * Forward all exception except #NM & #PF to the guest.
628 * We always need to check pagefaults since our shadow page table can be out of sync.
629 * And we always lazily sync the FPU & XMM state. .
630 */
631
632 /** @todo Possible optimization:
633 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
634 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
635 * registers ourselves of course.
636 *
637 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
638 */
639
640 /*
641 * Don't filter page faults, all of them should cause a world switch.
642 */
643 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
644 AssertRC(rc);
645 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
646 AssertRC(rc);
647
648 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
649 AssertRC(rc);
650 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
651 AssertRC(rc);
652 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
653 AssertRC(rc);
654
655 /*
656 * Set the MSR bitmap address.
657 */
658 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
659 {
660 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
661
662 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
663 AssertRC(rc);
664
665 /*
666 * Allow the guest to directly modify these MSRs; they are loaded/stored automatically
667 * using MSR-load/store areas in the VMCS.
668 */
669 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
670 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
671 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
672 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
673 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
674 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
675 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
676 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
677 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
678 if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
679 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true);
680 }
681
682#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
683 /*
684 * Set the guest & host MSR load/store physical addresses.
685 */
686 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
687 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
688 AssertRC(rc);
689 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
690 AssertRC(rc);
691 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
692 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
693 AssertRC(rc);
694#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
695
696 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
697 AssertRC(rc);
698 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
699 AssertRC(rc);
700 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0);
701 AssertRC(rc);
702
703 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
704 {
705 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
706 /* Optional */
707 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
708 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC);
709
710 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
711 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
712
713 AssertRC(rc);
714 }
715
716 /* Set link pointer to -1. Not currently used. */
717 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
718 AssertRC(rc);
719
720 /*
721 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
722 * VMCS data back to memory.
723 */
724 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
725 AssertRC(rc);
726
727 /*
728 * Configure the VMCS read cache.
729 */
730 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
731
732 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
733 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
734 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
735 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
736 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
737 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
738 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
739 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
740 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
741 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
742 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
743 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
744 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
745 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
746 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
747 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
748
749 VMX_SETUP_SELREG(ES, pCache);
750 VMX_SETUP_SELREG(SS, pCache);
751 VMX_SETUP_SELREG(CS, pCache);
752 VMX_SETUP_SELREG(DS, pCache);
753 VMX_SETUP_SELREG(FS, pCache);
754 VMX_SETUP_SELREG(GS, pCache);
755 VMX_SETUP_SELREG(LDTR, pCache);
756 VMX_SETUP_SELREG(TR, pCache);
757
758 /*
759 * Status code VMCS reads.
760 */
761 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
762 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
763 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
764 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
765 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
766 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
767 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
768 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
769 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
770
771 if (pVM->hwaccm.s.fNestedPaging)
772 {
773 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
774 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
775 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
776 }
777 else
778 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
779 } /* for each VMCPU */
780
781 /*
782 * Setup the right TLB function based on CPU capabilities.
783 */
784 if (pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID)
785 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBBoth;
786 else if (pVM->hwaccm.s.fNestedPaging)
787 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT;
788 else if (pVM->hwaccm.s.vmx.fVPID)
789 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID;
790 else
791 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy;
792
793vmx_end:
794 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
795 return rc;
796}
797
798
799/**
800 * Sets the permission bits for the specified MSR.
801 *
802 * @param pVCpu Pointer to the VMCPU.
803 * @param ulMSR The MSR value.
804 * @param fRead Whether reading is allowed.
805 * @param fWrite Whether writing is allowed.
806 */
807static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
808{
809 unsigned ulBit;
810 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
811
812 /*
813 * Layout:
814 * 0x000 - 0x3ff - Low MSR read bits
815 * 0x400 - 0x7ff - High MSR read bits
816 * 0x800 - 0xbff - Low MSR write bits
817 * 0xc00 - 0xfff - High MSR write bits
818 */
819 if (ulMSR <= 0x00001FFF)
820 {
821 /* Pentium-compatible MSRs */
822 ulBit = ulMSR;
823 }
824 else if ( ulMSR >= 0xC0000000
825 && ulMSR <= 0xC0001FFF)
826 {
827 /* AMD Sixth Generation x86 Processor MSRs */
828 ulBit = (ulMSR - 0xC0000000);
829 pMSRBitmap += 0x400;
830 }
831 else
832 {
833 AssertFailed();
834 return;
835 }
836
837 Assert(ulBit <= 0x1fff);
838 if (fRead)
839 ASMBitClear(pMSRBitmap, ulBit);
840 else
841 ASMBitSet(pMSRBitmap, ulBit);
842
843 if (fWrite)
844 ASMBitClear(pMSRBitmap + 0x800, ulBit);
845 else
846 ASMBitSet(pMSRBitmap + 0x800, ulBit);
847}
848
849
850/**
851 * Injects an event (trap or external interrupt).
852 *
853 * @returns VBox status code. Note that it may return VINF_EM_RESET to
854 * indicate a triple fault when injecting X86_XCPT_DF.
855 *
856 * @param pVM Pointer to the VM.
857 * @param pVCpu Pointer to the VMCPU.
858 * @param pCtx Pointer to the guest CPU Context.
859 * @param intInfo VMX interrupt info.
860 * @param cbInstr Opcode length of faulting instruction.
861 * @param errCode Error code (optional).
862 */
863static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
864{
865 int rc;
866 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
867
868#ifdef VBOX_WITH_STATISTICS
869 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
870#endif
871
872#ifdef VBOX_STRICT
873 if (iGate == 0xE)
874 {
875 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
876 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
877 }
878 else if (iGate < 0x20)
879 {
880 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
881 errCode));
882 }
883 else
884 {
885 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
886 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
887 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
888 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
889 || pCtx->eflags.u32 & X86_EFL_IF);
890 }
891#endif
892
893 if ( CPUMIsGuestInRealModeEx(pCtx)
894 && pVM->hwaccm.s.vmx.pRealModeTSS)
895 {
896 RTGCPHYS GCPhysHandler;
897 uint16_t offset, ip;
898 RTSEL sel;
899
900 /*
901 * Injecting events doesn't work right with real mode emulation.
902 * (#GP if we try to inject external hardware interrupts)
903 * Inject the interrupt or trap directly instead.
904 *
905 * ASSUMES no access handlers for the bits we read or write below (should be safe).
906 */
907 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
908
909 /*
910 * Check if the interrupt handler is present.
911 */
912 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
913 {
914 Log(("IDT cbIdt violation\n"));
915 if (iGate != X86_XCPT_DF)
916 {
917 uint32_t intInfo2;
918
919 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
920 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
921 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
922 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
923
924 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
925 }
926 Log(("Triple fault -> reset the VM!\n"));
927 return VINF_EM_RESET;
928 }
929 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
930 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
931 || iGate == 4)
932 {
933 ip = pCtx->ip + cbInstr;
934 }
935 else
936 ip = pCtx->ip;
937
938 /*
939 * Read the selector:offset pair of the interrupt handler.
940 */
941 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
942 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
943 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
944
945 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
946
947 /*
948 * Construct the stack frame.
949 */
950 /** @todo Check stack limit. */
951 pCtx->sp -= 2;
952 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
953 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
954 pCtx->sp -= 2;
955 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
956 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
957 pCtx->sp -= 2;
958 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
959 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
960
961 /*
962 * Update the CPU state for executing the handler.
963 */
964 pCtx->rip = offset;
965 pCtx->cs.Sel = sel;
966 pCtx->cs.u64Base = sel << 4;
967 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
968
969 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
970 return VINF_SUCCESS;
971 }
972
973 /*
974 * Set event injection state.
975 */
976 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
977 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
978 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
979
980 AssertRC(rc);
981 return rc;
982}
983
984
985/**
986 * Checks for pending guest interrupts and injects them.
987 *
988 * @returns VBox status code.
989 * @param pVM Pointer to the VM.
990 * @param pVCpu Pointer to the VMCPU.
991 * @param pCtx Pointer to the guest CPU context.
992 */
993static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
994{
995 int rc;
996
997 /*
998 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
999 */
1000 if (pVCpu->hwaccm.s.Event.fPending)
1001 {
1002 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo,
1003 pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
1004 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
1005 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
1006 AssertRC(rc);
1007
1008 pVCpu->hwaccm.s.Event.fPending = false;
1009 return VINF_SUCCESS;
1010 }
1011
1012 /*
1013 * If an active trap is already pending, we must forward it first!
1014 */
1015 if (!TRPMHasTrap(pVCpu))
1016 {
1017 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
1018 {
1019 RTGCUINTPTR intInfo;
1020
1021 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
1022
1023 intInfo = X86_XCPT_NMI;
1024 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1025 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1026
1027 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
1028 AssertRC(rc);
1029
1030 return VINF_SUCCESS;
1031 }
1032
1033 /** @todo SMI interrupts. */
1034
1035 /*
1036 * When external interrupts are pending, we should exit the VM when IF is set.
1037 */
1038 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
1039 {
1040 if (!(pCtx->eflags.u32 & X86_EFL_IF))
1041 {
1042 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
1043 {
1044 LogFlow(("Enable irq window exit!\n"));
1045 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
1046 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1047 AssertRC(rc);
1048 }
1049 /* else nothing to do but wait */
1050 }
1051 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1052 {
1053 uint8_t u8Interrupt;
1054
1055 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1056 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1057 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1058 if (RT_SUCCESS(rc))
1059 {
1060 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1061 AssertRC(rc);
1062 }
1063 else
1064 {
1065 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1066 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1067 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
1068 /* Just continue */
1069 }
1070 }
1071 else
1072 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1073 }
1074 }
1075
1076#ifdef VBOX_STRICT
1077 if (TRPMHasTrap(pVCpu))
1078 {
1079 uint8_t u8Vector;
1080 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1081 AssertRC(rc);
1082 }
1083#endif
1084
1085 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1086 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1087 && TRPMHasTrap(pVCpu)
1088 )
1089 {
1090 uint8_t u8Vector;
1091 TRPMEVENT enmType;
1092 RTGCUINTPTR intInfo;
1093 RTGCUINT errCode;
1094
1095 /*
1096 * If a new event is pending, dispatch it now.
1097 */
1098 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1099 AssertRC(rc);
1100 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1101 Assert(enmType != TRPM_SOFTWARE_INT);
1102
1103 /*
1104 * Clear the pending trap.
1105 */
1106 rc = TRPMResetTrap(pVCpu);
1107 AssertRC(rc);
1108
1109 intInfo = u8Vector;
1110 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1111
1112 if (enmType == TRPM_TRAP)
1113 {
1114 switch (u8Vector)
1115 {
1116 case X86_XCPT_DF:
1117 case X86_XCPT_TS:
1118 case X86_XCPT_NP:
1119 case X86_XCPT_SS:
1120 case X86_XCPT_GP:
1121 case X86_XCPT_PF:
1122 case X86_XCPT_AC:
1123 {
1124 /* Valid error codes. */
1125 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1126 break;
1127 }
1128
1129 default:
1130 break;
1131 }
1132
1133 if ( u8Vector == X86_XCPT_BP
1134 || u8Vector == X86_XCPT_OF)
1135 {
1136 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1137 }
1138 else
1139 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1140 }
1141 else
1142 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1143
1144 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
1145 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1146 AssertRC(rc);
1147 } /* if (interrupts can be dispatched) */
1148
1149 return VINF_SUCCESS;
1150}
1151
1152
1153/**
1154 * Save the host state into the VMCS.
1155 *
1156 * @returns VBox status code.
1157 * @param pVM Pointer to the VM.
1158 * @param pVCpu Pointer to the VMCPU.
1159 */
1160VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1161{
1162 int rc = VINF_SUCCESS;
1163 NOREF(pVM);
1164
1165 /*
1166 * Host CPU Context.
1167 */
1168 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1169 {
1170 RTIDTR idtr;
1171 RTGDTR gdtr;
1172 RTSEL SelTR;
1173 PCX86DESCHC pDesc;
1174 uintptr_t trBase;
1175 RTSEL cs;
1176 RTSEL ss;
1177 uint64_t cr3;
1178
1179 /*
1180 * Control registers.
1181 */
1182 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1183 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1184#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1185 if (VMX_IS_64BIT_HOST_MODE())
1186 {
1187 cr3 = hwaccmR0Get64bitCR3();
1188 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1189 }
1190 else
1191#endif
1192 {
1193 cr3 = ASMGetCR3();
1194 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1195 }
1196 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1197 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1198 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1199 AssertRC(rc);
1200
1201 /*
1202 * Selector registers.
1203 */
1204#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1205 if (VMX_IS_64BIT_HOST_MODE())
1206 {
1207 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1208 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1209 }
1210 else
1211 {
1212 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1213 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1214 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1215 }
1216#else
1217 cs = ASMGetCS();
1218 ss = ASMGetSS();
1219#endif
1220 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1221 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1222 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1223 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1224 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1225 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1226#if HC_ARCH_BITS == 32
1227 if (!VMX_IS_64BIT_HOST_MODE())
1228 {
1229 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1230 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1231 }
1232#endif
1233 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1234 SelTR = ASMGetTR();
1235 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1236 AssertRC(rc);
1237 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1238 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1239 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1240 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1241 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1242 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1243 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1244
1245 /*
1246 * GDTR & IDTR.
1247 */
1248#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1249 if (VMX_IS_64BIT_HOST_MODE())
1250 {
1251 X86XDTR64 gdtr64, idtr64;
1252 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1253 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1254 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1255 AssertRC(rc);
1256 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1257 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1258 gdtr.cbGdt = gdtr64.cb;
1259 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1260 }
1261 else
1262#endif
1263 {
1264 ASMGetGDTR(&gdtr);
1265 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1266 ASMGetIDTR(&idtr);
1267 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1268 AssertRC(rc);
1269 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1270 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1271 }
1272
1273 /*
1274 * Save the base address of the TR selector.
1275 */
1276 if (SelTR > gdtr.cbGdt)
1277 {
1278 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1279 return VERR_VMX_INVALID_HOST_STATE;
1280 }
1281
1282 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1283#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1284 if (VMX_IS_64BIT_HOST_MODE())
1285 {
1286 uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc);
1287 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1288 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1289 AssertRC(rc);
1290 }
1291 else
1292#endif
1293 {
1294#if HC_ARCH_BITS == 64
1295 trBase = X86DESC64_BASE(pDesc);
1296#else
1297 trBase = X86DESC_BASE(pDesc);
1298#endif
1299 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1300 AssertRC(rc);
1301 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1302 }
1303
1304 /*
1305 * FS base and GS base.
1306 */
1307#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1308 if (VMX_IS_64BIT_HOST_MODE())
1309 {
1310 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1311 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1312 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1313 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1314 }
1315#endif
1316 AssertRC(rc);
1317
1318 /*
1319 * Sysenter MSRs.
1320 */
1321 /** @todo expensive!! */
1322 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1323 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1324#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1325 if (VMX_IS_64BIT_HOST_MODE())
1326 {
1327 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1328 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1329 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1330 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1331 }
1332 else
1333 {
1334 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1335 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1336 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1337 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1338 }
1339#elif HC_ARCH_BITS == 32
1340 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1341 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1342 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1343 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1344#else
1345 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1346 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1347 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1348 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1349#endif
1350 AssertRC(rc);
1351
1352
1353#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1354 /*
1355 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1356 * the world switch back to the host.
1357 */
1358 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1359 unsigned idxMsr = 0;
1360
1361 uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001);
1362 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1363 {
1364#if 0
1365 pMsr->u32IndexMSR = MSR_K6_EFER;
1366 pMsr->u32Reserved = 0;
1367# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1368 if (CPUMIsGuestInLongMode(pVCpu))
1369 {
1370 /* Must match the EFER value in our 64 bits switcher. */
1371 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1372 }
1373 else
1374# endif
1375 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1376 pMsr++; idxMsr++;
1377#endif
1378 }
1379
1380# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1381 if (VMX_IS_64BIT_HOST_MODE())
1382 {
1383 pMsr->u32IndexMSR = MSR_K6_STAR;
1384 pMsr->u32Reserved = 0;
1385 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1386 pMsr++; idxMsr++;
1387 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1388 pMsr->u32Reserved = 0;
1389 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1390 pMsr++; idxMsr++;
1391 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1392 pMsr->u32Reserved = 0;
1393 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1394 pMsr++; idxMsr++;
1395
1396 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
1397#if 0
1398 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1399 pMsr->u32Reserved = 0;
1400 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1401 pMsr++; idxMsr++;
1402#endif
1403 }
1404# endif
1405
1406 if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1407 {
1408 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
1409 pMsr->u32Reserved = 0;
1410 pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
1411 pMsr++; idxMsr++;
1412 }
1413
1414 /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr
1415 * range. */
1416 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1417 AssertRC(rc);
1418#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1419
1420 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1421 }
1422 return rc;
1423}
1424
1425
1426/**
1427 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1428 * guest operates in PAE mode.
1429 *
1430 * @returns VBox status code.
1431 * @param pVCpu Pointer to the VMCPU.
1432 * @param pCtx Pointer to the guest CPU context.
1433 */
1434static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1435{
1436 if (CPUMIsGuestInPAEModeEx(pCtx))
1437 {
1438 X86PDPE aPdpes[4];
1439 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1440 AssertRCReturn(rc, rc);
1441
1442 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1443 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1444 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1445 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1446 }
1447 return VINF_SUCCESS;
1448}
1449
1450
1451/**
1452 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1453 * guest operates in PAE mode.
1454 *
1455 * @returns VBox status code.
1456 * @param pVCpu Pointer to the VM CPU.
1457 * @param pCtx Pointer to the guest CPU context.
1458 *
1459 * @remarks Tell PGM about CR3 changes before calling this helper.
1460 */
1461static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1462{
1463 if (CPUMIsGuestInPAEModeEx(pCtx))
1464 {
1465 int rc;
1466 X86PDPE aPdpes[4];
1467 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1468 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1469 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1470 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1471
1472 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1473 AssertRCReturn(rc, rc);
1474 }
1475 return VINF_SUCCESS;
1476}
1477
1478
1479/**
1480 * Update the exception bitmap according to the current CPU state.
1481 *
1482 * @param pVM Pointer to the VM.
1483 * @param pVCpu Pointer to the VMCPU.
1484 * @param pCtx Pointer to the guest CPU context.
1485 */
1486static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1487{
1488 uint32_t u32TrapMask;
1489 Assert(pCtx);
1490
1491 /*
1492 * Set up a mask for intercepting traps.
1493 */
1494 /** @todo Do we really need to always intercept #DB? */
1495 u32TrapMask = RT_BIT(X86_XCPT_DB)
1496 | RT_BIT(X86_XCPT_NM)
1497#ifdef VBOX_ALWAYS_TRAP_PF
1498 | RT_BIT(X86_XCPT_PF)
1499#endif
1500#ifdef VBOX_STRICT
1501 | RT_BIT(X86_XCPT_BP)
1502 | RT_BIT(X86_XCPT_DB)
1503 | RT_BIT(X86_XCPT_DE)
1504 | RT_BIT(X86_XCPT_NM)
1505 | RT_BIT(X86_XCPT_UD)
1506 | RT_BIT(X86_XCPT_NP)
1507 | RT_BIT(X86_XCPT_SS)
1508 | RT_BIT(X86_XCPT_GP)
1509 | RT_BIT(X86_XCPT_MF)
1510#endif
1511 ;
1512
1513 /*
1514 * Without nested paging, #PF must be intercepted to implement shadow paging.
1515 */
1516 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1517 if (!pVM->hwaccm.s.fNestedPaging)
1518 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1519
1520 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1521 if (!(pCtx->cr0 & X86_CR0_NE))
1522 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1523
1524#ifdef VBOX_STRICT
1525 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1526#endif
1527
1528 /*
1529 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1530 */
1531 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1532 if ( CPUMIsGuestInRealModeEx(pCtx)
1533 && pVM->hwaccm.s.vmx.pRealModeTSS)
1534 {
1535 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1536 | RT_BIT(X86_XCPT_DB)
1537 | RT_BIT(X86_XCPT_NMI)
1538 | RT_BIT(X86_XCPT_BP)
1539 | RT_BIT(X86_XCPT_OF)
1540 | RT_BIT(X86_XCPT_BR)
1541 | RT_BIT(X86_XCPT_UD)
1542 | RT_BIT(X86_XCPT_DF)
1543 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1544 | RT_BIT(X86_XCPT_TS)
1545 | RT_BIT(X86_XCPT_NP)
1546 | RT_BIT(X86_XCPT_SS)
1547 | RT_BIT(X86_XCPT_GP)
1548 | RT_BIT(X86_XCPT_MF)
1549 | RT_BIT(X86_XCPT_AC)
1550 | RT_BIT(X86_XCPT_MC)
1551 | RT_BIT(X86_XCPT_XF)
1552 ;
1553 }
1554
1555 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1556 AssertRC(rc);
1557}
1558
1559
1560/**
1561 * Loads a minimal guest state.
1562 *
1563 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1564 *
1565 * @param pVM Pointer to the VM.
1566 * @param pVCpu Pointer to the VMCPU.
1567 * @param pCtx Pointer to the guest CPU context.
1568 */
1569VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1570{
1571 int rc;
1572 X86EFLAGS eflags;
1573
1574 Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST));
1575
1576 /*
1577 * Load EIP, ESP and EFLAGS.
1578 */
1579 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1580 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1581 AssertRC(rc);
1582
1583 /*
1584 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1585 */
1586 eflags = pCtx->eflags;
1587 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1588 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1589
1590 /*
1591 * Check if real mode emulation using v86 mode.
1592 */
1593 if ( CPUMIsGuestInRealModeEx(pCtx)
1594 && pVM->hwaccm.s.vmx.pRealModeTSS)
1595 {
1596 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1597
1598 eflags.Bits.u1VM = 1;
1599 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1600 }
1601 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1602 AssertRC(rc);
1603}
1604
1605
1606/**
1607 * Loads the guest state.
1608 *
1609 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1610 *
1611 * @returns VBox status code.
1612 * @param pVM Pointer to the VM.
1613 * @param pVCpu Pointer to the VMCPU.
1614 * @param pCtx Pointer to the guest CPU context.
1615 */
1616VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1617{
1618 int rc = VINF_SUCCESS;
1619 RTGCUINTPTR val;
1620
1621 /*
1622 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1623 * Set required bits to one and zero according to the MSR capabilities.
1624 */
1625 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1626
1627 /*
1628 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1629 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1630 */
1631 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1632
1633 if (CPUMIsGuestInLongModeEx(pCtx))
1634 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1635 /* else Must be zero when AMD64 is not available. */
1636
1637 /*
1638 * Mask away the bits that the CPU doesn't support.
1639 */
1640 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1641 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1642 AssertRC(rc);
1643
1644 /*
1645 * VMX_VMCS_CTRL_EXIT_CONTROLS
1646 * Set required bits to one and zero according to the MSR capabilities.
1647 */
1648 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1649
1650 /*
1651 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1652 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1653 */
1654 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1655
1656#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1657 if (VMX_IS_64BIT_HOST_MODE())
1658 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1659 /* else Must be zero when AMD64 is not available. */
1660#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1661 if (CPUMIsGuestInLongModeEx(pCtx))
1662 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1663 else
1664 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1665#endif
1666 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1667
1668 /*
1669 * Don't acknowledge external interrupts on VM-exit.
1670 */
1671 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1672 AssertRC(rc);
1673
1674 /*
1675 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1676 */
1677 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1678 {
1679 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1680 {
1681 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1682 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1683 {
1684 /*
1685 * Correct weird requirements for switching to protected mode.
1686 */
1687 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1688 && enmGuestMode >= PGMMODE_PROTECTED)
1689 {
1690#ifdef VBOX_WITH_REM
1691 /*
1692 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1693 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1694 */
1695 REMFlushTBs(pVM);
1696#endif
1697
1698 /*
1699 * DPL of all hidden selector registers must match the current CPL (0).
1700 */
1701 pCtx->cs.Attr.n.u2Dpl = 0;
1702 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1703
1704 pCtx->ds.Attr.n.u2Dpl = 0;
1705 pCtx->es.Attr.n.u2Dpl = 0;
1706 pCtx->fs.Attr.n.u2Dpl = 0;
1707 pCtx->gs.Attr.n.u2Dpl = 0;
1708 pCtx->ss.Attr.n.u2Dpl = 0;
1709 }
1710 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1711 }
1712 else if ( CPUMIsGuestInRealModeEx(pCtx)
1713 && pCtx->cs.u64Base == 0xffff0000)
1714 {
1715 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1716 pCtx->cs.u64Base = 0xf0000;
1717 pCtx->cs.Sel = 0xf000;
1718 }
1719 }
1720
1721 VMX_WRITE_SELREG(ES, es);
1722 AssertRC(rc);
1723
1724 VMX_WRITE_SELREG(CS, cs);
1725 AssertRC(rc);
1726
1727 VMX_WRITE_SELREG(SS, ss);
1728 AssertRC(rc);
1729
1730 VMX_WRITE_SELREG(DS, ds);
1731 AssertRC(rc);
1732
1733 VMX_WRITE_SELREG(FS, fs);
1734 AssertRC(rc);
1735
1736 VMX_WRITE_SELREG(GS, gs);
1737 AssertRC(rc);
1738 }
1739
1740 /*
1741 * Guest CPU context: LDTR.
1742 */
1743 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1744 {
1745 if (pCtx->ldtr.Sel == 0)
1746 {
1747 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1748 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1749 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1750 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1751 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1752 }
1753 else
1754 {
1755 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1756 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1757 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtr.u64Base);
1758 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1759 }
1760 AssertRC(rc);
1761 }
1762
1763 /*
1764 * Guest CPU context: TR.
1765 */
1766 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1767 {
1768 /*
1769 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1770 * using the int bitmap in the TSS).
1771 */
1772 if ( CPUMIsGuestInRealModeEx(pCtx)
1773 && pVM->hwaccm.s.vmx.pRealModeTSS)
1774 {
1775 RTGCPHYS GCPhys;
1776
1777 /* We convert it here every time as PCI regions could be reconfigured. */
1778 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1779 AssertRC(rc);
1780
1781 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1782 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1783 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1784
1785 X86DESCATTR attr;
1786
1787 attr.u = 0;
1788 attr.n.u1Present = 1;
1789 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1790 val = attr.u;
1791 }
1792 else
1793 {
1794 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1795 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1796 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->tr.u64Base);
1797
1798 val = pCtx->tr.Attr.u;
1799
1800 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1801 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1802 {
1803 if (val & 0xf)
1804 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1805 else
1806 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1807 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1808 }
1809 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY,
1810 ("%#x\n", val));
1811 }
1812 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1813 AssertRC(rc);
1814 }
1815
1816 /*
1817 * Guest CPU context: GDTR.
1818 */
1819 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1820 {
1821 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1822 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1823 AssertRC(rc);
1824 }
1825
1826 /*
1827 * Guest CPU context: IDTR.
1828 */
1829 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1830 {
1831 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1832 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1833 AssertRC(rc);
1834 }
1835
1836 /*
1837 * Sysenter MSRs.
1838 */
1839 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1840 {
1841 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1842 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1843 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1844 AssertRC(rc);
1845 }
1846
1847 /*
1848 * Guest CPU context: Control registers.
1849 */
1850 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1851 {
1852 val = pCtx->cr0;
1853 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1854 Log2(("Guest CR0-shadow %08x\n", val));
1855 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1856 {
1857 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1858 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1859 }
1860 else
1861 {
1862 /** @todo check if we support the old style mess correctly. */
1863 if (!(val & X86_CR0_NE))
1864 Log(("Forcing X86_CR0_NE!!!\n"));
1865
1866 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1867 }
1868 /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1869 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1870 val |= X86_CR0_PE | X86_CR0_PG;
1871
1872 if (pVM->hwaccm.s.fNestedPaging)
1873 {
1874 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1875 {
1876 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1877 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1878 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1879 }
1880 else
1881 {
1882 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1883 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1884 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1885 }
1886 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1887 AssertRC(rc);
1888 }
1889 else
1890 {
1891 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1892 val |= X86_CR0_WP;
1893 }
1894
1895 /* Always enable caching. */
1896 val &= ~(X86_CR0_CD|X86_CR0_NW);
1897
1898 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1899 Log2(("Guest CR0 %08x\n", val));
1900
1901 /*
1902 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1903 */
1904 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1905 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1906 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1907 | X86_CR0_CD /* Bit not restored during VM-exit! */
1908 | X86_CR0_NW /* Bit not restored during VM-exit! */
1909 | X86_CR0_NE;
1910
1911 /*
1912 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1913 */
1914 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1915 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1916
1917 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1918
1919 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1920 Log2(("Guest CR0-mask %08x\n", val));
1921 AssertRC(rc);
1922 }
1923
1924 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1925 {
1926 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1927 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1928 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1929 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1930
1931 if (!pVM->hwaccm.s.fNestedPaging)
1932 {
1933 switch (pVCpu->hwaccm.s.enmShadowMode)
1934 {
1935 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1936 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1937 case PGMMODE_32_BIT: /* 32-bit paging. */
1938 val &= ~X86_CR4_PAE;
1939 break;
1940
1941 case PGMMODE_PAE: /* PAE paging. */
1942 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1943 /** Must use PAE paging as we could use physical memory > 4 GB */
1944 val |= X86_CR4_PAE;
1945 break;
1946
1947 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1948 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1949#ifdef VBOX_ENABLE_64_BITS_GUESTS
1950 break;
1951#else
1952 AssertFailed();
1953 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1954#endif
1955 default: /* shut up gcc */
1956 AssertFailed();
1957 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1958 }
1959 }
1960 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1961 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1962 {
1963 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1964 val |= X86_CR4_PSE;
1965 /* Our identity mapping is a 32 bits page directory. */
1966 val &= ~X86_CR4_PAE;
1967 }
1968
1969 /*
1970 * Turn off VME if we're in emulated real mode.
1971 */
1972 if ( CPUMIsGuestInRealModeEx(pCtx)
1973 && pVM->hwaccm.s.vmx.pRealModeTSS)
1974 {
1975 val &= ~X86_CR4_VME;
1976 }
1977
1978 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1979 Log2(("Guest CR4 %08x\n", val));
1980
1981 /*
1982 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1983 */
1984 val = 0
1985 | X86_CR4_VME
1986 | X86_CR4_PAE
1987 | X86_CR4_PGE
1988 | X86_CR4_PSE
1989 | X86_CR4_VMXE;
1990 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1991
1992 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1993 Log2(("Guest CR4-mask %08x\n", val));
1994 AssertRC(rc);
1995 }
1996
1997#if 0
1998 /* Enable single stepping if requested and CPU supports it. */
1999 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
2000 if (DBGFIsStepping(pVCpu))
2001 {
2002 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
2003 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2004 AssertRC(rc);
2005 }
2006#endif
2007
2008 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
2009 {
2010 if (pVM->hwaccm.s.fNestedPaging)
2011 {
2012 Assert(PGMGetHyperCR3(pVCpu));
2013 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
2014
2015 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
2016 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
2017 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
2018 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
2019
2020 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
2021 AssertRC(rc);
2022
2023 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2024 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
2025 {
2026 RTGCPHYS GCPhys;
2027
2028 /* We convert it here every time as PCI regions could be reconfigured. */
2029 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
2030 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
2031
2032 /*
2033 * We use our identity mapping page table here as we need to map guest virtual to
2034 * guest physical addresses; EPT will take care of the translation to host physical addresses.
2035 */
2036 val = GCPhys;
2037 }
2038 else
2039 {
2040 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
2041 val = pCtx->cr3;
2042 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
2043 AssertRCReturn(rc, rc);
2044 }
2045 }
2046 else
2047 {
2048 val = PGMGetHyperCR3(pVCpu);
2049 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
2050 }
2051
2052 /* Save our shadow CR3 register. */
2053 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
2054 AssertRC(rc);
2055 }
2056
2057 /*
2058 * Guest CPU context: Debug registers.
2059 */
2060 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
2061 {
2062 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2063 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2064
2065 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2066 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2067 pCtx->dr[7] |= 0x400; /* must be one */
2068
2069 /* Resync DR7 */
2070 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2071 AssertRC(rc);
2072
2073#ifdef DEBUG
2074 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2075 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2076 && !CPUMIsHyperDebugStateActive(pVCpu)
2077 && !DBGFIsStepping(pVCpu))
2078 {
2079 /* Save the host and load the hypervisor debug state. */
2080 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2081 AssertRC(rc);
2082
2083 /* DRx intercepts remain enabled. */
2084
2085 /* Override dr7 with the hypervisor value. */
2086 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2087 AssertRC(rc);
2088 }
2089 else
2090#endif
2091 /* Sync the debug state now if any breakpoint is armed. */
2092 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2093 && !CPUMIsGuestDebugStateActive(pVCpu)
2094 && !DBGFIsStepping(pVCpu))
2095 {
2096 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
2097
2098 /* Disable DRx move intercepts. */
2099 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2100 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2101 AssertRC(rc);
2102
2103 /* Save the host and load the guest debug state. */
2104 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2105 AssertRC(rc);
2106 }
2107
2108 /* IA32_DEBUGCTL MSR. */
2109 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
2110 AssertRC(rc);
2111
2112 /** @todo do we really ever need this? */
2113 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
2114 AssertRC(rc);
2115 }
2116
2117 /*
2118 * 64-bit guest mode.
2119 */
2120 if (CPUMIsGuestInLongModeEx(pCtx))
2121 {
2122#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2123 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2124#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2125 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2126#else
2127# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2128 if (!pVM->hwaccm.s.fAllow64BitGuests)
2129 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2130# endif
2131 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
2132#endif
2133 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
2134 {
2135 /* Update these as wrmsr might have changed them. */
2136 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fs.u64Base);
2137 AssertRC(rc);
2138 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gs.u64Base);
2139 AssertRC(rc);
2140 }
2141 }
2142 else
2143 {
2144 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
2145 }
2146
2147 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2148
2149#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2150 /*
2151 * Store all guest MSRs in the VM-entry load area, so they will be loaded
2152 * during VM-entry and restored into the VM-exit store area during VM-exit.
2153 */
2154 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2155 unsigned idxMsr = 0;
2156
2157 uint32_t u32GstExtFeatures;
2158 uint32_t u32Temp;
2159 CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures);
2160
2161 if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2162 {
2163#if 0
2164 pMsr->u32IndexMSR = MSR_K6_EFER;
2165 pMsr->u32Reserved = 0;
2166 pMsr->u64Value = pCtx->msrEFER;
2167 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2168 if (!CPUMIsGuestInLongModeEx(pCtx))
2169 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2170 pMsr++; idxMsr++;
2171#endif
2172
2173 if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2174 {
2175 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2176 pMsr->u32Reserved = 0;
2177 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2178 pMsr++; idxMsr++;
2179 pMsr->u32IndexMSR = MSR_K6_STAR;
2180 pMsr->u32Reserved = 0;
2181 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2182 pMsr++; idxMsr++;
2183 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2184 pMsr->u32Reserved = 0;
2185 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2186 pMsr++; idxMsr++;
2187
2188 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2189#if 0
2190 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2191 pMsr->u32Reserved = 0;
2192 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2193 pMsr++; idxMsr++;
2194#endif
2195 }
2196 }
2197
2198 if ( pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP
2199 && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP))
2200 {
2201 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
2202 pMsr->u32Reserved = 0;
2203 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value);
2204 AssertRC(rc);
2205 pMsr++; idxMsr++;
2206 }
2207
2208 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
2209
2210 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2211 AssertRC(rc);
2212
2213 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2214 AssertRC(rc);
2215#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2216
2217 bool fOffsettedTsc;
2218 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
2219 {
2220 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2221
2222 /* Make sure the returned values have sane upper and lower boundaries. */
2223 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2224
2225 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2226 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2227
2228 cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift;
2229 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2230 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2231 AssertRC(rc);
2232 }
2233 else
2234 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2235
2236 if (fOffsettedTsc)
2237 {
2238 uint64_t u64CurTSC = ASMReadTSC();
2239 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2240 {
2241 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2242 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
2243 AssertRC(rc);
2244
2245 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2246 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2247 AssertRC(rc);
2248 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
2249 }
2250 else
2251 {
2252 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2253 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2254 pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset,
2255 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset,
2256 TMCpuTickGet(pVCpu)));
2257 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2258 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2259 AssertRC(rc);
2260 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
2261 }
2262 }
2263 else
2264 {
2265 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2266 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2267 AssertRC(rc);
2268 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
2269 }
2270
2271 /* Done with the major changes */
2272 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
2273
2274 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2275 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2276 return rc;
2277}
2278
2279
2280/**
2281 * Syncs back the guest state from VMCS.
2282 *
2283 * @returns VBox status code.
2284 * @param pVM Pointer to the VM.
2285 * @param pVCpu Pointer to the VMCPU.
2286 * @param pCtx Pointer to the guest CPU context.
2287 */
2288DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2289{
2290 RTGCUINTREG val, valShadow;
2291 RTGCUINTPTR uInterruptState;
2292 int rc;
2293
2294 /* First sync back EIP, ESP, and EFLAGS. */
2295 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
2296 AssertRC(rc);
2297 pCtx->rip = val;
2298 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
2299 AssertRC(rc);
2300 pCtx->rsp = val;
2301 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2302 AssertRC(rc);
2303 pCtx->eflags.u32 = val;
2304
2305 /* Take care of instruction fusing (sti, mov ss) */
2306 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2307 uInterruptState = val;
2308 if (uInterruptState != 0)
2309 {
2310 Assert(uInterruptState <= 2); /* only sti & mov ss */
2311 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2312 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2313 }
2314 else
2315 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2316
2317 /* Control registers. */
2318 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2319 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
2320 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
2321 CPUMSetGuestCR0(pVCpu, val);
2322
2323 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2324 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
2325 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
2326 CPUMSetGuestCR4(pVCpu, val);
2327
2328 /*
2329 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2330 * the nested paging case where CR3 & CR4 can be changed by the guest.
2331 */
2332 if ( pVM->hwaccm.s.fNestedPaging
2333 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2334 {
2335 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
2336
2337 /* Can be updated behind our back in the nested paging case. */
2338 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2339
2340 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
2341
2342 if (val != pCtx->cr3)
2343 {
2344 CPUMSetGuestCR3(pVCpu, val);
2345 PGMUpdateCR3(pVCpu, val);
2346 }
2347 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2348 AssertRCReturn(rc, rc);
2349 }
2350
2351 /* Sync back DR7. */
2352 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2353 pCtx->dr[7] = val;
2354
2355 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2356 VMX_READ_SELREG(ES, es);
2357 VMX_READ_SELREG(SS, ss);
2358 VMX_READ_SELREG(CS, cs);
2359 VMX_READ_SELREG(DS, ds);
2360 VMX_READ_SELREG(FS, fs);
2361 VMX_READ_SELREG(GS, gs);
2362
2363 /* System MSRs */
2364 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2365 pCtx->SysEnter.cs = val;
2366 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2367 pCtx->SysEnter.eip = val;
2368 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2369 pCtx->SysEnter.esp = val;
2370
2371 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2372 VMX_READ_SELREG(LDTR, ldtr);
2373
2374 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2375 pCtx->gdtr.cbGdt = val;
2376 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2377 pCtx->gdtr.pGdt = val;
2378
2379 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2380 pCtx->idtr.cbIdt = val;
2381 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2382 pCtx->idtr.pIdt = val;
2383
2384 /* Real mode emulation using v86 mode. */
2385 if ( CPUMIsGuestInRealModeEx(pCtx)
2386 && pVM->hwaccm.s.vmx.pRealModeTSS)
2387 {
2388 /* Hide our emulation flags */
2389 pCtx->eflags.Bits.u1VM = 0;
2390
2391 /* Restore original IOPL setting as we always use 0. */
2392 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2393
2394 /* Force a TR resync every time in case we switch modes. */
2395 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2396 }
2397 else
2398 {
2399 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2400 VMX_READ_SELREG(TR, tr);
2401 }
2402
2403#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2404 /*
2405 * Save the possibly changed MSRs that we automatically restore and save during a world switch.
2406 */
2407 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2408 {
2409 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2410 pMsr += i;
2411
2412 switch (pMsr->u32IndexMSR)
2413 {
2414 case MSR_K8_LSTAR:
2415 pCtx->msrLSTAR = pMsr->u64Value;
2416 break;
2417 case MSR_K6_STAR:
2418 pCtx->msrSTAR = pMsr->u64Value;
2419 break;
2420 case MSR_K8_SF_MASK:
2421 pCtx->msrSFMASK = pMsr->u64Value;
2422 break;
2423 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2424#if 0
2425 case MSR_K8_KERNEL_GS_BASE:
2426 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2427 break;
2428#endif
2429 case MSR_K8_TSC_AUX:
2430 CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value);
2431 break;
2432#if 0
2433 case MSR_K6_EFER:
2434 /* EFER can't be changed without causing a VM-exit. */
2435 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2436 break;
2437#endif
2438 default:
2439 AssertFailed();
2440 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2441 }
2442 }
2443#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2444 return VINF_SUCCESS;
2445}
2446
2447
2448/**
2449 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2450 * where neither EPT nor VPID is supported by the CPU.
2451 *
2452 * @param pVM Pointer to the VM.
2453 * @param pVCpu Pointer to the VMCPU.
2454 */
2455static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2456{
2457 NOREF(pVM);
2458 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2459 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2460 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2461 return;
2462}
2463
2464
2465/**
2466 * Setup the tagged TLB for EPT+VPID.
2467 *
2468 * @param pVM Pointer to the VM.
2469 * @param pVCpu Pointer to the VMCPU.
2470 */
2471static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2472{
2473 PHMGLOBLCPUINFO pCpu;
2474
2475 Assert(pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID);
2476
2477 pCpu = HWACCMR0GetCurrentCpu();
2478
2479 /*
2480 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2481 * This can happen both for start & resume due to long jumps back to ring-3.
2482 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2483 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2484 */
2485 bool fNewASID = false;
2486 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2487 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2488 {
2489 pVCpu->hwaccm.s.fForceTLBFlush = true;
2490 fNewASID = true;
2491 }
2492
2493 /*
2494 * Check for explicit TLB shootdowns.
2495 */
2496 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2497 pVCpu->hwaccm.s.fForceTLBFlush = true;
2498
2499 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2500
2501 if (pVCpu->hwaccm.s.fForceTLBFlush)
2502 {
2503 if (fNewASID)
2504 {
2505 ++pCpu->uCurrentASID;
2506 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2507 {
2508 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2509 pCpu->cTLBFlushes++;
2510 pCpu->fFlushASIDBeforeUse = true;
2511 }
2512
2513 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2514 if (pCpu->fFlushASIDBeforeUse)
2515 {
2516 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2517#ifdef VBOX_WITH_STATISTICS
2518 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2519#endif
2520 }
2521 }
2522 else
2523 {
2524 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2525 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2526 else
2527 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2528
2529#ifdef VBOX_WITH_STATISTICS
2530 /*
2531 * This is not terribly accurate (i.e. we don't have any StatFlushEPT counter). We currently count these
2532 * as ASID flushes too, better than including them under StatFlushTLBWorldSwitch.
2533 */
2534 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2535#endif
2536 }
2537
2538 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2539 pVCpu->hwaccm.s.fForceTLBFlush = false;
2540 }
2541 else
2542 {
2543 AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID,
2544 ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n",
2545 pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes,
2546 pCpu->uCurrentASID, pCpu->cTLBFlushes));
2547
2548 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2549 * not be executed. See hwaccmQueueInvlPage() where it is commented
2550 * out. Support individual entry flushing someday. */
2551 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2552 {
2553 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2554
2555 /*
2556 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2557 * as supported by the CPU.
2558 */
2559 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2560 {
2561 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2562 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2563 }
2564 else
2565 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2566 }
2567 else
2568 {
2569#ifdef VBOX_WITH_STATISTICS
2570 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2571#endif
2572 }
2573 }
2574 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2575 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2576
2577 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2578 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2579 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2580 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2581 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2582 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2583
2584 /* Update VMCS with the VPID. */
2585 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2586 AssertRC(rc);
2587}
2588
2589
2590/**
2591 * Setup the tagged TLB for EPT only.
2592 *
2593 * @returns VBox status code.
2594 * @param pVM Pointer to the VM.
2595 * @param pVCpu Pointer to the VMCPU.
2596 */
2597static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2598{
2599 PHMGLOBLCPUINFO pCpu;
2600
2601 Assert(pVM->hwaccm.s.fNestedPaging);
2602 Assert(!pVM->hwaccm.s.vmx.fVPID);
2603
2604 pCpu = HWACCMR0GetCurrentCpu();
2605
2606 /*
2607 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2608 * This can happen both for start & resume due to long jumps back to ring-3.
2609 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2610 */
2611 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2612 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2613 {
2614 pVCpu->hwaccm.s.fForceTLBFlush = true;
2615 }
2616
2617 /*
2618 * Check for explicit TLB shootdown flushes.
2619 */
2620 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2621 pVCpu->hwaccm.s.fForceTLBFlush = true;
2622
2623 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2624 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2625
2626 if (pVCpu->hwaccm.s.fForceTLBFlush)
2627 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2628 else
2629 {
2630 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2631 * not be executed. See hwaccmQueueInvlPage() where it is commented
2632 * out. Support individual entry flushing someday. */
2633 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2634 {
2635 /*
2636 * We cannot flush individual entries without VPID support. Flush using EPT.
2637 */
2638 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2639 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2640 }
2641 }
2642 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2643 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2644
2645#ifdef VBOX_WITH_STATISTICS
2646 if (pVCpu->hwaccm.s.fForceTLBFlush)
2647 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2648 else
2649 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2650#endif
2651}
2652
2653
2654/**
2655 * Setup the tagged TLB for VPID.
2656 *
2657 * @returns VBox status code.
2658 * @param pVM Pointer to the VM.
2659 * @param pVCpu Pointer to the VMCPU.
2660 */
2661static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2662{
2663 PHMGLOBLCPUINFO pCpu;
2664
2665 Assert(pVM->hwaccm.s.vmx.fVPID);
2666 Assert(!pVM->hwaccm.s.fNestedPaging);
2667
2668 pCpu = HWACCMR0GetCurrentCpu();
2669
2670 /*
2671 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2672 * This can happen both for start & resume due to long jumps back to ring-3.
2673 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2674 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2675 */
2676 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2677 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2678 {
2679 /* Force a TLB flush on VM entry. */
2680 pVCpu->hwaccm.s.fForceTLBFlush = true;
2681 }
2682
2683 /*
2684 * Check for explicit TLB shootdown flushes.
2685 */
2686 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2687 pVCpu->hwaccm.s.fForceTLBFlush = true;
2688
2689 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2690
2691 if (pVCpu->hwaccm.s.fForceTLBFlush)
2692 {
2693 ++pCpu->uCurrentASID;
2694 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2695 {
2696 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2697 pCpu->cTLBFlushes++;
2698 pCpu->fFlushASIDBeforeUse = true;
2699 }
2700 else
2701 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2702
2703 pVCpu->hwaccm.s.fForceTLBFlush = false;
2704 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2705 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2706 if (pCpu->fFlushASIDBeforeUse)
2707 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2708 }
2709 else
2710 {
2711 AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID,
2712 ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n",
2713 pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes,
2714 pCpu->uCurrentASID, pCpu->cTLBFlushes));
2715
2716 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2717 * not be executed. See hwaccmQueueInvlPage() where it is commented
2718 * out. Support individual entry flushing someday. */
2719 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2720 {
2721 /*
2722 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2723 * as supported by the CPU.
2724 */
2725 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2726 {
2727 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2728 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2729 }
2730 else
2731 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2732 }
2733 }
2734 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2735 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2736
2737 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2738 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2739 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2740 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2741 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2742 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2743
2744 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2745 AssertRC(rc);
2746
2747# ifdef VBOX_WITH_STATISTICS
2748 if (pVCpu->hwaccm.s.fForceTLBFlush)
2749 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2750 else
2751 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2752# endif
2753}
2754
2755
2756/**
2757 * Runs guest code in a VT-x VM.
2758 *
2759 * @returns VBox status code.
2760 * @param pVM Pointer to the VM.
2761 * @param pVCpu Pointer to the VMCPU.
2762 * @param pCtx Pointer to the guest CPU context.
2763 */
2764VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2765{
2766 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2767 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
2768 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
2769
2770 VBOXSTRICTRC rc = VINF_SUCCESS;
2771 int rc2;
2772 RTGCUINTREG val;
2773 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2774 RTGCUINTREG instrError, cbInstr;
2775 RTGCUINTPTR exitQualification = 0;
2776 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2777 RTGCUINTPTR errCode, instrInfo;
2778 bool fSetupTPRCaching = false;
2779 uint64_t u64OldLSTAR = 0;
2780 uint8_t u8LastTPR = 0;
2781 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2782 unsigned cResume = 0;
2783#ifdef VBOX_STRICT
2784 RTCPUID idCpuCheck;
2785 bool fWasInLongMode = false;
2786#endif
2787#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2788 uint64_t u64LastTime = RTTimeMilliTS();
2789#endif
2790
2791 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2792 || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2793
2794 /*
2795 * Check if we need to use TPR shadowing.
2796 */
2797 if ( CPUMIsGuestInLongModeEx(pCtx)
2798 || ( (( pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2799 || pVM->hwaccm.s.fTRPPatchingAllowed)
2800 && pVM->hwaccm.s.fHasIoApic)
2801 )
2802 {
2803 fSetupTPRCaching = true;
2804 }
2805
2806 Log2(("\nE"));
2807
2808 /* This is not ideal, but if we don't clear the event injection in the VMCS right here,
2809 * we may end up injecting some stale event into a VM, including injecting an event that
2810 * originated before a VM reset *after* the VM has been reset. See @bugref{6220}.
2811 */
2812 VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, 0);
2813
2814#ifdef VBOX_STRICT
2815 {
2816 RTCCUINTREG val2;
2817
2818 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2819 AssertRC(rc2);
2820 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2821
2822 /* allowed zero */
2823 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2824 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2825
2826 /* allowed one */
2827 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2828 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2829
2830 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2831 AssertRC(rc2);
2832 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2833
2834 /*
2835 * Must be set according to the MSR, but can be cleared if nested paging is used.
2836 */
2837 if (pVM->hwaccm.s.fNestedPaging)
2838 {
2839 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2840 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2841 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2842 }
2843
2844 /* allowed zero */
2845 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2846 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2847
2848 /* allowed one */
2849 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2850 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2851
2852 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2853 AssertRC(rc2);
2854 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2855
2856 /* allowed zero */
2857 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2858 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2859
2860 /* allowed one */
2861 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2862 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2863
2864 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2865 AssertRC(rc2);
2866 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2867
2868 /* allowed zero */
2869 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2870 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2871
2872 /* allowed one */
2873 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2874 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2875 }
2876 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2877#endif /* VBOX_STRICT */
2878
2879#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2880 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2881#endif
2882
2883 /*
2884 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2885 */
2886ResumeExecution:
2887 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
2888 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
2889 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2890 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2891 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2892 Assert(!HWACCMR0SuspendPending());
2893 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2894 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2895
2896 /*
2897 * Safety precaution; looping for too long here can have a very bad effect on the host.
2898 */
2899 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2900 {
2901 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2902 rc = VINF_EM_RAW_INTERRUPT;
2903 goto end;
2904 }
2905
2906 /*
2907 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2908 */
2909 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2910 {
2911 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2912 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2913 {
2914 /*
2915 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2916 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2917 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2918 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2919 */
2920 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2921 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2922 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2923 AssertRC(rc2);
2924 }
2925 }
2926 else
2927 {
2928 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2929 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2930 AssertRC(rc2);
2931 }
2932
2933#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2934 if (RT_UNLIKELY((cResume & 0xf) == 0))
2935 {
2936 uint64_t u64CurTime = RTTimeMilliTS();
2937
2938 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2939 {
2940 u64LastTime = u64CurTime;
2941 TMTimerPollVoid(pVM, pVCpu);
2942 }
2943 }
2944#endif
2945
2946 /*
2947 * Check for pending actions that force us to go back to ring-3.
2948 */
2949 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2950 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2951 {
2952 /* Check if a sync operation is pending. */
2953 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2954 {
2955 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2956 if (rc != VINF_SUCCESS)
2957 {
2958 AssertRC(VBOXSTRICTRC_VAL(rc));
2959 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2960 goto end;
2961 }
2962 }
2963
2964#ifdef DEBUG
2965 /* Intercept X86_XCPT_DB if stepping is enabled */
2966 if (!DBGFIsStepping(pVCpu))
2967#endif
2968 {
2969 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2970 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2971 {
2972 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2973 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2974 goto end;
2975 }
2976 }
2977
2978 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2979 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2980 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2981 {
2982 rc = VINF_EM_PENDING_REQUEST;
2983 goto end;
2984 }
2985
2986 /* Check if a pgm pool flush is in progress. */
2987 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2988 {
2989 rc = VINF_PGM_POOL_FLUSH_PENDING;
2990 goto end;
2991 }
2992
2993 /* Check if DMA work is pending (2nd+ run). */
2994 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
2995 {
2996 rc = VINF_EM_RAW_TO_R3;
2997 goto end;
2998 }
2999 }
3000
3001#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3002 /*
3003 * Exit to ring-3 preemption/work is pending.
3004 *
3005 * Interrupts are disabled before the call to make sure we don't miss any interrupt
3006 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
3007 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
3008 *
3009 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
3010 * shootdowns rely on this.
3011 */
3012 uOldEFlags = ASMIntDisableFlags();
3013 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
3014 {
3015 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
3016 rc = VINF_EM_RAW_INTERRUPT;
3017 goto end;
3018 }
3019 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3020#endif
3021
3022 /*
3023 * When external interrupts are pending, we should exit the VM when IF is et.
3024 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
3025 */
3026 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
3027 if (RT_FAILURE(rc))
3028 goto end;
3029
3030 /** @todo check timers?? */
3031
3032 /*
3033 * TPR caching using CR8 is only available in 64-bit mode.
3034 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
3035 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
3036 */
3037 /** @todo query and update the TPR only when it could have been changed (mmio
3038 * access & wrsmr (x2apic) */
3039 if (fSetupTPRCaching)
3040 {
3041 /* TPR caching in CR8 */
3042 bool fPending;
3043
3044 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
3045 AssertRC(rc2);
3046 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
3047 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
3048
3049 /*
3050 * Two options here:
3051 * - external interrupt pending, but masked by the TPR value.
3052 * -> a CR8 update that lower the current TPR value should cause an exit
3053 * - no pending interrupts
3054 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
3055 */
3056
3057 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
3058 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0);
3059 AssertRC(VBOXSTRICTRC_VAL(rc));
3060
3061 if (pVM->hwaccm.s.fTPRPatchingActive)
3062 {
3063 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3064 /* Our patch code uses LSTAR for TPR caching. */
3065 pCtx->msrLSTAR = u8LastTPR;
3066
3067 /** @todo r=ramshankar: we should check for MSR-bitmap support here. */
3068 if (fPending)
3069 {
3070 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
3071 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
3072 }
3073 else
3074 {
3075 /*
3076 * No interrupts are pending, so we don't need to be explicitely notified.
3077 * There are enough world switches for detecting pending interrupts.
3078 */
3079 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
3080 }
3081 }
3082 }
3083
3084#ifdef LOG_ENABLED
3085 if ( pVM->hwaccm.s.fNestedPaging
3086 || pVM->hwaccm.s.vmx.fVPID)
3087 {
3088 PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu();
3089 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
3090 {
3091 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu,
3092 pCpu->idCpu));
3093 }
3094 else if (pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
3095 {
3096 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes,
3097 pCpu->cTLBFlushes));
3098 }
3099 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
3100 LogFlow(("Manual TLB flush\n"));
3101 }
3102#endif
3103#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3104 PGMRZDynMapFlushAutoSet(pVCpu);
3105#endif
3106
3107 /*
3108 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3109 * (until the actual world switch)
3110 */
3111#ifdef VBOX_STRICT
3112 idCpuCheck = RTMpCpuId();
3113#endif
3114#ifdef LOG_ENABLED
3115 VMMR0LogFlushDisable(pVCpu);
3116#endif
3117
3118 /*
3119 * Save the host state first.
3120 */
3121 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
3122 {
3123 rc = VMXR0SaveHostState(pVM, pVCpu);
3124 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3125 {
3126 VMMR0LogFlushEnable(pVCpu);
3127 goto end;
3128 }
3129 }
3130
3131 /*
3132 * Load the guest state.
3133 */
3134 if (!pVCpu->hwaccm.s.fContextUseFlags)
3135 {
3136 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3137 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal);
3138 }
3139 else
3140 {
3141 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3142 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3143 {
3144 VMMR0LogFlushEnable(pVCpu);
3145 goto end;
3146 }
3147 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull);
3148 }
3149
3150#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3151 /*
3152 * Disable interrupts to make sure a poke will interrupt execution.
3153 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3154 */
3155 uOldEFlags = ASMIntDisableFlags();
3156 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3157#endif
3158
3159 /* Non-register state Guest Context */
3160 /** @todo change me according to cpu state */
3161 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
3162 AssertRC(rc2);
3163
3164 /* Set TLB flush state as checked until we return from the world switch. */
3165 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
3166 /* Deal with tagged TLB setup and invalidation. */
3167 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
3168
3169 /*
3170 * Manual save and restore:
3171 * - General purpose registers except RIP, RSP
3172 *
3173 * Trashed:
3174 * - CR2 (we don't care)
3175 * - LDTR (reset to 0)
3176 * - DRx (presumably not changed at all)
3177 * - DR7 (reset to 0x400)
3178 * - EFLAGS (reset to RT_BIT(1); not relevant)
3179 */
3180
3181 /* All done! Let's start VM execution. */
3182 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
3183 Assert(idCpuCheck == RTMpCpuId());
3184
3185#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3186 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
3187 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3188#endif
3189
3190 /*
3191 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3192 */
3193 if (pVM->hwaccm.s.fTPRPatchingActive)
3194 {
3195 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3196 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3197 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3198 }
3199
3200 TMNotifyStartOfExecution(pVCpu);
3201
3202#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3203 /*
3204 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
3205 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
3206 */
3207 if ( (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3208 && !(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3209 {
3210 pVCpu->hwaccm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX);
3211 uint64_t u64GuestTSCAux = 0;
3212 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux);
3213 AssertRC(rc2);
3214 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux);
3215 }
3216#endif
3217
3218#ifdef VBOX_WITH_KERNEL_USING_XMM
3219 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
3220#else
3221 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
3222#endif
3223 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
3224 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
3225
3226 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3227 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3228 {
3229#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3230 /* Restore host's TSC_AUX. */
3231 if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3232 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hwaccm.s.u64HostTSCAux);
3233#endif
3234
3235 TMCpuTickSetLastSeen(pVCpu,
3236 ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3237 }
3238
3239 TMNotifyEndOfExecution(pVCpu);
3240 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3241 Assert(!(ASMGetFlags() & X86_EFL_IF));
3242
3243 /*
3244 * Restore the host LSTAR MSR if the guest could have changed it.
3245 */
3246 if (pVM->hwaccm.s.fTPRPatchingActive)
3247 {
3248 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3249 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3250 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3251 }
3252
3253 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
3254 ASMSetFlags(uOldEFlags);
3255#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3256 uOldEFlags = ~(RTCCUINTREG)0;
3257#endif
3258
3259 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n",
3260 pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
3261
3262 /* In case we execute a goto ResumeExecution later on. */
3263 pVCpu->hwaccm.s.fResumeVM = true;
3264 pVCpu->hwaccm.s.fForceTLBFlush = false;
3265
3266 /*
3267 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3268 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3269 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3270 */
3271
3272 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3273 {
3274 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3275 VMMR0LogFlushEnable(pVCpu);
3276 goto end;
3277 }
3278
3279 /* Success. Query the guest state and figure out what has happened. */
3280
3281 /* Investigate why there was a VM-exit. */
3282 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3283 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3284
3285 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3286 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3287 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3288 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3289 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3290 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
3291 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3292 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3293 AssertRC(rc2);
3294
3295 /*
3296 * Sync back the guest state.
3297 */
3298 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3299 AssertRC(rc2);
3300
3301 /* Note! NOW IT'S SAFE FOR LOGGING! */
3302 VMMR0LogFlushEnable(pVCpu);
3303 Log2(("Raw exit reason %08x\n", exitReason));
3304#if ARCH_BITS == 64 /* for the time being */
3305 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3306#endif
3307
3308 /*
3309 * Check if an injected event was interrupted prematurely.
3310 */
3311 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
3312 AssertRC(rc2);
3313 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3314 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3315 /* Ignore 'int xx' as they'll be restarted anyway. */
3316 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
3317 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3318 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3319 {
3320 Assert(!pVCpu->hwaccm.s.Event.fPending);
3321 pVCpu->hwaccm.s.Event.fPending = true;
3322 /* Error code present? */
3323 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
3324 {
3325 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
3326 AssertRC(rc2);
3327 pVCpu->hwaccm.s.Event.errCode = val;
3328 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3329 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3330 }
3331 else
3332 {
3333 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo,
3334 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3335 pVCpu->hwaccm.s.Event.errCode = 0;
3336 }
3337 }
3338#ifdef VBOX_STRICT
3339 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3340 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3341 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3342 {
3343 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3344 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3345 }
3346
3347 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3348 HWACCMDumpRegs(pVM, pVCpu, pCtx);
3349#endif
3350
3351 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3352 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3353 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3354 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3355 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3356
3357 /*
3358 * Sync back the TPR if it was changed.
3359 */
3360 if ( fSetupTPRCaching
3361 && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80])
3362 {
3363 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]);
3364 AssertRC(rc2);
3365 }
3366
3367#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3368 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3369 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3370#endif
3371 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
3372
3373 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3374 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3375 switch (exitReason)
3376 {
3377 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3378 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3379 {
3380 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3381
3382 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3383 {
3384 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3385#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3386 if ( RTThreadPreemptIsPendingTrusty()
3387 && !RTThreadPreemptIsPending(NIL_RTTHREAD))
3388 goto ResumeExecution;
3389#endif
3390 /* External interrupt; leave to allow it to be dispatched again. */
3391 rc = VINF_EM_RAW_INTERRUPT;
3392 break;
3393 }
3394 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3395 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3396 {
3397 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3398 /* External interrupt; leave to allow it to be dispatched again. */
3399 rc = VINF_EM_RAW_INTERRUPT;
3400 break;
3401
3402 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3403 AssertFailed(); /* can't come here; fails the first check. */
3404 break;
3405
3406 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3407 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3408 Assert(vector == 1 || vector == 3 || vector == 4);
3409 /* no break */
3410 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3411 Log2(("Hardware/software interrupt %d\n", vector));
3412 switch (vector)
3413 {
3414 case X86_XCPT_NM:
3415 {
3416 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3417
3418 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3419 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3420 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3421 if (rc == VINF_SUCCESS)
3422 {
3423 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3424
3425 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
3426
3427 /* Continue execution. */
3428 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3429
3430 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3431 goto ResumeExecution;
3432 }
3433
3434 Log(("Forward #NM fault to the guest\n"));
3435 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
3436 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3437 cbInstr, 0);
3438 AssertRC(rc2);
3439 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3440 goto ResumeExecution;
3441 }
3442
3443 case X86_XCPT_PF: /* Page fault */
3444 {
3445#ifdef VBOX_ALWAYS_TRAP_PF
3446 if (pVM->hwaccm.s.fNestedPaging)
3447 {
3448 /*
3449 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3450 */
3451 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3452 errCode, (RTGCPTR)pCtx->rsp));
3453
3454 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3455
3456 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3457
3458 /* Now we must update CR2. */
3459 pCtx->cr2 = exitQualification;
3460 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3461 cbInstr, errCode);
3462 AssertRC(rc2);
3463
3464 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3465 goto ResumeExecution;
3466 }
3467#else
3468 Assert(!pVM->hwaccm.s.fNestedPaging);
3469#endif
3470
3471#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
3472 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3473 if ( pVM->hwaccm.s.fTRPPatchingAllowed
3474 && pVM->hwaccm.s.pGuestPatchMem
3475 && (exitQualification & 0xfff) == 0x080
3476 && !(errCode & X86_TRAP_PF_P) /* not present */
3477 && CPUMGetGuestCPL(pVCpu) == 0
3478 && !CPUMIsGuestInLongModeEx(pCtx)
3479 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
3480 {
3481 RTGCPHYS GCPhysApicBase, GCPhys;
3482 PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */
3483 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3484
3485 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3486 if ( rc == VINF_SUCCESS
3487 && GCPhys == GCPhysApicBase)
3488 {
3489 /* Only attempt to patch the instruction once. */
3490 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3491 if (!pPatch)
3492 {
3493 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
3494 break;
3495 }
3496 }
3497 }
3498#endif
3499
3500 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3501 /* Exit qualification contains the linear address of the page fault. */
3502 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3503 TRPMSetErrorCode(pVCpu, errCode);
3504 TRPMSetFaultAddress(pVCpu, exitQualification);
3505
3506 /* Shortcut for APIC TPR reads and writes. */
3507 if ( (exitQualification & 0xfff) == 0x080
3508 && !(errCode & X86_TRAP_PF_P) /* not present */
3509 && fSetupTPRCaching
3510 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3511 {
3512 RTGCPHYS GCPhysApicBase, GCPhys;
3513 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3514 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3515
3516 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3517 if ( rc == VINF_SUCCESS
3518 && GCPhys == GCPhysApicBase)
3519 {
3520 Log(("Enable VT-x virtual APIC access filtering\n"));
3521 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3522 AssertRC(rc2);
3523 }
3524 }
3525
3526 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3527 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3528 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3529
3530 if (rc == VINF_SUCCESS)
3531 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3532 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3533 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
3534
3535 TRPMResetTrap(pVCpu);
3536 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3537 goto ResumeExecution;
3538 }
3539 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3540 {
3541 /*
3542 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3543 */
3544 Log2(("Forward page fault to the guest\n"));
3545
3546 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3547 /* The error code might have been changed. */
3548 errCode = TRPMGetErrorCode(pVCpu);
3549
3550 TRPMResetTrap(pVCpu);
3551
3552 /* Now we must update CR2. */
3553 pCtx->cr2 = exitQualification;
3554 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3555 cbInstr, errCode);
3556 AssertRC(rc2);
3557
3558 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3559 goto ResumeExecution;
3560 }
3561#ifdef VBOX_STRICT
3562 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3563 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3564#endif
3565 /* Need to go back to the recompiler to emulate the instruction. */
3566 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPFEM);
3567 TRPMResetTrap(pVCpu);
3568 break;
3569 }
3570
3571 case X86_XCPT_MF: /* Floating point exception. */
3572 {
3573 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
3574 if (!(pCtx->cr0 & X86_CR0_NE))
3575 {
3576 /* old style FPU error reporting needs some extra work. */
3577 /** @todo don't fall back to the recompiler, but do it manually. */
3578 rc = VINF_EM_RAW_EMULATE_INSTR;
3579 break;
3580 }
3581 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3582 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3583 cbInstr, errCode);
3584 AssertRC(rc2);
3585
3586 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3587 goto ResumeExecution;
3588 }
3589
3590 case X86_XCPT_DB: /* Debug exception. */
3591 {
3592 uint64_t uDR6;
3593
3594 /*
3595 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3596 *
3597 * Exit qualification bits:
3598 * 3:0 B0-B3 which breakpoint condition was met
3599 * 12:4 Reserved (0)
3600 * 13 BD - debug register access detected
3601 * 14 BS - single step execution or branch taken
3602 * 63:15 Reserved (0)
3603 */
3604 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3605
3606 /* Note that we don't support guest and host-initiated debugging at the same time. */
3607
3608 uDR6 = X86_DR6_INIT_VAL;
3609 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3610 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3611 if (rc == VINF_EM_RAW_GUEST_TRAP)
3612 {
3613 /* Update DR6 here. */
3614 pCtx->dr[6] = uDR6;
3615
3616 /* Resync DR6 if the debug state is active. */
3617 if (CPUMIsGuestDebugStateActive(pVCpu))
3618 ASMSetDR6(pCtx->dr[6]);
3619
3620 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3621 pCtx->dr[7] &= ~X86_DR7_GD;
3622
3623 /* Paranoia. */
3624 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3625 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3626 pCtx->dr[7] |= 0x400; /* must be one */
3627
3628 /* Resync DR7 */
3629 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3630 AssertRC(rc2);
3631
3632 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3633 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3634 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3635 cbInstr, errCode);
3636 AssertRC(rc2);
3637
3638 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3639 goto ResumeExecution;
3640 }
3641 /* Return to ring 3 to deal with the debug exit code. */
3642 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3643 break;
3644 }
3645
3646 case X86_XCPT_BP: /* Breakpoint. */
3647 {
3648 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestBP);
3649 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3650 if (rc == VINF_EM_RAW_GUEST_TRAP)
3651 {
3652 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3653 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3654 cbInstr, errCode);
3655 AssertRC(rc2);
3656 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3657 goto ResumeExecution;
3658 }
3659 if (rc == VINF_SUCCESS)
3660 {
3661 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3662 goto ResumeExecution;
3663 }
3664 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3665 break;
3666 }
3667
3668 case X86_XCPT_GP: /* General protection failure exception. */
3669 {
3670 uint32_t cbOp;
3671 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3672
3673 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3674#ifdef VBOX_STRICT
3675 if ( !CPUMIsGuestInRealModeEx(pCtx)
3676 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3677 {
3678 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3679 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3680 cbInstr, errCode);
3681 AssertRC(rc2);
3682 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3683 goto ResumeExecution;
3684 }
3685#endif
3686 Assert(CPUMIsGuestInRealModeEx(pCtx));
3687
3688 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3689
3690 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
3691 if (RT_SUCCESS(rc2))
3692 {
3693 bool fUpdateRIP = true;
3694
3695 rc = VINF_SUCCESS;
3696 Assert(cbOp == pDis->cbInstr);
3697 switch (pDis->pCurInstr->uOpcode)
3698 {
3699 case OP_CLI:
3700 pCtx->eflags.Bits.u1IF = 0;
3701 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3702 break;
3703
3704 case OP_STI:
3705 pCtx->eflags.Bits.u1IF = 1;
3706 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3707 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3708 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3709 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3710 AssertRC(rc2);
3711 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3712 break;
3713
3714 case OP_HLT:
3715 fUpdateRIP = false;
3716 rc = VINF_EM_HALT;
3717 pCtx->rip += pDis->cbInstr;
3718 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3719 break;
3720
3721 case OP_POPF:
3722 {
3723 RTGCPTR GCPtrStack;
3724 uint32_t cbParm;
3725 uint32_t uMask;
3726 X86EFLAGS eflags;
3727
3728 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3729 {
3730 cbParm = 4;
3731 uMask = 0xffffffff;
3732 }
3733 else
3734 {
3735 cbParm = 2;
3736 uMask = 0xffff;
3737 }
3738
3739 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3740 if (RT_FAILURE(rc2))
3741 {
3742 rc = VERR_EM_INTERPRETER;
3743 break;
3744 }
3745 eflags.u = 0;
3746 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3747 if (RT_FAILURE(rc2))
3748 {
3749 rc = VERR_EM_INTERPRETER;
3750 break;
3751 }
3752 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3753 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3754 | (eflags.u & X86_EFL_POPF_BITS & uMask);
3755 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3756 pCtx->eflags.Bits.u1RF = 0;
3757 pCtx->esp += cbParm;
3758 pCtx->esp &= uMask;
3759
3760 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3761 break;
3762 }
3763
3764 case OP_PUSHF:
3765 {
3766 RTGCPTR GCPtrStack;
3767 uint32_t cbParm;
3768 uint32_t uMask;
3769 X86EFLAGS eflags;
3770
3771 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3772 {
3773 cbParm = 4;
3774 uMask = 0xffffffff;
3775 }
3776 else
3777 {
3778 cbParm = 2;
3779 uMask = 0xffff;
3780 }
3781
3782 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3783 &GCPtrStack);
3784 if (RT_FAILURE(rc2))
3785 {
3786 rc = VERR_EM_INTERPRETER;
3787 break;
3788 }
3789 eflags = pCtx->eflags;
3790 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3791 eflags.Bits.u1RF = 0;
3792 eflags.Bits.u1VM = 0;
3793
3794 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3795 if (RT_FAILURE(rc2))
3796 {
3797 rc = VERR_EM_INTERPRETER;
3798 break;
3799 }
3800 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3801 pCtx->esp -= cbParm;
3802 pCtx->esp &= uMask;
3803 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3804 break;
3805 }
3806
3807 case OP_IRET:
3808 {
3809 RTGCPTR GCPtrStack;
3810 uint32_t uMask = 0xffff;
3811 uint16_t aIretFrame[3];
3812
3813 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3814 {
3815 rc = VERR_EM_INTERPRETER;
3816 break;
3817 }
3818
3819 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3820 if (RT_FAILURE(rc2))
3821 {
3822 rc = VERR_EM_INTERPRETER;
3823 break;
3824 }
3825 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3826 if (RT_FAILURE(rc2))
3827 {
3828 rc = VERR_EM_INTERPRETER;
3829 break;
3830 }
3831 pCtx->ip = aIretFrame[0];
3832 pCtx->cs.Sel = aIretFrame[1];
3833 pCtx->cs.ValidSel = aIretFrame[1];
3834 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
3835 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3836 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3837 pCtx->sp += sizeof(aIretFrame);
3838
3839 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
3840 fUpdateRIP = false;
3841 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3842 break;
3843 }
3844
3845 case OP_INT:
3846 {
3847 uint32_t intInfo2;
3848
3849 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
3850 intInfo2 = pDis->Param1.uValue & 0xff;
3851 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3852 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3853
3854 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3855 AssertRC(VBOXSTRICTRC_VAL(rc));
3856 fUpdateRIP = false;
3857 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3858 break;
3859 }
3860
3861 case OP_INTO:
3862 {
3863 if (pCtx->eflags.Bits.u1OF)
3864 {
3865 uint32_t intInfo2;
3866
3867 LogFlow(("Realmode: INTO\n"));
3868 intInfo2 = X86_XCPT_OF;
3869 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3870 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3871
3872 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3873 AssertRC(VBOXSTRICTRC_VAL(rc));
3874 fUpdateRIP = false;
3875 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3876 }
3877 break;
3878 }
3879
3880 case OP_INT3:
3881 {
3882 uint32_t intInfo2;
3883
3884 LogFlow(("Realmode: INT 3\n"));
3885 intInfo2 = 3;
3886 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3887 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3888
3889 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3890 AssertRC(VBOXSTRICTRC_VAL(rc));
3891 fUpdateRIP = false;
3892 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3893 break;
3894 }
3895
3896 default:
3897 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3898 fUpdateRIP = false;
3899 break;
3900 }
3901
3902 if (rc == VINF_SUCCESS)
3903 {
3904 if (fUpdateRIP)
3905 pCtx->rip += cbOp; /* Move on to the next instruction. */
3906
3907 /*
3908 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3909 * whole context to be done with it.
3910 */
3911 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3912
3913 /* Only resume if successful. */
3914 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3915 goto ResumeExecution;
3916 }
3917 }
3918 else
3919 rc = VERR_EM_INTERPRETER;
3920
3921 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
3922 ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3923 break;
3924 }
3925
3926#ifdef VBOX_STRICT
3927 case X86_XCPT_XF: /* SIMD exception. */
3928 case X86_XCPT_DE: /* Divide error. */
3929 case X86_XCPT_UD: /* Unknown opcode exception. */
3930 case X86_XCPT_SS: /* Stack segment exception. */
3931 case X86_XCPT_NP: /* Segment not present exception. */
3932 {
3933 switch (vector)
3934 {
3935 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE); break;
3936 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD); break;
3937 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS); break;
3938 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP); break;
3939 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXF); break;
3940 }
3941
3942 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3943 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3944 cbInstr, errCode);
3945 AssertRC(rc2);
3946
3947 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3948 goto ResumeExecution;
3949 }
3950#endif
3951 default:
3952 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXcpUnk);
3953 if ( CPUMIsGuestInRealModeEx(pCtx)
3954 && pVM->hwaccm.s.vmx.pRealModeTSS)
3955 {
3956 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
3957 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3958 cbInstr, errCode);
3959 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3960
3961 /* Go back to ring-3 in case of a triple fault. */
3962 if ( vector == X86_XCPT_DF
3963 && rc == VINF_EM_RESET)
3964 {
3965 break;
3966 }
3967
3968 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3969 goto ResumeExecution;
3970 }
3971 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3972 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3973 break;
3974 } /* switch (vector) */
3975
3976 break;
3977
3978 default:
3979 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3980 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
3981 break;
3982 }
3983
3984 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3985 break;
3986 }
3987
3988 /*
3989 * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed
3990 * by the configuration of the EPT paging structures.
3991 */
3992 case VMX_EXIT_EPT_VIOLATION:
3993 {
3994 RTGCPHYS GCPhys;
3995
3996 Assert(pVM->hwaccm.s.fNestedPaging);
3997
3998 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3999 AssertRC(rc2);
4000 Assert(((exitQualification >> 7) & 3) != 2);
4001
4002 /* Determine the kind of violation. */
4003 errCode = 0;
4004 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
4005 errCode |= X86_TRAP_PF_ID;
4006
4007 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
4008 errCode |= X86_TRAP_PF_RW;
4009
4010 /* If the page is present, then it's a page level protection fault. */
4011 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
4012 errCode |= X86_TRAP_PF_P;
4013 else
4014 {
4015 /* Shortcut for APIC TPR reads and writes. */
4016 if ( (GCPhys & 0xfff) == 0x080
4017 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4018 && fSetupTPRCaching
4019 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4020 {
4021 RTGCPHYS GCPhysApicBase;
4022 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
4023 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4024 if (GCPhys == GCPhysApicBase + 0x80)
4025 {
4026 Log(("Enable VT-x virtual APIC access filtering\n"));
4027 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
4028 AssertRC(rc2);
4029 }
4030 }
4031 }
4032 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
4033
4034 /* GCPhys contains the guest physical address of the page fault. */
4035 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
4036 TRPMSetErrorCode(pVCpu, errCode);
4037 TRPMSetFaultAddress(pVCpu, GCPhys);
4038
4039 /* Handle the pagefault trap for the nested shadow table. */
4040 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
4041
4042 /*
4043 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
4044 */
4045 if ( rc == VINF_SUCCESS
4046 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4047 || rc == VERR_PAGE_NOT_PRESENT)
4048 {
4049 /* We've successfully synced our shadow pages, so let's just continue execution. */
4050 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
4051 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
4052
4053 TRPMResetTrap(pVCpu);
4054 goto ResumeExecution;
4055 }
4056
4057#ifdef VBOX_STRICT
4058 if (rc != VINF_EM_RAW_EMULATE_INSTR)
4059 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4060#endif
4061 /* Need to go back to the recompiler to emulate the instruction. */
4062 TRPMResetTrap(pVCpu);
4063 break;
4064 }
4065
4066 case VMX_EXIT_EPT_MISCONFIG:
4067 {
4068 RTGCPHYS GCPhys;
4069
4070 Assert(pVM->hwaccm.s.fNestedPaging);
4071
4072 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
4073 AssertRC(rc2);
4074 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
4075
4076 /* Shortcut for APIC TPR reads and writes. */
4077 if ( (GCPhys & 0xfff) == 0x080
4078 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4079 && fSetupTPRCaching
4080 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4081 {
4082 RTGCPHYS GCPhysApicBase;
4083 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
4084 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4085 if (GCPhys == GCPhysApicBase + 0x80)
4086 {
4087 Log(("Enable VT-x virtual APIC access filtering\n"));
4088 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
4089 AssertRC(rc2);
4090 }
4091 }
4092
4093 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
4094
4095 /*
4096 * If we succeed, resume execution.
4097 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
4098 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
4099 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
4100 * weird case. See @bugref{6043}.
4101 */
4102 if ( rc == VINF_SUCCESS
4103 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4104 || rc == VERR_PAGE_NOT_PRESENT)
4105 {
4106 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
4107 goto ResumeExecution;
4108 }
4109
4110 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4111 break;
4112 }
4113
4114 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4115 /* Clear VM-exit on IF=1 change. */
4116 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
4117 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4118 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
4119 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4120 AssertRC(rc2);
4121 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
4122 goto ResumeExecution; /* we check for pending guest interrupts there */
4123
4124 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4125 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4126 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
4127 /* Skip instruction and continue directly. */
4128 pCtx->rip += cbInstr;
4129 /* Continue execution.*/
4130 goto ResumeExecution;
4131
4132 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4133 {
4134 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4135 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
4136 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4137 if (rc == VINF_SUCCESS)
4138 {
4139 /* Update EIP and continue execution. */
4140 Assert(cbInstr == 2);
4141 pCtx->rip += cbInstr;
4142 goto ResumeExecution;
4143 }
4144 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4145 rc = VINF_EM_RAW_EMULATE_INSTR;
4146 break;
4147 }
4148
4149 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4150 {
4151 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4152 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
4153 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4154 if (rc == VINF_SUCCESS)
4155 {
4156 /* Update EIP and continue execution. */
4157 Assert(cbInstr == 2);
4158 pCtx->rip += cbInstr;
4159 goto ResumeExecution;
4160 }
4161 rc = VINF_EM_RAW_EMULATE_INSTR;
4162 break;
4163 }
4164
4165 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4166 {
4167 Log2(("VMX: Rdtsc\n"));
4168 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
4169 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4170 if (rc == VINF_SUCCESS)
4171 {
4172 /* Update EIP and continue execution. */
4173 Assert(cbInstr == 2);
4174 pCtx->rip += cbInstr;
4175 goto ResumeExecution;
4176 }
4177 rc = VINF_EM_RAW_EMULATE_INSTR;
4178 break;
4179 }
4180
4181 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4182 {
4183 Log2(("VMX: Rdtscp\n"));
4184 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtscp);
4185 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4186 if (rc == VINF_SUCCESS)
4187 {
4188 /* Update EIP and continue execution. */
4189 Assert(cbInstr == 3);
4190 pCtx->rip += cbInstr;
4191 goto ResumeExecution;
4192 }
4193 rc = VINF_EM_RAW_EMULATE_INSTR;
4194 break;
4195 }
4196
4197 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4198 {
4199 Log2(("VMX: invlpg\n"));
4200 Assert(!pVM->hwaccm.s.fNestedPaging);
4201
4202 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvlpg);
4203 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4204 if (rc == VINF_SUCCESS)
4205 {
4206 /* Update EIP and continue execution. */
4207 pCtx->rip += cbInstr;
4208 goto ResumeExecution;
4209 }
4210 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4211 break;
4212 }
4213
4214 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4215 {
4216 Log2(("VMX: monitor\n"));
4217
4218 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
4219 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4220 if (rc == VINF_SUCCESS)
4221 {
4222 /* Update EIP and continue execution. */
4223 pCtx->rip += cbInstr;
4224 goto ResumeExecution;
4225 }
4226 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4227 break;
4228 }
4229
4230 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4231 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4232 if ( pVM->hwaccm.s.fTPRPatchingActive
4233 && pCtx->ecx == MSR_K8_LSTAR)
4234 {
4235 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4236 if ((pCtx->eax & 0xff) != u8LastTPR)
4237 {
4238 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4239
4240 /* Our patch code uses LSTAR for TPR caching. */
4241 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4242 AssertRC(rc2);
4243 }
4244
4245 /* Skip the instruction and continue. */
4246 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4247
4248 /* Only resume if successful. */
4249 goto ResumeExecution;
4250 }
4251 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR;
4252 /* no break */
4253 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4254 {
4255 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
4256
4257 /*
4258 * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different,
4259 * so we play safe by completely disassembling the instruction.
4260 */
4261 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4262 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4263 if (rc == VINF_SUCCESS)
4264 {
4265 /* EIP has been updated already. */
4266 /* Only resume if successful. */
4267 goto ResumeExecution;
4268 }
4269 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4270 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4271 break;
4272 }
4273
4274 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4275 {
4276 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4277
4278 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4279 {
4280 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4281 {
4282 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4283 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4284 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4285 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4286 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4287 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4288 {
4289 case 0:
4290 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
4291 break;
4292 case 2:
4293 break;
4294 case 3:
4295 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4296 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
4297 break;
4298 case 4:
4299 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
4300 break;
4301 case 8:
4302 /* CR8 contains the APIC TPR */
4303 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4304 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4305 break;
4306
4307 default:
4308 AssertFailed();
4309 break;
4310 }
4311 break;
4312 }
4313
4314 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4315 {
4316 Log2(("VMX: mov x, crx\n"));
4317 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4318
4319 Assert( !pVM->hwaccm.s.fNestedPaging
4320 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4321 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4322
4323 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4324 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4325 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4326
4327 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4328 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4329 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4330 break;
4331 }
4332
4333 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4334 {
4335 Log2(("VMX: clts\n"));
4336 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
4337 rc = EMInterpretCLTS(pVM, pVCpu);
4338 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4339 break;
4340 }
4341
4342 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4343 {
4344 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4345 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
4346 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4347 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4348 break;
4349 }
4350 }
4351
4352 /* Update EIP if no error occurred. */
4353 if (RT_SUCCESS(rc))
4354 pCtx->rip += cbInstr;
4355
4356 if (rc == VINF_SUCCESS)
4357 {
4358 /* Only resume if successful. */
4359 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4360 goto ResumeExecution;
4361 }
4362 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4363 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4364 break;
4365 }
4366
4367 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4368 {
4369 if ( !DBGFIsStepping(pVCpu)
4370 && !CPUMIsHyperDebugStateActive(pVCpu))
4371 {
4372 /* Disable DRx move intercepts. */
4373 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4374 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4375 AssertRC(rc2);
4376
4377 /* Save the host and load the guest debug state. */
4378 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4379 AssertRC(rc2);
4380
4381#ifdef LOG_ENABLED
4382 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4383 {
4384 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4385 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4386 }
4387 else
4388 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4389#endif
4390
4391#ifdef VBOX_WITH_STATISTICS
4392 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
4393 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4394 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4395 else
4396 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4397#endif
4398
4399 goto ResumeExecution;
4400 }
4401
4402 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4403 * time and restore DRx registers afterwards */
4404 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4405 {
4406 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4407 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4408 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4409 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4410 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4411 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4412 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4413 Log2(("DR7=%08x\n", pCtx->dr[7]));
4414 }
4415 else
4416 {
4417 Log2(("VMX: mov x, DRx\n"));
4418 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4419 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4420 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4421 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4422 }
4423 /* Update EIP if no error occurred. */
4424 if (RT_SUCCESS(rc))
4425 pCtx->rip += cbInstr;
4426
4427 if (rc == VINF_SUCCESS)
4428 {
4429 /* Only resume if successful. */
4430 goto ResumeExecution;
4431 }
4432 Assert(rc == VERR_EM_INTERPRETER);
4433 break;
4434 }
4435
4436 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4437 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4438 {
4439 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4440 uint32_t uPort;
4441 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4442 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4443
4444 /** @todo necessary to make the distinction? */
4445 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4446 uPort = pCtx->edx & 0xffff;
4447 else
4448 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4449
4450 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4451 {
4452 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4453 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4454 break;
4455 }
4456
4457 uint32_t cbSize = g_aIOSize[uIOWidth];
4458 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4459 {
4460 /* ins/outs */
4461 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
4462
4463 /* Disassemble manually to deal with segment prefixes. */
4464 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4465 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4466 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
4467 if (RT_SUCCESS(rc))
4468 {
4469 if (fIOWrite)
4470 {
4471 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4472 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
4473 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4474 }
4475 else
4476 {
4477 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4478 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
4479 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4480 }
4481 }
4482 else
4483 rc = VINF_EM_RAW_EMULATE_INSTR;
4484 }
4485 else
4486 {
4487 /* Normal in/out */
4488 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4489
4490 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4491
4492 if (fIOWrite)
4493 {
4494 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
4495 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4496 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4497 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4498 }
4499 else
4500 {
4501 uint32_t u32Val = 0;
4502
4503 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
4504 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4505 if (IOM_SUCCESS(rc))
4506 {
4507 /* Write back to the EAX register. */
4508 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4509 }
4510 else
4511 if (rc == VINF_IOM_R3_IOPORT_READ)
4512 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4513 }
4514 }
4515
4516 /*
4517 * Handled the I/O return codes.
4518 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4519 */
4520 if (IOM_SUCCESS(rc))
4521 {
4522 /* Update EIP and continue execution. */
4523 pCtx->rip += cbInstr;
4524 if (RT_LIKELY(rc == VINF_SUCCESS))
4525 {
4526 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4527 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4528 {
4529 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
4530 for (unsigned i = 0; i < 4; i++)
4531 {
4532 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4533
4534 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4535 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4536 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4537 {
4538 uint64_t uDR6;
4539
4540 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4541
4542 uDR6 = ASMGetDR6();
4543
4544 /* Clear all breakpoint status flags and set the one we just hit. */
4545 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4546 uDR6 |= (uint64_t)RT_BIT(i);
4547
4548 /*
4549 * Note: AMD64 Architecture Programmer's Manual 13.1:
4550 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4551 * be cleared by software after the contents have been read.
4552 */
4553 ASMSetDR6(uDR6);
4554
4555 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4556 pCtx->dr[7] &= ~X86_DR7_GD;
4557
4558 /* Paranoia. */
4559 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4560 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4561 pCtx->dr[7] |= 0x400; /* must be one */
4562
4563 /* Resync DR7 */
4564 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
4565 AssertRC(rc2);
4566
4567 /* Construct inject info. */
4568 intInfo = X86_XCPT_DB;
4569 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4570 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4571
4572 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4573 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4574 0 /* cbInstr */, 0 /* errCode */);
4575 AssertRC(rc2);
4576
4577 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4578 goto ResumeExecution;
4579 }
4580 }
4581 }
4582 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4583 goto ResumeExecution;
4584 }
4585 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4586 break;
4587 }
4588
4589#ifdef VBOX_STRICT
4590 if (rc == VINF_IOM_R3_IOPORT_READ)
4591 Assert(!fIOWrite);
4592 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4593 Assert(fIOWrite);
4594 else
4595 {
4596 AssertMsg( RT_FAILURE(rc)
4597 || rc == VINF_EM_RAW_EMULATE_INSTR
4598 || rc == VINF_EM_RAW_GUEST_TRAP
4599 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4600 }
4601#endif
4602 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4603 break;
4604 }
4605
4606 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4607 LogFlow(("VMX_EXIT_TPR\n"));
4608 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4609 goto ResumeExecution;
4610
4611 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4612 on the APIC-access page. */
4613 {
4614 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4615 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4616
4617 switch (uAccessType)
4618 {
4619 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4620 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4621 {
4622 RTGCPHYS GCPhys;
4623 PDMApicGetBase(pVM, &GCPhys);
4624 GCPhys &= PAGE_BASE_GC_MASK;
4625 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4626
4627 LogFlow(("Apic access at %RGp\n", GCPhys));
4628 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4629 CPUMCTX2CORE(pCtx), GCPhys);
4630 if (rc == VINF_SUCCESS)
4631 goto ResumeExecution; /* rip already updated */
4632 break;
4633 }
4634
4635 default:
4636 rc = VINF_EM_RAW_EMULATE_INSTR;
4637 break;
4638 }
4639 break;
4640 }
4641
4642 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4643 if (!TMTimerPollBool(pVM, pVCpu))
4644 goto ResumeExecution;
4645 rc = VINF_EM_RAW_TIMER_PENDING;
4646 break;
4647
4648 default:
4649 /* The rest is handled after syncing the entire CPU state. */
4650 break;
4651 }
4652
4653
4654 /*
4655 * Note: The guest state is not entirely synced back at this stage!
4656 */
4657
4658 /* Investigate why there was a VM-exit. (part 2) */
4659 switch (exitReason)
4660 {
4661 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4662 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4663 case VMX_EXIT_EPT_VIOLATION:
4664 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4665 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4666 /* Already handled above. */
4667 break;
4668
4669 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4670 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4671 break;
4672
4673 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4674 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4675 rc = VINF_EM_RAW_INTERRUPT;
4676 AssertFailed(); /* Can't happen. Yet. */
4677 break;
4678
4679 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4680 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4681 rc = VINF_EM_RAW_INTERRUPT;
4682 AssertFailed(); /* Can't happen afaik. */
4683 break;
4684
4685 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4686 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4687 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4688 && pVCpu->hwaccm.s.Event.fPending)
4689 {
4690 /* Caused by an injected interrupt. */
4691 pVCpu->hwaccm.s.Event.fPending = false;
4692
4693 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
4694 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
4695 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
4696 AssertRC(rc2);
4697 }
4698 /* else Exceptions and software interrupts can just be restarted. */
4699 rc = VERR_EM_INTERPRETER;
4700 break;
4701
4702 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4703 /* Check if external interrupts are pending; if so, don't switch back. */
4704 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
4705 pCtx->rip++; /* skip hlt */
4706 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4707 goto ResumeExecution;
4708
4709 rc = VINF_EM_HALT;
4710 break;
4711
4712 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4713 Log2(("VMX: mwait\n"));
4714 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4715 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4716 if ( rc == VINF_EM_HALT
4717 || rc == VINF_SUCCESS)
4718 {
4719 /* Update EIP and continue execution. */
4720 pCtx->rip += cbInstr;
4721
4722 /* Check if external interrupts are pending; if so, don't switch back. */
4723 if ( rc == VINF_SUCCESS
4724 || ( rc == VINF_EM_HALT
4725 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4726 )
4727 goto ResumeExecution;
4728 }
4729 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4730 break;
4731
4732 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4733 AssertFailed(); /* can't happen. */
4734 rc = VERR_EM_INTERPRETER;
4735 break;
4736
4737 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4738 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4739 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4740 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4741 AssertRC(rc2);
4742 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMTF);
4743#if 0
4744 DBGFDoneStepping(pVCpu);
4745#endif
4746 rc = VINF_EM_DBG_STOP;
4747 break;
4748
4749 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4750 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4751 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4752 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4753 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4754 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4755 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4756 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4757 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4758 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4759 /** @todo inject #UD immediately */
4760 rc = VERR_EM_INTERPRETER;
4761 break;
4762
4763 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4764 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4765 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4766 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4767 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4768 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4769 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4770 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4771 /* already handled above */
4772 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4773 || rc == VINF_EM_RAW_INTERRUPT
4774 || rc == VERR_EM_INTERPRETER
4775 || rc == VINF_EM_RAW_EMULATE_INSTR
4776 || rc == VINF_PGM_SYNC_CR3
4777 || rc == VINF_IOM_R3_IOPORT_READ
4778 || rc == VINF_IOM_R3_IOPORT_WRITE
4779 || rc == VINF_EM_RAW_GUEST_TRAP
4780 || rc == VINF_TRPM_XCPT_DISPATCHED
4781 || rc == VINF_EM_RESCHEDULE_REM,
4782 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4783 break;
4784
4785 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4786 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4787 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4788 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4789 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4790 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4791 on the APIC-access page. */
4792 {
4793 /*
4794 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4795 */
4796 rc = VERR_EM_INTERPRETER;
4797 break;
4798 }
4799
4800 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4801 Assert(rc == VINF_EM_RAW_INTERRUPT);
4802 break;
4803
4804 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4805 {
4806#ifdef VBOX_STRICT
4807 RTCCUINTREG val2 = 0;
4808
4809 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4810
4811 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4812 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4813
4814 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4815 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4816
4817 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4818 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4819
4820 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4821 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4822
4823 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4824 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4825
4826 VMX_LOG_SELREG(CS, "CS", val2);
4827 VMX_LOG_SELREG(DS, "DS", val2);
4828 VMX_LOG_SELREG(ES, "ES", val2);
4829 VMX_LOG_SELREG(FS, "FS", val2);
4830 VMX_LOG_SELREG(GS, "GS", val2);
4831 VMX_LOG_SELREG(SS, "SS", val2);
4832 VMX_LOG_SELREG(TR, "TR", val2);
4833 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4834
4835 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4836 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4837 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4838 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4839#endif /* VBOX_STRICT */
4840 rc = VERR_VMX_INVALID_GUEST_STATE;
4841 break;
4842 }
4843
4844 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4845 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4846 default:
4847 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4848 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4849 break;
4850
4851 }
4852
4853end:
4854 /* We now going back to ring-3, so clear the action flag. */
4855 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4856
4857 /*
4858 * Signal changes for the recompiler.
4859 */
4860 CPUMSetChangedFlags(pVCpu,
4861 CPUM_CHANGED_SYSENTER_MSR
4862 | CPUM_CHANGED_LDTR
4863 | CPUM_CHANGED_GDTR
4864 | CPUM_CHANGED_IDTR
4865 | CPUM_CHANGED_TR
4866 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4867
4868 /*
4869 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4870 */
4871 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4872 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4873 {
4874 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4875 /* On the next entry we'll only sync the host context. */
4876 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4877 }
4878 else
4879 {
4880 /* On the next entry we'll sync everything. */
4881 /** @todo we can do better than this */
4882 /* Not in the VINF_PGM_CHANGE_MODE though! */
4883 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4884 }
4885
4886 /* Translate into a less severe return code */
4887 if (rc == VERR_EM_INTERPRETER)
4888 rc = VINF_EM_RAW_EMULATE_INSTR;
4889 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4890 {
4891 /* Try to extract more information about what might have gone wrong here. */
4892 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4893 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS;
4894 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4895 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4896 }
4897
4898 /* Just set the correct state here instead of trying to catch every goto above. */
4899 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4900
4901#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4902 /* Restore interrupts if we exited after disabling them. */
4903 if (uOldEFlags != ~(RTCCUINTREG)0)
4904 ASMSetFlags(uOldEFlags);
4905#endif
4906
4907 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
4908 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
4909 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4910 Log2(("X"));
4911 return VBOXSTRICTRC_TODO(rc);
4912}
4913
4914
4915/**
4916 * Enters the VT-x session.
4917 *
4918 * @returns VBox status code.
4919 * @param pVM Pointer to the VM.
4920 * @param pVCpu Pointer to the VMCPU.
4921 * @param pCpu Pointer to the CPU info struct.
4922 */
4923VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4924{
4925 Assert(pVM->hwaccm.s.vmx.fSupported);
4926 NOREF(pCpu);
4927
4928 unsigned cr4 = ASMGetCR4();
4929 if (!(cr4 & X86_CR4_VMXE))
4930 {
4931 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4932 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4933 }
4934
4935 /* Activate the VMCS. */
4936 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4937 if (RT_FAILURE(rc))
4938 return rc;
4939
4940 pVCpu->hwaccm.s.fResumeVM = false;
4941 return VINF_SUCCESS;
4942}
4943
4944
4945/**
4946 * Leaves the VT-x session.
4947 *
4948 * @returns VBox status code.
4949 * @param pVM Pointer to the VM.
4950 * @param pVCpu Pointer to the VMCPU.
4951 * @param pCtx Pointer to the guests CPU context.
4952 */
4953VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4954{
4955 Assert(pVM->hwaccm.s.vmx.fSupported);
4956
4957#ifdef DEBUG
4958 if (CPUMIsHyperDebugStateActive(pVCpu))
4959 {
4960 CPUMR0LoadHostDebugState(pVM, pVCpu);
4961 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4962 }
4963 else
4964#endif
4965
4966 /*
4967 * Save the guest debug state if necessary.
4968 */
4969 if (CPUMIsGuestDebugStateActive(pVCpu))
4970 {
4971 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4972
4973 /* Enable DRx move intercepts again. */
4974 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4975 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4976 AssertRC(rc);
4977
4978 /* Resync the debug registers the next time. */
4979 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4980 }
4981 else
4982 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4983
4984 /*
4985 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
4986 * VMCS data back to memory.
4987 */
4988 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4989 AssertRC(rc);
4990
4991 return VINF_SUCCESS;
4992}
4993
4994
4995/**
4996 * Flush the TLB using EPT.
4997 *
4998 * @returns VBox status code.
4999 * @param pVM Pointer to the VM.
5000 * @param pVCpu Pointer to the VMCPU.
5001 * @param enmFlush Type of flush.
5002 */
5003static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
5004{
5005 uint64_t descriptor[2];
5006
5007 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
5008 Assert(pVM->hwaccm.s.fNestedPaging);
5009 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
5010 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
5011 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
5012 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.vmx.GCPhysEPTP, rc));
5013}
5014
5015
5016/**
5017 * Flush the TLB using VPID.
5018 *
5019 * @returns VBox status code.
5020 * @param pVM Pointer to the VM.
5021 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
5022 * enmFlush).
5023 * @param enmFlush Type of flush.
5024 * @param GCPtr Virtual address of the page to flush (can be 0 depending
5025 * on @a enmFlush).
5026 */
5027static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
5028{
5029 uint64_t descriptor[2];
5030
5031 Assert(pVM->hwaccm.s.vmx.fVPID);
5032 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
5033 {
5034 descriptor[0] = 0;
5035 descriptor[1] = 0;
5036 }
5037 else
5038 {
5039 AssertPtr(pVCpu);
5040 AssertMsg(pVCpu->hwaccm.s.uCurrentASID != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hwaccm.s.uCurrentASID));
5041 AssertMsg(pVCpu->hwaccm.s.uCurrentASID <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hwaccm.s.uCurrentASID));
5042 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
5043 descriptor[1] = GCPtr;
5044 }
5045 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
5046 AssertMsg(rc == VINF_SUCCESS,
5047 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hwaccm.s.uCurrentASID : 0, GCPtr, rc));
5048}
5049
5050
5051/**
5052 * Invalidates a guest page by guest virtual address. Only relevant for
5053 * EPT/VPID, otherwise there is nothing really to invalidate.
5054 *
5055 * @returns VBox status code.
5056 * @param pVM Pointer to the VM.
5057 * @param pVCpu Pointer to the VMCPU.
5058 * @param GCVirt Guest virtual address of the page to invalidate.
5059 */
5060VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
5061{
5062 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
5063
5064 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
5065
5066 if (!fFlushPending)
5067 {
5068 /*
5069 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
5070 * See @bugref{6043} and @bugref{6177}
5071 *
5072 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
5073 * function maybe called in a loop with individual addresses.
5074 */
5075 if (pVM->hwaccm.s.vmx.fVPID)
5076 {
5077 /* If we can flush just this page do it, otherwise flush as little as possible. */
5078 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
5079 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
5080 else
5081 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5082 }
5083 else if (pVM->hwaccm.s.fNestedPaging)
5084 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5085 }
5086
5087 return VINF_SUCCESS;
5088}
5089
5090
5091/**
5092 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
5093 * otherwise there is nothing really to invalidate.
5094 *
5095 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
5096 *
5097 * @returns VBox status code.
5098 * @param pVM Pointer to the VM.
5099 * @param pVCpu Pointer to the VMCPU.
5100 * @param GCPhys Guest physical address of the page to invalidate.
5101 */
5102VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
5103{
5104 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
5105
5106 /*
5107 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5108 * while invept only flushes by EPT not individual addresses. We update the force flag here
5109 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5110 */
5111 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5112 return VINF_SUCCESS;
5113}
5114
5115
5116/**
5117 * Report world switch error and dump some useful debug info.
5118 *
5119 * @param pVM Pointer to the VM.
5120 * @param pVCpu Pointer to the VMCPU.
5121 * @param rc Return code.
5122 * @param pCtx Pointer to the current guest CPU context (not updated).
5123 */
5124static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5125{
5126 NOREF(pVM);
5127
5128 switch (VBOXSTRICTRC_VAL(rc))
5129 {
5130 case VERR_VMX_INVALID_VMXON_PTR:
5131 AssertFailed();
5132 break;
5133
5134 case VERR_VMX_UNABLE_TO_START_VM:
5135 case VERR_VMX_UNABLE_TO_RESUME_VM:
5136 {
5137 int rc2;
5138 RTCCUINTREG exitReason, instrError;
5139
5140 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5141 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5142 AssertRC(rc2);
5143 if (rc2 == VINF_SUCCESS)
5144 {
5145 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5146 (uint32_t)instrError));
5147 Log(("Current stack %08x\n", &rc2));
5148
5149 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
5150 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
5151
5152#ifdef VBOX_STRICT
5153 RTGDTR gdtr;
5154 PCX86DESCHC pDesc;
5155 RTCCUINTREG val;
5156
5157 ASMGetGDTR(&gdtr);
5158
5159 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
5160 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5161 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
5162 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5163 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
5164 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5165 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
5166 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5167 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
5168 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5169
5170 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
5171 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5172 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
5173 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5174 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
5175 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5176
5177 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
5178 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5179 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
5180 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5181
5182 if (val < gdtr.cbGdt)
5183 {
5184 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5185 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
5186 }
5187
5188 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
5189 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5190 if (val < gdtr.cbGdt)
5191 {
5192 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5193 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
5194 }
5195
5196 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
5197 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5198 if (val < gdtr.cbGdt)
5199 {
5200 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5201 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
5202 }
5203
5204 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
5205 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5206 if (val < gdtr.cbGdt)
5207 {
5208 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5209 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
5210 }
5211
5212 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
5213 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5214 if (val < gdtr.cbGdt)
5215 {
5216 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5217 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
5218 }
5219
5220 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
5221 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5222 if (val < gdtr.cbGdt)
5223 {
5224 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5225 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
5226 }
5227
5228 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
5229 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5230 if (val < gdtr.cbGdt)
5231 {
5232 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5233 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
5234 }
5235
5236 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
5237 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5238 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
5239 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5240 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
5241 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5242 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5243 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5244 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5245 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5246 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5247 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5248 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
5249 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5250 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
5251 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5252# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5253 if (VMX_IS_64BIT_HOST_MODE())
5254 {
5255 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5256 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5257 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5258 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5259 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5260 Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5261 }
5262# endif
5263#endif /* VBOX_STRICT */
5264 }
5265 break;
5266 }
5267
5268 default:
5269 /* impossible */
5270 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5271 break;
5272 }
5273}
5274
5275
5276#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5277/**
5278 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5279 *
5280 * @returns VBox status code.
5281 * @param fResume Whether to vmlauch/vmresume.
5282 * @param pCtx Pointer to the guest CPU context.
5283 * @param pCache Pointer to the VMCS cache.
5284 * @param pVM Pointer to the VM.
5285 * @param pVCpu Pointer to the VMCPU.
5286 */
5287DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5288{
5289 uint32_t aParam[6];
5290 PHMGLOBLCPUINFO pCpu;
5291 RTHCPHYS HCPhysCpuPage;
5292 int rc;
5293
5294 pCpu = HWACCMR0GetCurrentCpu();
5295 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5296
5297#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5298 pCache->uPos = 1;
5299 pCache->interPD = PGMGetInterPaeCR3(pVM);
5300 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
5301#endif
5302
5303#ifdef DEBUG
5304 pCache->TestIn.HCPhysCpuPage= 0;
5305 pCache->TestIn.HCPhysVMCS = 0;
5306 pCache->TestIn.pCache = 0;
5307 pCache->TestOut.HCPhysVMCS = 0;
5308 pCache->TestOut.pCache = 0;
5309 pCache->TestOut.pCtx = 0;
5310 pCache->TestOut.eflags = 0;
5311#endif
5312
5313 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5314 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5315 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
5316 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
5317 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
5318 aParam[5] = 0;
5319
5320#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5321 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
5322 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
5323#endif
5324 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5325
5326#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5327 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
5328 Assert(pCtx->dr[4] == 10);
5329 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
5330#endif
5331
5332#ifdef DEBUG
5333 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5334 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5335 pVCpu->hwaccm.s.vmx.HCPhysVMCS));
5336 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5337 pCache->TestOut.HCPhysVMCS));
5338 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5339 pCache->TestOut.pCache));
5340 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache),
5341 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
5342 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5343 pCache->TestOut.pCtx));
5344 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5345#endif
5346 return rc;
5347}
5348
5349
5350# ifdef VBOX_STRICT
5351static bool hmR0VmxIsValidReadField(uint32_t idxField)
5352{
5353 switch (idxField)
5354 {
5355 case VMX_VMCS64_GUEST_RIP:
5356 case VMX_VMCS64_GUEST_RSP:
5357 case VMX_VMCS_GUEST_RFLAGS:
5358 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5359 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5360 case VMX_VMCS64_GUEST_CR0:
5361 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5362 case VMX_VMCS64_GUEST_CR4:
5363 case VMX_VMCS64_GUEST_DR7:
5364 case VMX_VMCS32_GUEST_SYSENTER_CS:
5365 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5366 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5367 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5368 case VMX_VMCS64_GUEST_GDTR_BASE:
5369 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5370 case VMX_VMCS64_GUEST_IDTR_BASE:
5371 case VMX_VMCS16_GUEST_FIELD_CS:
5372 case VMX_VMCS32_GUEST_CS_LIMIT:
5373 case VMX_VMCS64_GUEST_CS_BASE:
5374 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5375 case VMX_VMCS16_GUEST_FIELD_DS:
5376 case VMX_VMCS32_GUEST_DS_LIMIT:
5377 case VMX_VMCS64_GUEST_DS_BASE:
5378 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5379 case VMX_VMCS16_GUEST_FIELD_ES:
5380 case VMX_VMCS32_GUEST_ES_LIMIT:
5381 case VMX_VMCS64_GUEST_ES_BASE:
5382 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5383 case VMX_VMCS16_GUEST_FIELD_FS:
5384 case VMX_VMCS32_GUEST_FS_LIMIT:
5385 case VMX_VMCS64_GUEST_FS_BASE:
5386 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5387 case VMX_VMCS16_GUEST_FIELD_GS:
5388 case VMX_VMCS32_GUEST_GS_LIMIT:
5389 case VMX_VMCS64_GUEST_GS_BASE:
5390 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5391 case VMX_VMCS16_GUEST_FIELD_SS:
5392 case VMX_VMCS32_GUEST_SS_LIMIT:
5393 case VMX_VMCS64_GUEST_SS_BASE:
5394 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5395 case VMX_VMCS16_GUEST_FIELD_LDTR:
5396 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5397 case VMX_VMCS64_GUEST_LDTR_BASE:
5398 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5399 case VMX_VMCS16_GUEST_FIELD_TR:
5400 case VMX_VMCS32_GUEST_TR_LIMIT:
5401 case VMX_VMCS64_GUEST_TR_BASE:
5402 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5403 case VMX_VMCS32_RO_EXIT_REASON:
5404 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5405 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5406 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
5407 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5408 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5409 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5410 case VMX_VMCS32_RO_IDT_INFO:
5411 case VMX_VMCS32_RO_IDT_ERRCODE:
5412 case VMX_VMCS64_GUEST_CR3:
5413 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
5414 return true;
5415 }
5416 return false;
5417}
5418
5419
5420static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5421{
5422 switch (idxField)
5423 {
5424 case VMX_VMCS64_GUEST_LDTR_BASE:
5425 case VMX_VMCS64_GUEST_TR_BASE:
5426 case VMX_VMCS64_GUEST_GDTR_BASE:
5427 case VMX_VMCS64_GUEST_IDTR_BASE:
5428 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5429 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5430 case VMX_VMCS64_GUEST_CR0:
5431 case VMX_VMCS64_GUEST_CR4:
5432 case VMX_VMCS64_GUEST_CR3:
5433 case VMX_VMCS64_GUEST_DR7:
5434 case VMX_VMCS64_GUEST_RIP:
5435 case VMX_VMCS64_GUEST_RSP:
5436 case VMX_VMCS64_GUEST_CS_BASE:
5437 case VMX_VMCS64_GUEST_DS_BASE:
5438 case VMX_VMCS64_GUEST_ES_BASE:
5439 case VMX_VMCS64_GUEST_FS_BASE:
5440 case VMX_VMCS64_GUEST_GS_BASE:
5441 case VMX_VMCS64_GUEST_SS_BASE:
5442 return true;
5443 }
5444 return false;
5445}
5446# endif /* VBOX_STRICT */
5447
5448
5449/**
5450 * Executes the specified handler in 64-bit mode.
5451 *
5452 * @returns VBox status code.
5453 * @param pVM Pointer to the VM.
5454 * @param pVCpu Pointer to the VMCPU.
5455 * @param pCtx Pointer to the guest CPU context.
5456 * @param pfnHandler Pointer to the RC handler function.
5457 * @param cbParam Number of parameters.
5458 * @param paParam Array of 32-bit parameters.
5459 */
5460VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5461 uint32_t *paParam)
5462{
5463 int rc, rc2;
5464 PHMGLOBLCPUINFO pCpu;
5465 RTHCPHYS HCPhysCpuPage;
5466 RTHCUINTREG uOldEFlags;
5467
5468 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5469 Assert(pfnHandler);
5470 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
5471 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
5472
5473#ifdef VBOX_STRICT
5474 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5475 Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
5476
5477 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5478 Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
5479#endif
5480
5481 /* Disable interrupts. */
5482 uOldEFlags = ASMIntDisableFlags();
5483
5484#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5485 RTCPUID idHostCpu = RTMpCpuId();
5486 CPUMR0SetLApic(pVM, idHostCpu);
5487#endif
5488
5489 pCpu = HWACCMR0GetCurrentCpu();
5490 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5491
5492 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5493 VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5494
5495 /* Leave VMX Root Mode. */
5496 VMXDisable();
5497
5498 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5499
5500 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5501 CPUMSetHyperEIP(pVCpu, pfnHandler);
5502 for (int i=(int)cbParam-1;i>=0;i--)
5503 CPUMPushHyper(pVCpu, paParam[i]);
5504
5505 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5506
5507 /* Call switcher. */
5508 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5509 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5510
5511 /* Make sure the VMX instructions don't cause #UD faults. */
5512 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5513
5514 /* Enter VMX Root Mode */
5515 rc2 = VMXEnable(HCPhysCpuPage);
5516 if (RT_FAILURE(rc2))
5517 {
5518 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5519 ASMSetFlags(uOldEFlags);
5520 return VERR_VMX_VMXON_FAILED;
5521 }
5522
5523 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5524 AssertRC(rc2);
5525 Assert(!(ASMGetFlags() & X86_EFL_IF));
5526 ASMSetFlags(uOldEFlags);
5527 return rc;
5528}
5529#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5530
5531
5532#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
5533/**
5534 * Executes VMWRITE.
5535 *
5536 * @returns VBox status code
5537 * @param pVCpu Pointer to the VMCPU.
5538 * @param idxField VMCS field index.
5539 * @param u64Val 16, 32 or 64 bits value.
5540 */
5541VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5542{
5543 int rc;
5544 switch (idxField)
5545 {
5546 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
5547 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
5548 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
5549 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
5550 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
5551 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
5552 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
5553 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
5554 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
5555 case VMX_VMCS_GUEST_LINK_PTR_FULL:
5556 case VMX_VMCS_GUEST_PDPTR0_FULL:
5557 case VMX_VMCS_GUEST_PDPTR1_FULL:
5558 case VMX_VMCS_GUEST_PDPTR2_FULL:
5559 case VMX_VMCS_GUEST_PDPTR3_FULL:
5560 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
5561 case VMX_VMCS_GUEST_EFER_FULL:
5562 case VMX_VMCS_CTRL_EPTP_FULL:
5563 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5564 rc = VMXWriteVMCS32(idxField, u64Val);
5565 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5566 AssertRC(rc);
5567 return rc;
5568
5569 case VMX_VMCS64_GUEST_LDTR_BASE:
5570 case VMX_VMCS64_GUEST_TR_BASE:
5571 case VMX_VMCS64_GUEST_GDTR_BASE:
5572 case VMX_VMCS64_GUEST_IDTR_BASE:
5573 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5574 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5575 case VMX_VMCS64_GUEST_CR0:
5576 case VMX_VMCS64_GUEST_CR4:
5577 case VMX_VMCS64_GUEST_CR3:
5578 case VMX_VMCS64_GUEST_DR7:
5579 case VMX_VMCS64_GUEST_RIP:
5580 case VMX_VMCS64_GUEST_RSP:
5581 case VMX_VMCS64_GUEST_CS_BASE:
5582 case VMX_VMCS64_GUEST_DS_BASE:
5583 case VMX_VMCS64_GUEST_ES_BASE:
5584 case VMX_VMCS64_GUEST_FS_BASE:
5585 case VMX_VMCS64_GUEST_GS_BASE:
5586 case VMX_VMCS64_GUEST_SS_BASE:
5587 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5588 if (u64Val >> 32ULL)
5589 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5590 else
5591 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5592
5593 return rc;
5594
5595 default:
5596 AssertMsgFailed(("Unexpected field %x\n", idxField));
5597 return VERR_INVALID_PARAMETER;
5598 }
5599}
5600
5601
5602/**
5603 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
5604 *
5605 * @param pVCpu Pointer to the VMCPU.
5606 * @param idxField VMCS field index.
5607 * @param u64Val 16, 32 or 64 bits value.
5608 */
5609VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5610{
5611 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
5612
5613 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5614 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5615
5616 /* Make sure there are no duplicates. */
5617 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5618 {
5619 if (pCache->Write.aField[i] == idxField)
5620 {
5621 pCache->Write.aFieldVal[i] = u64Val;
5622 return VINF_SUCCESS;
5623 }
5624 }
5625
5626 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5627 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5628 pCache->Write.cValidEntries++;
5629 return VINF_SUCCESS;
5630}
5631
5632#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
5633
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette