VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 42914

Last change on this file since 42914 was 42894, checked in by vboxsync, 12 years ago

VMM: nits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 221.6 KB
Line 
1/* $Id: HWVMXR0.cpp 42894 2012-08-21 08:00:10Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hwaccm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HWACCMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HWACCMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HWACCMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
106 }
107 pVM->hwaccm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 */
120VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
121{
122 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
123 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
124
125 if (pVM)
126 {
127 /* Set revision dword at the beginning of the VMXON structure. */
128 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
129 }
130
131 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
132 * (which can have very bad consequences!!!)
133 */
134
135 if (ASMGetCR4() & X86_CR4_VMXE)
136 return VERR_VMX_IN_VMX_ROOT_MODE;
137
138 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
139
140 /*
141 * Enter VM root mode.
142 */
143 int rc = VMXEnable(HCPhysCpuPage);
144 if (RT_FAILURE(rc))
145 {
146 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
147 return VERR_VMX_VMXON_FAILED;
148 }
149
150 /*
151 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
152 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
153 * each time while reusing a VPID after hitting the MaxASID limit once.
154 */
155 if ( pVM
156 && pVM->hwaccm.s.vmx.fVPID
157 && (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
158 {
159 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
160 pCpu->fFlushASIDBeforeUse = false;
161 }
162 else
163 pCpu->fFlushASIDBeforeUse = true;
164
165 /*
166 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
167 */
168 ++pCpu->cTLBFlushes;
169
170 return VINF_SUCCESS;
171}
172
173
174/**
175 * Deactivates VT-x on the current CPU.
176 *
177 * @returns VBox status code.
178 * @param pCpu Pointer to the CPU info struct.
179 * @param pvCpuPage Pointer to the global CPU page.
180 * @param HCPhysCpuPage Physical address of the global CPU page.
181 */
182VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
183{
184 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
185 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
186 NOREF(pCpu);
187
188 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
189 if (!(ASMGetCR4() & X86_CR4_VMXE))
190 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
191
192 /* Leave VMX Root Mode. */
193 VMXDisable();
194
195 /* And clear the X86_CR4_VMXE bit. */
196 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
197 return VINF_SUCCESS;
198}
199
200
201/**
202 * Does Ring-0 per VM VT-x initialization.
203 *
204 * @returns VBox status code.
205 * @param pVM Pointer to the VM.
206 */
207VMMR0DECL(int) VMXR0InitVM(PVM pVM)
208{
209 int rc;
210
211#ifdef LOG_ENABLED
212 SUPR0Printf("VMXR0InitVM %p\n", pVM);
213#endif
214
215 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
216
217 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
218 {
219 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
220 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, true /* executable R0 mapping */);
221 AssertRC(rc);
222 if (RT_FAILURE(rc))
223 return rc;
224
225 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
226 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
227 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
228 }
229 else
230 {
231 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
232 pVM->hwaccm.s.vmx.pAPIC = 0;
233 pVM->hwaccm.s.vmx.pAPICPhys = 0;
234 }
235
236#ifdef VBOX_WITH_CRASHDUMP_MAGIC
237 {
238 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, true /* executable R0 mapping */);
239 AssertRC(rc);
240 if (RT_FAILURE(rc))
241 return rc;
242
243 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
244 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
245
246 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
247 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
248 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
249 }
250#endif
251
252 /* Allocate VMCSs for all guest CPUs. */
253 for (VMCPUID i = 0; i < pVM->cCpus; i++)
254 {
255 PVMCPU pVCpu = &pVM->aCpus[i];
256
257 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
258
259 /* Allocate one page for the VM control structure (VMCS). */
260 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, true /* executable R0 mapping */);
261 AssertRC(rc);
262 if (RT_FAILURE(rc))
263 return rc;
264
265 pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS);
266 pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0);
267 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS);
268
269 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
270 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
271
272 /* Allocate one page for the virtual APIC page for TPR caching. */
273 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, true /* executable R0 mapping */);
274 AssertRC(rc);
275 if (RT_FAILURE(rc))
276 return rc;
277
278 pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC);
279 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0);
280 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC);
281
282 /* Allocate the MSR bitmap if this feature is supported. */
283 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
284 {
285 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, true /* executable R0 mapping */);
286 AssertRC(rc);
287 if (RT_FAILURE(rc))
288 return rc;
289
290 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
291 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
292 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
293 }
294
295#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
296 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
297 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, true /* executable R0 mapping */);
298 AssertRC(rc);
299 if (RT_FAILURE(rc))
300 return rc;
301
302 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
303 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
304 Assert(!(pVCpu->hwaccm.s.vmx.pGuestMSRPhys & 0xf));
305 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
306
307 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
308 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, true /* executable R0 mapping */);
309 AssertRC(rc);
310 if (RT_FAILURE(rc))
311 return rc;
312
313 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
314 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
315 Assert(!(pVCpu->hwaccm.s.vmx.pHostMSRPhys & 0xf));
316 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
317#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
318
319 /* Current guest paging mode. */
320 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
321
322#ifdef LOG_ENABLED
323 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS);
324#endif
325 }
326
327 return VINF_SUCCESS;
328}
329
330
331/**
332 * Does Ring-0 per VM VT-x termination.
333 *
334 * @returns VBox status code.
335 * @param pVM Pointer to the VM.
336 */
337VMMR0DECL(int) VMXR0TermVM(PVM pVM)
338{
339 for (VMCPUID i = 0; i < pVM->cCpus; i++)
340 {
341 PVMCPU pVCpu = &pVM->aCpus[i];
342
343 if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
344 {
345 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false);
346 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
347 pVCpu->hwaccm.s.vmx.pvVMCS = 0;
348 pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0;
349 }
350 if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
351 {
352 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false);
353 pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
354 pVCpu->hwaccm.s.vmx.pbVAPIC = 0;
355 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0;
356 }
357 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
358 {
359 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
360 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
361 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
362 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
363 }
364#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
365 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
366 {
367 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
368 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
369 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
370 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
371 }
372 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
373 {
374 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
375 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
376 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
377 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
378 }
379#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
380 }
381 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
382 {
383 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
384 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
385 pVM->hwaccm.s.vmx.pAPIC = 0;
386 pVM->hwaccm.s.vmx.pAPICPhys = 0;
387 }
388#ifdef VBOX_WITH_CRASHDUMP_MAGIC
389 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
390 {
391 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
392 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
393 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
394 pVM->hwaccm.s.vmx.pScratch = 0;
395 pVM->hwaccm.s.vmx.pScratchPhys = 0;
396 }
397#endif
398 return VINF_SUCCESS;
399}
400
401
402/**
403 * Sets up VT-x for the specified VM.
404 *
405 * @returns VBox status code.
406 * @param pVM Pointer to the VM.
407 */
408VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
409{
410 int rc = VINF_SUCCESS;
411 uint32_t val;
412
413 AssertReturn(pVM, VERR_INVALID_PARAMETER);
414
415 /* Initialize these always, see hwaccmR3InitFinalizeR0().*/
416 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NONE;
417 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NONE;
418
419 /* Determine optimal flush type for EPT. */
420 if (pVM->hwaccm.s.fNestedPaging)
421 {
422 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT)
423 {
424 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
425 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_SINGLE_CONTEXT;
426 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
427 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_ALL_CONTEXTS;
428 else
429 {
430 /*
431 * Should never really happen. EPT is supported but no suitable flush types supported.
432 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
433 */
434 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
435 return VERR_VMX_GENERIC;
436 }
437 }
438 else
439 {
440 /*
441 * Should never really happen. EPT is supported but INVEPT instruction is not supported.
442 */
443 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
444 return VERR_VMX_GENERIC;
445 }
446 }
447
448 /* Determine optimal flush type for VPID. */
449 if (pVM->hwaccm.s.vmx.fVPID)
450 {
451 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID)
452 {
453 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
454 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_SINGLE_CONTEXT;
455 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
456 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_ALL_CONTEXTS;
457 else
458 {
459 /*
460 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
461 * We do not handle other flush type combinations, ignore VPID capabilities.
462 */
463 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
464 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
465 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
466 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
467 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
468 pVM->hwaccm.s.vmx.fVPID = false;
469 }
470 }
471 else
472 {
473 /*
474 * Should not really happen. EPT is supported but INVEPT is not supported.
475 * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing.
476 */
477 Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n"));
478 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
479 pVM->hwaccm.s.vmx.fVPID = false;
480 }
481 }
482
483 for (VMCPUID i = 0; i < pVM->cCpus; i++)
484 {
485 PVMCPU pVCpu = &pVM->aCpus[i];
486
487 AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS);
488
489 /* Set revision dword at the beginning of the VMCS structure. */
490 *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
491
492 /*
493 * Clear and activate the VMCS.
494 */
495 Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS));
496 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
497 if (RT_FAILURE(rc))
498 goto vmx_end;
499
500 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
501 if (RT_FAILURE(rc))
502 goto vmx_end;
503
504 /*
505 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
506 * Set required bits to one and zero according to the MSR capabilities.
507 */
508 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
509 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
510 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
511
512 /*
513 * Enable the VMX preemption timer.
514 */
515 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
516 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
517 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
518
519 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
520 AssertRC(rc);
521
522 /*
523 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
524 * Set required bits to one and zero according to the MSR capabilities.
525 */
526 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
527 /* Program which event cause VM-exits and which features we want to use. */
528 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
529 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
530 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
531 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
532 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
533 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
534 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
535 the guest (host thinks the cpu load is high) */
536
537 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
538 if (!pVM->hwaccm.s.fNestedPaging)
539 {
540 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
541 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
542 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
543 }
544
545 /*
546 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
547 * failure with an invalid control fields error. (combined with some other exit reasons)
548 */
549 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
550 {
551 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
552 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
553 Assert(pVM->hwaccm.s.vmx.pAPIC);
554 }
555 else
556 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
557 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
558
559 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
560 {
561 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
562 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
563 }
564
565 /* We will use the secondary control if it's present. */
566 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
567
568 /* Mask away the bits that the CPU doesn't support */
569 /** @todo make sure they don't conflict with the above requirements. */
570 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
571 pVCpu->hwaccm.s.vmx.proc_ctls = val;
572
573 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
574 AssertRC(rc);
575
576 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
577 {
578 /*
579 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
580 * Set required bits to one and zero according to the MSR capabilities.
581 */
582 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
583 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
584
585 if (pVM->hwaccm.s.fNestedPaging)
586 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
587
588 if (pVM->hwaccm.s.vmx.fVPID)
589 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
590
591 if (pVM->hwaccm.s.fHasIoApic)
592 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
593
594 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
595 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
596
597 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
598 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
599
600 /* Mask away the bits that the CPU doesn't support */
601 /** @todo make sure they don't conflict with the above requirements. */
602 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
603 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
604 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
605 AssertRC(rc);
606 }
607
608 /*
609 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
610 * Set required bits to one and zero according to the MSR capabilities.
611 */
612 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
613 AssertRC(rc);
614
615 /*
616 * Forward all exception except #NM & #PF to the guest.
617 * We always need to check pagefaults since our shadow page table can be out of sync.
618 * And we always lazily sync the FPU & XMM state. .
619 */
620
621 /** @todo Possible optimization:
622 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
623 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
624 * registers ourselves of course.
625 *
626 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
627 */
628
629 /*
630 * Don't filter page faults, all of them should cause a world switch.
631 */
632 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
633 AssertRC(rc);
634 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
635 AssertRC(rc);
636
637 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
638 AssertRC(rc);
639 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
640 AssertRC(rc);
641 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
642 AssertRC(rc);
643
644 /*
645 * Set the MSR bitmap address.
646 */
647 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
648 {
649 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
650
651 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
652 AssertRC(rc);
653
654 /*
655 * Allow the guest to directly modify these MSRs; they are loaded/stored automatically
656 * using MSR-load/store areas in the VMCS.
657 */
658 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
659 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
660 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
661 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
662 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
663 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
664 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
665 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
666 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
667 if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
668 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true);
669 }
670
671#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
672 /*
673 * Set the guest & host MSR load/store physical addresses.
674 */
675 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
676 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
677 AssertRC(rc);
678 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
679 AssertRC(rc);
680 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
681 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
682 AssertRC(rc);
683#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
684
685 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
686 AssertRC(rc);
687 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
688 AssertRC(rc);
689 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0);
690 AssertRC(rc);
691
692 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
693 {
694 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
695 /* Optional */
696 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
697 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC);
698
699 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
700 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
701
702 AssertRC(rc);
703 }
704
705 /* Set link pointer to -1. Not currently used. */
706 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
707 AssertRC(rc);
708
709 /*
710 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
711 * VMCS data back to memory.
712 */
713 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
714 AssertRC(rc);
715
716 /*
717 * Configure the VMCS read cache.
718 */
719 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
720
721 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
722 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
723 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
724 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
725 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
726 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
727 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
728 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
729 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
730 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
731 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
732 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
733 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
734 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
735 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
736 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
737
738 VMX_SETUP_SELREG(ES, pCache);
739 VMX_SETUP_SELREG(SS, pCache);
740 VMX_SETUP_SELREG(CS, pCache);
741 VMX_SETUP_SELREG(DS, pCache);
742 VMX_SETUP_SELREG(FS, pCache);
743 VMX_SETUP_SELREG(GS, pCache);
744 VMX_SETUP_SELREG(LDTR, pCache);
745 VMX_SETUP_SELREG(TR, pCache);
746
747 /*
748 * Status code VMCS reads.
749 */
750 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
751 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
752 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
753 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
754 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
755 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
756 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
757 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
758 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
759
760 if (pVM->hwaccm.s.fNestedPaging)
761 {
762 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
763 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
764 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
765 }
766 else
767 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
768 } /* for each VMCPU */
769
770 /*
771 * Setup the right TLB function based on CPU capabilities.
772 */
773 if (pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID)
774 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBBoth;
775 else if (pVM->hwaccm.s.fNestedPaging)
776 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT;
777 else if (pVM->hwaccm.s.vmx.fVPID)
778 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID;
779 else
780 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy;
781
782vmx_end:
783 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
784 return rc;
785}
786
787
788/**
789 * Sets the permission bits for the specified MSR.
790 *
791 * @param pVCpu Pointer to the VMCPU.
792 * @param ulMSR The MSR value.
793 * @param fRead Whether reading is allowed.
794 * @param fWrite Whether writing is allowed.
795 */
796static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
797{
798 unsigned ulBit;
799 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
800
801 /*
802 * Layout:
803 * 0x000 - 0x3ff - Low MSR read bits
804 * 0x400 - 0x7ff - High MSR read bits
805 * 0x800 - 0xbff - Low MSR write bits
806 * 0xc00 - 0xfff - High MSR write bits
807 */
808 if (ulMSR <= 0x00001FFF)
809 {
810 /* Pentium-compatible MSRs */
811 ulBit = ulMSR;
812 }
813 else if ( ulMSR >= 0xC0000000
814 && ulMSR <= 0xC0001FFF)
815 {
816 /* AMD Sixth Generation x86 Processor MSRs */
817 ulBit = (ulMSR - 0xC0000000);
818 pMSRBitmap += 0x400;
819 }
820 else
821 {
822 AssertFailed();
823 return;
824 }
825
826 Assert(ulBit <= 0x1fff);
827 if (fRead)
828 ASMBitClear(pMSRBitmap, ulBit);
829 else
830 ASMBitSet(pMSRBitmap, ulBit);
831
832 if (fWrite)
833 ASMBitClear(pMSRBitmap + 0x800, ulBit);
834 else
835 ASMBitSet(pMSRBitmap + 0x800, ulBit);
836}
837
838
839/**
840 * Injects an event (trap or external interrupt).
841 *
842 * @returns VBox status code. Note that it may return VINF_EM_RESET to
843 * indicate a triple fault when injecting X86_XCPT_DF.
844 *
845 * @param pVM Pointer to the VM.
846 * @param pVCpu Pointer to the VMCPU.
847 * @param pCtx Pointer to the guest CPU Context.
848 * @param intInfo VMX interrupt info.
849 * @param cbInstr Opcode length of faulting instruction.
850 * @param errCode Error code (optional).
851 */
852static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
853{
854 int rc;
855 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
856
857#ifdef VBOX_WITH_STATISTICS
858 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
859#endif
860
861#ifdef VBOX_STRICT
862 if (iGate == 0xE)
863 {
864 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
865 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
866 }
867 else if (iGate < 0x20)
868 {
869 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
870 errCode));
871 }
872 else
873 {
874 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
875 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
876 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
877 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
878 || pCtx->eflags.u32 & X86_EFL_IF);
879 }
880#endif
881
882 if ( CPUMIsGuestInRealModeEx(pCtx)
883 && pVM->hwaccm.s.vmx.pRealModeTSS)
884 {
885 RTGCPHYS GCPhysHandler;
886 uint16_t offset, ip;
887 RTSEL sel;
888
889 /*
890 * Injecting events doesn't work right with real mode emulation.
891 * (#GP if we try to inject external hardware interrupts)
892 * Inject the interrupt or trap directly instead.
893 *
894 * ASSUMES no access handlers for the bits we read or write below (should be safe).
895 */
896 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
897
898 /*
899 * Check if the interrupt handler is present.
900 */
901 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
902 {
903 Log(("IDT cbIdt violation\n"));
904 if (iGate != X86_XCPT_DF)
905 {
906 uint32_t intInfo2;
907
908 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
909 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
910 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
911 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
912
913 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
914 }
915 Log(("Triple fault -> reset the VM!\n"));
916 return VINF_EM_RESET;
917 }
918 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
919 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
920 || iGate == 4)
921 {
922 ip = pCtx->ip + cbInstr;
923 }
924 else
925 ip = pCtx->ip;
926
927 /*
928 * Read the selector:offset pair of the interrupt handler.
929 */
930 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
931 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
932 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
933
934 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
935
936 /*
937 * Construct the stack frame.
938 */
939 /** @todo Check stack limit. */
940 pCtx->sp -= 2;
941 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
942 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
943 pCtx->sp -= 2;
944 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
945 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
946 pCtx->sp -= 2;
947 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
948 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
949
950 /*
951 * Update the CPU state for executing the handler.
952 */
953 pCtx->rip = offset;
954 pCtx->cs.Sel = sel;
955 pCtx->cs.u64Base = sel << 4;
956 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
957
958 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
959 return VINF_SUCCESS;
960 }
961
962 /*
963 * Set event injection state.
964 */
965 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
966 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
967 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
968
969 AssertRC(rc);
970 return rc;
971}
972
973
974/**
975 * Checks for pending guest interrupts and injects them.
976 *
977 * @returns VBox status code.
978 * @param pVM Pointer to the VM.
979 * @param pVCpu Pointer to the VMCPU.
980 * @param pCtx Pointer to the guest CPU context.
981 */
982static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
983{
984 int rc;
985
986 /*
987 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
988 */
989 if (pVCpu->hwaccm.s.Event.fPending)
990 {
991 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo,
992 pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
993 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
994 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
995 AssertRC(rc);
996
997 pVCpu->hwaccm.s.Event.fPending = false;
998 return VINF_SUCCESS;
999 }
1000
1001 /*
1002 * If an active trap is already pending, we must forward it first!
1003 */
1004 if (!TRPMHasTrap(pVCpu))
1005 {
1006 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
1007 {
1008 RTGCUINTPTR intInfo;
1009
1010 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
1011
1012 intInfo = X86_XCPT_NMI;
1013 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1014 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1015
1016 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
1017 AssertRC(rc);
1018
1019 return VINF_SUCCESS;
1020 }
1021
1022 /** @todo SMI interrupts. */
1023
1024 /*
1025 * When external interrupts are pending, we should exit the VM when IF is set.
1026 */
1027 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
1028 {
1029 if (!(pCtx->eflags.u32 & X86_EFL_IF))
1030 {
1031 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
1032 {
1033 LogFlow(("Enable irq window exit!\n"));
1034 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
1035 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1036 AssertRC(rc);
1037 }
1038 /* else nothing to do but wait */
1039 }
1040 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1041 {
1042 uint8_t u8Interrupt;
1043
1044 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1045 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1046 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1047 if (RT_SUCCESS(rc))
1048 {
1049 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1050 AssertRC(rc);
1051 }
1052 else
1053 {
1054 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1055 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1056 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
1057 /* Just continue */
1058 }
1059 }
1060 else
1061 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1062 }
1063 }
1064
1065#ifdef VBOX_STRICT
1066 if (TRPMHasTrap(pVCpu))
1067 {
1068 uint8_t u8Vector;
1069 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1070 AssertRC(rc);
1071 }
1072#endif
1073
1074 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1075 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1076 && TRPMHasTrap(pVCpu)
1077 )
1078 {
1079 uint8_t u8Vector;
1080 TRPMEVENT enmType;
1081 RTGCUINTPTR intInfo;
1082 RTGCUINT errCode;
1083
1084 /*
1085 * If a new event is pending, dispatch it now.
1086 */
1087 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1088 AssertRC(rc);
1089 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1090 Assert(enmType != TRPM_SOFTWARE_INT);
1091
1092 /*
1093 * Clear the pending trap.
1094 */
1095 rc = TRPMResetTrap(pVCpu);
1096 AssertRC(rc);
1097
1098 intInfo = u8Vector;
1099 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1100
1101 if (enmType == TRPM_TRAP)
1102 {
1103 switch (u8Vector)
1104 {
1105 case X86_XCPT_DF:
1106 case X86_XCPT_TS:
1107 case X86_XCPT_NP:
1108 case X86_XCPT_SS:
1109 case X86_XCPT_GP:
1110 case X86_XCPT_PF:
1111 case X86_XCPT_AC:
1112 {
1113 /* Valid error codes. */
1114 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1115 break;
1116 }
1117
1118 default:
1119 break;
1120 }
1121
1122 if ( u8Vector == X86_XCPT_BP
1123 || u8Vector == X86_XCPT_OF)
1124 {
1125 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1126 }
1127 else
1128 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1129 }
1130 else
1131 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1132
1133 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
1134 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1135 AssertRC(rc);
1136 } /* if (interrupts can be dispatched) */
1137
1138 return VINF_SUCCESS;
1139}
1140
1141
1142/**
1143 * Save the host state into the VMCS.
1144 *
1145 * @returns VBox status code.
1146 * @param pVM Pointer to the VM.
1147 * @param pVCpu Pointer to the VMCPU.
1148 */
1149VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1150{
1151 int rc = VINF_SUCCESS;
1152 NOREF(pVM);
1153
1154 /*
1155 * Host CPU Context.
1156 */
1157 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1158 {
1159 RTIDTR idtr;
1160 RTGDTR gdtr;
1161 RTSEL SelTR;
1162 PCX86DESCHC pDesc;
1163 uintptr_t trBase;
1164 RTSEL cs;
1165 RTSEL ss;
1166 uint64_t cr3;
1167
1168 /*
1169 * Control registers.
1170 */
1171 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1172 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1173#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1174 if (VMX_IS_64BIT_HOST_MODE())
1175 {
1176 cr3 = hwaccmR0Get64bitCR3();
1177 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1178 }
1179 else
1180#endif
1181 {
1182 cr3 = ASMGetCR3();
1183 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1184 }
1185 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1186 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1187 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1188 AssertRC(rc);
1189
1190 /*
1191 * Selector registers.
1192 */
1193#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1194 if (VMX_IS_64BIT_HOST_MODE())
1195 {
1196 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1197 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1198 }
1199 else
1200 {
1201 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1202 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1203 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1204 }
1205#else
1206 cs = ASMGetCS();
1207 ss = ASMGetSS();
1208#endif
1209 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1210 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1211 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1212 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1213 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1214 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1215#if HC_ARCH_BITS == 32
1216 if (!VMX_IS_64BIT_HOST_MODE())
1217 {
1218 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1219 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1220 }
1221#endif
1222 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1223 SelTR = ASMGetTR();
1224 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1225 AssertRC(rc);
1226 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1227 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1228 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1229 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1230 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1231 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1232 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1233
1234 /*
1235 * GDTR & IDTR.
1236 */
1237#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1238 if (VMX_IS_64BIT_HOST_MODE())
1239 {
1240 X86XDTR64 gdtr64, idtr64;
1241 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1242 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1243 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1244 AssertRC(rc);
1245 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1246 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1247 gdtr.cbGdt = gdtr64.cb;
1248 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1249 }
1250 else
1251#endif
1252 {
1253 ASMGetGDTR(&gdtr);
1254 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1255 ASMGetIDTR(&idtr);
1256 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1257 AssertRC(rc);
1258 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1259 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1260 }
1261
1262 /*
1263 * Save the base address of the TR selector.
1264 */
1265 if (SelTR > gdtr.cbGdt)
1266 {
1267 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1268 return VERR_VMX_INVALID_HOST_STATE;
1269 }
1270
1271 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1272#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1273 if (VMX_IS_64BIT_HOST_MODE())
1274 {
1275 uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc);
1276 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1277 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1278 AssertRC(rc);
1279 }
1280 else
1281#endif
1282 {
1283#if HC_ARCH_BITS == 64
1284 trBase = X86DESC64_BASE(pDesc);
1285#else
1286 trBase = X86DESC_BASE(pDesc);
1287#endif
1288 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1289 AssertRC(rc);
1290 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1291 }
1292
1293 /*
1294 * FS base and GS base.
1295 */
1296#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1297 if (VMX_IS_64BIT_HOST_MODE())
1298 {
1299 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1300 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1301 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1302 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1303 }
1304#endif
1305 AssertRC(rc);
1306
1307 /*
1308 * Sysenter MSRs.
1309 */
1310 /** @todo expensive!! */
1311 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1312 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1313#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1314 if (VMX_IS_64BIT_HOST_MODE())
1315 {
1316 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1317 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1318 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1319 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1320 }
1321 else
1322 {
1323 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1324 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1325 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1326 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1327 }
1328#elif HC_ARCH_BITS == 32
1329 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1330 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1331 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1332 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1333#else
1334 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1335 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1336 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1337 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1338#endif
1339 AssertRC(rc);
1340
1341
1342#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1343 /*
1344 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1345 * the world switch back to the host.
1346 */
1347 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1348 unsigned idxMsr = 0;
1349
1350 uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001);
1351 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1352 {
1353 if (u32HostExtFeatures & X86_CPUID_EXT_FEATURE_EDX_SYSCALL)
1354 {
1355 pMsr->u32IndexMSR = MSR_K6_STAR;
1356 pMsr->u32Reserved = 0;
1357 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1358 pMsr++; idxMsr++;
1359 }
1360
1361#if 0
1362 pMsr->u32IndexMSR = MSR_K6_EFER;
1363 pMsr->u32Reserved = 0;
1364# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1365 if (CPUMIsGuestInLongMode(pVCpu))
1366 {
1367 /* Must match the EFER value in our 64 bits switcher. */
1368 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1369 }
1370 else
1371# endif
1372 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1373 pMsr++; idxMsr++;
1374#endif
1375 }
1376
1377# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1378 if (VMX_IS_64BIT_HOST_MODE())
1379 {
1380 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1381 pMsr->u32Reserved = 0;
1382 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1383 pMsr++; idxMsr++;
1384 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1385 pMsr->u32Reserved = 0;
1386 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1387 pMsr++; idxMsr++;
1388
1389 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
1390#if 0
1391 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1392 pMsr->u32Reserved = 0;
1393 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1394 pMsr++; idxMsr++;
1395#endif
1396 }
1397# endif
1398
1399 if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1400 {
1401 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
1402 pMsr->u32Reserved = 0;
1403 pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
1404 pMsr++; idxMsr++;
1405 }
1406
1407 /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr
1408 * range. */
1409 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1410 AssertRC(rc);
1411#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1412
1413 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1414 }
1415 return rc;
1416}
1417
1418
1419/**
1420 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1421 * guest operates in PAE mode.
1422 *
1423 * @returns VBox status code.
1424 * @param pVCpu Pointer to the VMCPU.
1425 * @param pCtx Pointer to the guest CPU context.
1426 */
1427static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1428{
1429 if (CPUMIsGuestInPAEModeEx(pCtx))
1430 {
1431 X86PDPE aPdpes[4];
1432 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1433 AssertRCReturn(rc, rc);
1434
1435 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1436 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1437 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1438 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1439 }
1440 return VINF_SUCCESS;
1441}
1442
1443
1444/**
1445 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1446 * guest operates in PAE mode.
1447 *
1448 * @returns VBox status code.
1449 * @param pVCpu Pointer to the VM CPU.
1450 * @param pCtx Pointer to the guest CPU context.
1451 *
1452 * @remarks Tell PGM about CR3 changes before calling this helper.
1453 */
1454static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1455{
1456 if (CPUMIsGuestInPAEModeEx(pCtx))
1457 {
1458 int rc;
1459 X86PDPE aPdpes[4];
1460 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1461 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1462 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1463 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1464
1465 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1466 AssertRCReturn(rc, rc);
1467 }
1468 return VINF_SUCCESS;
1469}
1470
1471
1472/**
1473 * Update the exception bitmap according to the current CPU state.
1474 *
1475 * @param pVM Pointer to the VM.
1476 * @param pVCpu Pointer to the VMCPU.
1477 * @param pCtx Pointer to the guest CPU context.
1478 */
1479static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1480{
1481 uint32_t u32TrapMask;
1482 Assert(pCtx);
1483
1484 /*
1485 * Set up a mask for intercepting traps.
1486 */
1487 /** @todo Do we really need to always intercept #DB? */
1488 u32TrapMask = RT_BIT(X86_XCPT_DB)
1489 | RT_BIT(X86_XCPT_NM)
1490#ifdef VBOX_ALWAYS_TRAP_PF
1491 | RT_BIT(X86_XCPT_PF)
1492#endif
1493#ifdef VBOX_STRICT
1494 | RT_BIT(X86_XCPT_BP)
1495 | RT_BIT(X86_XCPT_DB)
1496 | RT_BIT(X86_XCPT_DE)
1497 | RT_BIT(X86_XCPT_NM)
1498 | RT_BIT(X86_XCPT_UD)
1499 | RT_BIT(X86_XCPT_NP)
1500 | RT_BIT(X86_XCPT_SS)
1501 | RT_BIT(X86_XCPT_GP)
1502 | RT_BIT(X86_XCPT_MF)
1503#endif
1504 ;
1505
1506 /*
1507 * Without nested paging, #PF must be intercepted to implement shadow paging.
1508 */
1509 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1510 if (!pVM->hwaccm.s.fNestedPaging)
1511 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1512
1513 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1514 if (!(pCtx->cr0 & X86_CR0_NE))
1515 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1516
1517#ifdef VBOX_STRICT
1518 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1519#endif
1520
1521 /*
1522 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1523 */
1524 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1525 if ( CPUMIsGuestInRealModeEx(pCtx)
1526 && pVM->hwaccm.s.vmx.pRealModeTSS)
1527 {
1528 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1529 | RT_BIT(X86_XCPT_DB)
1530 | RT_BIT(X86_XCPT_NMI)
1531 | RT_BIT(X86_XCPT_BP)
1532 | RT_BIT(X86_XCPT_OF)
1533 | RT_BIT(X86_XCPT_BR)
1534 | RT_BIT(X86_XCPT_UD)
1535 | RT_BIT(X86_XCPT_DF)
1536 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1537 | RT_BIT(X86_XCPT_TS)
1538 | RT_BIT(X86_XCPT_NP)
1539 | RT_BIT(X86_XCPT_SS)
1540 | RT_BIT(X86_XCPT_GP)
1541 | RT_BIT(X86_XCPT_MF)
1542 | RT_BIT(X86_XCPT_AC)
1543 | RT_BIT(X86_XCPT_MC)
1544 | RT_BIT(X86_XCPT_XF)
1545 ;
1546 }
1547
1548 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1549 AssertRC(rc);
1550}
1551
1552
1553/**
1554 * Loads a minimal guest state.
1555 *
1556 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1557 *
1558 * @param pVM Pointer to the VM.
1559 * @param pVCpu Pointer to the VMCPU.
1560 * @param pCtx Pointer to the guest CPU context.
1561 */
1562VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1563{
1564 int rc;
1565 X86EFLAGS eflags;
1566
1567 Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST));
1568
1569 /*
1570 * Load EIP, ESP and EFLAGS.
1571 */
1572 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1573 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1574 AssertRC(rc);
1575
1576 /*
1577 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1578 */
1579 eflags = pCtx->eflags;
1580 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1581 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1582
1583 /*
1584 * Check if real mode emulation using v86 mode.
1585 */
1586 if ( CPUMIsGuestInRealModeEx(pCtx)
1587 && pVM->hwaccm.s.vmx.pRealModeTSS)
1588 {
1589 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1590
1591 eflags.Bits.u1VM = 1;
1592 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1593 }
1594 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1595 AssertRC(rc);
1596}
1597
1598
1599/**
1600 * Loads the guest state.
1601 *
1602 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1603 *
1604 * @returns VBox status code.
1605 * @param pVM Pointer to the VM.
1606 * @param pVCpu Pointer to the VMCPU.
1607 * @param pCtx Pointer to the guest CPU context.
1608 */
1609VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1610{
1611 int rc = VINF_SUCCESS;
1612 RTGCUINTPTR val;
1613
1614 /*
1615 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1616 * Set required bits to one and zero according to the MSR capabilities.
1617 */
1618 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1619
1620 /*
1621 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1622 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1623 */
1624 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1625
1626 if (CPUMIsGuestInLongModeEx(pCtx))
1627 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1628 /* else Must be zero when AMD64 is not available. */
1629
1630 /*
1631 * Mask away the bits that the CPU doesn't support.
1632 */
1633 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1634 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1635 AssertRC(rc);
1636
1637 /*
1638 * VMX_VMCS_CTRL_EXIT_CONTROLS
1639 * Set required bits to one and zero according to the MSR capabilities.
1640 */
1641 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1642
1643 /*
1644 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1645 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1646 */
1647 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1648
1649#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1650 if (VMX_IS_64BIT_HOST_MODE())
1651 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1652 /* else Must be zero when AMD64 is not available. */
1653#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1654 if (CPUMIsGuestInLongModeEx(pCtx))
1655 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1656 else
1657 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1658#endif
1659 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1660
1661 /*
1662 * Don't acknowledge external interrupts on VM-exit.
1663 */
1664 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1665 AssertRC(rc);
1666
1667 /*
1668 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1669 */
1670 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1671 {
1672 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1673 {
1674 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1675 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1676 {
1677 /*
1678 * Correct weird requirements for switching to protected mode.
1679 */
1680 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1681 && enmGuestMode >= PGMMODE_PROTECTED)
1682 {
1683#ifdef VBOX_WITH_REM
1684 /*
1685 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1686 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1687 */
1688 REMFlushTBs(pVM);
1689#endif
1690
1691 /*
1692 * DPL of all hidden selector registers must match the current CPL (0).
1693 */
1694 pCtx->cs.Attr.n.u2Dpl = 0;
1695 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1696
1697 pCtx->ds.Attr.n.u2Dpl = 0;
1698 pCtx->es.Attr.n.u2Dpl = 0;
1699 pCtx->fs.Attr.n.u2Dpl = 0;
1700 pCtx->gs.Attr.n.u2Dpl = 0;
1701 pCtx->ss.Attr.n.u2Dpl = 0;
1702 }
1703 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1704 }
1705 else if ( CPUMIsGuestInRealModeEx(pCtx)
1706 && pCtx->cs.u64Base == 0xffff0000)
1707 {
1708 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1709 pCtx->cs.u64Base = 0xf0000;
1710 pCtx->cs.Sel = 0xf000;
1711 }
1712 }
1713
1714 VMX_WRITE_SELREG(ES, es);
1715 AssertRC(rc);
1716
1717 VMX_WRITE_SELREG(CS, cs);
1718 AssertRC(rc);
1719
1720 VMX_WRITE_SELREG(SS, ss);
1721 AssertRC(rc);
1722
1723 VMX_WRITE_SELREG(DS, ds);
1724 AssertRC(rc);
1725
1726 VMX_WRITE_SELREG(FS, fs);
1727 AssertRC(rc);
1728
1729 VMX_WRITE_SELREG(GS, gs);
1730 AssertRC(rc);
1731 }
1732
1733 /*
1734 * Guest CPU context: LDTR.
1735 */
1736 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1737 {
1738 if (pCtx->ldtr.Sel == 0)
1739 {
1740 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1741 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1742 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1743 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1744 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1745 }
1746 else
1747 {
1748 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1749 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1750 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtr.u64Base);
1751 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1752 }
1753 AssertRC(rc);
1754 }
1755
1756 /*
1757 * Guest CPU context: TR.
1758 */
1759 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1760 {
1761 /*
1762 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1763 * using the int bitmap in the TSS).
1764 */
1765 if ( CPUMIsGuestInRealModeEx(pCtx)
1766 && pVM->hwaccm.s.vmx.pRealModeTSS)
1767 {
1768 RTGCPHYS GCPhys;
1769
1770 /* We convert it here every time as PCI regions could be reconfigured. */
1771 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1772 AssertRC(rc);
1773
1774 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1775 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1776 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1777
1778 X86DESCATTR attr;
1779
1780 attr.u = 0;
1781 attr.n.u1Present = 1;
1782 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1783 val = attr.u;
1784 }
1785 else
1786 {
1787 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1788 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1789 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->tr.u64Base);
1790
1791 val = pCtx->tr.Attr.u;
1792
1793 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1794 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1795 {
1796 if (val & 0xf)
1797 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1798 else
1799 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1800 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1801 }
1802 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY,
1803 ("%#x\n", val));
1804 }
1805 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1806 AssertRC(rc);
1807 }
1808
1809 /*
1810 * Guest CPU context: GDTR.
1811 */
1812 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1813 {
1814 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1815 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1816 AssertRC(rc);
1817 }
1818
1819 /*
1820 * Guest CPU context: IDTR.
1821 */
1822 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1823 {
1824 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1825 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1826 AssertRC(rc);
1827 }
1828
1829 /*
1830 * Sysenter MSRs.
1831 */
1832 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1833 {
1834 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1835 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1836 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1837 AssertRC(rc);
1838 }
1839
1840 /*
1841 * Guest CPU context: Control registers.
1842 */
1843 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1844 {
1845 val = pCtx->cr0;
1846 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1847 Log2(("Guest CR0-shadow %08x\n", val));
1848 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1849 {
1850 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1851 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1852 }
1853 else
1854 {
1855 /** @todo check if we support the old style mess correctly. */
1856 if (!(val & X86_CR0_NE))
1857 Log(("Forcing X86_CR0_NE!!!\n"));
1858
1859 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1860 }
1861 /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1862 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1863 val |= X86_CR0_PE | X86_CR0_PG;
1864
1865 if (pVM->hwaccm.s.fNestedPaging)
1866 {
1867 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1868 {
1869 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1870 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1871 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1872 }
1873 else
1874 {
1875 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1876 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1877 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1878 }
1879 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1880 AssertRC(rc);
1881 }
1882 else
1883 {
1884 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1885 val |= X86_CR0_WP;
1886 }
1887
1888 /* Always enable caching. */
1889 val &= ~(X86_CR0_CD|X86_CR0_NW);
1890
1891 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1892 Log2(("Guest CR0 %08x\n", val));
1893
1894 /*
1895 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1896 */
1897 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1898 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1899 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1900 | X86_CR0_CD /* Bit not restored during VM-exit! */
1901 | X86_CR0_NW /* Bit not restored during VM-exit! */
1902 | X86_CR0_NE;
1903
1904 /*
1905 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1906 */
1907 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1908 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1909
1910 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1911
1912 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1913 Log2(("Guest CR0-mask %08x\n", val));
1914 AssertRC(rc);
1915 }
1916
1917 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1918 {
1919 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1920 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1921 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1922 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1923
1924 if (!pVM->hwaccm.s.fNestedPaging)
1925 {
1926 switch (pVCpu->hwaccm.s.enmShadowMode)
1927 {
1928 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1929 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1930 case PGMMODE_32_BIT: /* 32-bit paging. */
1931 val &= ~X86_CR4_PAE;
1932 break;
1933
1934 case PGMMODE_PAE: /* PAE paging. */
1935 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1936 /** Must use PAE paging as we could use physical memory > 4 GB */
1937 val |= X86_CR4_PAE;
1938 break;
1939
1940 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1941 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1942#ifdef VBOX_ENABLE_64_BITS_GUESTS
1943 break;
1944#else
1945 AssertFailed();
1946 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1947#endif
1948 default: /* shut up gcc */
1949 AssertFailed();
1950 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1951 }
1952 }
1953 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1954 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1955 {
1956 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1957 val |= X86_CR4_PSE;
1958 /* Our identity mapping is a 32 bits page directory. */
1959 val &= ~X86_CR4_PAE;
1960 }
1961
1962 /*
1963 * Turn off VME if we're in emulated real mode.
1964 */
1965 if ( CPUMIsGuestInRealModeEx(pCtx)
1966 && pVM->hwaccm.s.vmx.pRealModeTSS)
1967 {
1968 val &= ~X86_CR4_VME;
1969 }
1970
1971 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1972 Log2(("Guest CR4 %08x\n", val));
1973
1974 /*
1975 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1976 */
1977 val = 0
1978 | X86_CR4_VME
1979 | X86_CR4_PAE
1980 | X86_CR4_PGE
1981 | X86_CR4_PSE
1982 | X86_CR4_VMXE;
1983 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1984
1985 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1986 Log2(("Guest CR4-mask %08x\n", val));
1987 AssertRC(rc);
1988 }
1989
1990#if 0
1991 /* Enable single stepping if requested and CPU supports it. */
1992 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
1993 if (DBGFIsStepping(pVCpu))
1994 {
1995 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
1996 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1997 AssertRC(rc);
1998 }
1999#endif
2000
2001 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
2002 {
2003 if (pVM->hwaccm.s.fNestedPaging)
2004 {
2005 Assert(PGMGetHyperCR3(pVCpu));
2006 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
2007
2008 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
2009 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
2010 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
2011 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
2012
2013 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
2014 AssertRC(rc);
2015
2016 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2017 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
2018 {
2019 RTGCPHYS GCPhys;
2020
2021 /* We convert it here every time as PCI regions could be reconfigured. */
2022 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
2023 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
2024
2025 /*
2026 * We use our identity mapping page table here as we need to map guest virtual to
2027 * guest physical addresses; EPT will take care of the translation to host physical addresses.
2028 */
2029 val = GCPhys;
2030 }
2031 else
2032 {
2033 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
2034 val = pCtx->cr3;
2035 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
2036 AssertRCReturn(rc, rc);
2037 }
2038 }
2039 else
2040 {
2041 val = PGMGetHyperCR3(pVCpu);
2042 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
2043 }
2044
2045 /* Save our shadow CR3 register. */
2046 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
2047 AssertRC(rc);
2048 }
2049
2050 /*
2051 * Guest CPU context: Debug registers.
2052 */
2053 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
2054 {
2055 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2056 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2057
2058 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2059 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2060 pCtx->dr[7] |= 0x400; /* must be one */
2061
2062 /* Resync DR7 */
2063 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2064 AssertRC(rc);
2065
2066#ifdef DEBUG
2067 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2068 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2069 && !CPUMIsHyperDebugStateActive(pVCpu)
2070 && !DBGFIsStepping(pVCpu))
2071 {
2072 /* Save the host and load the hypervisor debug state. */
2073 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2074 AssertRC(rc);
2075
2076 /* DRx intercepts remain enabled. */
2077
2078 /* Override dr7 with the hypervisor value. */
2079 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2080 AssertRC(rc);
2081 }
2082 else
2083#endif
2084 /* Sync the debug state now if any breakpoint is armed. */
2085 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2086 && !CPUMIsGuestDebugStateActive(pVCpu)
2087 && !DBGFIsStepping(pVCpu))
2088 {
2089 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
2090
2091 /* Disable DRx move intercepts. */
2092 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2093 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2094 AssertRC(rc);
2095
2096 /* Save the host and load the guest debug state. */
2097 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2098 AssertRC(rc);
2099 }
2100
2101 /* IA32_DEBUGCTL MSR. */
2102 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
2103 AssertRC(rc);
2104
2105 /** @todo do we really ever need this? */
2106 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
2107 AssertRC(rc);
2108 }
2109
2110 /*
2111 * 64-bit guest mode.
2112 */
2113 if (CPUMIsGuestInLongModeEx(pCtx))
2114 {
2115#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2116 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2117#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2118 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2119#else
2120# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2121 if (!pVM->hwaccm.s.fAllow64BitGuests)
2122 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2123# endif
2124 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
2125#endif
2126 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
2127 {
2128 /* Update these as wrmsr might have changed them. */
2129 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fs.u64Base);
2130 AssertRC(rc);
2131 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gs.u64Base);
2132 AssertRC(rc);
2133 }
2134 }
2135 else
2136 {
2137 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
2138 }
2139
2140 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2141
2142#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2143 /*
2144 * Store all guest MSRs in the VM-entry load area, so they will be loaded
2145 * during VM-entry and restored into the VM-exit store area during VM-exit.
2146 */
2147 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2148 unsigned idxMsr = 0;
2149
2150 uint32_t u32GstExtFeatures;
2151 uint32_t u32Temp;
2152 CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures);
2153
2154 if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2155 {
2156#if 0
2157 pMsr->u32IndexMSR = MSR_K6_EFER;
2158 pMsr->u32Reserved = 0;
2159 pMsr->u64Value = pCtx->msrEFER;
2160 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2161 if (!CPUMIsGuestInLongModeEx(pCtx))
2162 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2163 pMsr++; idxMsr++;
2164#endif
2165
2166 if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2167 {
2168 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2169 pMsr->u32Reserved = 0;
2170 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2171 pMsr++; idxMsr++;
2172 pMsr->u32IndexMSR = MSR_K6_STAR;
2173 pMsr->u32Reserved = 0;
2174 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2175 pMsr++; idxMsr++;
2176 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2177 pMsr->u32Reserved = 0;
2178 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2179 pMsr++; idxMsr++;
2180
2181 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2182#if 0
2183 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2184 pMsr->u32Reserved = 0;
2185 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2186 pMsr++; idxMsr++;
2187#endif
2188 }
2189 }
2190
2191 if ( pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP
2192 && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP))
2193 {
2194 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
2195 pMsr->u32Reserved = 0;
2196 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value);
2197 AssertRC(rc);
2198 pMsr++; idxMsr++;
2199 }
2200
2201 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
2202
2203 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2204 AssertRC(rc);
2205
2206 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2207 AssertRC(rc);
2208#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2209
2210 bool fOffsettedTsc;
2211 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
2212 {
2213 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2214
2215 /* Make sure the returned values have sane upper and lower boundaries. */
2216 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2217
2218 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2219 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2220
2221 cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift;
2222 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2223 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2224 AssertRC(rc);
2225 }
2226 else
2227 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2228
2229 if (fOffsettedTsc)
2230 {
2231 uint64_t u64CurTSC = ASMReadTSC();
2232 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2233 {
2234 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2235 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
2236 AssertRC(rc);
2237
2238 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2239 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2240 AssertRC(rc);
2241 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
2242 }
2243 else
2244 {
2245 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2246 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2247 pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset,
2248 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset,
2249 TMCpuTickGet(pVCpu)));
2250 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2251 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2252 AssertRC(rc);
2253 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
2254 }
2255 }
2256 else
2257 {
2258 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2259 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2260 AssertRC(rc);
2261 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
2262 }
2263
2264 /* Done with the major changes */
2265 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
2266
2267 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2268 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2269 return rc;
2270}
2271
2272
2273/**
2274 * Syncs back the guest state from VMCS.
2275 *
2276 * @returns VBox status code.
2277 * @param pVM Pointer to the VM.
2278 * @param pVCpu Pointer to the VMCPU.
2279 * @param pCtx Pointer to the guest CPU context.
2280 */
2281DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2282{
2283 RTGCUINTREG val, valShadow;
2284 RTGCUINTPTR uInterruptState;
2285 int rc;
2286
2287 /* First sync back EIP, ESP, and EFLAGS. */
2288 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
2289 AssertRC(rc);
2290 pCtx->rip = val;
2291 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
2292 AssertRC(rc);
2293 pCtx->rsp = val;
2294 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2295 AssertRC(rc);
2296 pCtx->eflags.u32 = val;
2297
2298 /* Take care of instruction fusing (sti, mov ss) */
2299 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2300 uInterruptState = val;
2301 if (uInterruptState != 0)
2302 {
2303 Assert(uInterruptState <= 2); /* only sti & mov ss */
2304 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2305 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2306 }
2307 else
2308 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2309
2310 /* Control registers. */
2311 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2312 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
2313 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
2314 CPUMSetGuestCR0(pVCpu, val);
2315
2316 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2317 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
2318 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
2319 CPUMSetGuestCR4(pVCpu, val);
2320
2321 /*
2322 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2323 * the nested paging case where CR3 & CR4 can be changed by the guest.
2324 */
2325 if ( pVM->hwaccm.s.fNestedPaging
2326 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2327 {
2328 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
2329
2330 /* Can be updated behind our back in the nested paging case. */
2331 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2332
2333 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
2334
2335 if (val != pCtx->cr3)
2336 {
2337 CPUMSetGuestCR3(pVCpu, val);
2338 PGMUpdateCR3(pVCpu, val);
2339 }
2340 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2341 AssertRCReturn(rc, rc);
2342 }
2343
2344 /* Sync back DR7. */
2345 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2346 pCtx->dr[7] = val;
2347
2348 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2349 VMX_READ_SELREG(ES, es);
2350 VMX_READ_SELREG(SS, ss);
2351 VMX_READ_SELREG(CS, cs);
2352 VMX_READ_SELREG(DS, ds);
2353 VMX_READ_SELREG(FS, fs);
2354 VMX_READ_SELREG(GS, gs);
2355
2356 /* System MSRs */
2357 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2358 pCtx->SysEnter.cs = val;
2359 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2360 pCtx->SysEnter.eip = val;
2361 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2362 pCtx->SysEnter.esp = val;
2363
2364 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2365 VMX_READ_SELREG(LDTR, ldtr);
2366
2367 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2368 pCtx->gdtr.cbGdt = val;
2369 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2370 pCtx->gdtr.pGdt = val;
2371
2372 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2373 pCtx->idtr.cbIdt = val;
2374 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2375 pCtx->idtr.pIdt = val;
2376
2377 /* Real mode emulation using v86 mode. */
2378 if ( CPUMIsGuestInRealModeEx(pCtx)
2379 && pVM->hwaccm.s.vmx.pRealModeTSS)
2380 {
2381 /* Hide our emulation flags */
2382 pCtx->eflags.Bits.u1VM = 0;
2383
2384 /* Restore original IOPL setting as we always use 0. */
2385 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2386
2387 /* Force a TR resync every time in case we switch modes. */
2388 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2389 }
2390 else
2391 {
2392 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2393 VMX_READ_SELREG(TR, tr);
2394 }
2395
2396#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2397 /*
2398 * Save the possibly changed MSRs that we automatically restore and save during a world switch.
2399 */
2400 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2401 {
2402 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2403 pMsr += i;
2404
2405 switch (pMsr->u32IndexMSR)
2406 {
2407 case MSR_K8_LSTAR:
2408 pCtx->msrLSTAR = pMsr->u64Value;
2409 break;
2410 case MSR_K6_STAR:
2411 pCtx->msrSTAR = pMsr->u64Value;
2412 break;
2413 case MSR_K8_SF_MASK:
2414 pCtx->msrSFMASK = pMsr->u64Value;
2415 break;
2416 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2417#if 0
2418 case MSR_K8_KERNEL_GS_BASE:
2419 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2420 break;
2421#endif
2422 case MSR_K8_TSC_AUX:
2423 CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value);
2424 break;
2425#if 0
2426 case MSR_K6_EFER:
2427 /* EFER can't be changed without causing a VM-exit. */
2428 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2429 break;
2430#endif
2431 default:
2432 AssertFailed();
2433 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2434 }
2435 }
2436#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2437 return VINF_SUCCESS;
2438}
2439
2440
2441/**
2442 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2443 * where neither EPT nor VPID is supported by the CPU.
2444 *
2445 * @param pVM Pointer to the VM.
2446 * @param pVCpu Pointer to the VMCPU.
2447 */
2448static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2449{
2450 NOREF(pVM);
2451 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2452 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2453 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2454 return;
2455}
2456
2457
2458/**
2459 * Setup the tagged TLB for EPT+VPID.
2460 *
2461 * @param pVM Pointer to the VM.
2462 * @param pVCpu Pointer to the VMCPU.
2463 */
2464static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2465{
2466 PHMGLOBLCPUINFO pCpu;
2467
2468 Assert(pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID);
2469
2470 pCpu = HWACCMR0GetCurrentCpu();
2471
2472 /*
2473 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2474 * This can happen both for start & resume due to long jumps back to ring-3.
2475 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2476 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2477 */
2478 bool fNewASID = false;
2479 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2480 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2481 {
2482 pVCpu->hwaccm.s.fForceTLBFlush = true;
2483 fNewASID = true;
2484 }
2485
2486 /*
2487 * Check for explicit TLB shootdowns.
2488 */
2489 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2490 pVCpu->hwaccm.s.fForceTLBFlush = true;
2491
2492 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2493
2494 if (pVCpu->hwaccm.s.fForceTLBFlush)
2495 {
2496 if (fNewASID)
2497 {
2498 ++pCpu->uCurrentASID;
2499 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2500 {
2501 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2502 pCpu->cTLBFlushes++;
2503 pCpu->fFlushASIDBeforeUse = true;
2504 }
2505
2506 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2507 if (pCpu->fFlushASIDBeforeUse)
2508 {
2509 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2510#ifdef VBOX_WITH_STATISTICS
2511 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2512#endif
2513 }
2514 }
2515 else
2516 {
2517 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2518 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2519 else
2520 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2521
2522#ifdef VBOX_WITH_STATISTICS
2523 /*
2524 * This is not terribly accurate (i.e. we don't have any StatFlushEPT counter). We currently count these
2525 * as ASID flushes too, better than including them under StatFlushTLBWorldSwitch.
2526 */
2527 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2528#endif
2529 }
2530
2531 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2532 pVCpu->hwaccm.s.fForceTLBFlush = false;
2533 }
2534 else
2535 {
2536 AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID,
2537 ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n",
2538 pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes,
2539 pCpu->uCurrentASID, pCpu->cTLBFlushes));
2540
2541 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2542 * not be executed. See hwaccmQueueInvlPage() where it is commented
2543 * out. Support individual entry flushing someday. */
2544 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2545 {
2546 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2547
2548 /*
2549 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2550 * as supported by the CPU.
2551 */
2552 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2553 {
2554 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2555 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2556 }
2557 else
2558 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2559 }
2560 else
2561 {
2562#ifdef VBOX_WITH_STATISTICS
2563 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2564#endif
2565 }
2566 }
2567 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2568 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2569
2570 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2571 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2572 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2573 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2574 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2575 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2576
2577 /* Update VMCS with the VPID. */
2578 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2579 AssertRC(rc);
2580}
2581
2582
2583/**
2584 * Setup the tagged TLB for EPT only.
2585 *
2586 * @returns VBox status code.
2587 * @param pVM Pointer to the VM.
2588 * @param pVCpu Pointer to the VMCPU.
2589 */
2590static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2591{
2592 PHMGLOBLCPUINFO pCpu;
2593
2594 Assert(pVM->hwaccm.s.fNestedPaging);
2595 Assert(!pVM->hwaccm.s.vmx.fVPID);
2596
2597 pCpu = HWACCMR0GetCurrentCpu();
2598
2599 /*
2600 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2601 * This can happen both for start & resume due to long jumps back to ring-3.
2602 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2603 */
2604 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2605 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2606 {
2607 pVCpu->hwaccm.s.fForceTLBFlush = true;
2608 }
2609
2610 /*
2611 * Check for explicit TLB shootdown flushes.
2612 */
2613 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2614 pVCpu->hwaccm.s.fForceTLBFlush = true;
2615
2616 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2617 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2618
2619 if (pVCpu->hwaccm.s.fForceTLBFlush)
2620 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2621 else
2622 {
2623 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2624 * not be executed. See hwaccmQueueInvlPage() where it is commented
2625 * out. Support individual entry flushing someday. */
2626 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2627 {
2628 /*
2629 * We cannot flush individual entries without VPID support. Flush using EPT.
2630 */
2631 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2632 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2633 }
2634 }
2635 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2636 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2637
2638#ifdef VBOX_WITH_STATISTICS
2639 if (pVCpu->hwaccm.s.fForceTLBFlush)
2640 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2641 else
2642 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2643#endif
2644}
2645
2646
2647/**
2648 * Setup the tagged TLB for VPID.
2649 *
2650 * @returns VBox status code.
2651 * @param pVM Pointer to the VM.
2652 * @param pVCpu Pointer to the VMCPU.
2653 */
2654static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2655{
2656 PHMGLOBLCPUINFO pCpu;
2657
2658 Assert(pVM->hwaccm.s.vmx.fVPID);
2659 Assert(!pVM->hwaccm.s.fNestedPaging);
2660
2661 pCpu = HWACCMR0GetCurrentCpu();
2662
2663 /*
2664 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2665 * This can happen both for start & resume due to long jumps back to ring-3.
2666 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2667 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2668 */
2669 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2670 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2671 {
2672 /* Force a TLB flush on VM entry. */
2673 pVCpu->hwaccm.s.fForceTLBFlush = true;
2674 }
2675
2676 /*
2677 * Check for explicit TLB shootdown flushes.
2678 */
2679 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2680 pVCpu->hwaccm.s.fForceTLBFlush = true;
2681
2682 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2683
2684 if (pVCpu->hwaccm.s.fForceTLBFlush)
2685 {
2686 ++pCpu->uCurrentASID;
2687 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2688 {
2689 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2690 pCpu->cTLBFlushes++;
2691 pCpu->fFlushASIDBeforeUse = true;
2692 }
2693 else
2694 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2695
2696 pVCpu->hwaccm.s.fForceTLBFlush = false;
2697 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2698 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2699 if (pCpu->fFlushASIDBeforeUse)
2700 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2701 }
2702 else
2703 {
2704 AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID,
2705 ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n",
2706 pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes,
2707 pCpu->uCurrentASID, pCpu->cTLBFlushes));
2708
2709 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2710 * not be executed. See hwaccmQueueInvlPage() where it is commented
2711 * out. Support individual entry flushing someday. */
2712 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2713 {
2714 /*
2715 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2716 * as supported by the CPU.
2717 */
2718 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2719 {
2720 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2721 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2722 }
2723 else
2724 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2725 }
2726 }
2727 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2728 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2729
2730 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2731 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2732 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2733 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2734 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2735 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2736
2737 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2738 AssertRC(rc);
2739
2740# ifdef VBOX_WITH_STATISTICS
2741 if (pVCpu->hwaccm.s.fForceTLBFlush)
2742 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2743 else
2744 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2745# endif
2746}
2747
2748
2749/**
2750 * Runs guest code in a VT-x VM.
2751 *
2752 * @returns VBox status code.
2753 * @param pVM Pointer to the VM.
2754 * @param pVCpu Pointer to the VMCPU.
2755 * @param pCtx Pointer to the guest CPU context.
2756 */
2757VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2758{
2759 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2760 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
2761 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
2762
2763 VBOXSTRICTRC rc = VINF_SUCCESS;
2764 int rc2;
2765 RTGCUINTREG val;
2766 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2767 RTGCUINTREG instrError, cbInstr;
2768 RTGCUINTPTR exitQualification = 0;
2769 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2770 RTGCUINTPTR errCode, instrInfo;
2771 bool fSetupTPRCaching = false;
2772 uint64_t u64OldLSTAR = 0;
2773 uint8_t u8LastTPR = 0;
2774 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2775 unsigned cResume = 0;
2776#ifdef VBOX_STRICT
2777 RTCPUID idCpuCheck;
2778 bool fWasInLongMode = false;
2779#endif
2780#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2781 uint64_t u64LastTime = RTTimeMilliTS();
2782#endif
2783
2784 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2785 || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2786
2787 /*
2788 * Check if we need to use TPR shadowing.
2789 */
2790 if ( CPUMIsGuestInLongModeEx(pCtx)
2791 || ( (( pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2792 || pVM->hwaccm.s.fTRPPatchingAllowed)
2793 && pVM->hwaccm.s.fHasIoApic)
2794 )
2795 {
2796 fSetupTPRCaching = true;
2797 }
2798
2799 Log2(("\nE"));
2800
2801#ifdef VBOX_STRICT
2802 {
2803 RTCCUINTREG val2;
2804
2805 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2806 AssertRC(rc2);
2807 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2808
2809 /* allowed zero */
2810 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2811 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2812
2813 /* allowed one */
2814 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2815 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2816
2817 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2818 AssertRC(rc2);
2819 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2820
2821 /*
2822 * Must be set according to the MSR, but can be cleared if nested paging is used.
2823 */
2824 if (pVM->hwaccm.s.fNestedPaging)
2825 {
2826 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2827 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2828 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2829 }
2830
2831 /* allowed zero */
2832 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2833 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2834
2835 /* allowed one */
2836 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2837 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2838
2839 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2840 AssertRC(rc2);
2841 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2842
2843 /* allowed zero */
2844 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2845 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2846
2847 /* allowed one */
2848 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2849 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2850
2851 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2852 AssertRC(rc2);
2853 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2854
2855 /* allowed zero */
2856 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2857 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2858
2859 /* allowed one */
2860 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2861 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2862 }
2863 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2864#endif /* VBOX_STRICT */
2865
2866#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2867 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2868#endif
2869
2870 /*
2871 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2872 */
2873ResumeExecution:
2874 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
2875 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
2876 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2877 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2878 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2879 Assert(!HWACCMR0SuspendPending());
2880 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2881 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2882
2883 /*
2884 * Safety precaution; looping for too long here can have a very bad effect on the host.
2885 */
2886 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2887 {
2888 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2889 rc = VINF_EM_RAW_INTERRUPT;
2890 goto end;
2891 }
2892
2893 /*
2894 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2895 */
2896 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2897 {
2898 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2899 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2900 {
2901 /*
2902 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2903 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2904 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2905 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2906 */
2907 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2908 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2909 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2910 AssertRC(rc2);
2911 }
2912 }
2913 else
2914 {
2915 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2916 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2917 AssertRC(rc2);
2918 }
2919
2920#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2921 if (RT_UNLIKELY((cResume & 0xf) == 0))
2922 {
2923 uint64_t u64CurTime = RTTimeMilliTS();
2924
2925 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2926 {
2927 u64LastTime = u64CurTime;
2928 TMTimerPollVoid(pVM, pVCpu);
2929 }
2930 }
2931#endif
2932
2933 /*
2934 * Check for pending actions that force us to go back to ring-3.
2935 */
2936 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2937 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2938 {
2939 /* Check if a sync operation is pending. */
2940 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2941 {
2942 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2943 if (rc != VINF_SUCCESS)
2944 {
2945 AssertRC(VBOXSTRICTRC_VAL(rc));
2946 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2947 goto end;
2948 }
2949 }
2950
2951#ifdef DEBUG
2952 /* Intercept X86_XCPT_DB if stepping is enabled */
2953 if (!DBGFIsStepping(pVCpu))
2954#endif
2955 {
2956 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2957 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2958 {
2959 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2960 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2961 goto end;
2962 }
2963 }
2964
2965 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2966 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2967 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2968 {
2969 rc = VINF_EM_PENDING_REQUEST;
2970 goto end;
2971 }
2972
2973 /* Check if a pgm pool flush is in progress. */
2974 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2975 {
2976 rc = VINF_PGM_POOL_FLUSH_PENDING;
2977 goto end;
2978 }
2979
2980 /* Check if DMA work is pending (2nd+ run). */
2981 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
2982 {
2983 rc = VINF_EM_RAW_TO_R3;
2984 goto end;
2985 }
2986 }
2987
2988#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2989 /*
2990 * Exit to ring-3 preemption/work is pending.
2991 *
2992 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2993 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2994 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
2995 *
2996 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2997 * shootdowns rely on this.
2998 */
2999 uOldEFlags = ASMIntDisableFlags();
3000 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
3001 {
3002 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
3003 rc = VINF_EM_RAW_INTERRUPT;
3004 goto end;
3005 }
3006 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3007#endif
3008
3009 /*
3010 * When external interrupts are pending, we should exit the VM when IF is et.
3011 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
3012 */
3013 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
3014 if (RT_FAILURE(rc))
3015 goto end;
3016
3017 /** @todo check timers?? */
3018
3019 /*
3020 * TPR caching using CR8 is only available in 64-bit mode.
3021 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
3022 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
3023 */
3024 /** @todo query and update the TPR only when it could have been changed (mmio
3025 * access & wrsmr (x2apic) */
3026 if (fSetupTPRCaching)
3027 {
3028 /* TPR caching in CR8 */
3029 bool fPending;
3030
3031 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
3032 AssertRC(rc2);
3033 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
3034 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
3035
3036 /*
3037 * Two options here:
3038 * - external interrupt pending, but masked by the TPR value.
3039 * -> a CR8 update that lower the current TPR value should cause an exit
3040 * - no pending interrupts
3041 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
3042 */
3043
3044 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
3045 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0);
3046 AssertRC(VBOXSTRICTRC_VAL(rc));
3047
3048 if (pVM->hwaccm.s.fTPRPatchingActive)
3049 {
3050 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3051 /* Our patch code uses LSTAR for TPR caching. */
3052 pCtx->msrLSTAR = u8LastTPR;
3053
3054 /** @todo r=ramshankar: we should check for MSR-bitmap support here. */
3055 if (fPending)
3056 {
3057 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
3058 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
3059 }
3060 else
3061 {
3062 /*
3063 * No interrupts are pending, so we don't need to be explicitely notified.
3064 * There are enough world switches for detecting pending interrupts.
3065 */
3066 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
3067 }
3068 }
3069 }
3070
3071#ifdef LOG_ENABLED
3072 if ( pVM->hwaccm.s.fNestedPaging
3073 || pVM->hwaccm.s.vmx.fVPID)
3074 {
3075 PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu();
3076 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
3077 {
3078 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu,
3079 pCpu->idCpu));
3080 }
3081 else if (pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
3082 {
3083 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes,
3084 pCpu->cTLBFlushes));
3085 }
3086 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
3087 LogFlow(("Manual TLB flush\n"));
3088 }
3089#endif
3090#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3091 PGMRZDynMapFlushAutoSet(pVCpu);
3092#endif
3093
3094 /*
3095 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3096 * (until the actual world switch)
3097 */
3098#ifdef VBOX_STRICT
3099 idCpuCheck = RTMpCpuId();
3100#endif
3101#ifdef LOG_ENABLED
3102 VMMR0LogFlushDisable(pVCpu);
3103#endif
3104
3105 /*
3106 * Save the host state first.
3107 */
3108 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
3109 {
3110 rc = VMXR0SaveHostState(pVM, pVCpu);
3111 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3112 {
3113 VMMR0LogFlushEnable(pVCpu);
3114 goto end;
3115 }
3116 }
3117
3118 /*
3119 * Load the guest state.
3120 */
3121 if (!pVCpu->hwaccm.s.fContextUseFlags)
3122 {
3123 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3124 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal);
3125 }
3126 else
3127 {
3128 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3129 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3130 {
3131 VMMR0LogFlushEnable(pVCpu);
3132 goto end;
3133 }
3134 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull);
3135 }
3136
3137#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3138 /*
3139 * Disable interrupts to make sure a poke will interrupt execution.
3140 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3141 */
3142 uOldEFlags = ASMIntDisableFlags();
3143 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3144#endif
3145
3146 /* Non-register state Guest Context */
3147 /** @todo change me according to cpu state */
3148 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
3149 AssertRC(rc2);
3150
3151 /* Set TLB flush state as checked until we return from the world switch. */
3152 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
3153 /* Deal with tagged TLB setup and invalidation. */
3154 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
3155
3156 /*
3157 * Manual save and restore:
3158 * - General purpose registers except RIP, RSP
3159 *
3160 * Trashed:
3161 * - CR2 (we don't care)
3162 * - LDTR (reset to 0)
3163 * - DRx (presumably not changed at all)
3164 * - DR7 (reset to 0x400)
3165 * - EFLAGS (reset to RT_BIT(1); not relevant)
3166 */
3167
3168 /* All done! Let's start VM execution. */
3169 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
3170 Assert(idCpuCheck == RTMpCpuId());
3171
3172#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3173 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
3174 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3175#endif
3176
3177 /*
3178 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3179 */
3180 if (pVM->hwaccm.s.fTPRPatchingActive)
3181 {
3182 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3183 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3184 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3185 }
3186
3187 TMNotifyStartOfExecution(pVCpu);
3188
3189#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3190 /*
3191 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
3192 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
3193 */
3194 if ( (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3195 && !(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3196 {
3197 pVCpu->hwaccm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX);
3198 uint64_t u64GuestTSCAux = 0;
3199 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux);
3200 AssertRC(rc2);
3201 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux);
3202 }
3203#endif
3204
3205#ifdef VBOX_WITH_KERNEL_USING_XMM
3206 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
3207#else
3208 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
3209#endif
3210 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
3211 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
3212
3213 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3214 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3215 {
3216#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3217 /* Restore host's TSC_AUX. */
3218 if (pVCpu->hwaccm.s.vmx.proc_ctls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3219 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hwaccm.s.u64HostTSCAux);
3220#endif
3221
3222 TMCpuTickSetLastSeen(pVCpu,
3223 ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3224 }
3225
3226 TMNotifyEndOfExecution(pVCpu);
3227 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3228 Assert(!(ASMGetFlags() & X86_EFL_IF));
3229
3230 /*
3231 * Restore the host LSTAR MSR if the guest could have changed it.
3232 */
3233 if (pVM->hwaccm.s.fTPRPatchingActive)
3234 {
3235 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3236 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3237 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3238 }
3239
3240 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
3241 ASMSetFlags(uOldEFlags);
3242#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3243 uOldEFlags = ~(RTCCUINTREG)0;
3244#endif
3245
3246 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n",
3247 pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
3248
3249 /* In case we execute a goto ResumeExecution later on. */
3250 pVCpu->hwaccm.s.fResumeVM = true;
3251 pVCpu->hwaccm.s.fForceTLBFlush = false;
3252
3253 /*
3254 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3255 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3256 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3257 */
3258
3259 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3260 {
3261 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3262 VMMR0LogFlushEnable(pVCpu);
3263 goto end;
3264 }
3265
3266 /* Success. Query the guest state and figure out what has happened. */
3267
3268 /* Investigate why there was a VM-exit. */
3269 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3270 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3271
3272 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3273 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3274 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3275 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3276 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3277 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
3278 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3279 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3280 AssertRC(rc2);
3281
3282 /*
3283 * Sync back the guest state.
3284 */
3285 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3286 AssertRC(rc2);
3287
3288 /* Note! NOW IT'S SAFE FOR LOGGING! */
3289 VMMR0LogFlushEnable(pVCpu);
3290 Log2(("Raw exit reason %08x\n", exitReason));
3291#if ARCH_BITS == 64 /* for the time being */
3292 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3293#endif
3294
3295 /*
3296 * Check if an injected event was interrupted prematurely.
3297 */
3298 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
3299 AssertRC(rc2);
3300 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3301 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3302 /* Ignore 'int xx' as they'll be restarted anyway. */
3303 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
3304 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3305 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3306 {
3307 Assert(!pVCpu->hwaccm.s.Event.fPending);
3308 pVCpu->hwaccm.s.Event.fPending = true;
3309 /* Error code present? */
3310 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
3311 {
3312 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
3313 AssertRC(rc2);
3314 pVCpu->hwaccm.s.Event.errCode = val;
3315 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3316 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3317 }
3318 else
3319 {
3320 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo,
3321 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3322 pVCpu->hwaccm.s.Event.errCode = 0;
3323 }
3324 }
3325#ifdef VBOX_STRICT
3326 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3327 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3328 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3329 {
3330 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3331 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3332 }
3333
3334 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3335 HWACCMDumpRegs(pVM, pVCpu, pCtx);
3336#endif
3337
3338 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3339 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3340 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3341 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3342 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3343
3344 /*
3345 * Sync back the TPR if it was changed.
3346 */
3347 if ( fSetupTPRCaching
3348 && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80])
3349 {
3350 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]);
3351 AssertRC(rc2);
3352 }
3353
3354#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3355 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3356 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3357#endif
3358 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
3359
3360 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3361 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3362 switch (exitReason)
3363 {
3364 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3365 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3366 {
3367 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3368
3369 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3370 {
3371 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3372#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3373 if ( RTThreadPreemptIsPendingTrusty()
3374 && !RTThreadPreemptIsPending(NIL_RTTHREAD))
3375 goto ResumeExecution;
3376#endif
3377 /* External interrupt; leave to allow it to be dispatched again. */
3378 rc = VINF_EM_RAW_INTERRUPT;
3379 break;
3380 }
3381 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3382 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3383 {
3384 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3385 /* External interrupt; leave to allow it to be dispatched again. */
3386 rc = VINF_EM_RAW_INTERRUPT;
3387 break;
3388
3389 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3390 AssertFailed(); /* can't come here; fails the first check. */
3391 break;
3392
3393 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3394 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3395 Assert(vector == 1 || vector == 3 || vector == 4);
3396 /* no break */
3397 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3398 Log2(("Hardware/software interrupt %d\n", vector));
3399 switch (vector)
3400 {
3401 case X86_XCPT_NM:
3402 {
3403 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3404
3405 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3406 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3407 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3408 if (rc == VINF_SUCCESS)
3409 {
3410 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3411
3412 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
3413
3414 /* Continue execution. */
3415 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3416
3417 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3418 goto ResumeExecution;
3419 }
3420
3421 Log(("Forward #NM fault to the guest\n"));
3422 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
3423 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3424 cbInstr, 0);
3425 AssertRC(rc2);
3426 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3427 goto ResumeExecution;
3428 }
3429
3430 case X86_XCPT_PF: /* Page fault */
3431 {
3432#ifdef VBOX_ALWAYS_TRAP_PF
3433 if (pVM->hwaccm.s.fNestedPaging)
3434 {
3435 /*
3436 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3437 */
3438 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3439 errCode, (RTGCPTR)pCtx->rsp));
3440
3441 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3442
3443 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3444
3445 /* Now we must update CR2. */
3446 pCtx->cr2 = exitQualification;
3447 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3448 cbInstr, errCode);
3449 AssertRC(rc2);
3450
3451 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3452 goto ResumeExecution;
3453 }
3454#else
3455 Assert(!pVM->hwaccm.s.fNestedPaging);
3456#endif
3457
3458#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
3459 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3460 if ( pVM->hwaccm.s.fTRPPatchingAllowed
3461 && pVM->hwaccm.s.pGuestPatchMem
3462 && (exitQualification & 0xfff) == 0x080
3463 && !(errCode & X86_TRAP_PF_P) /* not present */
3464 && CPUMGetGuestCPL(pVCpu) == 0
3465 && !CPUMIsGuestInLongModeEx(pCtx)
3466 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
3467 {
3468 RTGCPHYS GCPhysApicBase, GCPhys;
3469 PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */
3470 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3471
3472 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3473 if ( rc == VINF_SUCCESS
3474 && GCPhys == GCPhysApicBase)
3475 {
3476 /* Only attempt to patch the instruction once. */
3477 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3478 if (!pPatch)
3479 {
3480 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
3481 break;
3482 }
3483 }
3484 }
3485#endif
3486
3487 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3488 /* Exit qualification contains the linear address of the page fault. */
3489 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3490 TRPMSetErrorCode(pVCpu, errCode);
3491 TRPMSetFaultAddress(pVCpu, exitQualification);
3492
3493 /* Shortcut for APIC TPR reads and writes. */
3494 if ( (exitQualification & 0xfff) == 0x080
3495 && !(errCode & X86_TRAP_PF_P) /* not present */
3496 && fSetupTPRCaching
3497 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3498 {
3499 RTGCPHYS GCPhysApicBase, GCPhys;
3500 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3501 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3502
3503 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3504 if ( rc == VINF_SUCCESS
3505 && GCPhys == GCPhysApicBase)
3506 {
3507 Log(("Enable VT-x virtual APIC access filtering\n"));
3508 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3509 AssertRC(rc2);
3510 }
3511 }
3512
3513 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3514 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3515 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3516
3517 if (rc == VINF_SUCCESS)
3518 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3519 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3520 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
3521
3522 TRPMResetTrap(pVCpu);
3523 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3524 goto ResumeExecution;
3525 }
3526 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3527 {
3528 /*
3529 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3530 */
3531 Log2(("Forward page fault to the guest\n"));
3532
3533 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3534 /* The error code might have been changed. */
3535 errCode = TRPMGetErrorCode(pVCpu);
3536
3537 TRPMResetTrap(pVCpu);
3538
3539 /* Now we must update CR2. */
3540 pCtx->cr2 = exitQualification;
3541 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3542 cbInstr, errCode);
3543 AssertRC(rc2);
3544
3545 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3546 goto ResumeExecution;
3547 }
3548#ifdef VBOX_STRICT
3549 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3550 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3551#endif
3552 /* Need to go back to the recompiler to emulate the instruction. */
3553 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPFEM);
3554 TRPMResetTrap(pVCpu);
3555 break;
3556 }
3557
3558 case X86_XCPT_MF: /* Floating point exception. */
3559 {
3560 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
3561 if (!(pCtx->cr0 & X86_CR0_NE))
3562 {
3563 /* old style FPU error reporting needs some extra work. */
3564 /** @todo don't fall back to the recompiler, but do it manually. */
3565 rc = VINF_EM_RAW_EMULATE_INSTR;
3566 break;
3567 }
3568 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3569 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3570 cbInstr, errCode);
3571 AssertRC(rc2);
3572
3573 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3574 goto ResumeExecution;
3575 }
3576
3577 case X86_XCPT_DB: /* Debug exception. */
3578 {
3579 uint64_t uDR6;
3580
3581 /*
3582 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3583 *
3584 * Exit qualification bits:
3585 * 3:0 B0-B3 which breakpoint condition was met
3586 * 12:4 Reserved (0)
3587 * 13 BD - debug register access detected
3588 * 14 BS - single step execution or branch taken
3589 * 63:15 Reserved (0)
3590 */
3591 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3592
3593 /* Note that we don't support guest and host-initiated debugging at the same time. */
3594
3595 uDR6 = X86_DR6_INIT_VAL;
3596 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3597 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3598 if (rc == VINF_EM_RAW_GUEST_TRAP)
3599 {
3600 /* Update DR6 here. */
3601 pCtx->dr[6] = uDR6;
3602
3603 /* Resync DR6 if the debug state is active. */
3604 if (CPUMIsGuestDebugStateActive(pVCpu))
3605 ASMSetDR6(pCtx->dr[6]);
3606
3607 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3608 pCtx->dr[7] &= ~X86_DR7_GD;
3609
3610 /* Paranoia. */
3611 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3612 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3613 pCtx->dr[7] |= 0x400; /* must be one */
3614
3615 /* Resync DR7 */
3616 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3617 AssertRC(rc2);
3618
3619 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3620 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3621 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3622 cbInstr, errCode);
3623 AssertRC(rc2);
3624
3625 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3626 goto ResumeExecution;
3627 }
3628 /* Return to ring 3 to deal with the debug exit code. */
3629 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3630 break;
3631 }
3632
3633 case X86_XCPT_BP: /* Breakpoint. */
3634 {
3635 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestBP);
3636 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3637 if (rc == VINF_EM_RAW_GUEST_TRAP)
3638 {
3639 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3640 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3641 cbInstr, errCode);
3642 AssertRC(rc2);
3643 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3644 goto ResumeExecution;
3645 }
3646 if (rc == VINF_SUCCESS)
3647 {
3648 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3649 goto ResumeExecution;
3650 }
3651 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3652 break;
3653 }
3654
3655 case X86_XCPT_GP: /* General protection failure exception. */
3656 {
3657 uint32_t cbOp;
3658 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3659
3660 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3661#ifdef VBOX_STRICT
3662 if ( !CPUMIsGuestInRealModeEx(pCtx)
3663 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3664 {
3665 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3666 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3667 cbInstr, errCode);
3668 AssertRC(rc2);
3669 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3670 goto ResumeExecution;
3671 }
3672#endif
3673 Assert(CPUMIsGuestInRealModeEx(pCtx));
3674
3675 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3676
3677 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
3678 if (RT_SUCCESS(rc2))
3679 {
3680 bool fUpdateRIP = true;
3681
3682 rc = VINF_SUCCESS;
3683 Assert(cbOp == pDis->cbInstr);
3684 switch (pDis->pCurInstr->uOpcode)
3685 {
3686 case OP_CLI:
3687 pCtx->eflags.Bits.u1IF = 0;
3688 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3689 break;
3690
3691 case OP_STI:
3692 pCtx->eflags.Bits.u1IF = 1;
3693 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3694 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3695 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3696 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3697 AssertRC(rc2);
3698 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3699 break;
3700
3701 case OP_HLT:
3702 fUpdateRIP = false;
3703 rc = VINF_EM_HALT;
3704 pCtx->rip += pDis->cbInstr;
3705 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3706 break;
3707
3708 case OP_POPF:
3709 {
3710 RTGCPTR GCPtrStack;
3711 uint32_t cbParm;
3712 uint32_t uMask;
3713 X86EFLAGS eflags;
3714
3715 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3716 {
3717 cbParm = 4;
3718 uMask = 0xffffffff;
3719 }
3720 else
3721 {
3722 cbParm = 2;
3723 uMask = 0xffff;
3724 }
3725
3726 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3727 if (RT_FAILURE(rc2))
3728 {
3729 rc = VERR_EM_INTERPRETER;
3730 break;
3731 }
3732 eflags.u = 0;
3733 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3734 if (RT_FAILURE(rc2))
3735 {
3736 rc = VERR_EM_INTERPRETER;
3737 break;
3738 }
3739 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3740 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3741 | (eflags.u & X86_EFL_POPF_BITS & uMask);
3742 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3743 pCtx->eflags.Bits.u1RF = 0;
3744 pCtx->esp += cbParm;
3745 pCtx->esp &= uMask;
3746
3747 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3748 break;
3749 }
3750
3751 case OP_PUSHF:
3752 {
3753 RTGCPTR GCPtrStack;
3754 uint32_t cbParm;
3755 uint32_t uMask;
3756 X86EFLAGS eflags;
3757
3758 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3759 {
3760 cbParm = 4;
3761 uMask = 0xffffffff;
3762 }
3763 else
3764 {
3765 cbParm = 2;
3766 uMask = 0xffff;
3767 }
3768
3769 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3770 &GCPtrStack);
3771 if (RT_FAILURE(rc2))
3772 {
3773 rc = VERR_EM_INTERPRETER;
3774 break;
3775 }
3776 eflags = pCtx->eflags;
3777 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3778 eflags.Bits.u1RF = 0;
3779 eflags.Bits.u1VM = 0;
3780
3781 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3782 if (RT_FAILURE(rc2))
3783 {
3784 rc = VERR_EM_INTERPRETER;
3785 break;
3786 }
3787 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3788 pCtx->esp -= cbParm;
3789 pCtx->esp &= uMask;
3790 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3791 break;
3792 }
3793
3794 case OP_IRET:
3795 {
3796 RTGCPTR GCPtrStack;
3797 uint32_t uMask = 0xffff;
3798 uint16_t aIretFrame[3];
3799
3800 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3801 {
3802 rc = VERR_EM_INTERPRETER;
3803 break;
3804 }
3805
3806 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3807 if (RT_FAILURE(rc2))
3808 {
3809 rc = VERR_EM_INTERPRETER;
3810 break;
3811 }
3812 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3813 if (RT_FAILURE(rc2))
3814 {
3815 rc = VERR_EM_INTERPRETER;
3816 break;
3817 }
3818 pCtx->ip = aIretFrame[0];
3819 pCtx->cs.Sel = aIretFrame[1];
3820 pCtx->cs.ValidSel = aIretFrame[1];
3821 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
3822 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3823 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3824 pCtx->sp += sizeof(aIretFrame);
3825
3826 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
3827 fUpdateRIP = false;
3828 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3829 break;
3830 }
3831
3832 case OP_INT:
3833 {
3834 uint32_t intInfo2;
3835
3836 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
3837 intInfo2 = pDis->Param1.uValue & 0xff;
3838 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3839 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3840
3841 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3842 AssertRC(VBOXSTRICTRC_VAL(rc));
3843 fUpdateRIP = false;
3844 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3845 break;
3846 }
3847
3848 case OP_INTO:
3849 {
3850 if (pCtx->eflags.Bits.u1OF)
3851 {
3852 uint32_t intInfo2;
3853
3854 LogFlow(("Realmode: INTO\n"));
3855 intInfo2 = X86_XCPT_OF;
3856 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3857 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3858
3859 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3860 AssertRC(VBOXSTRICTRC_VAL(rc));
3861 fUpdateRIP = false;
3862 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3863 }
3864 break;
3865 }
3866
3867 case OP_INT3:
3868 {
3869 uint32_t intInfo2;
3870
3871 LogFlow(("Realmode: INT 3\n"));
3872 intInfo2 = 3;
3873 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3874 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3875
3876 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3877 AssertRC(VBOXSTRICTRC_VAL(rc));
3878 fUpdateRIP = false;
3879 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3880 break;
3881 }
3882
3883 default:
3884 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3885 fUpdateRIP = false;
3886 break;
3887 }
3888
3889 if (rc == VINF_SUCCESS)
3890 {
3891 if (fUpdateRIP)
3892 pCtx->rip += cbOp; /* Move on to the next instruction. */
3893
3894 /*
3895 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3896 * whole context to be done with it.
3897 */
3898 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3899
3900 /* Only resume if successful. */
3901 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3902 goto ResumeExecution;
3903 }
3904 }
3905 else
3906 rc = VERR_EM_INTERPRETER;
3907
3908 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
3909 ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3910 break;
3911 }
3912
3913#ifdef VBOX_STRICT
3914 case X86_XCPT_XF: /* SIMD exception. */
3915 case X86_XCPT_DE: /* Divide error. */
3916 case X86_XCPT_UD: /* Unknown opcode exception. */
3917 case X86_XCPT_SS: /* Stack segment exception. */
3918 case X86_XCPT_NP: /* Segment not present exception. */
3919 {
3920 switch (vector)
3921 {
3922 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE); break;
3923 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD); break;
3924 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS); break;
3925 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP); break;
3926 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXF); break;
3927 }
3928
3929 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3930 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3931 cbInstr, errCode);
3932 AssertRC(rc2);
3933
3934 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3935 goto ResumeExecution;
3936 }
3937#endif
3938 default:
3939 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXcpUnk);
3940 if ( CPUMIsGuestInRealModeEx(pCtx)
3941 && pVM->hwaccm.s.vmx.pRealModeTSS)
3942 {
3943 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
3944 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3945 cbInstr, errCode);
3946 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3947
3948 /* Go back to ring-3 in case of a triple fault. */
3949 if ( vector == X86_XCPT_DF
3950 && rc == VINF_EM_RESET)
3951 {
3952 break;
3953 }
3954
3955 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3956 goto ResumeExecution;
3957 }
3958 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3959 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3960 break;
3961 } /* switch (vector) */
3962
3963 break;
3964
3965 default:
3966 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3967 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
3968 break;
3969 }
3970
3971 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3972 break;
3973 }
3974
3975 /*
3976 * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed
3977 * by the configuration of the EPT paging structures.
3978 */
3979 case VMX_EXIT_EPT_VIOLATION:
3980 {
3981 RTGCPHYS GCPhys;
3982
3983 Assert(pVM->hwaccm.s.fNestedPaging);
3984
3985 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3986 AssertRC(rc2);
3987 Assert(((exitQualification >> 7) & 3) != 2);
3988
3989 /* Determine the kind of violation. */
3990 errCode = 0;
3991 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3992 errCode |= X86_TRAP_PF_ID;
3993
3994 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3995 errCode |= X86_TRAP_PF_RW;
3996
3997 /* If the page is present, then it's a page level protection fault. */
3998 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3999 errCode |= X86_TRAP_PF_P;
4000 else
4001 {
4002 /* Shortcut for APIC TPR reads and writes. */
4003 if ( (GCPhys & 0xfff) == 0x080
4004 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4005 && fSetupTPRCaching
4006 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4007 {
4008 RTGCPHYS GCPhysApicBase;
4009 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
4010 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4011 if (GCPhys == GCPhysApicBase + 0x80)
4012 {
4013 Log(("Enable VT-x virtual APIC access filtering\n"));
4014 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
4015 AssertRC(rc2);
4016 }
4017 }
4018 }
4019 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
4020
4021 /* GCPhys contains the guest physical address of the page fault. */
4022 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
4023 TRPMSetErrorCode(pVCpu, errCode);
4024 TRPMSetFaultAddress(pVCpu, GCPhys);
4025
4026 /* Handle the pagefault trap for the nested shadow table. */
4027 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
4028
4029 /*
4030 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
4031 */
4032 if ( rc == VINF_SUCCESS
4033 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4034 || rc == VERR_PAGE_NOT_PRESENT)
4035 {
4036 /* We've successfully synced our shadow pages, so let's just continue execution. */
4037 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
4038 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
4039
4040 TRPMResetTrap(pVCpu);
4041 goto ResumeExecution;
4042 }
4043
4044#ifdef VBOX_STRICT
4045 if (rc != VINF_EM_RAW_EMULATE_INSTR)
4046 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4047#endif
4048 /* Need to go back to the recompiler to emulate the instruction. */
4049 TRPMResetTrap(pVCpu);
4050 break;
4051 }
4052
4053 case VMX_EXIT_EPT_MISCONFIG:
4054 {
4055 RTGCPHYS GCPhys;
4056
4057 Assert(pVM->hwaccm.s.fNestedPaging);
4058
4059 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
4060 AssertRC(rc2);
4061 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
4062
4063 /* Shortcut for APIC TPR reads and writes. */
4064 if ( (GCPhys & 0xfff) == 0x080
4065 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4066 && fSetupTPRCaching
4067 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4068 {
4069 RTGCPHYS GCPhysApicBase;
4070 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
4071 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4072 if (GCPhys == GCPhysApicBase + 0x80)
4073 {
4074 Log(("Enable VT-x virtual APIC access filtering\n"));
4075 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
4076 AssertRC(rc2);
4077 }
4078 }
4079
4080 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
4081
4082 /*
4083 * If we succeed, resume execution.
4084 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
4085 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
4086 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
4087 * weird case. See @bugref{6043}.
4088 */
4089 if ( rc == VINF_SUCCESS
4090 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4091 || rc == VERR_PAGE_NOT_PRESENT)
4092 {
4093 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
4094 goto ResumeExecution;
4095 }
4096
4097 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4098 break;
4099 }
4100
4101 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4102 /* Clear VM-exit on IF=1 change. */
4103 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
4104 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4105 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
4106 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4107 AssertRC(rc2);
4108 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
4109 goto ResumeExecution; /* we check for pending guest interrupts there */
4110
4111 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4112 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4113 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
4114 /* Skip instruction and continue directly. */
4115 pCtx->rip += cbInstr;
4116 /* Continue execution.*/
4117 goto ResumeExecution;
4118
4119 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4120 {
4121 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4122 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
4123 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4124 if (rc == VINF_SUCCESS)
4125 {
4126 /* Update EIP and continue execution. */
4127 Assert(cbInstr == 2);
4128 pCtx->rip += cbInstr;
4129 goto ResumeExecution;
4130 }
4131 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4132 rc = VINF_EM_RAW_EMULATE_INSTR;
4133 break;
4134 }
4135
4136 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4137 {
4138 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4139 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
4140 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4141 if (rc == VINF_SUCCESS)
4142 {
4143 /* Update EIP and continue execution. */
4144 Assert(cbInstr == 2);
4145 pCtx->rip += cbInstr;
4146 goto ResumeExecution;
4147 }
4148 rc = VINF_EM_RAW_EMULATE_INSTR;
4149 break;
4150 }
4151
4152 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4153 {
4154 Log2(("VMX: Rdtsc\n"));
4155 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
4156 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4157 if (rc == VINF_SUCCESS)
4158 {
4159 /* Update EIP and continue execution. */
4160 Assert(cbInstr == 2);
4161 pCtx->rip += cbInstr;
4162 goto ResumeExecution;
4163 }
4164 rc = VINF_EM_RAW_EMULATE_INSTR;
4165 break;
4166 }
4167
4168 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4169 {
4170 Log2(("VMX: Rdtscp\n"));
4171 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtscp);
4172 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4173 if (rc == VINF_SUCCESS)
4174 {
4175 /* Update EIP and continue execution. */
4176 Assert(cbInstr == 3);
4177 pCtx->rip += cbInstr;
4178 goto ResumeExecution;
4179 }
4180 rc = VINF_EM_RAW_EMULATE_INSTR;
4181 break;
4182 }
4183
4184 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4185 {
4186 Log2(("VMX: invlpg\n"));
4187 Assert(!pVM->hwaccm.s.fNestedPaging);
4188
4189 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvlpg);
4190 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4191 if (rc == VINF_SUCCESS)
4192 {
4193 /* Update EIP and continue execution. */
4194 pCtx->rip += cbInstr;
4195 goto ResumeExecution;
4196 }
4197 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4198 break;
4199 }
4200
4201 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4202 {
4203 Log2(("VMX: monitor\n"));
4204
4205 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
4206 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4207 if (rc == VINF_SUCCESS)
4208 {
4209 /* Update EIP and continue execution. */
4210 pCtx->rip += cbInstr;
4211 goto ResumeExecution;
4212 }
4213 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4214 break;
4215 }
4216
4217 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4218 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4219 if ( pVM->hwaccm.s.fTPRPatchingActive
4220 && pCtx->ecx == MSR_K8_LSTAR)
4221 {
4222 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4223 if ((pCtx->eax & 0xff) != u8LastTPR)
4224 {
4225 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4226
4227 /* Our patch code uses LSTAR for TPR caching. */
4228 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4229 AssertRC(rc2);
4230 }
4231
4232 /* Skip the instruction and continue. */
4233 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4234
4235 /* Only resume if successful. */
4236 goto ResumeExecution;
4237 }
4238 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR;
4239 /* no break */
4240 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4241 {
4242 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
4243
4244 /*
4245 * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different,
4246 * so we play safe by completely disassembling the instruction.
4247 */
4248 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4249 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4250 if (rc == VINF_SUCCESS)
4251 {
4252 /* EIP has been updated already. */
4253 /* Only resume if successful. */
4254 goto ResumeExecution;
4255 }
4256 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4257 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4258 break;
4259 }
4260
4261 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4262 {
4263 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4264
4265 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4266 {
4267 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4268 {
4269 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4270 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4271 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4272 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4273 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4274 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4275 {
4276 case 0:
4277 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
4278 break;
4279 case 2:
4280 break;
4281 case 3:
4282 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4283 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
4284 break;
4285 case 4:
4286 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
4287 break;
4288 case 8:
4289 /* CR8 contains the APIC TPR */
4290 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4291 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4292 break;
4293
4294 default:
4295 AssertFailed();
4296 break;
4297 }
4298 break;
4299 }
4300
4301 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4302 {
4303 Log2(("VMX: mov x, crx\n"));
4304 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4305
4306 Assert( !pVM->hwaccm.s.fNestedPaging
4307 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4308 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4309
4310 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4311 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4312 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4313
4314 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4315 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4316 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4317 break;
4318 }
4319
4320 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4321 {
4322 Log2(("VMX: clts\n"));
4323 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
4324 rc = EMInterpretCLTS(pVM, pVCpu);
4325 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4326 break;
4327 }
4328
4329 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4330 {
4331 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4332 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
4333 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4334 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4335 break;
4336 }
4337 }
4338
4339 /* Update EIP if no error occurred. */
4340 if (RT_SUCCESS(rc))
4341 pCtx->rip += cbInstr;
4342
4343 if (rc == VINF_SUCCESS)
4344 {
4345 /* Only resume if successful. */
4346 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4347 goto ResumeExecution;
4348 }
4349 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4350 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4351 break;
4352 }
4353
4354 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4355 {
4356 if ( !DBGFIsStepping(pVCpu)
4357 && !CPUMIsHyperDebugStateActive(pVCpu))
4358 {
4359 /* Disable DRx move intercepts. */
4360 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4361 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4362 AssertRC(rc2);
4363
4364 /* Save the host and load the guest debug state. */
4365 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4366 AssertRC(rc2);
4367
4368#ifdef LOG_ENABLED
4369 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4370 {
4371 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4372 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4373 }
4374 else
4375 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4376#endif
4377
4378#ifdef VBOX_WITH_STATISTICS
4379 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
4380 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4381 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4382 else
4383 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4384#endif
4385
4386 goto ResumeExecution;
4387 }
4388
4389 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4390 * time and restore DRx registers afterwards */
4391 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4392 {
4393 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4394 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4395 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4396 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4397 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4398 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4399 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4400 Log2(("DR7=%08x\n", pCtx->dr[7]));
4401 }
4402 else
4403 {
4404 Log2(("VMX: mov x, DRx\n"));
4405 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4406 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4407 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4408 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4409 }
4410 /* Update EIP if no error occurred. */
4411 if (RT_SUCCESS(rc))
4412 pCtx->rip += cbInstr;
4413
4414 if (rc == VINF_SUCCESS)
4415 {
4416 /* Only resume if successful. */
4417 goto ResumeExecution;
4418 }
4419 Assert(rc == VERR_EM_INTERPRETER);
4420 break;
4421 }
4422
4423 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4424 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4425 {
4426 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4427 uint32_t uPort;
4428 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4429 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4430
4431 /** @todo necessary to make the distinction? */
4432 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4433 uPort = pCtx->edx & 0xffff;
4434 else
4435 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4436
4437 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4438 {
4439 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4440 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4441 break;
4442 }
4443
4444 uint32_t cbSize = g_aIOSize[uIOWidth];
4445 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4446 {
4447 /* ins/outs */
4448 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
4449
4450 /* Disassemble manually to deal with segment prefixes. */
4451 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4452 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4453 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
4454 if (RT_SUCCESS(rc))
4455 {
4456 if (fIOWrite)
4457 {
4458 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4459 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
4460 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4461 }
4462 else
4463 {
4464 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4465 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
4466 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4467 }
4468 }
4469 else
4470 rc = VINF_EM_RAW_EMULATE_INSTR;
4471 }
4472 else
4473 {
4474 /* Normal in/out */
4475 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4476
4477 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4478
4479 if (fIOWrite)
4480 {
4481 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
4482 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4483 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4484 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4485 }
4486 else
4487 {
4488 uint32_t u32Val = 0;
4489
4490 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
4491 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4492 if (IOM_SUCCESS(rc))
4493 {
4494 /* Write back to the EAX register. */
4495 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4496 }
4497 else
4498 if (rc == VINF_IOM_R3_IOPORT_READ)
4499 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4500 }
4501 }
4502
4503 /*
4504 * Handled the I/O return codes.
4505 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4506 */
4507 if (IOM_SUCCESS(rc))
4508 {
4509 /* Update EIP and continue execution. */
4510 pCtx->rip += cbInstr;
4511 if (RT_LIKELY(rc == VINF_SUCCESS))
4512 {
4513 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4514 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4515 {
4516 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
4517 for (unsigned i = 0; i < 4; i++)
4518 {
4519 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4520
4521 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4522 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4523 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4524 {
4525 uint64_t uDR6;
4526
4527 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4528
4529 uDR6 = ASMGetDR6();
4530
4531 /* Clear all breakpoint status flags and set the one we just hit. */
4532 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4533 uDR6 |= (uint64_t)RT_BIT(i);
4534
4535 /*
4536 * Note: AMD64 Architecture Programmer's Manual 13.1:
4537 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4538 * be cleared by software after the contents have been read.
4539 */
4540 ASMSetDR6(uDR6);
4541
4542 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4543 pCtx->dr[7] &= ~X86_DR7_GD;
4544
4545 /* Paranoia. */
4546 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4547 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4548 pCtx->dr[7] |= 0x400; /* must be one */
4549
4550 /* Resync DR7 */
4551 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
4552 AssertRC(rc2);
4553
4554 /* Construct inject info. */
4555 intInfo = X86_XCPT_DB;
4556 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4557 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4558
4559 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4560 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4561 0 /* cbInstr */, 0 /* errCode */);
4562 AssertRC(rc2);
4563
4564 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4565 goto ResumeExecution;
4566 }
4567 }
4568 }
4569 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4570 goto ResumeExecution;
4571 }
4572 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4573 break;
4574 }
4575
4576#ifdef VBOX_STRICT
4577 if (rc == VINF_IOM_R3_IOPORT_READ)
4578 Assert(!fIOWrite);
4579 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4580 Assert(fIOWrite);
4581 else
4582 {
4583 AssertMsg( RT_FAILURE(rc)
4584 || rc == VINF_EM_RAW_EMULATE_INSTR
4585 || rc == VINF_EM_RAW_GUEST_TRAP
4586 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4587 }
4588#endif
4589 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4590 break;
4591 }
4592
4593 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4594 LogFlow(("VMX_EXIT_TPR\n"));
4595 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4596 goto ResumeExecution;
4597
4598 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4599 on the APIC-access page. */
4600 {
4601 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4602 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4603
4604 switch (uAccessType)
4605 {
4606 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4607 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4608 {
4609 RTGCPHYS GCPhys;
4610 PDMApicGetBase(pVM, &GCPhys);
4611 GCPhys &= PAGE_BASE_GC_MASK;
4612 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4613
4614 LogFlow(("Apic access at %RGp\n", GCPhys));
4615 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4616 CPUMCTX2CORE(pCtx), GCPhys);
4617 if (rc == VINF_SUCCESS)
4618 goto ResumeExecution; /* rip already updated */
4619 break;
4620 }
4621
4622 default:
4623 rc = VINF_EM_RAW_EMULATE_INSTR;
4624 break;
4625 }
4626 break;
4627 }
4628
4629 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4630 if (!TMTimerPollBool(pVM, pVCpu))
4631 goto ResumeExecution;
4632 rc = VINF_EM_RAW_TIMER_PENDING;
4633 break;
4634
4635 default:
4636 /* The rest is handled after syncing the entire CPU state. */
4637 break;
4638 }
4639
4640
4641 /*
4642 * Note: The guest state is not entirely synced back at this stage!
4643 */
4644
4645 /* Investigate why there was a VM-exit. (part 2) */
4646 switch (exitReason)
4647 {
4648 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4649 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4650 case VMX_EXIT_EPT_VIOLATION:
4651 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4652 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4653 /* Already handled above. */
4654 break;
4655
4656 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4657 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4658 break;
4659
4660 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4661 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4662 rc = VINF_EM_RAW_INTERRUPT;
4663 AssertFailed(); /* Can't happen. Yet. */
4664 break;
4665
4666 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4667 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4668 rc = VINF_EM_RAW_INTERRUPT;
4669 AssertFailed(); /* Can't happen afaik. */
4670 break;
4671
4672 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4673 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4674 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4675 && pVCpu->hwaccm.s.Event.fPending)
4676 {
4677 /* Caused by an injected interrupt. */
4678 pVCpu->hwaccm.s.Event.fPending = false;
4679
4680 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
4681 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
4682 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
4683 AssertRC(rc2);
4684 }
4685 /* else Exceptions and software interrupts can just be restarted. */
4686 rc = VERR_EM_INTERPRETER;
4687 break;
4688
4689 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4690 /* Check if external interrupts are pending; if so, don't switch back. */
4691 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
4692 pCtx->rip++; /* skip hlt */
4693 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4694 goto ResumeExecution;
4695
4696 rc = VINF_EM_HALT;
4697 break;
4698
4699 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4700 Log2(("VMX: mwait\n"));
4701 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4702 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4703 if ( rc == VINF_EM_HALT
4704 || rc == VINF_SUCCESS)
4705 {
4706 /* Update EIP and continue execution. */
4707 pCtx->rip += cbInstr;
4708
4709 /* Check if external interrupts are pending; if so, don't switch back. */
4710 if ( rc == VINF_SUCCESS
4711 || ( rc == VINF_EM_HALT
4712 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4713 )
4714 goto ResumeExecution;
4715 }
4716 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4717 break;
4718
4719 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4720 AssertFailed(); /* can't happen. */
4721 rc = VERR_EM_INTERPRETER;
4722 break;
4723
4724 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4725 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4726 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4727 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4728 AssertRC(rc2);
4729 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMTF);
4730#if 0
4731 DBGFDoneStepping(pVCpu);
4732#endif
4733 rc = VINF_EM_DBG_STOP;
4734 break;
4735
4736 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4737 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4738 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4739 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4740 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4741 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4742 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4743 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4744 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4745 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4746 /** @todo inject #UD immediately */
4747 rc = VERR_EM_INTERPRETER;
4748 break;
4749
4750 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4751 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4752 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4753 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4754 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4755 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4756 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4757 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4758 /* already handled above */
4759 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4760 || rc == VINF_EM_RAW_INTERRUPT
4761 || rc == VERR_EM_INTERPRETER
4762 || rc == VINF_EM_RAW_EMULATE_INSTR
4763 || rc == VINF_PGM_SYNC_CR3
4764 || rc == VINF_IOM_R3_IOPORT_READ
4765 || rc == VINF_IOM_R3_IOPORT_WRITE
4766 || rc == VINF_EM_RAW_GUEST_TRAP
4767 || rc == VINF_TRPM_XCPT_DISPATCHED
4768 || rc == VINF_EM_RESCHEDULE_REM,
4769 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4770 break;
4771
4772 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4773 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4774 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4775 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4776 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4777 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4778 on the APIC-access page. */
4779 {
4780 /*
4781 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4782 */
4783 rc = VERR_EM_INTERPRETER;
4784 break;
4785 }
4786
4787 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4788 Assert(rc == VINF_EM_RAW_INTERRUPT);
4789 break;
4790
4791 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4792 {
4793#ifdef VBOX_STRICT
4794 RTCCUINTREG val2 = 0;
4795
4796 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4797
4798 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4799 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4800
4801 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4802 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4803
4804 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4805 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4806
4807 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4808 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4809
4810 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4811 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4812
4813 VMX_LOG_SELREG(CS, "CS", val2);
4814 VMX_LOG_SELREG(DS, "DS", val2);
4815 VMX_LOG_SELREG(ES, "ES", val2);
4816 VMX_LOG_SELREG(FS, "FS", val2);
4817 VMX_LOG_SELREG(GS, "GS", val2);
4818 VMX_LOG_SELREG(SS, "SS", val2);
4819 VMX_LOG_SELREG(TR, "TR", val2);
4820 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4821
4822 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4823 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4824 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4825 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4826#endif /* VBOX_STRICT */
4827 rc = VERR_VMX_INVALID_GUEST_STATE;
4828 break;
4829 }
4830
4831 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4832 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4833 default:
4834 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4835 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4836 break;
4837
4838 }
4839
4840end:
4841 /* We now going back to ring-3, so clear the action flag. */
4842 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4843
4844 /*
4845 * Signal changes for the recompiler.
4846 */
4847 CPUMSetChangedFlags(pVCpu,
4848 CPUM_CHANGED_SYSENTER_MSR
4849 | CPUM_CHANGED_LDTR
4850 | CPUM_CHANGED_GDTR
4851 | CPUM_CHANGED_IDTR
4852 | CPUM_CHANGED_TR
4853 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4854
4855 /*
4856 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4857 */
4858 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4859 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4860 {
4861 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4862 /* On the next entry we'll only sync the host context. */
4863 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4864 }
4865 else
4866 {
4867 /* On the next entry we'll sync everything. */
4868 /** @todo we can do better than this */
4869 /* Not in the VINF_PGM_CHANGE_MODE though! */
4870 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4871 }
4872
4873 /* Translate into a less severe return code */
4874 if (rc == VERR_EM_INTERPRETER)
4875 rc = VINF_EM_RAW_EMULATE_INSTR;
4876 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4877 {
4878 /* Try to extract more information about what might have gone wrong here. */
4879 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4880 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS;
4881 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4882 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4883 }
4884
4885 /* Just set the correct state here instead of trying to catch every goto above. */
4886 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4887
4888#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4889 /* Restore interrupts if we exited after disabling them. */
4890 if (uOldEFlags != ~(RTCCUINTREG)0)
4891 ASMSetFlags(uOldEFlags);
4892#endif
4893
4894 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
4895 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
4896 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4897 Log2(("X"));
4898 return VBOXSTRICTRC_TODO(rc);
4899}
4900
4901
4902/**
4903 * Enters the VT-x session.
4904 *
4905 * @returns VBox status code.
4906 * @param pVM Pointer to the VM.
4907 * @param pVCpu Pointer to the VMCPU.
4908 * @param pCpu Pointer to the CPU info struct.
4909 */
4910VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4911{
4912 Assert(pVM->hwaccm.s.vmx.fSupported);
4913 NOREF(pCpu);
4914
4915 unsigned cr4 = ASMGetCR4();
4916 if (!(cr4 & X86_CR4_VMXE))
4917 {
4918 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4919 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4920 }
4921
4922 /* Activate the VMCS. */
4923 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4924 if (RT_FAILURE(rc))
4925 return rc;
4926
4927 pVCpu->hwaccm.s.fResumeVM = false;
4928 return VINF_SUCCESS;
4929}
4930
4931
4932/**
4933 * Leaves the VT-x session.
4934 *
4935 * @returns VBox status code.
4936 * @param pVM Pointer to the VM.
4937 * @param pVCpu Pointer to the VMCPU.
4938 * @param pCtx Pointer to the guests CPU context.
4939 */
4940VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4941{
4942 Assert(pVM->hwaccm.s.vmx.fSupported);
4943
4944#ifdef DEBUG
4945 if (CPUMIsHyperDebugStateActive(pVCpu))
4946 {
4947 CPUMR0LoadHostDebugState(pVM, pVCpu);
4948 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4949 }
4950 else
4951#endif
4952
4953 /*
4954 * Save the guest debug state if necessary.
4955 */
4956 if (CPUMIsGuestDebugStateActive(pVCpu))
4957 {
4958 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4959
4960 /* Enable DRx move intercepts again. */
4961 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4962 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4963 AssertRC(rc);
4964
4965 /* Resync the debug registers the next time. */
4966 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4967 }
4968 else
4969 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4970
4971 /*
4972 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
4973 * VMCS data back to memory.
4974 */
4975 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4976 AssertRC(rc);
4977
4978 return VINF_SUCCESS;
4979}
4980
4981
4982/**
4983 * Flush the TLB using EPT.
4984 *
4985 * @returns VBox status code.
4986 * @param pVM Pointer to the VM.
4987 * @param pVCpu Pointer to the VMCPU.
4988 * @param enmFlush Type of flush.
4989 */
4990static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
4991{
4992 uint64_t descriptor[2];
4993
4994 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
4995 Assert(pVM->hwaccm.s.fNestedPaging);
4996 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4997 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
4998 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4999 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.vmx.GCPhysEPTP, rc));
5000}
5001
5002
5003/**
5004 * Flush the TLB using VPID.
5005 *
5006 * @returns VBox status code.
5007 * @param pVM Pointer to the VM.
5008 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
5009 * enmFlush).
5010 * @param enmFlush Type of flush.
5011 * @param GCPtr Virtual address of the page to flush (can be 0 depending
5012 * on @a enmFlush).
5013 */
5014static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
5015{
5016 uint64_t descriptor[2];
5017
5018 Assert(pVM->hwaccm.s.vmx.fVPID);
5019 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
5020 {
5021 descriptor[0] = 0;
5022 descriptor[1] = 0;
5023 }
5024 else
5025 {
5026 AssertPtr(pVCpu);
5027 AssertMsg(pVCpu->hwaccm.s.uCurrentASID != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hwaccm.s.uCurrentASID));
5028 AssertMsg(pVCpu->hwaccm.s.uCurrentASID <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hwaccm.s.uCurrentASID));
5029 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
5030 descriptor[1] = GCPtr;
5031 }
5032 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
5033 AssertMsg(rc == VINF_SUCCESS,
5034 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hwaccm.s.uCurrentASID : 0, GCPtr, rc));
5035}
5036
5037
5038/**
5039 * Invalidates a guest page by guest virtual address. Only relevant for
5040 * EPT/VPID, otherwise there is nothing really to invalidate.
5041 *
5042 * @returns VBox status code.
5043 * @param pVM Pointer to the VM.
5044 * @param pVCpu Pointer to the VMCPU.
5045 * @param GCVirt Guest virtual address of the page to invalidate.
5046 */
5047VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
5048{
5049 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
5050
5051 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
5052
5053 if (!fFlushPending)
5054 {
5055 /*
5056 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
5057 * See @bugref{6043} and @bugref{6177}
5058 *
5059 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
5060 * function maybe called in a loop with individual addresses.
5061 */
5062 if (pVM->hwaccm.s.vmx.fVPID)
5063 {
5064 /* If we can flush just this page do it, otherwise flush as little as possible. */
5065 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
5066 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
5067 else
5068 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5069 }
5070 else if (pVM->hwaccm.s.fNestedPaging)
5071 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5072 }
5073
5074 return VINF_SUCCESS;
5075}
5076
5077
5078/**
5079 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
5080 * otherwise there is nothing really to invalidate.
5081 *
5082 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
5083 *
5084 * @returns VBox status code.
5085 * @param pVM Pointer to the VM.
5086 * @param pVCpu Pointer to the VMCPU.
5087 * @param GCPhys Guest physical address of the page to invalidate.
5088 */
5089VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
5090{
5091 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
5092
5093 /*
5094 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5095 * while invept only flushes by EPT not individual addresses. We update the force flag here
5096 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5097 */
5098 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5099 return VINF_SUCCESS;
5100}
5101
5102
5103/**
5104 * Report world switch error and dump some useful debug info.
5105 *
5106 * @param pVM Pointer to the VM.
5107 * @param pVCpu Pointer to the VMCPU.
5108 * @param rc Return code.
5109 * @param pCtx Pointer to the current guest CPU context (not updated).
5110 */
5111static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5112{
5113 NOREF(pVM);
5114
5115 switch (VBOXSTRICTRC_VAL(rc))
5116 {
5117 case VERR_VMX_INVALID_VMXON_PTR:
5118 AssertFailed();
5119 break;
5120
5121 case VERR_VMX_UNABLE_TO_START_VM:
5122 case VERR_VMX_UNABLE_TO_RESUME_VM:
5123 {
5124 int rc2;
5125 RTCCUINTREG exitReason, instrError;
5126
5127 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5128 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5129 AssertRC(rc2);
5130 if (rc2 == VINF_SUCCESS)
5131 {
5132 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5133 (uint32_t)instrError));
5134 Log(("Current stack %08x\n", &rc2));
5135
5136 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
5137 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
5138
5139#ifdef VBOX_STRICT
5140 RTGDTR gdtr;
5141 PCX86DESCHC pDesc;
5142 RTCCUINTREG val;
5143
5144 ASMGetGDTR(&gdtr);
5145
5146 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
5147 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5148 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
5149 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5150 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
5151 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5152 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
5153 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5154 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
5155 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5156
5157 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
5158 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5159 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
5160 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5161 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
5162 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5163
5164 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
5165 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5166 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
5167 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5168
5169 if (val < gdtr.cbGdt)
5170 {
5171 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5172 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
5173 }
5174
5175 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
5176 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5177 if (val < gdtr.cbGdt)
5178 {
5179 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5180 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
5181 }
5182
5183 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
5184 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5185 if (val < gdtr.cbGdt)
5186 {
5187 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5188 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
5189 }
5190
5191 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
5192 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5193 if (val < gdtr.cbGdt)
5194 {
5195 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5196 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
5197 }
5198
5199 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
5200 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5201 if (val < gdtr.cbGdt)
5202 {
5203 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5204 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
5205 }
5206
5207 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
5208 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5209 if (val < gdtr.cbGdt)
5210 {
5211 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5212 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
5213 }
5214
5215 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
5216 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5217 if (val < gdtr.cbGdt)
5218 {
5219 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5220 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
5221 }
5222
5223 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
5224 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5225 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
5226 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5227 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
5228 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5229 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5230 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5231 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5232 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5233 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5234 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5235 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
5236 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5237 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
5238 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5239# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5240 if (VMX_IS_64BIT_HOST_MODE())
5241 {
5242 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5243 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5244 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5245 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5246 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5247 Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5248 }
5249# endif
5250#endif /* VBOX_STRICT */
5251 }
5252 break;
5253 }
5254
5255 default:
5256 /* impossible */
5257 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5258 break;
5259 }
5260}
5261
5262
5263#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5264/**
5265 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5266 *
5267 * @returns VBox status code.
5268 * @param fResume Whether to vmlauch/vmresume.
5269 * @param pCtx Pointer to the guest CPU context.
5270 * @param pCache Pointer to the VMCS cache.
5271 * @param pVM Pointer to the VM.
5272 * @param pVCpu Pointer to the VMCPU.
5273 */
5274DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5275{
5276 uint32_t aParam[6];
5277 PHMGLOBLCPUINFO pCpu;
5278 RTHCPHYS HCPhysCpuPage;
5279 int rc;
5280
5281 pCpu = HWACCMR0GetCurrentCpu();
5282 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5283
5284#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5285 pCache->uPos = 1;
5286 pCache->interPD = PGMGetInterPaeCR3(pVM);
5287 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
5288#endif
5289
5290#ifdef DEBUG
5291 pCache->TestIn.HCPhysCpuPage= 0;
5292 pCache->TestIn.HCPhysVMCS = 0;
5293 pCache->TestIn.pCache = 0;
5294 pCache->TestOut.HCPhysVMCS = 0;
5295 pCache->TestOut.pCache = 0;
5296 pCache->TestOut.pCtx = 0;
5297 pCache->TestOut.eflags = 0;
5298#endif
5299
5300 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5301 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5302 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
5303 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
5304 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
5305 aParam[5] = 0;
5306
5307#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5308 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
5309 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
5310#endif
5311 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5312
5313#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5314 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
5315 Assert(pCtx->dr[4] == 10);
5316 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
5317#endif
5318
5319#ifdef DEBUG
5320 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5321 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5322 pVCpu->hwaccm.s.vmx.HCPhysVMCS));
5323 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5324 pCache->TestOut.HCPhysVMCS));
5325 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5326 pCache->TestOut.pCache));
5327 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache),
5328 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
5329 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5330 pCache->TestOut.pCtx));
5331 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5332#endif
5333 return rc;
5334}
5335
5336
5337# ifdef VBOX_STRICT
5338static bool hmR0VmxIsValidReadField(uint32_t idxField)
5339{
5340 switch (idxField)
5341 {
5342 case VMX_VMCS64_GUEST_RIP:
5343 case VMX_VMCS64_GUEST_RSP:
5344 case VMX_VMCS_GUEST_RFLAGS:
5345 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5346 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5347 case VMX_VMCS64_GUEST_CR0:
5348 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5349 case VMX_VMCS64_GUEST_CR4:
5350 case VMX_VMCS64_GUEST_DR7:
5351 case VMX_VMCS32_GUEST_SYSENTER_CS:
5352 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5353 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5354 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5355 case VMX_VMCS64_GUEST_GDTR_BASE:
5356 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5357 case VMX_VMCS64_GUEST_IDTR_BASE:
5358 case VMX_VMCS16_GUEST_FIELD_CS:
5359 case VMX_VMCS32_GUEST_CS_LIMIT:
5360 case VMX_VMCS64_GUEST_CS_BASE:
5361 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5362 case VMX_VMCS16_GUEST_FIELD_DS:
5363 case VMX_VMCS32_GUEST_DS_LIMIT:
5364 case VMX_VMCS64_GUEST_DS_BASE:
5365 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5366 case VMX_VMCS16_GUEST_FIELD_ES:
5367 case VMX_VMCS32_GUEST_ES_LIMIT:
5368 case VMX_VMCS64_GUEST_ES_BASE:
5369 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5370 case VMX_VMCS16_GUEST_FIELD_FS:
5371 case VMX_VMCS32_GUEST_FS_LIMIT:
5372 case VMX_VMCS64_GUEST_FS_BASE:
5373 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5374 case VMX_VMCS16_GUEST_FIELD_GS:
5375 case VMX_VMCS32_GUEST_GS_LIMIT:
5376 case VMX_VMCS64_GUEST_GS_BASE:
5377 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5378 case VMX_VMCS16_GUEST_FIELD_SS:
5379 case VMX_VMCS32_GUEST_SS_LIMIT:
5380 case VMX_VMCS64_GUEST_SS_BASE:
5381 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5382 case VMX_VMCS16_GUEST_FIELD_LDTR:
5383 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5384 case VMX_VMCS64_GUEST_LDTR_BASE:
5385 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5386 case VMX_VMCS16_GUEST_FIELD_TR:
5387 case VMX_VMCS32_GUEST_TR_LIMIT:
5388 case VMX_VMCS64_GUEST_TR_BASE:
5389 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5390 case VMX_VMCS32_RO_EXIT_REASON:
5391 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5392 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5393 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
5394 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5395 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5396 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5397 case VMX_VMCS32_RO_IDT_INFO:
5398 case VMX_VMCS32_RO_IDT_ERRCODE:
5399 case VMX_VMCS64_GUEST_CR3:
5400 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
5401 return true;
5402 }
5403 return false;
5404}
5405
5406
5407static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5408{
5409 switch (idxField)
5410 {
5411 case VMX_VMCS64_GUEST_LDTR_BASE:
5412 case VMX_VMCS64_GUEST_TR_BASE:
5413 case VMX_VMCS64_GUEST_GDTR_BASE:
5414 case VMX_VMCS64_GUEST_IDTR_BASE:
5415 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5416 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5417 case VMX_VMCS64_GUEST_CR0:
5418 case VMX_VMCS64_GUEST_CR4:
5419 case VMX_VMCS64_GUEST_CR3:
5420 case VMX_VMCS64_GUEST_DR7:
5421 case VMX_VMCS64_GUEST_RIP:
5422 case VMX_VMCS64_GUEST_RSP:
5423 case VMX_VMCS64_GUEST_CS_BASE:
5424 case VMX_VMCS64_GUEST_DS_BASE:
5425 case VMX_VMCS64_GUEST_ES_BASE:
5426 case VMX_VMCS64_GUEST_FS_BASE:
5427 case VMX_VMCS64_GUEST_GS_BASE:
5428 case VMX_VMCS64_GUEST_SS_BASE:
5429 return true;
5430 }
5431 return false;
5432}
5433# endif /* VBOX_STRICT */
5434
5435
5436/**
5437 * Executes the specified handler in 64-bit mode.
5438 *
5439 * @returns VBox status code.
5440 * @param pVM Pointer to the VM.
5441 * @param pVCpu Pointer to the VMCPU.
5442 * @param pCtx Pointer to the guest CPU context.
5443 * @param pfnHandler Pointer to the RC handler function.
5444 * @param cbParam Number of parameters.
5445 * @param paParam Array of 32-bit parameters.
5446 */
5447VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5448 uint32_t *paParam)
5449{
5450 int rc, rc2;
5451 PHMGLOBLCPUINFO pCpu;
5452 RTHCPHYS HCPhysCpuPage;
5453 RTHCUINTREG uOldEFlags;
5454
5455 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5456 Assert(pfnHandler);
5457 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
5458 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
5459
5460#ifdef VBOX_STRICT
5461 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5462 Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
5463
5464 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5465 Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
5466#endif
5467
5468 /* Disable interrupts. */
5469 uOldEFlags = ASMIntDisableFlags();
5470
5471#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5472 RTCPUID idHostCpu = RTMpCpuId();
5473 CPUMR0SetLApic(pVM, idHostCpu);
5474#endif
5475
5476 pCpu = HWACCMR0GetCurrentCpu();
5477 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5478
5479 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5480 VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5481
5482 /* Leave VMX Root Mode. */
5483 VMXDisable();
5484
5485 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5486
5487 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5488 CPUMSetHyperEIP(pVCpu, pfnHandler);
5489 for (int i=(int)cbParam-1;i>=0;i--)
5490 CPUMPushHyper(pVCpu, paParam[i]);
5491
5492 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5493
5494 /* Call switcher. */
5495 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5496 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5497
5498 /* Make sure the VMX instructions don't cause #UD faults. */
5499 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5500
5501 /* Enter VMX Root Mode */
5502 rc2 = VMXEnable(HCPhysCpuPage);
5503 if (RT_FAILURE(rc2))
5504 {
5505 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5506 ASMSetFlags(uOldEFlags);
5507 return VERR_VMX_VMXON_FAILED;
5508 }
5509
5510 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5511 AssertRC(rc2);
5512 Assert(!(ASMGetFlags() & X86_EFL_IF));
5513 ASMSetFlags(uOldEFlags);
5514 return rc;
5515}
5516#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5517
5518
5519#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
5520/**
5521 * Executes VMWRITE.
5522 *
5523 * @returns VBox status code
5524 * @param pVCpu Pointer to the VMCPU.
5525 * @param idxField VMCS field index.
5526 * @param u64Val 16, 32 or 64 bits value.
5527 */
5528VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5529{
5530 int rc;
5531 switch (idxField)
5532 {
5533 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
5534 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
5535 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
5536 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
5537 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
5538 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
5539 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
5540 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
5541 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
5542 case VMX_VMCS_GUEST_LINK_PTR_FULL:
5543 case VMX_VMCS_GUEST_PDPTR0_FULL:
5544 case VMX_VMCS_GUEST_PDPTR1_FULL:
5545 case VMX_VMCS_GUEST_PDPTR2_FULL:
5546 case VMX_VMCS_GUEST_PDPTR3_FULL:
5547 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
5548 case VMX_VMCS_GUEST_EFER_FULL:
5549 case VMX_VMCS_CTRL_EPTP_FULL:
5550 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5551 rc = VMXWriteVMCS32(idxField, u64Val);
5552 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5553 AssertRC(rc);
5554 return rc;
5555
5556 case VMX_VMCS64_GUEST_LDTR_BASE:
5557 case VMX_VMCS64_GUEST_TR_BASE:
5558 case VMX_VMCS64_GUEST_GDTR_BASE:
5559 case VMX_VMCS64_GUEST_IDTR_BASE:
5560 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5561 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5562 case VMX_VMCS64_GUEST_CR0:
5563 case VMX_VMCS64_GUEST_CR4:
5564 case VMX_VMCS64_GUEST_CR3:
5565 case VMX_VMCS64_GUEST_DR7:
5566 case VMX_VMCS64_GUEST_RIP:
5567 case VMX_VMCS64_GUEST_RSP:
5568 case VMX_VMCS64_GUEST_CS_BASE:
5569 case VMX_VMCS64_GUEST_DS_BASE:
5570 case VMX_VMCS64_GUEST_ES_BASE:
5571 case VMX_VMCS64_GUEST_FS_BASE:
5572 case VMX_VMCS64_GUEST_GS_BASE:
5573 case VMX_VMCS64_GUEST_SS_BASE:
5574 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5575 if (u64Val >> 32ULL)
5576 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5577 else
5578 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5579
5580 return rc;
5581
5582 default:
5583 AssertMsgFailed(("Unexpected field %x\n", idxField));
5584 return VERR_INVALID_PARAMETER;
5585 }
5586}
5587
5588
5589/**
5590 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
5591 *
5592 * @param pVCpu Pointer to the VMCPU.
5593 * @param idxField VMCS field index.
5594 * @param u64Val 16, 32 or 64 bits value.
5595 */
5596VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5597{
5598 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
5599
5600 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5601 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5602
5603 /* Make sure there are no duplicates. */
5604 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5605 {
5606 if (pCache->Write.aField[i] == idxField)
5607 {
5608 pCache->Write.aFieldVal[i] = u64Val;
5609 return VINF_SUCCESS;
5610 }
5611 }
5612
5613 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5614 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5615 pCache->Write.cValidEntries++;
5616 return VINF_SUCCESS;
5617}
5618
5619#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
5620
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette