VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 41327

Last change on this file since 41327 was 41327, checked in by vboxsync, 13 years ago

VMX: Do not force #PF traps unless specifically directed to do so.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 209.4 KB
Line 
1/* $Id: HWVMXR0.cpp 41327 2012-05-16 10:39:24Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hwaccm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HWACCMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63/*******************************************************************************
64* Global Variables *
65*******************************************************************************/
66/* IO operation lookup arrays. */
67static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
68static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
69
70#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
71/** See HWACCMR0A.asm. */
72extern "C" uint32_t g_fVMXIs64bitHost;
73#endif
74
75/*******************************************************************************
76* Local Functions *
77*******************************************************************************/
78static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
79static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
80static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
83static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
84static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
85static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
86static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
87
88
89static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
90{
91 if (rc == VERR_VMX_GENERIC)
92 {
93 RTCCUINTREG instrError;
94
95 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
96 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
97 }
98 pVM->hwaccm.s.lLastError = rc;
99}
100
101/**
102 * Sets up and activates VT-x on the current CPU
103 *
104 * @returns VBox status code.
105 * @param pCpu CPU info struct
106 * @param pVM The VM to operate on. (can be NULL after a resume!!)
107 * @param pvCpuPage Pointer to the global cpu page.
108 * @param HCPhysCpuPage Physical address of the global cpu page.
109 */
110VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
111{
112 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
113 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
114 NOREF(pCpu);
115
116 if (pVM)
117 {
118 /* Set revision dword at the beginning of the VMXON structure. */
119 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
120 }
121
122 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
123 * (which can have very bad consequences!!!)
124 */
125
126 if (ASMGetCR4() & X86_CR4_VMXE)
127 return VERR_VMX_IN_VMX_ROOT_MODE;
128
129 /* Make sure the VMX instructions don't cause #UD faults. */
130 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
131
132 /* Enter VMX Root Mode. */
133 int rc = VMXEnable(HCPhysCpuPage);
134 if (RT_FAILURE(rc))
135 {
136 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
137 return VERR_VMX_VMXON_FAILED;
138 }
139
140 /*
141 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
142 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
143 * each time while reusing a VPID after hitting the MaxASID limit once.
144 */
145 if ( pVM
146 && pVM->hwaccm.s.vmx.fVPID
147 && (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
148 {
149 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
150 pCpu->fFlushASIDBeforeUse = false;
151 }
152 else
153 pCpu->fFlushASIDBeforeUse = true;
154
155 return VINF_SUCCESS;
156}
157
158/**
159 * Deactivates VT-x on the current CPU
160 *
161 * @returns VBox status code.
162 * @param pCpu CPU info struct
163 * @param pvCpuPage Pointer to the global cpu page.
164 * @param HCPhysCpuPage Physical address of the global cpu page.
165 */
166VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
167{
168 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
169 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
170 NOREF(pCpu);
171
172 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
173 if (!(ASMGetCR4() & X86_CR4_VMXE))
174 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
175
176 /* Leave VMX Root Mode. */
177 VMXDisable();
178
179 /* And clear the X86_CR4_VMXE bit. */
180 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
181 return VINF_SUCCESS;
182}
183
184/**
185 * Does Ring-0 per VM VT-x init.
186 *
187 * @returns VBox status code.
188 * @param pVM The VM to operate on.
189 */
190VMMR0DECL(int) VMXR0InitVM(PVM pVM)
191{
192 int rc;
193
194#ifdef LOG_ENABLED
195 SUPR0Printf("VMXR0InitVM %x\n", pVM);
196#endif
197
198 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
199
200 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
201 {
202 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
203 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, true /* executable R0 mapping */);
204 AssertRC(rc);
205 if (RT_FAILURE(rc))
206 return rc;
207
208 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
209 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
210 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
211 }
212 else
213 {
214 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
215 pVM->hwaccm.s.vmx.pAPIC = 0;
216 pVM->hwaccm.s.vmx.pAPICPhys = 0;
217 }
218
219#ifdef VBOX_WITH_CRASHDUMP_MAGIC
220 {
221 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, true /* executable R0 mapping */);
222 AssertRC(rc);
223 if (RT_FAILURE(rc))
224 return rc;
225
226 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
227 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
228
229 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
230 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
231 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
232 }
233#endif
234
235 /* Allocate VMCSs for all guest CPUs. */
236 for (VMCPUID i = 0; i < pVM->cCpus; i++)
237 {
238 PVMCPU pVCpu = &pVM->aCpus[i];
239
240 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
241
242 /* Allocate one page for the VM control structure (VMCS). */
243 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, true /* executable R0 mapping */);
244 AssertRC(rc);
245 if (RT_FAILURE(rc))
246 return rc;
247
248 pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS);
249 pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0);
250 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS);
251
252 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
253 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
254
255 /* Allocate one page for the virtual APIC page for TPR caching. */
256 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, true /* executable R0 mapping */);
257 AssertRC(rc);
258 if (RT_FAILURE(rc))
259 return rc;
260
261 pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC);
262 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0);
263 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC);
264
265 /* Allocate the MSR bitmap if this feature is supported. */
266 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
267 {
268 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, true /* executable R0 mapping */);
269 AssertRC(rc);
270 if (RT_FAILURE(rc))
271 return rc;
272
273 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
274 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
275 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
276 }
277
278#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
279 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
280 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, true /* executable R0 mapping */);
281 AssertRC(rc);
282 if (RT_FAILURE(rc))
283 return rc;
284
285 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
286 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
287 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
288
289 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
290 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, true /* executable R0 mapping */);
291 AssertRC(rc);
292 if (RT_FAILURE(rc))
293 return rc;
294
295 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
296 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
297 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
298#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
299
300 /* Current guest paging mode. */
301 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
302
303#ifdef LOG_ENABLED
304 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS);
305#endif
306 }
307
308 return VINF_SUCCESS;
309}
310
311/**
312 * Does Ring-0 per VM VT-x termination.
313 *
314 * @returns VBox status code.
315 * @param pVM The VM to operate on.
316 */
317VMMR0DECL(int) VMXR0TermVM(PVM pVM)
318{
319 for (VMCPUID i = 0; i < pVM->cCpus; i++)
320 {
321 PVMCPU pVCpu = &pVM->aCpus[i];
322
323 if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
324 {
325 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false);
326 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
327 pVCpu->hwaccm.s.vmx.pvVMCS = 0;
328 pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0;
329 }
330 if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
331 {
332 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false);
333 pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
334 pVCpu->hwaccm.s.vmx.pbVAPIC = 0;
335 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0;
336 }
337 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
338 {
339 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
340 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
341 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
342 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
343 }
344#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
345 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
346 {
347 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
348 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
349 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
350 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
351 }
352 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
353 {
354 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
355 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
356 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
357 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
358 }
359#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
360 }
361 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
362 {
363 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
364 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
365 pVM->hwaccm.s.vmx.pAPIC = 0;
366 pVM->hwaccm.s.vmx.pAPICPhys = 0;
367 }
368#ifdef VBOX_WITH_CRASHDUMP_MAGIC
369 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
370 {
371 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
372 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
373 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
374 pVM->hwaccm.s.vmx.pScratch = 0;
375 pVM->hwaccm.s.vmx.pScratchPhys = 0;
376 }
377#endif
378 return VINF_SUCCESS;
379}
380
381/**
382 * Sets up VT-x for the specified VM
383 *
384 * @returns VBox status code.
385 * @param pVM The VM to operate on.
386 */
387VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
388{
389 int rc = VINF_SUCCESS;
390 uint32_t val;
391
392 AssertReturn(pVM, VERR_INVALID_PARAMETER);
393
394 /* Initialize these always, see hwaccmR3InitFinalizeR0().*/
395 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NONE;
396 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NONE;
397
398 /* Determine optimal flush type for EPT. */
399 if (pVM->hwaccm.s.fNestedPaging)
400 {
401 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
402 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_SINGLE_CONTEXT;
403 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
404 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_ALL_CONTEXTS;
405 else
406 {
407 /*
408 * Should never really happen. EPT is supported but no suitable flush types supported.
409 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
410 */
411 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
412 return VERR_VMX_GENERIC;
413 }
414 }
415
416 /* Determine optimal flush type for VPID. */
417 if (pVM->hwaccm.s.vmx.fVPID)
418 {
419 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
420 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_SINGLE_CONTEXT;
421 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
422 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_ALL_CONTEXTS;
423 else
424 {
425 /*
426 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
427 * We do not handle other flush type combinations, ignore VPID capabilities.
428 */
429 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
430 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
431 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
432 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
433 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
434 pVM->hwaccm.s.vmx.fVPID = false;
435 }
436 }
437
438 for (VMCPUID i = 0; i < pVM->cCpus; i++)
439 {
440 PVMCPU pVCpu = &pVM->aCpus[i];
441
442 AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS);
443
444 /* Set revision dword at the beginning of the VMCS structure. */
445 *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
446
447 /* Clear VM Control Structure. */
448 Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS));
449 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
450 if (RT_FAILURE(rc))
451 goto vmx_end;
452
453 /* Activate the VM Control Structure. */
454 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
455 if (RT_FAILURE(rc))
456 goto vmx_end;
457
458 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
459 * Set required bits to one and zero according to the MSR capabilities.
460 */
461 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
462 /* External and non-maskable interrupts cause VM-exits. */
463 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
464 /* enable the preemption timer. */
465 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
466 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
467 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
468
469 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
470 AssertRC(rc);
471
472 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
473 * Set required bits to one and zero according to the MSR capabilities.
474 */
475 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
476 /* Program which event cause VM-exits and which features we want to use. */
477 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
478 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
479 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
480 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
481 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
482 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
483 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
484
485 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
486 if (!pVM->hwaccm.s.fNestedPaging)
487 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
488 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
489 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
490
491 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
492 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
493 {
494 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
495 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
496 Assert(pVM->hwaccm.s.vmx.pAPIC);
497 }
498 else
499 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
500 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
501
502 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
503 {
504 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
505 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
506 }
507
508 /* We will use the secondary control if it's present. */
509 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
510
511 /* Mask away the bits that the CPU doesn't support */
512 /** @todo make sure they don't conflict with the above requirements. */
513 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
514 pVCpu->hwaccm.s.vmx.proc_ctls = val;
515
516 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
517 AssertRC(rc);
518
519 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
520 {
521 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
522 * Set required bits to one and zero according to the MSR capabilities.
523 */
524 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
525 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
526
527 if (pVM->hwaccm.s.fNestedPaging)
528 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
529
530 if (pVM->hwaccm.s.vmx.fVPID)
531 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
532
533 if (pVM->hwaccm.s.fHasIoApic)
534 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
535
536 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
537 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
538
539 /* Mask away the bits that the CPU doesn't support */
540 /** @todo make sure they don't conflict with the above requirements. */
541 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
542 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
543 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
544 AssertRC(rc);
545 }
546
547 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
548 * Set required bits to one and zero according to the MSR capabilities.
549 */
550 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
551 AssertRC(rc);
552
553 /* Forward all exception except #NM & #PF to the guest.
554 * We always need to check pagefaults since our shadow page table can be out of sync.
555 * And we always lazily sync the FPU & XMM state.
556 */
557
558 /** @todo Possible optimization:
559 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
560 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
561 * registers ourselves of course.
562 *
563 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
564 */
565
566 /* Don't filter page faults; all of them should cause a switch. */
567 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
568 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
569 AssertRC(rc);
570
571 /* Init TSC offset to zero. */
572 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
573 AssertRC(rc);
574
575 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
576 AssertRC(rc);
577
578 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
579 AssertRC(rc);
580
581 /* Set the MSR bitmap address. */
582 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
583 {
584 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
585
586 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
587 AssertRC(rc);
588
589 /* Allow the guest to directly modify these MSRs; they are restored and saved automatically. */
590 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
591 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
592 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
593 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
594 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
595 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
596 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
597 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
598 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
599 }
600
601#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
602 /* Set the guest & host MSR load/store physical addresses. */
603 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
604 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
605 AssertRC(rc);
606 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
607 AssertRC(rc);
608
609 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
610 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
611 AssertRC(rc);
612#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
613
614 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
615 AssertRC(rc);
616
617 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
618 AssertRC(rc);
619
620 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
621 {
622 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
623 /* Optional */
624 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
625 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC);
626
627 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
628 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
629
630 AssertRC(rc);
631 }
632
633 /* Set link pointer to -1. Not currently used. */
634 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
635 AssertRC(rc);
636
637 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
638 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
639 AssertRC(rc);
640
641 /* Configure the VMCS read cache. */
642 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
643
644 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
645 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
646 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
647 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
648 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
649 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
650 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
651 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
652 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
653 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
654 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
655 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
656 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
657 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
658 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
659 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
660
661 VMX_SETUP_SELREG(ES, pCache);
662 VMX_SETUP_SELREG(SS, pCache);
663 VMX_SETUP_SELREG(CS, pCache);
664 VMX_SETUP_SELREG(DS, pCache);
665 VMX_SETUP_SELREG(FS, pCache);
666 VMX_SETUP_SELREG(GS, pCache);
667 VMX_SETUP_SELREG(LDTR, pCache);
668 VMX_SETUP_SELREG(TR, pCache);
669
670 /* Status code VMCS reads. */
671 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
672 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
673 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
674 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
675 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
676 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
677 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
678 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
679 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
680
681 if (pVM->hwaccm.s.fNestedPaging)
682 {
683 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
684 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
685 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
686 }
687 else
688 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
689 } /* for each VMCPU */
690
691 /* Choose the right TLB setup function. */
692 if (pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID)
693 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBBoth;
694 else if (pVM->hwaccm.s.fNestedPaging)
695 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT;
696 else if (pVM->hwaccm.s.vmx.fVPID)
697 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID;
698 else
699 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy;
700
701vmx_end:
702 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
703 return rc;
704}
705
706/**
707 * Sets the permission bits for the specified MSR
708 *
709 * @param pVCpu The VMCPU to operate on.
710 * @param ulMSR MSR value
711 * @param fRead Reading allowed/disallowed
712 * @param fWrite Writing allowed/disallowed
713 */
714static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
715{
716 unsigned ulBit;
717 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
718
719 /* Layout:
720 * 0x000 - 0x3ff - Low MSR read bits
721 * 0x400 - 0x7ff - High MSR read bits
722 * 0x800 - 0xbff - Low MSR write bits
723 * 0xc00 - 0xfff - High MSR write bits
724 */
725 if (ulMSR <= 0x00001FFF)
726 {
727 /* Pentium-compatible MSRs */
728 ulBit = ulMSR;
729 }
730 else
731 if ( ulMSR >= 0xC0000000
732 && ulMSR <= 0xC0001FFF)
733 {
734 /* AMD Sixth Generation x86 Processor MSRs */
735 ulBit = (ulMSR - 0xC0000000);
736 pMSRBitmap += 0x400;
737 }
738 else
739 {
740 AssertFailed();
741 return;
742 }
743
744 Assert(ulBit <= 0x1fff);
745 if (fRead)
746 ASMBitClear(pMSRBitmap, ulBit);
747 else
748 ASMBitSet(pMSRBitmap, ulBit);
749
750 if (fWrite)
751 ASMBitClear(pMSRBitmap + 0x800, ulBit);
752 else
753 ASMBitSet(pMSRBitmap + 0x800, ulBit);
754}
755
756
757/**
758 * Injects an event (trap or external interrupt)
759 *
760 * @returns VBox status code. Note that it may return VINF_EM_RESET to
761 * indicate a triple fault when injecting X86_XCPT_DF.
762 *
763 * @param pVM The VM to operate on.
764 * @param pVCpu The VMCPU to operate on.
765 * @param pCtx CPU Context
766 * @param intInfo VMX interrupt info
767 * @param cbInstr Opcode length of faulting instruction
768 * @param errCode Error code (optional)
769 */
770static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
771{
772 int rc;
773 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
774
775#ifdef VBOX_WITH_STATISTICS
776 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
777#endif
778
779#ifdef VBOX_STRICT
780 if (iGate == 0xE)
781 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
782 else
783 if (iGate < 0x20)
784 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
785 else
786 {
787 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
788 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
789 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
790 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
791 || pCtx->eflags.u32 & X86_EFL_IF);
792 }
793#endif
794
795 if ( CPUMIsGuestInRealModeEx(pCtx)
796 && pVM->hwaccm.s.vmx.pRealModeTSS)
797 {
798 RTGCPHYS GCPhysHandler;
799 uint16_t offset, ip;
800 RTSEL sel;
801
802 /* Injecting events doesn't work right with real mode emulation.
803 * (#GP if we try to inject external hardware interrupts)
804 * Inject the interrupt or trap directly instead.
805 *
806 * ASSUMES no access handlers for the bits we read or write below (should be safe).
807 */
808 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
809
810 /* Check if the interrupt handler is present. */
811 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
812 {
813 Log(("IDT cbIdt violation\n"));
814 if (iGate != X86_XCPT_DF)
815 {
816 uint32_t intInfo2;
817
818 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
819 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
820 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
821 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
822
823 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
824 }
825 Log(("Triple fault -> reset the VM!\n"));
826 return VINF_EM_RESET;
827 }
828 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
829 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
830 || iGate == 4)
831 {
832 ip = pCtx->ip + cbInstr;
833 }
834 else
835 ip = pCtx->ip;
836
837 /* Read the selector:offset pair of the interrupt handler. */
838 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
839 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
840 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
841
842 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
843
844 /* Construct the stack frame. */
845 /** @todo should check stack limit. */
846 pCtx->sp -= 2;
847 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
848 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
849 pCtx->sp -= 2;
850 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
851 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
852 pCtx->sp -= 2;
853 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
854 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
855
856 /* Update the CPU state for executing the handler. */
857 pCtx->rip = offset;
858 pCtx->cs = sel;
859 pCtx->csHid.u64Base = sel << 4;
860 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
861
862 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
863 return VINF_SUCCESS;
864 }
865
866 /* Set event injection state. */
867 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
868
869 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
870 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
871
872 AssertRC(rc);
873 return rc;
874}
875
876
877/**
878 * Checks for pending guest interrupts and injects them
879 *
880 * @returns VBox status code.
881 * @param pVM The VM to operate on.
882 * @param pVCpu The VMCPU to operate on.
883 * @param pCtx CPU Context
884 */
885static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
886{
887 int rc;
888
889 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
890 if (pVCpu->hwaccm.s.Event.fPending)
891 {
892 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
893 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
894 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
895 AssertRC(rc);
896
897 pVCpu->hwaccm.s.Event.fPending = false;
898 return VINF_SUCCESS;
899 }
900
901 /* If an active trap is already pending, then we must forward it first! */
902 if (!TRPMHasTrap(pVCpu))
903 {
904 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
905 {
906 RTGCUINTPTR intInfo;
907
908 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
909
910 intInfo = X86_XCPT_NMI;
911 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
912 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
913
914 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
915 AssertRC(rc);
916
917 return VINF_SUCCESS;
918 }
919
920 /* @todo SMI interrupts. */
921
922 /* When external interrupts are pending, we should exit the VM when IF is set. */
923 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
924 {
925 if (!(pCtx->eflags.u32 & X86_EFL_IF))
926 {
927 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
928 {
929 LogFlow(("Enable irq window exit!\n"));
930 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
931 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
932 AssertRC(rc);
933 }
934 /* else nothing to do but wait */
935 }
936 else
937 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
938 {
939 uint8_t u8Interrupt;
940
941 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
942 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
943 if (RT_SUCCESS(rc))
944 {
945 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
946 AssertRC(rc);
947 }
948 else
949 {
950 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
951 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
952 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
953 /* Just continue */
954 }
955 }
956 else
957 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
958 }
959 }
960
961#ifdef VBOX_STRICT
962 if (TRPMHasTrap(pVCpu))
963 {
964 uint8_t u8Vector;
965 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
966 AssertRC(rc);
967 }
968#endif
969
970 if ( (pCtx->eflags.u32 & X86_EFL_IF)
971 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
972 && TRPMHasTrap(pVCpu)
973 )
974 {
975 uint8_t u8Vector;
976 TRPMEVENT enmType;
977 RTGCUINTPTR intInfo;
978 RTGCUINT errCode;
979
980 /* If a new event is pending, then dispatch it now. */
981 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
982 AssertRC(rc);
983 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
984 Assert(enmType != TRPM_SOFTWARE_INT);
985
986 /* Clear the pending trap. */
987 rc = TRPMResetTrap(pVCpu);
988 AssertRC(rc);
989
990 intInfo = u8Vector;
991 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
992
993 if (enmType == TRPM_TRAP)
994 {
995 switch (u8Vector) {
996 case X86_XCPT_DF:
997 case X86_XCPT_TS:
998 case X86_XCPT_NP:
999 case X86_XCPT_SS:
1000 case X86_XCPT_GP:
1001 case X86_XCPT_PF:
1002 case X86_XCPT_AC:
1003 /* Valid error codes. */
1004 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1005 break;
1006 default:
1007 break;
1008 }
1009 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
1010 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1011 else
1012 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1013 }
1014 else
1015 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1016
1017 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
1018 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1019 AssertRC(rc);
1020 } /* if (interrupts can be dispatched) */
1021
1022 return VINF_SUCCESS;
1023}
1024
1025/**
1026 * Save the host state
1027 *
1028 * @returns VBox status code.
1029 * @param pVM The VM to operate on.
1030 * @param pVCpu The VMCPU to operate on.
1031 */
1032VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1033{
1034 int rc = VINF_SUCCESS;
1035 NOREF(pVM);
1036
1037 /*
1038 * Host CPU Context
1039 */
1040 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1041 {
1042 RTIDTR idtr;
1043 RTGDTR gdtr;
1044 RTSEL SelTR;
1045 PCX86DESCHC pDesc;
1046 uintptr_t trBase;
1047 RTSEL cs;
1048 RTSEL ss;
1049 uint64_t cr3;
1050
1051 /* Control registers */
1052 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1053#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1054 if (VMX_IS_64BIT_HOST_MODE())
1055 {
1056 cr3 = hwaccmR0Get64bitCR3();
1057 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1058 }
1059 else
1060#endif
1061 {
1062 cr3 = ASMGetCR3();
1063 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1064 }
1065 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1066 AssertRC(rc);
1067 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1068 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1069 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1070
1071 /* Selector registers. */
1072#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1073 if (VMX_IS_64BIT_HOST_MODE())
1074 {
1075 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1076 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1077 }
1078 else
1079 {
1080 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1081 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1082 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1083 }
1084#else
1085 cs = ASMGetCS();
1086 ss = ASMGetSS();
1087#endif
1088 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1089 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1090 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1091 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1092 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1093 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1094#if HC_ARCH_BITS == 32
1095 if (!VMX_IS_64BIT_HOST_MODE())
1096 {
1097 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1098 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1099 }
1100#endif
1101 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1102 SelTR = ASMGetTR();
1103 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1104 AssertRC(rc);
1105 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1106 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1107 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1108 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1109 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1110 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1111 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1112
1113 /* GDTR & IDTR */
1114#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1115 if (VMX_IS_64BIT_HOST_MODE())
1116 {
1117 X86XDTR64 gdtr64, idtr64;
1118 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1119 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1120 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1121 AssertRC(rc);
1122 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1123 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1124 gdtr.cbGdt = gdtr64.cb;
1125 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1126 }
1127 else
1128#endif
1129 {
1130 ASMGetGDTR(&gdtr);
1131 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1132 ASMGetIDTR(&idtr);
1133 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1134 AssertRC(rc);
1135 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1136 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1137 }
1138
1139 /* Save the base address of the TR selector. */
1140 if (SelTR > gdtr.cbGdt)
1141 {
1142 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1143 return VERR_VMX_INVALID_HOST_STATE;
1144 }
1145
1146 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1147#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1148 if (VMX_IS_64BIT_HOST_MODE())
1149 {
1150 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
1151 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1152 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1153 AssertRC(rc);
1154 }
1155 else
1156#endif
1157 {
1158#if HC_ARCH_BITS == 64
1159 trBase = X86DESC64_BASE(*pDesc);
1160#else
1161 trBase = X86DESC_BASE(*pDesc);
1162#endif
1163 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1164 AssertRC(rc);
1165 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1166 }
1167
1168 /* FS and GS base. */
1169#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1170 if (VMX_IS_64BIT_HOST_MODE())
1171 {
1172 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1173 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1174 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1175 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1176 }
1177#endif
1178 AssertRC(rc);
1179
1180 /* Sysenter MSRs. */
1181 /** @todo expensive!! */
1182 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1183 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1184#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1185 if (VMX_IS_64BIT_HOST_MODE())
1186 {
1187 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1188 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1189 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1190 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1191 }
1192 else
1193 {
1194 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1195 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1196 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1197 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1198 }
1199#elif HC_ARCH_BITS == 32
1200 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1201 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1202 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1203 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1204#else
1205 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1206 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1207 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1208 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1209#endif
1210 AssertRC(rc);
1211
1212#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1213 /* Store all host MSRs in the VM-Exit load area, so they will be reloaded after the world switch back to the host. */
1214 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1215 unsigned idxMsr = 0;
1216
1217 /* EFER MSR present? */
1218 if (ASMCpuId_EDX(0x80000001) & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1219 {
1220 if (ASMCpuId_EDX(0x80000001) & X86_CPUID_AMD_FEATURE_EDX_SEP)
1221 {
1222 pMsr->u32IndexMSR = MSR_K6_STAR;
1223 pMsr->u32Reserved = 0;
1224 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1225 pMsr++; idxMsr++;
1226 }
1227
1228 pMsr->u32IndexMSR = MSR_K6_EFER;
1229 pMsr->u32Reserved = 0;
1230# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1231 if (CPUMIsGuestInLongMode(pVCpu))
1232 {
1233 /* Must match the efer value in our 64 bits switcher. */
1234 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1235 }
1236 else
1237# endif
1238 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1239 pMsr++; idxMsr++;
1240 }
1241
1242# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1243 if (VMX_IS_64BIT_HOST_MODE())
1244 {
1245 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1246 pMsr->u32Reserved = 0;
1247 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1248 pMsr++; idxMsr++;
1249 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1250 pMsr->u32Reserved = 0;
1251 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1252 pMsr++; idxMsr++;
1253 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1254 pMsr->u32Reserved = 0;
1255 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1256 pMsr++; idxMsr++;
1257 }
1258# endif
1259 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1260 AssertRC(rc);
1261#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1262
1263 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1264 }
1265 return rc;
1266}
1267
1268/**
1269 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1270 * guest operates in PAE mode.
1271 *
1272 * @returns VINF_SUCCESS or fatal error.
1273 * @param pVCpu The VMCPU to operate on.
1274 * @param pCtx Guest context
1275 */
1276static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1277{
1278 if (CPUMIsGuestInPAEModeEx(pCtx))
1279 {
1280 X86PDPE aPdpes[4];
1281 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1282 AssertRCReturn(rc, rc);
1283
1284 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1285 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1286 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1287 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1288 }
1289 return VINF_SUCCESS;
1290}
1291
1292/**
1293 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1294 * guest operates in PAE mode.
1295 *
1296 * @returns VINF_SUCCESS or fatal error.
1297 * @param pVCpu The VMCPU to operate on.
1298 * @param pCtx Guest context
1299 *
1300 * @remarks Tell PGM about CR3 changes before calling this helper.
1301 */
1302static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1303{
1304 if (CPUMIsGuestInPAEModeEx(pCtx))
1305 {
1306 int rc;
1307 X86PDPE aPdpes[4];
1308 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1309 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1310 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1311 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1312
1313 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1314 AssertRCReturn(rc, rc);
1315 }
1316 return VINF_SUCCESS;
1317}
1318
1319
1320/**
1321 * Update the exception bitmap according to the current CPU state
1322 *
1323 * @param pVM The VM to operate on.
1324 * @param pVCpu The VMCPU to operate on.
1325 * @param pCtx Guest context
1326 */
1327static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1328{
1329 uint32_t u32TrapMask;
1330 Assert(pCtx);
1331
1332 /* Set up a mask for intercepting traps. */
1333 /** @todo Do we really need to always intercept #DB? */
1334 u32TrapMask = RT_BIT(X86_XCPT_DB)
1335 | RT_BIT(X86_XCPT_NM)
1336#ifdef VBOX_ALWAYS_TRAP_PF
1337 | RT_BIT(X86_XCPT_PF)
1338#endif
1339#ifdef VBOX_STRICT
1340 | RT_BIT(X86_XCPT_BP)
1341 | RT_BIT(X86_XCPT_DB)
1342 | RT_BIT(X86_XCPT_DE)
1343 | RT_BIT(X86_XCPT_NM)
1344 | RT_BIT(X86_XCPT_UD)
1345 | RT_BIT(X86_XCPT_NP)
1346 | RT_BIT(X86_XCPT_SS)
1347 | RT_BIT(X86_XCPT_GP)
1348 | RT_BIT(X86_XCPT_MF)
1349#endif
1350 ;
1351
1352 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1353 /* Without nested paging, #PF must be intercepted to implement shadow paging. */
1354 if (!pVM->hwaccm.s.fNestedPaging)
1355 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1356
1357 /* Also catch floating point exceptions if we need to report them to the guest in a different way. */
1358 if (!(pCtx->cr0 & X86_CR0_NE))
1359 {
1360 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1361 }
1362
1363#ifdef VBOX_STRICT
1364 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1365#endif
1366
1367 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
1368 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1369 if ( CPUMIsGuestInRealModeEx(pCtx)
1370 && pVM->hwaccm.s.vmx.pRealModeTSS)
1371 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1372 | RT_BIT(X86_XCPT_DB)
1373 | RT_BIT(X86_XCPT_NMI)
1374 | RT_BIT(X86_XCPT_BP)
1375 | RT_BIT(X86_XCPT_OF)
1376 | RT_BIT(X86_XCPT_BR)
1377 | RT_BIT(X86_XCPT_UD)
1378 | RT_BIT(X86_XCPT_DF)
1379 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1380 | RT_BIT(X86_XCPT_TS)
1381 | RT_BIT(X86_XCPT_NP)
1382 | RT_BIT(X86_XCPT_SS)
1383 | RT_BIT(X86_XCPT_GP)
1384 | RT_BIT(X86_XCPT_MF)
1385 | RT_BIT(X86_XCPT_AC)
1386 | RT_BIT(X86_XCPT_MC)
1387 | RT_BIT(X86_XCPT_XF)
1388 ;
1389
1390 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1391 AssertRC(rc);
1392}
1393
1394/**
1395 * Loads a minimal guest state
1396 *
1397 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1398 *
1399 * @param pVM The VM to operate on.
1400 * @param pVCpu The VMCPU to operate on.
1401 * @param pCtx Guest context
1402 */
1403VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1404{
1405 int rc;
1406 X86EFLAGS eflags;
1407
1408 Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST));
1409
1410 /* EIP, ESP and EFLAGS */
1411 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1412 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1413 AssertRC(rc);
1414
1415 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1416 eflags = pCtx->eflags;
1417 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1418 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1419
1420 /* Real mode emulation using v86 mode. */
1421 if ( CPUMIsGuestInRealModeEx(pCtx)
1422 && pVM->hwaccm.s.vmx.pRealModeTSS)
1423 {
1424 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1425
1426 eflags.Bits.u1VM = 1;
1427 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1428 }
1429 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1430 AssertRC(rc);
1431}
1432
1433/**
1434 * Loads the guest state
1435 *
1436 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1437 *
1438 * @returns VBox status code.
1439 * @param pVM The VM to operate on.
1440 * @param pVCpu The VMCPU to operate on.
1441 * @param pCtx Guest context
1442 */
1443VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1444{
1445 int rc = VINF_SUCCESS;
1446 RTGCUINTPTR val;
1447
1448 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1449 * Set required bits to one and zero according to the MSR capabilities.
1450 */
1451 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1452 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1453 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1454 /* 64 bits guest mode? */
1455 if (CPUMIsGuestInLongModeEx(pCtx))
1456 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1457 /* else Must be zero when AMD64 is not available. */
1458
1459 /* Mask away the bits that the CPU doesn't support */
1460 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1461 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1462 AssertRC(rc);
1463
1464 /* VMX_VMCS_CTRL_EXIT_CONTROLS
1465 * Set required bits to one and zero according to the MSR capabilities.
1466 */
1467 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1468
1469 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1470 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1471
1472#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1473 if (VMX_IS_64BIT_HOST_MODE())
1474 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1475 /* else: Must be zero when AMD64 is not available. */
1476#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1477 if (CPUMIsGuestInLongModeEx(pCtx))
1478 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1479 else
1480 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1481#endif
1482 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1483 /* Don't acknowledge external interrupts on VM-exit. */
1484 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1485 AssertRC(rc);
1486
1487 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1488 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1489 {
1490 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1491 {
1492 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1493 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1494 {
1495 /* Correct weird requirements for switching to protected mode. */
1496 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1497 && enmGuestMode >= PGMMODE_PROTECTED)
1498 {
1499#ifdef VBOX_WITH_REM
1500 /* Flush the recompiler code cache as it's not unlikely
1501 * the guest will rewrite code it will later execute in real
1502 * mode (OpenBSD 4.0 is one such example)
1503 */
1504 REMFlushTBs(pVM);
1505#endif
1506
1507 /* DPL of all hidden selector registers must match the current CPL (0). */
1508 pCtx->csHid.Attr.n.u2Dpl = 0;
1509 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1510
1511 pCtx->dsHid.Attr.n.u2Dpl = 0;
1512 pCtx->esHid.Attr.n.u2Dpl = 0;
1513 pCtx->fsHid.Attr.n.u2Dpl = 0;
1514 pCtx->gsHid.Attr.n.u2Dpl = 0;
1515 pCtx->ssHid.Attr.n.u2Dpl = 0;
1516 }
1517 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1518 }
1519 else
1520 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1521 if ( CPUMIsGuestInRealModeEx(pCtx)
1522 && pCtx->csHid.u64Base == 0xffff0000)
1523 {
1524 pCtx->csHid.u64Base = 0xf0000;
1525 pCtx->cs = 0xf000;
1526 }
1527 }
1528
1529 VMX_WRITE_SELREG(ES, es);
1530 AssertRC(rc);
1531
1532 VMX_WRITE_SELREG(CS, cs);
1533 AssertRC(rc);
1534
1535 VMX_WRITE_SELREG(SS, ss);
1536 AssertRC(rc);
1537
1538 VMX_WRITE_SELREG(DS, ds);
1539 AssertRC(rc);
1540
1541 VMX_WRITE_SELREG(FS, fs);
1542 AssertRC(rc);
1543
1544 VMX_WRITE_SELREG(GS, gs);
1545 AssertRC(rc);
1546 }
1547
1548 /* Guest CPU context: LDTR. */
1549 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1550 {
1551 if (pCtx->ldtr == 0)
1552 {
1553 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1554 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1555 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1556 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1557 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1558 }
1559 else
1560 {
1561 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1562 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1563 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1564 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1565 }
1566 AssertRC(rc);
1567 }
1568 /* Guest CPU context: TR. */
1569 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1570 {
1571 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1572 if ( CPUMIsGuestInRealModeEx(pCtx)
1573 && pVM->hwaccm.s.vmx.pRealModeTSS)
1574 {
1575 RTGCPHYS GCPhys;
1576
1577 /* We convert it here every time as pci regions could be reconfigured. */
1578 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1579 AssertRC(rc);
1580
1581 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1582 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1583 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1584
1585 X86DESCATTR attr;
1586
1587 attr.u = 0;
1588 attr.n.u1Present = 1;
1589 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1590 val = attr.u;
1591 }
1592 else
1593 {
1594 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1595 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1596 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1597
1598 val = pCtx->trHid.Attr.u;
1599
1600 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1601 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1602 {
1603 if (val & 0xf)
1604 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1605 else
1606 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1607 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1608 }
1609 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("%#x\n", val));
1610 }
1611 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1612 AssertRC(rc);
1613 }
1614 /* Guest CPU context: GDTR. */
1615 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1616 {
1617 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1618 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1619 AssertRC(rc);
1620 }
1621 /* Guest CPU context: IDTR. */
1622 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1623 {
1624 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1625 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1626 AssertRC(rc);
1627 }
1628
1629 /*
1630 * Sysenter MSRs
1631 */
1632 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1633 {
1634 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1635 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1636 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1637 AssertRC(rc);
1638 }
1639
1640 /* Control registers */
1641 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1642 {
1643 val = pCtx->cr0;
1644 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1645 Log2(("Guest CR0-shadow %08x\n", val));
1646 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1647 {
1648 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1649 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1650 }
1651 else
1652 {
1653 /** @todo check if we support the old style mess correctly. */
1654 if (!(val & X86_CR0_NE))
1655 Log(("Forcing X86_CR0_NE!!!\n"));
1656
1657 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1658 }
1659 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1660 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1661 val |= X86_CR0_PE | X86_CR0_PG;
1662
1663 if (pVM->hwaccm.s.fNestedPaging)
1664 {
1665 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1666 {
1667 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1668 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1669 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1670 }
1671 else
1672 {
1673 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1674 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1675 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1676 }
1677 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1678 AssertRC(rc);
1679 }
1680 else
1681 {
1682 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1683 val |= X86_CR0_WP;
1684 }
1685
1686 /* Always enable caching. */
1687 val &= ~(X86_CR0_CD|X86_CR0_NW);
1688
1689 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1690 Log2(("Guest CR0 %08x\n", val));
1691 /* CR0 flags owned by the host; if the guests attempts to change them, then
1692 * the VM will exit.
1693 */
1694 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1695 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1696 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1697 | X86_CR0_CD /* Bit not restored during VM-exit! */
1698 | X86_CR0_NW /* Bit not restored during VM-exit! */
1699 | X86_CR0_NE;
1700
1701 /* When the guest's FPU state is active, then we no longer care about
1702 * the FPU related bits.
1703 */
1704 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1705 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1706
1707 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1708
1709 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1710 Log2(("Guest CR0-mask %08x\n", val));
1711 AssertRC(rc);
1712 }
1713 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1714 {
1715 /* CR4 */
1716 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1717 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1718 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1719 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1720
1721 if (!pVM->hwaccm.s.fNestedPaging)
1722 {
1723 switch(pVCpu->hwaccm.s.enmShadowMode)
1724 {
1725 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1726 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1727 case PGMMODE_32_BIT: /* 32-bit paging. */
1728 val &= ~X86_CR4_PAE;
1729 break;
1730
1731 case PGMMODE_PAE: /* PAE paging. */
1732 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1733 /** Must use PAE paging as we could use physical memory > 4 GB */
1734 val |= X86_CR4_PAE;
1735 break;
1736
1737 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1738 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1739#ifdef VBOX_ENABLE_64_BITS_GUESTS
1740 break;
1741#else
1742 AssertFailed();
1743 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1744#endif
1745 default: /* shut up gcc */
1746 AssertFailed();
1747 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1748 }
1749 }
1750 else
1751 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1752 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1753 {
1754 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1755 val |= X86_CR4_PSE;
1756 /* Our identity mapping is a 32 bits page directory. */
1757 val &= ~X86_CR4_PAE;
1758 }
1759
1760 /* Turn off VME if we're in emulated real mode. */
1761 if ( CPUMIsGuestInRealModeEx(pCtx)
1762 && pVM->hwaccm.s.vmx.pRealModeTSS)
1763 val &= ~X86_CR4_VME;
1764
1765 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1766 Log2(("Guest CR4 %08x\n", val));
1767 /* CR4 flags owned by the host; if the guests attempts to change them, then
1768 * the VM will exit.
1769 */
1770 val = 0
1771 | X86_CR4_VME
1772 | X86_CR4_PAE
1773 | X86_CR4_PGE
1774 | X86_CR4_PSE
1775 | X86_CR4_VMXE;
1776 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1777
1778 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1779 Log2(("Guest CR4-mask %08x\n", val));
1780 AssertRC(rc);
1781 }
1782
1783#if 0
1784 /* Enable single stepping if requested and CPU supports it. */
1785 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
1786 if (DBGFIsStepping(pVCpu))
1787 {
1788 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
1789 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1790 AssertRC(rc);
1791 }
1792#endif
1793
1794 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1795 {
1796 if (pVM->hwaccm.s.fNestedPaging)
1797 {
1798 Assert(PGMGetHyperCR3(pVCpu));
1799 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1800
1801 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1802 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1803 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1804 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1805
1806 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1807 AssertRC(rc);
1808
1809 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1810 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1811 {
1812 RTGCPHYS GCPhys;
1813
1814 /* We convert it here every time as pci regions could be reconfigured. */
1815 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1816 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
1817
1818 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1819 * take care of the translation to host physical addresses.
1820 */
1821 val = GCPhys;
1822 }
1823 else
1824 {
1825 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1826 val = pCtx->cr3;
1827 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
1828 AssertRCReturn(rc, rc);
1829 }
1830 }
1831 else
1832 {
1833 val = PGMGetHyperCR3(pVCpu);
1834 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1835 }
1836
1837 /* Save our shadow CR3 register. */
1838 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1839 AssertRC(rc);
1840 }
1841
1842 /* Debug registers. */
1843 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1844 {
1845 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1846 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1847
1848 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1849 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1850 pCtx->dr[7] |= 0x400; /* must be one */
1851
1852 /* Resync DR7 */
1853 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
1854 AssertRC(rc);
1855
1856#ifdef DEBUG
1857 /* Sync the hypervisor debug state now if any breakpoint is armed. */
1858 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
1859 && !CPUMIsHyperDebugStateActive(pVCpu)
1860 && !DBGFIsStepping(pVCpu))
1861 {
1862 /* Save the host and load the hypervisor debug state. */
1863 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1864 AssertRC(rc);
1865
1866 /* DRx intercepts remain enabled. */
1867
1868 /* Override dr7 with the hypervisor value. */
1869 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
1870 AssertRC(rc);
1871 }
1872 else
1873#endif
1874 /* Sync the debug state now if any breakpoint is armed. */
1875 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1876 && !CPUMIsGuestDebugStateActive(pVCpu)
1877 && !DBGFIsStepping(pVCpu))
1878 {
1879 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1880
1881 /* Disable drx move intercepts. */
1882 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1883 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1884 AssertRC(rc);
1885
1886 /* Save the host and load the guest debug state. */
1887 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1888 AssertRC(rc);
1889 }
1890
1891 /* IA32_DEBUGCTL MSR. */
1892 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1893 AssertRC(rc);
1894
1895 /** @todo do we really ever need this? */
1896 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1897 AssertRC(rc);
1898 }
1899
1900 /* 64 bits guest mode? */
1901 if (CPUMIsGuestInLongModeEx(pCtx))
1902 {
1903#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
1904 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1905#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1906 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
1907#else
1908# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1909 if (!pVM->hwaccm.s.fAllow64BitGuests)
1910 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1911# endif
1912 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1913#endif
1914 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1915 {
1916 /* Update these as wrmsr might have changed them. */
1917 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1918 AssertRC(rc);
1919 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1920 AssertRC(rc);
1921 }
1922 }
1923 else
1924 {
1925 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1926 }
1927
1928 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
1929
1930#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1931 /* Store all guest MSRs in the VM-Entry load area, so they will be loaded during the world switch. */
1932 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
1933 unsigned idxMsr = 0;
1934
1935 uint32_t ulEdx;
1936 uint32_t ulTemp;
1937 CPUMGetGuestCpuId(pVCpu, 0x80000001, &ulTemp, &ulTemp, &ulTemp, &ulEdx);
1938 /* EFER MSR present? */
1939 if (ulEdx & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1940 {
1941 pMsr->u32IndexMSR = MSR_K6_EFER;
1942 pMsr->u32Reserved = 0;
1943 pMsr->u64Value = pCtx->msrEFER;
1944 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
1945 if (!CPUMIsGuestInLongModeEx(pCtx))
1946 pMsr->u64Value &= ~(MSR_K6_EFER_LMA|MSR_K6_EFER_LME);
1947 pMsr++; idxMsr++;
1948
1949 if (ulEdx & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE)
1950 {
1951 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1952 pMsr->u32Reserved = 0;
1953 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
1954 pMsr++; idxMsr++;
1955 pMsr->u32IndexMSR = MSR_K6_STAR;
1956 pMsr->u32Reserved = 0;
1957 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
1958 pMsr++; idxMsr++;
1959 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1960 pMsr->u32Reserved = 0;
1961 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
1962 pMsr++; idxMsr++;
1963 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1964 pMsr->u32Reserved = 0;
1965 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
1966 pMsr++; idxMsr++;
1967 }
1968 }
1969 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
1970
1971 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
1972 AssertRC(rc);
1973
1974 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
1975 AssertRC(rc);
1976#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1977
1978 bool fOffsettedTsc;
1979 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
1980 {
1981 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
1982
1983 /* Make sure the returned values have sane upper and lower boundaries. */
1984 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
1985
1986 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
1987 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
1988
1989 cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift;
1990 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
1991 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
1992 AssertRC(rc);
1993 }
1994 else
1995 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
1996 if (fOffsettedTsc)
1997 {
1998 uint64_t u64CurTSC = ASMReadTSC();
1999 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2000 {
2001 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
2002 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
2003 AssertRC(rc);
2004
2005 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2006 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2007 AssertRC(rc);
2008 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
2009 }
2010 else
2011 {
2012 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
2013 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGet(pVCpu)));
2014 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2015 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2016 AssertRC(rc);
2017 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
2018 }
2019 }
2020 else
2021 {
2022 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2023 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2024 AssertRC(rc);
2025 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
2026 }
2027
2028 /* Done with the major changes */
2029 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
2030
2031 /* Minimal guest state update (esp, eip, eflags mostly) */
2032 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2033 return rc;
2034}
2035
2036/**
2037 * Syncs back the guest state
2038 *
2039 * @returns VBox status code.
2040 * @param pVM The VM to operate on.
2041 * @param pVCpu The VMCPU to operate on.
2042 * @param pCtx Guest context
2043 */
2044DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2045{
2046 RTGCUINTREG val, valShadow;
2047 RTGCUINTPTR uInterruptState;
2048 int rc;
2049
2050 /* Let's first sync back eip, esp, and eflags. */
2051 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
2052 AssertRC(rc);
2053 pCtx->rip = val;
2054 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
2055 AssertRC(rc);
2056 pCtx->rsp = val;
2057 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2058 AssertRC(rc);
2059 pCtx->eflags.u32 = val;
2060
2061 /* Take care of instruction fusing (sti, mov ss) */
2062 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2063 uInterruptState = val;
2064 if (uInterruptState != 0)
2065 {
2066 Assert(uInterruptState <= 2); /* only sti & mov ss */
2067 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2068 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2069 }
2070 else
2071 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2072
2073 /* Control registers. */
2074 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2075 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
2076 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
2077 CPUMSetGuestCR0(pVCpu, val);
2078
2079 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2080 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
2081 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
2082 CPUMSetGuestCR4(pVCpu, val);
2083
2084 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
2085 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
2086 if ( pVM->hwaccm.s.fNestedPaging
2087 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2088 {
2089 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
2090
2091 /* Can be updated behind our back in the nested paging case. */
2092 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2093
2094 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
2095
2096 if (val != pCtx->cr3)
2097 {
2098 CPUMSetGuestCR3(pVCpu, val);
2099 PGMUpdateCR3(pVCpu, val);
2100 }
2101 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2102 AssertRCReturn(rc, rc);
2103 }
2104
2105 /* Sync back DR7 here. */
2106 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2107 pCtx->dr[7] = val;
2108
2109 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2110 VMX_READ_SELREG(ES, es);
2111 VMX_READ_SELREG(SS, ss);
2112 VMX_READ_SELREG(CS, cs);
2113 VMX_READ_SELREG(DS, ds);
2114 VMX_READ_SELREG(FS, fs);
2115 VMX_READ_SELREG(GS, gs);
2116
2117 /*
2118 * System MSRs
2119 */
2120 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2121 pCtx->SysEnter.cs = val;
2122 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2123 pCtx->SysEnter.eip = val;
2124 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2125 pCtx->SysEnter.esp = val;
2126
2127 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2128 VMX_READ_SELREG(LDTR, ldtr);
2129
2130 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2131 pCtx->gdtr.cbGdt = val;
2132 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2133 pCtx->gdtr.pGdt = val;
2134
2135 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2136 pCtx->idtr.cbIdt = val;
2137 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2138 pCtx->idtr.pIdt = val;
2139
2140 /* Real mode emulation using v86 mode. */
2141 if ( CPUMIsGuestInRealModeEx(pCtx)
2142 && pVM->hwaccm.s.vmx.pRealModeTSS)
2143 {
2144 /* Hide our emulation flags */
2145 pCtx->eflags.Bits.u1VM = 0;
2146
2147 /* Restore original IOPL setting as we always use 0. */
2148 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2149
2150 /* Force a TR resync every time in case we switch modes. */
2151 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2152 }
2153 else
2154 {
2155 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2156 VMX_READ_SELREG(TR, tr);
2157 }
2158
2159#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2160 /* Save the possibly changed MSRs that we automatically restore and save during a world switch. */
2161 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2162 {
2163 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2164 pMsr += i;
2165
2166 switch (pMsr->u32IndexMSR)
2167 {
2168 case MSR_K8_LSTAR:
2169 pCtx->msrLSTAR = pMsr->u64Value;
2170 break;
2171 case MSR_K6_STAR:
2172 pCtx->msrSTAR = pMsr->u64Value;
2173 break;
2174 case MSR_K8_SF_MASK:
2175 pCtx->msrSFMASK = pMsr->u64Value;
2176 break;
2177 case MSR_K8_KERNEL_GS_BASE:
2178 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2179 break;
2180 case MSR_K6_EFER:
2181 /* EFER can't be changed without causing a VM-exit. */
2182// Assert(pCtx->msrEFER == pMsr->u64Value);
2183 break;
2184 default:
2185 AssertFailed();
2186 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2187 }
2188 }
2189#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2190 return VINF_SUCCESS;
2191}
2192
2193/**
2194 * Dummy placeholder
2195 *
2196 * @param pVM The VM to operate on.
2197 * @param pVCpu The VMCPU to operate on.
2198 */
2199static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2200{
2201 NOREF(pVM);
2202 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2203 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2204 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2205 return;
2206}
2207
2208
2209/**
2210 * Setup the tagged TLB for EPT+VPID.
2211 *
2212 * @param pVM The VM to operate on.
2213 * @param pVCpu The VMCPU to operate on.
2214 */
2215static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2216{
2217 PHMGLOBLCPUINFO pCpu;
2218
2219 Assert(pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID);
2220
2221 pCpu = HWACCMR0GetCurrentCpu();
2222
2223 /*
2224 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2225 * This can happen both for start & resume due to long jumps back to ring-3.
2226 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB,
2227 * so we cannot reuse the current ASID anymore.
2228 */
2229 bool fNewASID = false;
2230 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2231 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2232 {
2233 pVCpu->hwaccm.s.fForceTLBFlush = true;
2234 fNewASID = true;
2235 }
2236
2237 /*
2238 * Check for explicit TLB shootdowns.
2239 */
2240 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2241 pVCpu->hwaccm.s.fForceTLBFlush = true;
2242
2243 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2244
2245 if (pVCpu->hwaccm.s.fForceTLBFlush)
2246 {
2247 if (fNewASID)
2248 {
2249 ++pCpu->uCurrentASID;
2250 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2251 {
2252 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2253 pCpu->cTLBFlushes++;
2254 pCpu->fFlushASIDBeforeUse = true;
2255 }
2256
2257 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2258 if (pCpu->fFlushASIDBeforeUse)
2259 {
2260 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2261#ifdef VBOX_WITH_STATISTICS
2262 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2263#endif
2264 }
2265 }
2266 else
2267 {
2268 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2269 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2270 else
2271 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2272
2273#ifdef VBOX_WITH_STATISTICS
2274 /*
2275 * This is not terribly accurate (i.e. we don't have any StatFlushEPT counter). We currently count these
2276 * as ASID flushes too, better than including them under StatFlushTLBWorldSwitch.
2277 */
2278 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2279#endif
2280 }
2281
2282 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2283 pVCpu->hwaccm.s.fForceTLBFlush = false;
2284 }
2285 else
2286 {
2287 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2288
2289 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2290 * not be executed. See hwaccmQueueInvlPage() where it is commented
2291 * out. Support individual entry flushing someday. */
2292 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2293 {
2294 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2295
2296 /*
2297 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2298 * as supported by the CPU.
2299 */
2300 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2301 {
2302 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2303 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2304 }
2305 else
2306 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2307 }
2308 else
2309 {
2310#ifdef VBOX_WITH_STATISTICS
2311 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2312#endif
2313 }
2314 }
2315 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2316 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2317
2318 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2319 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2320 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2321 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2322 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2323 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2324
2325 /* Update VMCS with the VPID. */
2326 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2327 AssertRC(rc);
2328}
2329
2330
2331/**
2332 * Setup the tagged TLB for EPT only.
2333 *
2334 * @returns VBox status code.
2335 * @param pVM The VM to operate on.
2336 * @param pVCpu The VMCPU to operate on.
2337 */
2338static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2339{
2340 PHMGLOBLCPUINFO pCpu;
2341
2342 Assert(pVM->hwaccm.s.fNestedPaging);
2343 Assert(!pVM->hwaccm.s.vmx.fVPID);
2344
2345 /* Deal with tagged TLBs if VPID or EPT is supported. */
2346 pCpu = HWACCMR0GetCurrentCpu();
2347 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2348 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2349 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2350 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2351 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2352 {
2353 /* Force a TLB flush on VM entry. */
2354 pVCpu->hwaccm.s.fForceTLBFlush = true;
2355 }
2356
2357 /* Check for tlb shootdown flushes. */
2358 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2359 pVCpu->hwaccm.s.fForceTLBFlush = true;
2360
2361 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2362
2363 if (pVCpu->hwaccm.s.fForceTLBFlush)
2364 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2365 else
2366 {
2367 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2368 * not be executed. See hwaccmQueueInvlPage() where it is commented
2369 * out. Support individual entry flushing someday. */
2370 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2371 {
2372 /*
2373 * We cannot flush individual entries without VPID support. Flush using EPT.
2374 */
2375 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2376 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2377 }
2378 }
2379 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2380 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2381
2382#ifdef VBOX_WITH_STATISTICS
2383 if (pVCpu->hwaccm.s.fForceTLBFlush)
2384 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2385 else
2386 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2387#endif
2388}
2389
2390
2391/**
2392 * Setup the tagged TLB for VPID
2393 *
2394 * @returns VBox status code.
2395 * @param pVM The VM to operate on.
2396 * @param pVCpu The VMCPU to operate on.
2397 */
2398static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2399{
2400 PHMGLOBLCPUINFO pCpu;
2401
2402 Assert(pVM->hwaccm.s.vmx.fVPID);
2403 Assert(!pVM->hwaccm.s.fNestedPaging);
2404
2405 /* Deal with tagged TLBs if VPID or EPT is supported. */
2406 pCpu = HWACCMR0GetCurrentCpu();
2407 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2408 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2409 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2410 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2411 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2412 {
2413 /* Force a TLB flush on VM entry. */
2414 pVCpu->hwaccm.s.fForceTLBFlush = true;
2415 }
2416
2417 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2418
2419 /* Check for tlb shootdown flushes. */
2420 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2421 pVCpu->hwaccm.s.fForceTLBFlush = true;
2422
2423 /* Make sure we flush the TLB when required. */
2424 if (pVCpu->hwaccm.s.fForceTLBFlush)
2425 {
2426 ++pCpu->uCurrentASID;
2427 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2428 {
2429 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2430 pCpu->cTLBFlushes++;
2431 pCpu->fFlushASIDBeforeUse = true;
2432 }
2433 else
2434 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2435
2436 pVCpu->hwaccm.s.fForceTLBFlush = false;
2437 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2438 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2439 if (pCpu->fFlushASIDBeforeUse)
2440 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2441 }
2442 else
2443 {
2444 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2445
2446 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2447 * not be executed. See hwaccmQueueInvlPage() where it is commented
2448 * out. Support individual entry flushing someday. */
2449 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2450 {
2451 /*
2452 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2453 * as supported by the CPU.
2454 */
2455 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2456 {
2457 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2458 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2459 }
2460 else
2461 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2462 }
2463 }
2464 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2465 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2466
2467 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2468 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2469 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2470
2471 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2472 AssertRC(rc);
2473
2474# ifdef VBOX_WITH_STATISTICS
2475 if (pVCpu->hwaccm.s.fForceTLBFlush)
2476 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2477 else
2478 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2479# endif
2480}
2481
2482
2483/**
2484 * Runs guest code in a VT-x VM.
2485 *
2486 * @returns VBox status code.
2487 * @param pVM The VM to operate on.
2488 * @param pVCpu The VMCPU to operate on.
2489 * @param pCtx Guest context
2490 */
2491VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2492{
2493 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2494 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
2495 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
2496
2497 VBOXSTRICTRC rc = VINF_SUCCESS;
2498 int rc2;
2499 RTGCUINTREG val;
2500 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2501 RTGCUINTREG instrError, cbInstr;
2502 RTGCUINTPTR exitQualification = 0;
2503 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2504 RTGCUINTPTR errCode, instrInfo;
2505 bool fSetupTPRCaching = false;
2506 uint64_t u64OldLSTAR = 0;
2507 uint8_t u8LastTPR = 0;
2508 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2509 unsigned cResume = 0;
2510#ifdef VBOX_STRICT
2511 RTCPUID idCpuCheck;
2512 bool fWasInLongMode = false;
2513#endif
2514#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2515 uint64_t u64LastTime = RTTimeMilliTS();
2516#endif
2517
2518 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2519
2520 /* Check if we need to use TPR shadowing. */
2521 if ( CPUMIsGuestInLongModeEx(pCtx)
2522 || ( ((pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || pVM->hwaccm.s.fTRPPatchingAllowed)
2523 && pVM->hwaccm.s.fHasIoApic)
2524 )
2525 {
2526 fSetupTPRCaching = true;
2527 }
2528
2529 Log2(("\nE"));
2530
2531#ifdef VBOX_STRICT
2532 {
2533 RTCCUINTREG val2;
2534
2535 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2536 AssertRC(rc2);
2537 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2538
2539 /* allowed zero */
2540 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2541 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2542
2543 /* allowed one */
2544 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2545 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2546
2547 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2548 AssertRC(rc2);
2549 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2550
2551 /* Must be set according to the MSR, but can be cleared in case of EPT. */
2552 if (pVM->hwaccm.s.fNestedPaging)
2553 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2554 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2555 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2556
2557 /* allowed zero */
2558 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2559 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2560
2561 /* allowed one */
2562 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2563 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2564
2565 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2566 AssertRC(rc2);
2567 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2568
2569 /* allowed zero */
2570 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2571 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2572
2573 /* allowed one */
2574 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2575 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2576
2577 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2578 AssertRC(rc2);
2579 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2580
2581 /* allowed zero */
2582 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2583 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2584
2585 /* allowed one */
2586 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2587 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2588 }
2589 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2590#endif /* VBOX_STRICT */
2591
2592#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2593 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2594#endif
2595
2596 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
2597 */
2598ResumeExecution:
2599 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
2600 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
2601 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2602 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2603 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2604 Assert(!HWACCMR0SuspendPending());
2605 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2606 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2607
2608 /* Safety precaution; looping for too long here can have a very bad effect on the host */
2609 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2610 {
2611 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2612 rc = VINF_EM_RAW_INTERRUPT;
2613 goto end;
2614 }
2615
2616 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
2617 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2618 {
2619 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2620 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2621 {
2622 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2623 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2624 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2625 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2626 */
2627 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2628 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2629 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2630 AssertRC(rc2);
2631 }
2632 }
2633 else
2634 {
2635 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2636 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2637 AssertRC(rc2);
2638 }
2639
2640#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2641 if (RT_UNLIKELY((cResume & 0xf) == 0))
2642 {
2643 uint64_t u64CurTime = RTTimeMilliTS();
2644
2645 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2646 {
2647 u64LastTime = u64CurTime;
2648 TMTimerPollVoid(pVM, pVCpu);
2649 }
2650 }
2651#endif
2652
2653 /* Check for pending actions that force us to go back to ring 3. */
2654 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2655 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2656 {
2657 /* Check if a sync operation is pending. */
2658 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2659 {
2660 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2661 if (rc != VINF_SUCCESS)
2662 {
2663 AssertRC(VBOXSTRICTRC_VAL(rc));
2664 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2665 goto end;
2666 }
2667 }
2668
2669#ifdef DEBUG
2670 /* Intercept X86_XCPT_DB if stepping is enabled */
2671 if (!DBGFIsStepping(pVCpu))
2672#endif
2673 {
2674 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2675 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2676 {
2677 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2678 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2679 goto end;
2680 }
2681 }
2682
2683 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2684 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2685 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2686 {
2687 rc = VINF_EM_PENDING_REQUEST;
2688 goto end;
2689 }
2690
2691 /* Check if a pgm pool flush is in progress. */
2692 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2693 {
2694 rc = VINF_PGM_POOL_FLUSH_PENDING;
2695 goto end;
2696 }
2697
2698 /* Check if DMA work is pending (2nd+ run). */
2699 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
2700 {
2701 rc = VINF_EM_RAW_TO_R3;
2702 goto end;
2703 }
2704 }
2705
2706#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2707 /*
2708 * Exit to ring-3 preemption/work is pending.
2709 *
2710 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2711 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2712 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
2713 *
2714 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2715 * shootdowns rely on this.
2716 */
2717 uOldEFlags = ASMIntDisableFlags();
2718 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2719 {
2720 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
2721 rc = VINF_EM_RAW_INTERRUPT;
2722 goto end;
2723 }
2724 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2725#endif
2726
2727 /* When external interrupts are pending, we should exit the VM when IF is set. */
2728 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
2729 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
2730 if (RT_FAILURE(rc))
2731 goto end;
2732
2733 /** @todo check timers?? */
2734
2735 /* TPR caching using CR8 is only available in 64 bits mode */
2736 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
2737 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! (no longer true) */
2738 /**
2739 * @todo query and update the TPR only when it could have been changed (mmio access & wrmsr (x2apic))
2740 */
2741 if (fSetupTPRCaching)
2742 {
2743 /* TPR caching in CR8 */
2744 bool fPending;
2745
2746 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
2747 AssertRC(rc2);
2748 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2749 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
2750
2751 /* Two options here:
2752 * - external interrupt pending, but masked by the TPR value.
2753 * -> a CR8 update that lower the current TPR value should cause an exit
2754 * - no pending interrupts
2755 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2756 */
2757 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
2758 AssertRC(VBOXSTRICTRC_VAL(rc));
2759
2760 if (pVM->hwaccm.s.fTPRPatchingActive)
2761 {
2762 Assert(!CPUMIsGuestInLongModeEx(pCtx));
2763 /* Our patch code uses LSTAR for TPR caching. */
2764 pCtx->msrLSTAR = u8LastTPR;
2765
2766 if (fPending)
2767 {
2768 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
2769 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
2770 }
2771 else
2772 {
2773 /* No interrupts are pending, so we don't need to be explicitely notified.
2774 * There are enough world switches for detecting pending interrupts.
2775 */
2776 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
2777 }
2778 }
2779 }
2780
2781#ifdef LOG_ENABLED
2782 if ( pVM->hwaccm.s.fNestedPaging
2783 || pVM->hwaccm.s.vmx.fVPID)
2784 {
2785 PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu();
2786 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2787 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2788 {
2789 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2790 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2791 else
2792 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2793 }
2794 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
2795 LogFlow(("Manual TLB flush\n"));
2796 }
2797#endif
2798#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2799 PGMRZDynMapFlushAutoSet(pVCpu);
2800#endif
2801
2802 /*
2803 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
2804 * (until the actual world switch)
2805 */
2806#ifdef VBOX_STRICT
2807 idCpuCheck = RTMpCpuId();
2808#endif
2809#ifdef LOG_ENABLED
2810 VMMR0LogFlushDisable(pVCpu);
2811#endif
2812 /* Save the host state first. */
2813 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
2814 {
2815 rc = VMXR0SaveHostState(pVM, pVCpu);
2816 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2817 {
2818 VMMR0LogFlushEnable(pVCpu);
2819 goto end;
2820 }
2821 }
2822
2823 /* Load the guest state */
2824 if (!pVCpu->hwaccm.s.fContextUseFlags)
2825 {
2826 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2827 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal);
2828 }
2829 else
2830 {
2831 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
2832 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2833 {
2834 VMMR0LogFlushEnable(pVCpu);
2835 goto end;
2836 }
2837 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull);
2838 }
2839
2840#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2841 /* Disable interrupts to make sure a poke will interrupt execution.
2842 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
2843 */
2844 uOldEFlags = ASMIntDisableFlags();
2845 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2846#endif
2847
2848 /* Non-register state Guest Context */
2849 /** @todo change me according to cpu state */
2850 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
2851 AssertRC(rc2);
2852
2853 /* Set TLB flush state as checked until we return from the world switch. */
2854 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
2855 /* Deal with tagged TLB setup and invalidation. */
2856 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
2857
2858 /* Manual save and restore:
2859 * - General purpose registers except RIP, RSP
2860 *
2861 * Trashed:
2862 * - CR2 (we don't care)
2863 * - LDTR (reset to 0)
2864 * - DRx (presumably not changed at all)
2865 * - DR7 (reset to 0x400)
2866 * - EFLAGS (reset to RT_BIT(1); not relevant)
2867 *
2868 */
2869
2870 /* All done! Let's start VM execution. */
2871 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
2872 Assert(idCpuCheck == RTMpCpuId());
2873
2874#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2875 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
2876 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
2877#endif
2878
2879 /* Save the current TPR value in the LSTAR msr so our patches can access it. */
2880 if (pVM->hwaccm.s.fTPRPatchingActive)
2881 {
2882 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2883 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2884 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
2885 }
2886
2887 TMNotifyStartOfExecution(pVCpu);
2888#ifdef VBOX_WITH_KERNEL_USING_XMM
2889 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
2890#else
2891 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
2892#endif
2893 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
2894 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
2895 /* Possibly the last TSC value seen by the guest (too high) (only when we're in tsc offset mode). */
2896 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
2897 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
2898
2899 TMNotifyEndOfExecution(pVCpu);
2900 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
2901 Assert(!(ASMGetFlags() & X86_EFL_IF));
2902
2903 /* Restore the host LSTAR msr if the guest could have changed it. */
2904 if (pVM->hwaccm.s.fTPRPatchingActive)
2905 {
2906 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2907 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2908 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
2909 }
2910
2911 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
2912 ASMSetFlags(uOldEFlags);
2913#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2914 uOldEFlags = ~(RTCCUINTREG)0;
2915#endif
2916
2917 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
2918
2919 /* In case we execute a goto ResumeExecution later on. */
2920 pVCpu->hwaccm.s.fResumeVM = true;
2921 pVCpu->hwaccm.s.fForceTLBFlush = false;
2922
2923 /*
2924 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2925 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
2926 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2927 */
2928
2929 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2930 {
2931 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
2932 VMMR0LogFlushEnable(pVCpu);
2933 goto end;
2934 }
2935
2936 /* Success. Query the guest state and figure out what has happened. */
2937
2938 /* Investigate why there was a VM-exit. */
2939 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
2940 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
2941
2942 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
2943 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
2944 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
2945 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
2946 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2947 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
2948 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
2949 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
2950 AssertRC(rc2);
2951
2952 /* Sync back the guest state */
2953 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
2954 AssertRC(rc2);
2955
2956 /* Note! NOW IT'S SAFE FOR LOGGING! */
2957 VMMR0LogFlushEnable(pVCpu);
2958 Log2(("Raw exit reason %08x\n", exitReason));
2959#if ARCH_BITS == 64 /* for the time being */
2960 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
2961#endif
2962
2963 /* Check if an injected event was interrupted prematurely. */
2964 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
2965 AssertRC(rc2);
2966 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2967 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2968 /* Ignore 'int xx' as they'll be restarted anyway. */
2969 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
2970 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
2971 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2972 {
2973 Assert(!pVCpu->hwaccm.s.Event.fPending);
2974 pVCpu->hwaccm.s.Event.fPending = true;
2975 /* Error code present? */
2976 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2977 {
2978 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
2979 AssertRC(rc2);
2980 pVCpu->hwaccm.s.Event.errCode = val;
2981 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2982 }
2983 else
2984 {
2985 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2986 pVCpu->hwaccm.s.Event.errCode = 0;
2987 }
2988 }
2989#ifdef VBOX_STRICT
2990 else
2991 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2992 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2993 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2994 {
2995 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2996 }
2997
2998 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2999 HWACCMDumpRegs(pVM, pVCpu, pCtx);
3000#endif
3001
3002 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs, (RTGCPTR)pCtx->rip));
3003 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3004 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3005 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3006 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3007
3008 /* Sync back the TPR if it was changed. */
3009 if ( fSetupTPRCaching
3010 && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80])
3011 {
3012 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]);
3013 AssertRC(rc2);
3014 }
3015
3016#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3017 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3018 exitReason, (uint64_t)exitQualification, pCtx->cs, pCtx->rip, (uint64_t)intInfo);
3019#endif
3020 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
3021
3022 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3023 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3024 switch (exitReason)
3025 {
3026 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3027 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3028 {
3029 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3030
3031 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3032 {
3033 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3034#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3035 if ( RTThreadPreemptIsPendingTrusty()
3036 && !RTThreadPreemptIsPending(NIL_RTTHREAD))
3037 goto ResumeExecution;
3038#endif
3039 /* External interrupt; leave to allow it to be dispatched again. */
3040 rc = VINF_EM_RAW_INTERRUPT;
3041 break;
3042 }
3043 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3044 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3045 {
3046 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3047 /* External interrupt; leave to allow it to be dispatched again. */
3048 rc = VINF_EM_RAW_INTERRUPT;
3049 break;
3050
3051 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3052 AssertFailed(); /* can't come here; fails the first check. */
3053 break;
3054
3055 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3056 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3057 Assert(vector == 1 || vector == 3 || vector == 4);
3058 /* no break */
3059 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3060 Log2(("Hardware/software interrupt %d\n", vector));
3061 switch (vector)
3062 {
3063 case X86_XCPT_NM:
3064 {
3065 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3066
3067 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3068 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3069 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3070 if (rc == VINF_SUCCESS)
3071 {
3072 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3073
3074 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
3075
3076 /* Continue execution. */
3077 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3078
3079 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3080 goto ResumeExecution;
3081 }
3082
3083 Log(("Forward #NM fault to the guest\n"));
3084 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
3085 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
3086 AssertRC(rc2);
3087 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3088 goto ResumeExecution;
3089 }
3090
3091 case X86_XCPT_PF: /* Page fault */
3092 {
3093#ifdef VBOX_ALWAYS_TRAP_PF
3094 if (pVM->hwaccm.s.fNestedPaging)
3095 { /* A genuine pagefault.
3096 * Forward the trap to the guest by injecting the exception and resuming execution.
3097 */
3098 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
3099
3100 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3101
3102 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3103
3104 /* Now we must update CR2. */
3105 pCtx->cr2 = exitQualification;
3106 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3107 AssertRC(rc2);
3108
3109 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3110 goto ResumeExecution;
3111 }
3112#endif
3113 Assert(!pVM->hwaccm.s.fNestedPaging);
3114
3115#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
3116 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3117 if ( pVM->hwaccm.s.fTRPPatchingAllowed
3118 && pVM->hwaccm.s.pGuestPatchMem
3119 && (exitQualification & 0xfff) == 0x080
3120 && !(errCode & X86_TRAP_PF_P) /* not present */
3121 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
3122 && !CPUMIsGuestInLongModeEx(pCtx)
3123 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
3124 {
3125 RTGCPHYS GCPhysApicBase, GCPhys;
3126 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3127 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3128
3129 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3130 if ( rc == VINF_SUCCESS
3131 && GCPhys == GCPhysApicBase)
3132 {
3133 /* Only attempt to patch the instruction once. */
3134 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3135 if (!pPatch)
3136 {
3137 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
3138 break;
3139 }
3140 }
3141 }
3142#endif
3143
3144 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3145 /* Exit qualification contains the linear address of the page fault. */
3146 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3147 TRPMSetErrorCode(pVCpu, errCode);
3148 TRPMSetFaultAddress(pVCpu, exitQualification);
3149
3150 /* Shortcut for APIC TPR reads and writes. */
3151 if ( (exitQualification & 0xfff) == 0x080
3152 && !(errCode & X86_TRAP_PF_P) /* not present */
3153 && fSetupTPRCaching
3154 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3155 {
3156 RTGCPHYS GCPhysApicBase, GCPhys;
3157 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3158 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3159
3160 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3161 if ( rc == VINF_SUCCESS
3162 && GCPhys == GCPhysApicBase)
3163 {
3164 Log(("Enable VT-x virtual APIC access filtering\n"));
3165 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3166 AssertRC(rc2);
3167 }
3168 }
3169
3170 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3171 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3172 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3173
3174 if (rc == VINF_SUCCESS)
3175 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3176 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3177 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
3178
3179 TRPMResetTrap(pVCpu);
3180 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3181 goto ResumeExecution;
3182 }
3183 else
3184 if (rc == VINF_EM_RAW_GUEST_TRAP)
3185 { /* A genuine pagefault.
3186 * Forward the trap to the guest by injecting the exception and resuming execution.
3187 */
3188 Log2(("Forward page fault to the guest\n"));
3189
3190 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3191 /* The error code might have been changed. */
3192 errCode = TRPMGetErrorCode(pVCpu);
3193
3194 TRPMResetTrap(pVCpu);
3195
3196 /* Now we must update CR2. */
3197 pCtx->cr2 = exitQualification;
3198 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3199 AssertRC(rc2);
3200
3201 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3202 goto ResumeExecution;
3203 }
3204#ifdef VBOX_STRICT
3205 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3206 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3207#endif
3208 /* Need to go back to the recompiler to emulate the instruction. */
3209 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPFEM);
3210 TRPMResetTrap(pVCpu);
3211 break;
3212 }
3213
3214 case X86_XCPT_MF: /* Floating point exception. */
3215 {
3216 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
3217 if (!(pCtx->cr0 & X86_CR0_NE))
3218 {
3219 /* old style FPU error reporting needs some extra work. */
3220 /** @todo don't fall back to the recompiler, but do it manually. */
3221 rc = VINF_EM_RAW_EMULATE_INSTR;
3222 break;
3223 }
3224 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3225 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3226 AssertRC(rc2);
3227
3228 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3229 goto ResumeExecution;
3230 }
3231
3232 case X86_XCPT_DB: /* Debug exception. */
3233 {
3234 uint64_t uDR6;
3235
3236 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3237 *
3238 * Exit qualification bits:
3239 * 3:0 B0-B3 which breakpoint condition was met
3240 * 12:4 Reserved (0)
3241 * 13 BD - debug register access detected
3242 * 14 BS - single step execution or branch taken
3243 * 63:15 Reserved (0)
3244 */
3245 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3246
3247 /* Note that we don't support guest and host-initiated debugging at the same time. */
3248
3249 uDR6 = X86_DR6_INIT_VAL;
3250 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3251 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3252 if (rc == VINF_EM_RAW_GUEST_TRAP)
3253 {
3254 /* Update DR6 here. */
3255 pCtx->dr[6] = uDR6;
3256
3257 /* Resync DR6 if the debug state is active. */
3258 if (CPUMIsGuestDebugStateActive(pVCpu))
3259 ASMSetDR6(pCtx->dr[6]);
3260
3261 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3262 pCtx->dr[7] &= ~X86_DR7_GD;
3263
3264 /* Paranoia. */
3265 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3266 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3267 pCtx->dr[7] |= 0x400; /* must be one */
3268
3269 /* Resync DR7 */
3270 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3271 AssertRC(rc2);
3272
3273 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip, exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3274 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3275 AssertRC(rc2);
3276
3277 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3278 goto ResumeExecution;
3279 }
3280 /* Return to ring 3 to deal with the debug exit code. */
3281 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3282 break;
3283 }
3284
3285 case X86_XCPT_BP: /* Breakpoint. */
3286 {
3287 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestBP);
3288 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3289 if (rc == VINF_EM_RAW_GUEST_TRAP)
3290 {
3291 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip));
3292 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3293 AssertRC(rc2);
3294 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3295 goto ResumeExecution;
3296 }
3297 if (rc == VINF_SUCCESS)
3298 {
3299 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3300 goto ResumeExecution;
3301 }
3302 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3303 break;
3304 }
3305
3306 case X86_XCPT_GP: /* General protection failure exception.*/
3307 {
3308 uint32_t cbOp;
3309 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3310
3311 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3312#ifdef VBOX_STRICT
3313 if ( !CPUMIsGuestInRealModeEx(pCtx)
3314 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3315 {
3316 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
3317 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3318 AssertRC(rc2);
3319 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3320 goto ResumeExecution;
3321 }
3322#endif
3323 Assert(CPUMIsGuestInRealModeEx(pCtx));
3324
3325 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip));
3326
3327 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp);
3328 if (RT_SUCCESS(rc2))
3329 {
3330 bool fUpdateRIP = true;
3331
3332 rc = VINF_SUCCESS;
3333 Assert(cbOp == pDis->opsize);
3334 switch (pDis->pCurInstr->opcode)
3335 {
3336 case OP_CLI:
3337 pCtx->eflags.Bits.u1IF = 0;
3338 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3339 break;
3340
3341 case OP_STI:
3342 pCtx->eflags.Bits.u1IF = 1;
3343 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->opsize);
3344 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3345 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3346 AssertRC(rc2);
3347 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3348 break;
3349
3350 case OP_HLT:
3351 fUpdateRIP = false;
3352 rc = VINF_EM_HALT;
3353 pCtx->rip += pDis->opsize;
3354 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3355 break;
3356
3357 case OP_POPF:
3358 {
3359 RTGCPTR GCPtrStack;
3360 uint32_t cbParm;
3361 uint32_t uMask;
3362 X86EFLAGS eflags;
3363
3364 if (pDis->prefix & PREFIX_OPSIZE)
3365 {
3366 cbParm = 4;
3367 uMask = 0xffffffff;
3368 }
3369 else
3370 {
3371 cbParm = 2;
3372 uMask = 0xffff;
3373 }
3374
3375 rc2 = SELMToFlatEx(pVCpu, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3376 if (RT_FAILURE(rc2))
3377 {
3378 rc = VERR_EM_INTERPRETER;
3379 break;
3380 }
3381 eflags.u = 0;
3382 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3383 if (RT_FAILURE(rc2))
3384 {
3385 rc = VERR_EM_INTERPRETER;
3386 break;
3387 }
3388 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3389 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
3390 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3391 pCtx->eflags.Bits.u1RF = 0;
3392 pCtx->esp += cbParm;
3393 pCtx->esp &= uMask;
3394
3395 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3396 break;
3397 }
3398
3399 case OP_PUSHF:
3400 {
3401 RTGCPTR GCPtrStack;
3402 uint32_t cbParm;
3403 uint32_t uMask;
3404 X86EFLAGS eflags;
3405
3406 if (pDis->prefix & PREFIX_OPSIZE)
3407 {
3408 cbParm = 4;
3409 uMask = 0xffffffff;
3410 }
3411 else
3412 {
3413 cbParm = 2;
3414 uMask = 0xffff;
3415 }
3416
3417 rc2 = SELMToFlatEx(pVCpu, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack);
3418 if (RT_FAILURE(rc2))
3419 {
3420 rc = VERR_EM_INTERPRETER;
3421 break;
3422 }
3423 eflags = pCtx->eflags;
3424 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3425 eflags.Bits.u1RF = 0;
3426 eflags.Bits.u1VM = 0;
3427
3428 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3429 if (RT_FAILURE(rc2))
3430 {
3431 rc = VERR_EM_INTERPRETER;
3432 break;
3433 }
3434 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3435 pCtx->esp -= cbParm;
3436 pCtx->esp &= uMask;
3437 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3438 break;
3439 }
3440
3441 case OP_IRET:
3442 {
3443 RTGCPTR GCPtrStack;
3444 uint32_t uMask = 0xffff;
3445 uint16_t aIretFrame[3];
3446
3447 if (pDis->prefix & (PREFIX_OPSIZE | PREFIX_ADDRSIZE))
3448 {
3449 rc = VERR_EM_INTERPRETER;
3450 break;
3451 }
3452
3453 rc2 = SELMToFlatEx(pVCpu, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3454 if (RT_FAILURE(rc2))
3455 {
3456 rc = VERR_EM_INTERPRETER;
3457 break;
3458 }
3459 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3460 if (RT_FAILURE(rc2))
3461 {
3462 rc = VERR_EM_INTERPRETER;
3463 break;
3464 }
3465 pCtx->ip = aIretFrame[0];
3466 pCtx->cs = aIretFrame[1];
3467 pCtx->csHid.u64Base = pCtx->cs << 4;
3468 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3469 pCtx->sp += sizeof(aIretFrame);
3470
3471 LogFlow(("iret to %04x:%x\n", pCtx->cs, pCtx->ip));
3472 fUpdateRIP = false;
3473 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3474 break;
3475 }
3476
3477 case OP_INT:
3478 {
3479 uint32_t intInfo2;
3480
3481 LogFlow(("Realmode: INT %x\n", pDis->param1.parval & 0xff));
3482 intInfo2 = pDis->param1.parval & 0xff;
3483 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3484 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3485
3486 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3487 AssertRC(VBOXSTRICTRC_VAL(rc));
3488 fUpdateRIP = false;
3489 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3490 break;
3491 }
3492
3493 case OP_INTO:
3494 {
3495 if (pCtx->eflags.Bits.u1OF)
3496 {
3497 uint32_t intInfo2;
3498
3499 LogFlow(("Realmode: INTO\n"));
3500 intInfo2 = X86_XCPT_OF;
3501 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3502 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3503
3504 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3505 AssertRC(VBOXSTRICTRC_VAL(rc));
3506 fUpdateRIP = false;
3507 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3508 }
3509 break;
3510 }
3511
3512 case OP_INT3:
3513 {
3514 uint32_t intInfo2;
3515
3516 LogFlow(("Realmode: INT 3\n"));
3517 intInfo2 = 3;
3518 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3519 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3520
3521 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3522 AssertRC(VBOXSTRICTRC_VAL(rc));
3523 fUpdateRIP = false;
3524 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3525 break;
3526 }
3527
3528 default:
3529 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3530 fUpdateRIP = false;
3531 break;
3532 }
3533
3534 if (rc == VINF_SUCCESS)
3535 {
3536 if (fUpdateRIP)
3537 pCtx->rip += cbOp; /* Move on to the next instruction. */
3538
3539 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
3540 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3541
3542 /* Only resume if successful. */
3543 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3544 goto ResumeExecution;
3545 }
3546 }
3547 else
3548 rc = VERR_EM_INTERPRETER;
3549
3550 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3551 break;
3552 }
3553
3554#ifdef VBOX_STRICT
3555 case X86_XCPT_XF: /* SIMD exception. */
3556 case X86_XCPT_DE: /* Divide error. */
3557 case X86_XCPT_UD: /* Unknown opcode exception. */
3558 case X86_XCPT_SS: /* Stack segment exception. */
3559 case X86_XCPT_NP: /* Segment not present exception. */
3560 {
3561 switch(vector)
3562 {
3563 case X86_XCPT_DE:
3564 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
3565 break;
3566 case X86_XCPT_UD:
3567 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
3568 break;
3569 case X86_XCPT_SS:
3570 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
3571 break;
3572 case X86_XCPT_NP:
3573 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
3574 break;
3575 case X86_XCPT_XF:
3576 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXF);
3577 break;
3578 }
3579
3580 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3581 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3582 AssertRC(rc2);
3583
3584 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3585 goto ResumeExecution;
3586 }
3587#endif
3588 default:
3589 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXcpUnk);
3590 if ( CPUMIsGuestInRealModeEx(pCtx)
3591 && pVM->hwaccm.s.vmx.pRealModeTSS)
3592 {
3593 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
3594 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3595 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3596
3597 /* Go back to ring 3 in case of a triple fault. */
3598 if ( vector == X86_XCPT_DF
3599 && rc == VINF_EM_RESET)
3600 break;
3601
3602 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3603 goto ResumeExecution;
3604 }
3605 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3606 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3607 break;
3608 } /* switch (vector) */
3609
3610 break;
3611
3612 default:
3613 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3614 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
3615 break;
3616 }
3617
3618 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3619 break;
3620 }
3621
3622 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
3623 {
3624 RTGCPHYS GCPhys;
3625
3626 Assert(pVM->hwaccm.s.fNestedPaging);
3627
3628 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3629 AssertRC(rc2);
3630 Assert(((exitQualification >> 7) & 3) != 2);
3631
3632 /* Determine the kind of violation. */
3633 errCode = 0;
3634 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3635 errCode |= X86_TRAP_PF_ID;
3636
3637 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3638 errCode |= X86_TRAP_PF_RW;
3639
3640 /* If the page is present, then it's a page level protection fault. */
3641 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3642 {
3643 errCode |= X86_TRAP_PF_P;
3644 }
3645 else
3646 {
3647 /* Shortcut for APIC TPR reads and writes. */
3648 if ( (GCPhys & 0xfff) == 0x080
3649 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3650 && fSetupTPRCaching
3651 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3652 {
3653 RTGCPHYS GCPhysApicBase;
3654 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3655 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3656 if (GCPhys == GCPhysApicBase + 0x80)
3657 {
3658 Log(("Enable VT-x virtual APIC access filtering\n"));
3659 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3660 AssertRC(rc2);
3661 }
3662 }
3663 }
3664 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
3665
3666 /* GCPhys contains the guest physical address of the page fault. */
3667 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3668 TRPMSetErrorCode(pVCpu, errCode);
3669 TRPMSetFaultAddress(pVCpu, GCPhys);
3670
3671 /* Handle the pagefault trap for the nested shadow table. */
3672 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
3673
3674 /*
3675 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
3676 */
3677 if ( rc == VINF_SUCCESS
3678 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3679 || rc == VERR_PAGE_NOT_PRESENT)
3680 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3681 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
3682 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
3683
3684 TRPMResetTrap(pVCpu);
3685 goto ResumeExecution;
3686 }
3687
3688#ifdef VBOX_STRICT
3689 if (rc != VINF_EM_RAW_EMULATE_INSTR)
3690 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3691#endif
3692 /* Need to go back to the recompiler to emulate the instruction. */
3693 TRPMResetTrap(pVCpu);
3694 break;
3695 }
3696
3697 case VMX_EXIT_EPT_MISCONFIG:
3698 {
3699 RTGCPHYS GCPhys;
3700
3701 Assert(pVM->hwaccm.s.fNestedPaging);
3702
3703 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3704 AssertRC(rc2);
3705 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
3706
3707 /* Shortcut for APIC TPR reads and writes. */
3708 if ( (GCPhys & 0xfff) == 0x080
3709 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3710 && fSetupTPRCaching
3711 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3712 {
3713 RTGCPHYS GCPhysApicBase;
3714 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3715 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3716 if (GCPhys == GCPhysApicBase + 0x80)
3717 {
3718 Log(("Enable VT-x virtual APIC access filtering\n"));
3719 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3720 AssertRC(rc2);
3721 }
3722 }
3723
3724 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
3725
3726 /*
3727 * If we succeed, resume execution.
3728 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
3729 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
3730 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
3731 * weird case. See @bugref{6043}.
3732 */
3733 if ( rc == VINF_SUCCESS
3734 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3735 || rc == VERR_PAGE_NOT_PRESENT)
3736 {
3737 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
3738 goto ResumeExecution;
3739 }
3740
3741 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3742 break;
3743 }
3744
3745 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3746 /* Clear VM-exit on IF=1 change. */
3747 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
3748 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
3749 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3750 AssertRC(rc2);
3751 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
3752 goto ResumeExecution; /* we check for pending guest interrupts there */
3753
3754 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
3755 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
3756 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
3757 /* Skip instruction and continue directly. */
3758 pCtx->rip += cbInstr;
3759 /* Continue execution.*/
3760 goto ResumeExecution;
3761
3762 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3763 {
3764 Log2(("VMX: Cpuid %x\n", pCtx->eax));
3765 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
3766 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3767 if (rc == VINF_SUCCESS)
3768 {
3769 /* Update EIP and continue execution. */
3770 Assert(cbInstr == 2);
3771 pCtx->rip += cbInstr;
3772 goto ResumeExecution;
3773 }
3774 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
3775 rc = VINF_EM_RAW_EMULATE_INSTR;
3776 break;
3777 }
3778
3779 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
3780 {
3781 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
3782 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
3783 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3784 if (rc == VINF_SUCCESS)
3785 {
3786 /* Update EIP and continue execution. */
3787 Assert(cbInstr == 2);
3788 pCtx->rip += cbInstr;
3789 goto ResumeExecution;
3790 }
3791 rc = VINF_EM_RAW_EMULATE_INSTR;
3792 break;
3793 }
3794
3795 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
3796 {
3797 Log2(("VMX: Rdtsc\n"));
3798 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
3799 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3800 if (rc == VINF_SUCCESS)
3801 {
3802 /* Update EIP and continue execution. */
3803 Assert(cbInstr == 2);
3804 pCtx->rip += cbInstr;
3805 goto ResumeExecution;
3806 }
3807 rc = VINF_EM_RAW_EMULATE_INSTR;
3808 break;
3809 }
3810
3811 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
3812 {
3813 Log2(("VMX: invlpg\n"));
3814 Assert(!pVM->hwaccm.s.fNestedPaging);
3815
3816 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
3817 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
3818 if (rc == VINF_SUCCESS)
3819 {
3820 /* Update EIP and continue execution. */
3821 pCtx->rip += cbInstr;
3822 goto ResumeExecution;
3823 }
3824 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
3825 break;
3826 }
3827
3828 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
3829 {
3830 Log2(("VMX: monitor\n"));
3831
3832 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
3833 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3834 if (rc == VINF_SUCCESS)
3835 {
3836 /* Update EIP and continue execution. */
3837 pCtx->rip += cbInstr;
3838 goto ResumeExecution;
3839 }
3840 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
3841 break;
3842 }
3843
3844 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
3845 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
3846 if ( pVM->hwaccm.s.fTPRPatchingActive
3847 && pCtx->ecx == MSR_K8_LSTAR)
3848 {
3849 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3850 if ((pCtx->eax & 0xff) != u8LastTPR)
3851 {
3852 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
3853
3854 /* Our patch code uses LSTAR for TPR caching. */
3855 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
3856 AssertRC(rc2);
3857 }
3858
3859 /* Skip the instruction and continue. */
3860 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
3861
3862 /* Only resume if successful. */
3863 goto ResumeExecution;
3864 }
3865 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR;
3866 /* no break */
3867 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
3868 {
3869 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
3870
3871 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
3872 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
3873 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
3874 if (rc == VINF_SUCCESS)
3875 {
3876 /* EIP has been updated already. */
3877
3878 /* Only resume if successful. */
3879 goto ResumeExecution;
3880 }
3881 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
3882 break;
3883 }
3884
3885 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
3886 {
3887 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3888
3889 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
3890 {
3891 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
3892 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
3893 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3894 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3895 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
3896 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
3897
3898 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
3899 {
3900 case 0:
3901 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
3902 break;
3903 case 2:
3904 break;
3905 case 3:
3906 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
3907 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
3908 break;
3909 case 4:
3910 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
3911 break;
3912 case 8:
3913 /* CR8 contains the APIC TPR */
3914 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3915 break;
3916
3917 default:
3918 AssertFailed();
3919 break;
3920 }
3921 break;
3922
3923 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
3924 Log2(("VMX: mov x, crx\n"));
3925 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3926
3927 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
3928
3929 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
3930 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3931
3932 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3933 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
3934 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
3935 break;
3936
3937 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
3938 Log2(("VMX: clts\n"));
3939 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
3940 rc = EMInterpretCLTS(pVM, pVCpu);
3941 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3942 break;
3943
3944 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
3945 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
3946 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
3947 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
3948 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3949 break;
3950 }
3951
3952 /* Update EIP if no error occurred. */
3953 if (RT_SUCCESS(rc))
3954 pCtx->rip += cbInstr;
3955
3956 if (rc == VINF_SUCCESS)
3957 {
3958 /* Only resume if successful. */
3959 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3960 goto ResumeExecution;
3961 }
3962 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3963 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3964 break;
3965 }
3966
3967 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3968 {
3969 if ( !DBGFIsStepping(pVCpu)
3970 && !CPUMIsHyperDebugStateActive(pVCpu))
3971 {
3972 /* Disable drx move intercepts. */
3973 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3974 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3975 AssertRC(rc2);
3976
3977 /* Save the host and load the guest debug state. */
3978 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
3979 AssertRC(rc2);
3980
3981#ifdef LOG_ENABLED
3982 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3983 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3984 else
3985 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
3986#endif
3987
3988#ifdef VBOX_WITH_STATISTICS
3989 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
3990 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3991 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3992 else
3993 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3994#endif
3995
3996 goto ResumeExecution;
3997 }
3998
3999 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
4000 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4001 {
4002 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4003 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4004 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4005 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4006 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4007 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4008 Log2(("DR7=%08x\n", pCtx->dr[7]));
4009 }
4010 else
4011 {
4012 Log2(("VMX: mov x, drx\n"));
4013 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4014 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4015 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4016 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4017 }
4018 /* Update EIP if no error occurred. */
4019 if (RT_SUCCESS(rc))
4020 pCtx->rip += cbInstr;
4021
4022 if (rc == VINF_SUCCESS)
4023 {
4024 /* Only resume if successful. */
4025 goto ResumeExecution;
4026 }
4027 Assert(rc == VERR_EM_INTERPRETER);
4028 break;
4029 }
4030
4031 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4032 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4033 {
4034 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4035 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4036 uint32_t uPort;
4037 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4038
4039 /** @todo necessary to make the distinction? */
4040 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4041 {
4042 uPort = pCtx->edx & 0xffff;
4043 }
4044 else
4045 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4046
4047 /* paranoia */
4048 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
4049 {
4050 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4051 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4052 break;
4053 }
4054
4055 uint32_t cbSize = g_aIOSize[uIOWidth];
4056
4057 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4058 {
4059 /* ins/outs */
4060 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
4061
4062 /* Disassemble manually to deal with segment prefixes. */
4063 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4064 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4065 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
4066 if (RT_SUCCESS(rc))
4067 {
4068 if (fIOWrite)
4069 {
4070 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4071 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
4072 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, pDis->addrmode, cbSize);
4073 }
4074 else
4075 {
4076 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4077 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
4078 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, pDis->addrmode, cbSize);
4079 }
4080 }
4081 else
4082 rc = VINF_EM_RAW_EMULATE_INSTR;
4083 }
4084 else
4085 {
4086 /* normal in/out */
4087 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4088
4089 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4090
4091 if (fIOWrite)
4092 {
4093 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
4094 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4095 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4096 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4097 }
4098 else
4099 {
4100 uint32_t u32Val = 0;
4101
4102 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
4103 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4104 if (IOM_SUCCESS(rc))
4105 {
4106 /* Write back to the EAX register. */
4107 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4108 }
4109 else
4110 if (rc == VINF_IOM_R3_IOPORT_READ)
4111 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4112 }
4113 }
4114 /*
4115 * Handled the I/O return codes.
4116 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4117 */
4118 if (IOM_SUCCESS(rc))
4119 {
4120 /* Update EIP and continue execution. */
4121 pCtx->rip += cbInstr;
4122 if (RT_LIKELY(rc == VINF_SUCCESS))
4123 {
4124 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4125 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4126 {
4127 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
4128 for (unsigned i=0;i<4;i++)
4129 {
4130 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4131
4132 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4133 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4134 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4135 {
4136 uint64_t uDR6;
4137
4138 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4139
4140 uDR6 = ASMGetDR6();
4141
4142 /* Clear all breakpoint status flags and set the one we just hit. */
4143 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4144 uDR6 |= (uint64_t)RT_BIT(i);
4145
4146 /* Note: AMD64 Architecture Programmer's Manual 13.1:
4147 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
4148 * the contents have been read.
4149 */
4150 ASMSetDR6(uDR6);
4151
4152 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
4153 pCtx->dr[7] &= ~X86_DR7_GD;
4154
4155 /* Paranoia. */
4156 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4157 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4158 pCtx->dr[7] |= 0x400; /* must be one */
4159
4160 /* Resync DR7 */
4161 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
4162 AssertRC(rc2);
4163
4164 /* Construct inject info. */
4165 intInfo = X86_XCPT_DB;
4166 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4167 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4168
4169 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4170 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
4171 AssertRC(rc2);
4172
4173 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4174 goto ResumeExecution;
4175 }
4176 }
4177 }
4178 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4179 goto ResumeExecution;
4180 }
4181 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4182 break;
4183 }
4184
4185#ifdef VBOX_STRICT
4186 if (rc == VINF_IOM_R3_IOPORT_READ)
4187 Assert(!fIOWrite);
4188 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4189 Assert(fIOWrite);
4190 else
4191 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4192#endif
4193 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4194 break;
4195 }
4196
4197 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4198 LogFlow(("VMX_EXIT_TPR\n"));
4199 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4200 goto ResumeExecution;
4201
4202 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
4203 {
4204 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4205 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4206
4207 switch(uAccessType)
4208 {
4209 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4210 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4211 {
4212 RTGCPHYS GCPhys;
4213 PDMApicGetBase(pVM, &GCPhys);
4214 GCPhys &= PAGE_BASE_GC_MASK;
4215 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4216
4217 LogFlow(("Apic access at %RGp\n", GCPhys));
4218 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys);
4219 if (rc == VINF_SUCCESS)
4220 goto ResumeExecution; /* rip already updated */
4221 break;
4222 }
4223
4224 default:
4225 rc = VINF_EM_RAW_EMULATE_INSTR;
4226 break;
4227 }
4228 break;
4229 }
4230
4231 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4232 if (!TMTimerPollBool(pVM, pVCpu))
4233 goto ResumeExecution;
4234 rc = VINF_EM_RAW_TIMER_PENDING;
4235 break;
4236
4237 default:
4238 /* The rest is handled after syncing the entire CPU state. */
4239 break;
4240 }
4241
4242 /* Note: the guest state isn't entirely synced back at this stage. */
4243
4244 /* Investigate why there was a VM-exit. (part 2) */
4245 switch (exitReason)
4246 {
4247 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4248 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4249 case VMX_EXIT_EPT_VIOLATION:
4250 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4251 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4252 /* Already handled above. */
4253 break;
4254
4255 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4256 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4257 break;
4258
4259 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4260 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4261 rc = VINF_EM_RAW_INTERRUPT;
4262 AssertFailed(); /* Can't happen. Yet. */
4263 break;
4264
4265 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4266 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4267 rc = VINF_EM_RAW_INTERRUPT;
4268 AssertFailed(); /* Can't happen afaik. */
4269 break;
4270
4271 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4272 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4273 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4274 && pVCpu->hwaccm.s.Event.fPending)
4275 {
4276 /* Caused by an injected interrupt. */
4277 pVCpu->hwaccm.s.Event.fPending = false;
4278
4279 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
4280 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
4281 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
4282 AssertRC(rc2);
4283 }
4284 /* else Exceptions and software interrupts can just be restarted. */
4285 rc = VERR_EM_INTERPRETER;
4286 break;
4287
4288 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4289 /* Check if external interrupts are pending; if so, don't switch back. */
4290 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
4291 pCtx->rip++; /* skip hlt */
4292 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4293 goto ResumeExecution;
4294
4295 rc = VINF_EM_HALT;
4296 break;
4297
4298 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4299 Log2(("VMX: mwait\n"));
4300 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4301 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4302 if ( rc == VINF_EM_HALT
4303 || rc == VINF_SUCCESS)
4304 {
4305 /* Update EIP and continue execution. */
4306 pCtx->rip += cbInstr;
4307
4308 /* Check if external interrupts are pending; if so, don't switch back. */
4309 if ( rc == VINF_SUCCESS
4310 || ( rc == VINF_EM_HALT
4311 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4312 )
4313 goto ResumeExecution;
4314 }
4315 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4316 break;
4317
4318 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4319 AssertFailed(); /* can't happen. */
4320 rc = VERR_EM_INTERPRETER;
4321 break;
4322
4323 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4324 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4325 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4326 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4327 AssertRC(rc2);
4328 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMTF);
4329#if 0
4330 DBGFDoneStepping(pVCpu);
4331#endif
4332 rc = VINF_EM_DBG_STOP;
4333 break;
4334
4335 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4336 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4337 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4338 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4339 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4340 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4341 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4342 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4343 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4344 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4345 /** @todo inject #UD immediately */
4346 rc = VERR_EM_INTERPRETER;
4347 break;
4348
4349 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4350 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4351 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4352 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4353 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4354 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4355 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4356 /* already handled above */
4357 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4358 || rc == VINF_EM_RAW_INTERRUPT
4359 || rc == VERR_EM_INTERPRETER
4360 || rc == VINF_EM_RAW_EMULATE_INSTR
4361 || rc == VINF_PGM_SYNC_CR3
4362 || rc == VINF_IOM_R3_IOPORT_READ
4363 || rc == VINF_IOM_R3_IOPORT_WRITE
4364 || rc == VINF_EM_RAW_GUEST_TRAP
4365 || rc == VINF_TRPM_XCPT_DISPATCHED
4366 || rc == VINF_EM_RESCHEDULE_REM,
4367 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4368 break;
4369
4370 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4371 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
4372 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4373 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4374 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4375 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4376 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
4377 rc = VERR_EM_INTERPRETER;
4378 break;
4379
4380 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4381 Assert(rc == VINF_EM_RAW_INTERRUPT);
4382 break;
4383
4384 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4385 {
4386#ifdef VBOX_STRICT
4387 RTCCUINTREG val2 = 0;
4388
4389 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4390
4391 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4392 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4393
4394 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4395 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4396
4397 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4398 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4399
4400 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4401 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4402
4403 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4404 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4405
4406 VMX_LOG_SELREG(CS, "CS", val2);
4407 VMX_LOG_SELREG(DS, "DS", val2);
4408 VMX_LOG_SELREG(ES, "ES", val2);
4409 VMX_LOG_SELREG(FS, "FS", val2);
4410 VMX_LOG_SELREG(GS, "GS", val2);
4411 VMX_LOG_SELREG(SS, "SS", val2);
4412 VMX_LOG_SELREG(TR, "TR", val2);
4413 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4414
4415 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4416 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4417 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4418 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4419#endif /* VBOX_STRICT */
4420 rc = VERR_VMX_INVALID_GUEST_STATE;
4421 break;
4422 }
4423
4424 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4425 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4426 default:
4427 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4428 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4429 break;
4430
4431 }
4432end:
4433
4434 /* We now going back to ring-3, so clear the action flag. */
4435 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4436
4437 /* Signal changes for the recompiler. */
4438 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
4439
4440 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
4441 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4442 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4443 {
4444 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4445 /* On the next entry we'll only sync the host context. */
4446 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4447 }
4448 else
4449 {
4450 /* On the next entry we'll sync everything. */
4451 /** @todo we can do better than this */
4452 /* Not in the VINF_PGM_CHANGE_MODE though! */
4453 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4454 }
4455
4456 /* translate into a less severe return code */
4457 if (rc == VERR_EM_INTERPRETER)
4458 rc = VINF_EM_RAW_EMULATE_INSTR;
4459 else
4460 /* Try to extract more information about what might have gone wrong here. */
4461 if (rc == VERR_VMX_INVALID_VMCS_PTR)
4462 {
4463 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4464 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS;
4465 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4466 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4467 }
4468
4469 /* Just set the correct state here instead of trying to catch every goto above. */
4470 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4471
4472#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4473 /* Restore interrupts if we exitted after disabling them. */
4474 if (uOldEFlags != ~(RTCCUINTREG)0)
4475 ASMSetFlags(uOldEFlags);
4476#endif
4477
4478 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
4479 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
4480 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4481 Log2(("X"));
4482 return VBOXSTRICTRC_TODO(rc);
4483}
4484
4485
4486/**
4487 * Enters the VT-x session
4488 *
4489 * @returns VBox status code.
4490 * @param pVM The VM to operate on.
4491 * @param pVCpu The VMCPU to operate on.
4492 * @param pCpu CPU info struct
4493 */
4494VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4495{
4496 Assert(pVM->hwaccm.s.vmx.fSupported);
4497 NOREF(pCpu);
4498
4499 unsigned cr4 = ASMGetCR4();
4500 if (!(cr4 & X86_CR4_VMXE))
4501 {
4502 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4503 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4504 }
4505
4506 /* Activate the VM Control Structure. */
4507 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4508 if (RT_FAILURE(rc))
4509 return rc;
4510
4511 pVCpu->hwaccm.s.fResumeVM = false;
4512 return VINF_SUCCESS;
4513}
4514
4515
4516/**
4517 * Leaves the VT-x session
4518 *
4519 * @returns VBox status code.
4520 * @param pVM The VM to operate on.
4521 * @param pVCpu The VMCPU to operate on.
4522 * @param pCtx CPU context
4523 */
4524VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4525{
4526 Assert(pVM->hwaccm.s.vmx.fSupported);
4527
4528#ifdef DEBUG
4529 if (CPUMIsHyperDebugStateActive(pVCpu))
4530 {
4531 CPUMR0LoadHostDebugState(pVM, pVCpu);
4532 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4533 }
4534 else
4535#endif
4536 /* Save the guest debug state if necessary. */
4537 if (CPUMIsGuestDebugStateActive(pVCpu))
4538 {
4539 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4540
4541 /* Enable drx move intercepts again. */
4542 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4543 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4544 AssertRC(rc);
4545
4546 /* Resync the debug registers the next time. */
4547 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4548 }
4549 else
4550 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4551
4552 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4553 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4554 AssertRC(rc);
4555
4556 return VINF_SUCCESS;
4557}
4558
4559
4560/**
4561 * Flush the TLB using EPT.
4562 *
4563 * @returns VBox status code.
4564 * @param pVM The VM to operate on.
4565 * @param pVCpu The VM CPU to operate on.
4566 * @param enmFlush Type of flush.
4567 */
4568static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
4569{
4570 uint64_t descriptor[2];
4571
4572 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
4573 Assert(pVM->hwaccm.s.fNestedPaging);
4574 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4575 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
4576 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4577 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.vmx.GCPhysEPTP, rc));
4578}
4579
4580
4581/**
4582 * Flush the TLB using VPID.
4583 *
4584 * @returns VBox status code.
4585 * @param pVM The VM to operate on.
4586 * @param pVCpu The VM CPU to operate on (can be NULL depending on @a
4587 * enmFlush).
4588 * @param enmFlush Type of flush.
4589 * @param GCPtr Virtual address of the page to flush (can be 0 depending
4590 * on @a enmFlush).
4591 */
4592static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
4593{
4594#if HC_ARCH_BITS == 32
4595 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
4596 if ( CPUMIsGuestInLongMode(pVCpu)
4597 && !VMX_IS_64BIT_HOST_MODE())
4598 {
4599 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4600 }
4601 else
4602#endif
4603 {
4604 uint64_t descriptor[2];
4605
4606 Assert(pVM->hwaccm.s.vmx.fVPID);
4607 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
4608 {
4609 descriptor[0] = 0;
4610 descriptor[1] = 0;
4611 }
4612 else
4613 {
4614 AssertPtr(pVCpu);
4615 Assert(pVCpu->hwaccm.s.uCurrentASID != 0);
4616 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
4617 descriptor[1] = GCPtr;
4618 }
4619 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
4620 AssertMsg(rc == VINF_SUCCESS,
4621 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hwaccm.s.uCurrentASID : 0, GCPtr, rc));
4622 }
4623}
4624
4625
4626/**
4627 * Invalidates a guest page by guest virtual address. Only relevant for
4628 * EPT/VPID, otherwise there is nothing really to invalidate.
4629 *
4630 * @returns VBox status code.
4631 * @param pVM The VM to operate on.
4632 * @param pVCpu The VM CPU to operate on.
4633 * @param GCVirt Page to invalidate.
4634 */
4635VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
4636{
4637 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4638
4639 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
4640
4641 if (!fFlushPending)
4642 {
4643 /*
4644 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
4645 * See @bugref{6043} and @bugref{6177}
4646 *
4647 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
4648 * function maybe called in a loop with individual addresses.
4649 */
4650 if (pVM->hwaccm.s.vmx.fVPID)
4651 {
4652 /* If we can flush just this page do it, otherwise flush as little as possible. */
4653 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
4654 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
4655 else
4656 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4657 }
4658 else if (pVM->hwaccm.s.fNestedPaging)
4659 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4660 }
4661
4662 return VINF_SUCCESS;
4663}
4664
4665
4666/**
4667 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
4668 * otherwise there is nothing really to invalidate.
4669 *
4670 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
4671 *
4672 * @returns VBox status code.
4673 * @param pVM The VM to operate on.
4674 * @param pVCpu The VM CPU to operate on.
4675 * @param GCPhys Page to invalidate.
4676 */
4677VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
4678{
4679 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
4680
4681 /*
4682 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
4683 * while invept only flushes by EPT not individual addresses. We update the force flag here
4684 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
4685 */
4686 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4687 return VINF_SUCCESS;
4688}
4689
4690
4691/**
4692 * Report world switch error and dump some useful debug info
4693 *
4694 * @param pVM The VM to operate on.
4695 * @param pVCpu The VMCPU to operate on.
4696 * @param rc Return code
4697 * @param pCtx Current CPU context (not updated)
4698 */
4699static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
4700{
4701 NOREF(pVM);
4702
4703 switch (VBOXSTRICTRC_VAL(rc))
4704 {
4705 case VERR_VMX_INVALID_VMXON_PTR:
4706 AssertFailed();
4707 break;
4708
4709 case VERR_VMX_UNABLE_TO_START_VM:
4710 case VERR_VMX_UNABLE_TO_RESUME_VM:
4711 {
4712 int rc2;
4713 RTCCUINTREG exitReason, instrError;
4714
4715 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
4716 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
4717 AssertRC(rc2);
4718 if (rc2 == VINF_SUCCESS)
4719 {
4720 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
4721 Log(("Current stack %08x\n", &rc2));
4722
4723 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
4724 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
4725
4726#ifdef VBOX_STRICT
4727 RTGDTR gdtr;
4728 PCX86DESCHC pDesc;
4729 RTCCUINTREG val;
4730
4731 ASMGetGDTR(&gdtr);
4732
4733 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
4734 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
4735 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
4736 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
4737 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
4738 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
4739 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
4740 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
4741 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
4742 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
4743
4744 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
4745 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
4746
4747 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
4748 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
4749
4750 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
4751 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
4752
4753 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
4754 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
4755
4756 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
4757 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
4758
4759 if (val < gdtr.cbGdt)
4760 {
4761 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4762 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
4763 }
4764
4765 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
4766 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
4767 if (val < gdtr.cbGdt)
4768 {
4769 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4770 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
4771 }
4772
4773 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
4774 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
4775 if (val < gdtr.cbGdt)
4776 {
4777 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4778 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
4779 }
4780
4781 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
4782 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
4783 if (val < gdtr.cbGdt)
4784 {
4785 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4786 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
4787 }
4788
4789 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
4790 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
4791 if (val < gdtr.cbGdt)
4792 {
4793 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4794 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
4795 }
4796
4797 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
4798 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
4799 if (val < gdtr.cbGdt)
4800 {
4801 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4802 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
4803 }
4804
4805 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
4806 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
4807 if (val < gdtr.cbGdt)
4808 {
4809 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4810 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
4811 }
4812
4813 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
4814 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
4815
4816 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
4817 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
4818 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
4819 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
4820
4821 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
4822 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
4823
4824 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
4825 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
4826
4827 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
4828 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
4829
4830 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
4831 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
4832 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
4833 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
4834
4835# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4836 if (VMX_IS_64BIT_HOST_MODE())
4837 {
4838 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
4839 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
4840 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4841 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4842 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4843 }
4844# endif
4845#endif /* VBOX_STRICT */
4846 }
4847 break;
4848 }
4849
4850 default:
4851 /* impossible */
4852 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
4853 break;
4854 }
4855}
4856
4857#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4858
4859/**
4860 * Prepares for and executes VMLAUNCH (64 bits guest mode)
4861 *
4862 * @returns VBox status code
4863 * @param fResume vmlauch/vmresume
4864 * @param pCtx Guest context
4865 * @param pCache VMCS cache
4866 * @param pVM The VM to operate on.
4867 * @param pVCpu The VMCPU to operate on.
4868 */
4869DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
4870{
4871 uint32_t aParam[6];
4872 PHMGLOBLCPUINFO pCpu;
4873 RTHCPHYS HCPhysCpuPage;
4874 int rc;
4875
4876 pCpu = HWACCMR0GetCurrentCpu();
4877 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
4878
4879#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4880 pCache->uPos = 1;
4881 pCache->interPD = PGMGetInterPaeCR3(pVM);
4882 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
4883#endif
4884
4885#ifdef DEBUG
4886 pCache->TestIn.HCPhysCpuPage= 0;
4887 pCache->TestIn.HCPhysVMCS = 0;
4888 pCache->TestIn.pCache = 0;
4889 pCache->TestOut.HCPhysVMCS = 0;
4890 pCache->TestOut.pCache = 0;
4891 pCache->TestOut.pCtx = 0;
4892 pCache->TestOut.eflags = 0;
4893#endif
4894
4895 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
4896 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
4897 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
4898 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
4899 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
4900 aParam[5] = 0;
4901
4902#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4903 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
4904 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
4905#endif
4906 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
4907
4908#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4909 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
4910 Assert(pCtx->dr[4] == 10);
4911 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
4912#endif
4913
4914#ifdef DEBUG
4915 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
4916 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS, pVCpu->hwaccm.s.vmx.HCPhysVMCS));
4917 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS, pCache->TestOut.HCPhysVMCS));
4918 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache));
4919 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
4920 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx));
4921 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
4922#endif
4923 return rc;
4924}
4925
4926# ifdef VBOX_STRICT
4927
4928static bool hmR0VmxIsValidReadField(uint32_t idxField)
4929{
4930 switch(idxField)
4931 {
4932 case VMX_VMCS64_GUEST_RIP:
4933 case VMX_VMCS64_GUEST_RSP:
4934 case VMX_VMCS_GUEST_RFLAGS:
4935 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
4936 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
4937 case VMX_VMCS64_GUEST_CR0:
4938 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
4939 case VMX_VMCS64_GUEST_CR4:
4940 case VMX_VMCS64_GUEST_DR7:
4941 case VMX_VMCS32_GUEST_SYSENTER_CS:
4942 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4943 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4944 case VMX_VMCS32_GUEST_GDTR_LIMIT:
4945 case VMX_VMCS64_GUEST_GDTR_BASE:
4946 case VMX_VMCS32_GUEST_IDTR_LIMIT:
4947 case VMX_VMCS64_GUEST_IDTR_BASE:
4948 case VMX_VMCS16_GUEST_FIELD_CS:
4949 case VMX_VMCS32_GUEST_CS_LIMIT:
4950 case VMX_VMCS64_GUEST_CS_BASE:
4951 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
4952 case VMX_VMCS16_GUEST_FIELD_DS:
4953 case VMX_VMCS32_GUEST_DS_LIMIT:
4954 case VMX_VMCS64_GUEST_DS_BASE:
4955 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
4956 case VMX_VMCS16_GUEST_FIELD_ES:
4957 case VMX_VMCS32_GUEST_ES_LIMIT:
4958 case VMX_VMCS64_GUEST_ES_BASE:
4959 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
4960 case VMX_VMCS16_GUEST_FIELD_FS:
4961 case VMX_VMCS32_GUEST_FS_LIMIT:
4962 case VMX_VMCS64_GUEST_FS_BASE:
4963 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
4964 case VMX_VMCS16_GUEST_FIELD_GS:
4965 case VMX_VMCS32_GUEST_GS_LIMIT:
4966 case VMX_VMCS64_GUEST_GS_BASE:
4967 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
4968 case VMX_VMCS16_GUEST_FIELD_SS:
4969 case VMX_VMCS32_GUEST_SS_LIMIT:
4970 case VMX_VMCS64_GUEST_SS_BASE:
4971 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
4972 case VMX_VMCS16_GUEST_FIELD_LDTR:
4973 case VMX_VMCS32_GUEST_LDTR_LIMIT:
4974 case VMX_VMCS64_GUEST_LDTR_BASE:
4975 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
4976 case VMX_VMCS16_GUEST_FIELD_TR:
4977 case VMX_VMCS32_GUEST_TR_LIMIT:
4978 case VMX_VMCS64_GUEST_TR_BASE:
4979 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
4980 case VMX_VMCS32_RO_EXIT_REASON:
4981 case VMX_VMCS32_RO_VM_INSTR_ERROR:
4982 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
4983 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
4984 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
4985 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
4986 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4987 case VMX_VMCS32_RO_IDT_INFO:
4988 case VMX_VMCS32_RO_IDT_ERRCODE:
4989 case VMX_VMCS64_GUEST_CR3:
4990 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
4991 return true;
4992 }
4993 return false;
4994}
4995
4996static bool hmR0VmxIsValidWriteField(uint32_t idxField)
4997{
4998 switch(idxField)
4999 {
5000 case VMX_VMCS64_GUEST_LDTR_BASE:
5001 case VMX_VMCS64_GUEST_TR_BASE:
5002 case VMX_VMCS64_GUEST_GDTR_BASE:
5003 case VMX_VMCS64_GUEST_IDTR_BASE:
5004 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5005 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5006 case VMX_VMCS64_GUEST_CR0:
5007 case VMX_VMCS64_GUEST_CR4:
5008 case VMX_VMCS64_GUEST_CR3:
5009 case VMX_VMCS64_GUEST_DR7:
5010 case VMX_VMCS64_GUEST_RIP:
5011 case VMX_VMCS64_GUEST_RSP:
5012 case VMX_VMCS64_GUEST_CS_BASE:
5013 case VMX_VMCS64_GUEST_DS_BASE:
5014 case VMX_VMCS64_GUEST_ES_BASE:
5015 case VMX_VMCS64_GUEST_FS_BASE:
5016 case VMX_VMCS64_GUEST_GS_BASE:
5017 case VMX_VMCS64_GUEST_SS_BASE:
5018 return true;
5019 }
5020 return false;
5021}
5022
5023# endif /* VBOX_STRICT */
5024
5025/**
5026 * Executes the specified handler in 64 mode
5027 *
5028 * @returns VBox status code.
5029 * @param pVM The VM to operate on.
5030 * @param pVCpu The VMCPU to operate on.
5031 * @param pCtx Guest context
5032 * @param pfnHandler RC handler
5033 * @param cbParam Number of parameters
5034 * @param paParam Array of 32 bits parameters
5035 */
5036VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
5037{
5038 int rc, rc2;
5039 PHMGLOBLCPUINFO pCpu;
5040 RTHCPHYS HCPhysCpuPage;
5041 RTHCUINTREG uOldEFlags;
5042
5043 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5044 Assert(pfnHandler);
5045 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
5046 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
5047
5048#ifdef VBOX_STRICT
5049 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5050 Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
5051
5052 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5053 Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
5054#endif
5055
5056 /* Disable interrupts. */
5057 uOldEFlags = ASMIntDisableFlags();
5058
5059 pCpu = HWACCMR0GetCurrentCpu();
5060 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5061
5062 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
5063 VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5064
5065 /* Leave VMX Root Mode. */
5066 VMXDisable();
5067
5068 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5069
5070 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5071 CPUMSetHyperEIP(pVCpu, pfnHandler);
5072 for (int i=(int)cbParam-1;i>=0;i--)
5073 CPUMPushHyper(pVCpu, paParam[i]);
5074
5075 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5076 /* Call switcher. */
5077 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5078 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5079
5080 /* Make sure the VMX instructions don't cause #UD faults. */
5081 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5082
5083 /* Enter VMX Root Mode */
5084 rc2 = VMXEnable(HCPhysCpuPage);
5085 if (RT_FAILURE(rc2))
5086 {
5087 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5088 ASMSetFlags(uOldEFlags);
5089 return VERR_VMX_VMXON_FAILED;
5090 }
5091
5092 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5093 AssertRC(rc2);
5094 Assert(!(ASMGetFlags() & X86_EFL_IF));
5095 ASMSetFlags(uOldEFlags);
5096 return rc;
5097}
5098
5099#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5100
5101
5102#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
5103/**
5104 * Executes VMWRITE
5105 *
5106 * @returns VBox status code
5107 * @param pVCpu The VMCPU to operate on.
5108 * @param idxField VMCS index
5109 * @param u64Val 16, 32 or 64 bits value
5110 */
5111VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5112{
5113 int rc;
5114
5115 switch (idxField)
5116 {
5117 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
5118 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
5119 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
5120 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
5121 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
5122 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
5123 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
5124 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
5125 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
5126 case VMX_VMCS_GUEST_LINK_PTR_FULL:
5127 case VMX_VMCS_GUEST_PDPTR0_FULL:
5128 case VMX_VMCS_GUEST_PDPTR1_FULL:
5129 case VMX_VMCS_GUEST_PDPTR2_FULL:
5130 case VMX_VMCS_GUEST_PDPTR3_FULL:
5131 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
5132 case VMX_VMCS_GUEST_EFER_FULL:
5133 case VMX_VMCS_CTRL_EPTP_FULL:
5134 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5135 rc = VMXWriteVMCS32(idxField, u64Val);
5136 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5137 AssertRC(rc);
5138 return rc;
5139
5140 case VMX_VMCS64_GUEST_LDTR_BASE:
5141 case VMX_VMCS64_GUEST_TR_BASE:
5142 case VMX_VMCS64_GUEST_GDTR_BASE:
5143 case VMX_VMCS64_GUEST_IDTR_BASE:
5144 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5145 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5146 case VMX_VMCS64_GUEST_CR0:
5147 case VMX_VMCS64_GUEST_CR4:
5148 case VMX_VMCS64_GUEST_CR3:
5149 case VMX_VMCS64_GUEST_DR7:
5150 case VMX_VMCS64_GUEST_RIP:
5151 case VMX_VMCS64_GUEST_RSP:
5152 case VMX_VMCS64_GUEST_CS_BASE:
5153 case VMX_VMCS64_GUEST_DS_BASE:
5154 case VMX_VMCS64_GUEST_ES_BASE:
5155 case VMX_VMCS64_GUEST_FS_BASE:
5156 case VMX_VMCS64_GUEST_GS_BASE:
5157 case VMX_VMCS64_GUEST_SS_BASE:
5158 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5159 if (u64Val >> 32ULL)
5160 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5161 else
5162 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5163
5164 return rc;
5165
5166 default:
5167 AssertMsgFailed(("Unexpected field %x\n", idxField));
5168 return VERR_INVALID_PARAMETER;
5169 }
5170}
5171
5172/**
5173 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
5174 *
5175 * @param pVCpu The VMCPU to operate on.
5176 * @param idxField VMCS field
5177 * @param u64Val Value
5178 */
5179VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5180{
5181 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
5182
5183 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5184
5185 /* Make sure there are no duplicates. */
5186 for (unsigned i=0;i<pCache->Write.cValidEntries;i++)
5187 {
5188 if (pCache->Write.aField[i] == idxField)
5189 {
5190 pCache->Write.aFieldVal[i] = u64Val;
5191 return VINF_SUCCESS;
5192 }
5193 }
5194
5195 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5196 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5197 pCache->Write.cValidEntries++;
5198 return VINF_SUCCESS;
5199}
5200
5201#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
5202
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette