VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 42046

Last change on this file since 42046 was 42046, checked in by vboxsync, 12 years ago

VMM/HWVMXR0: comments.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 217.3 KB
Line 
1/* $Id: HWVMXR0.cpp 42046 2012-07-09 07:29:55Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hwaccm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HWACCMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HWACCMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HWACCMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
106 }
107 pVM->hwaccm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 */
120VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
121{
122 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
123 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
124
125 if (pVM)
126 {
127 /* Set revision dword at the beginning of the VMXON structure. */
128 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
129 }
130
131 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
132 * (which can have very bad consequences!!!)
133 */
134
135 if (ASMGetCR4() & X86_CR4_VMXE)
136 return VERR_VMX_IN_VMX_ROOT_MODE;
137
138 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
139
140 /*
141 * Enter VM root mode.
142 */
143 int rc = VMXEnable(HCPhysCpuPage);
144 if (RT_FAILURE(rc))
145 {
146 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
147 return VERR_VMX_VMXON_FAILED;
148 }
149
150 /*
151 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
152 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
153 * each time while reusing a VPID after hitting the MaxASID limit once.
154 */
155 if ( pVM
156 && pVM->hwaccm.s.vmx.fVPID
157 && (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
158 {
159 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
160 pCpu->fFlushASIDBeforeUse = false;
161 }
162 else
163 pCpu->fFlushASIDBeforeUse = true;
164
165 /*
166 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
167 */
168 ++pCpu->cTLBFlushes;
169
170 return VINF_SUCCESS;
171}
172
173
174/**
175 * Deactivates VT-x on the current CPU.
176 *
177 * @returns VBox status code.
178 * @param pCpu Pointer to the CPU info struct.
179 * @param pvCpuPage Pointer to the global CPU page.
180 * @param HCPhysCpuPage Physical address of the global CPU page.
181 */
182VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
183{
184 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
185 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
186 NOREF(pCpu);
187
188 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
189 if (!(ASMGetCR4() & X86_CR4_VMXE))
190 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
191
192 /* Leave VMX Root Mode. */
193 VMXDisable();
194
195 /* And clear the X86_CR4_VMXE bit. */
196 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
197 return VINF_SUCCESS;
198}
199
200
201/**
202 * Does Ring-0 per VM VT-x initialization.
203 *
204 * @returns VBox status code.
205 * @param pVM Pointer to the VM.
206 */
207VMMR0DECL(int) VMXR0InitVM(PVM pVM)
208{
209 int rc;
210
211#ifdef LOG_ENABLED
212 SUPR0Printf("VMXR0InitVM %x\n", pVM);
213#endif
214
215 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
216
217 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
218 {
219 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
220 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, true /* executable R0 mapping */);
221 AssertRC(rc);
222 if (RT_FAILURE(rc))
223 return rc;
224
225 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
226 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
227 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
228 }
229 else
230 {
231 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
232 pVM->hwaccm.s.vmx.pAPIC = 0;
233 pVM->hwaccm.s.vmx.pAPICPhys = 0;
234 }
235
236#ifdef VBOX_WITH_CRASHDUMP_MAGIC
237 {
238 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, true /* executable R0 mapping */);
239 AssertRC(rc);
240 if (RT_FAILURE(rc))
241 return rc;
242
243 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
244 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
245
246 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
247 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
248 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
249 }
250#endif
251
252 /* Allocate VMCSs for all guest CPUs. */
253 for (VMCPUID i = 0; i < pVM->cCpus; i++)
254 {
255 PVMCPU pVCpu = &pVM->aCpus[i];
256
257 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
258
259 /* Allocate one page for the VM control structure (VMCS). */
260 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, true /* executable R0 mapping */);
261 AssertRC(rc);
262 if (RT_FAILURE(rc))
263 return rc;
264
265 pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS);
266 pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0);
267 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS);
268
269 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
270 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
271
272 /* Allocate one page for the virtual APIC page for TPR caching. */
273 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, true /* executable R0 mapping */);
274 AssertRC(rc);
275 if (RT_FAILURE(rc))
276 return rc;
277
278 pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC);
279 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0);
280 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC);
281
282 /* Allocate the MSR bitmap if this feature is supported. */
283 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
284 {
285 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, true /* executable R0 mapping */);
286 AssertRC(rc);
287 if (RT_FAILURE(rc))
288 return rc;
289
290 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
291 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
292 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
293 }
294
295#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
296 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
297 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, true /* executable R0 mapping */);
298 AssertRC(rc);
299 if (RT_FAILURE(rc))
300 return rc;
301
302 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
303 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
304 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
305
306 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
307 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, true /* executable R0 mapping */);
308 AssertRC(rc);
309 if (RT_FAILURE(rc))
310 return rc;
311
312 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
313 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
314 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
315#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
316
317 /* Current guest paging mode. */
318 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
319
320#ifdef LOG_ENABLED
321 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS);
322#endif
323 }
324
325 return VINF_SUCCESS;
326}
327
328
329/**
330 * Does Ring-0 per VM VT-x termination.
331 *
332 * @returns VBox status code.
333 * @param pVM Pointer to the VM.
334 */
335VMMR0DECL(int) VMXR0TermVM(PVM pVM)
336{
337 for (VMCPUID i = 0; i < pVM->cCpus; i++)
338 {
339 PVMCPU pVCpu = &pVM->aCpus[i];
340
341 if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
342 {
343 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false);
344 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
345 pVCpu->hwaccm.s.vmx.pvVMCS = 0;
346 pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0;
347 }
348 if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
349 {
350 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false);
351 pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
352 pVCpu->hwaccm.s.vmx.pbVAPIC = 0;
353 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0;
354 }
355 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
356 {
357 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
358 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
359 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
360 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
361 }
362#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
363 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
364 {
365 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
366 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
367 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
368 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
369 }
370 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
371 {
372 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
373 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
374 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
375 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
376 }
377#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
378 }
379 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
380 {
381 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
382 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
383 pVM->hwaccm.s.vmx.pAPIC = 0;
384 pVM->hwaccm.s.vmx.pAPICPhys = 0;
385 }
386#ifdef VBOX_WITH_CRASHDUMP_MAGIC
387 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
388 {
389 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
390 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
391 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
392 pVM->hwaccm.s.vmx.pScratch = 0;
393 pVM->hwaccm.s.vmx.pScratchPhys = 0;
394 }
395#endif
396 return VINF_SUCCESS;
397}
398
399
400/**
401 * Sets up VT-x for the specified VM.
402 *
403 * @returns VBox status code.
404 * @param pVM Pointer to the VM.
405 */
406VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
407{
408 int rc = VINF_SUCCESS;
409 uint32_t val;
410
411 AssertReturn(pVM, VERR_INVALID_PARAMETER);
412
413 /* Initialize these always, see hwaccmR3InitFinalizeR0().*/
414 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NONE;
415 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NONE;
416
417 /* Determine optimal flush type for EPT. */
418 if (pVM->hwaccm.s.fNestedPaging)
419 {
420 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
421 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_SINGLE_CONTEXT;
422 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
423 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_ALL_CONTEXTS;
424 else
425 {
426 /*
427 * Should never really happen. EPT is supported but no suitable flush types supported.
428 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
429 */
430 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
431 return VERR_VMX_GENERIC;
432 }
433 }
434
435 /* Determine optimal flush type for VPID. */
436 if (pVM->hwaccm.s.vmx.fVPID)
437 {
438 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
439 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_SINGLE_CONTEXT;
440 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
441 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_ALL_CONTEXTS;
442 else
443 {
444 /*
445 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
446 * We do not handle other flush type combinations, ignore VPID capabilities.
447 */
448 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
449 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
450 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
451 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
452 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
453 pVM->hwaccm.s.vmx.fVPID = false;
454 }
455 }
456
457 for (VMCPUID i = 0; i < pVM->cCpus; i++)
458 {
459 PVMCPU pVCpu = &pVM->aCpus[i];
460
461 AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS);
462
463 /* Set revision dword at the beginning of the VMCS structure. */
464 *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
465
466 /*
467 * Clear and activate the VMCS.
468 */
469 Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS));
470 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
471 if (RT_FAILURE(rc))
472 goto vmx_end;
473
474 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
475 if (RT_FAILURE(rc))
476 goto vmx_end;
477
478 /*
479 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
480 * Set required bits to one and zero according to the MSR capabilities.
481 */
482 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
483 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
484 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
485
486 /*
487 * Enable the VMX preemption timer.
488 */
489 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
490 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
491 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
492
493 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
494 AssertRC(rc);
495
496 /*
497 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
498 * Set required bits to one and zero according to the MSR capabilities.
499 */
500 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
501 /* Program which event cause VM-exits and which features we want to use. */
502 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
503 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
504 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
505 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
506 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
507 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
508 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
509 the guest (host thinks the cpu load is high) */
510
511 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
512 if (!pVM->hwaccm.s.fNestedPaging)
513 {
514 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
515 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
516 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
517 }
518
519 /*
520 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
521 * failure with an invalid control fields error. (combined with some other exit reasons)
522 */
523 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
524 {
525 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
526 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
527 Assert(pVM->hwaccm.s.vmx.pAPIC);
528 }
529 else
530 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
531 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
532
533 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
534 {
535 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
536 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
537 }
538
539 /* We will use the secondary control if it's present. */
540 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
541
542 /* Mask away the bits that the CPU doesn't support */
543 /** @todo make sure they don't conflict with the above requirements. */
544 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
545 pVCpu->hwaccm.s.vmx.proc_ctls = val;
546
547 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
548 AssertRC(rc);
549
550 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
551 {
552 /*
553 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
554 * Set required bits to one and zero according to the MSR capabilities.
555 */
556 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
557 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
558
559 if (pVM->hwaccm.s.fNestedPaging)
560 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
561
562 if (pVM->hwaccm.s.vmx.fVPID)
563 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
564
565 if (pVM->hwaccm.s.fHasIoApic)
566 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
567
568 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
569 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
570
571 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
572 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
573
574 /* Mask away the bits that the CPU doesn't support */
575 /** @todo make sure they don't conflict with the above requirements. */
576 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
577 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
578 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
579 AssertRC(rc);
580 }
581
582 /*
583 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
584 * Set required bits to one and zero according to the MSR capabilities.
585 */
586 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
587 AssertRC(rc);
588
589 /*
590 * Forward all exception except #NM & #PF to the guest.
591 * We always need to check pagefaults since our shadow page table can be out of sync.
592 * And we always lazily sync the FPU & XMM state. .
593 */
594
595 /** @todo Possible optimization:
596 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
597 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
598 * registers ourselves of course.
599 *
600 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
601 */
602
603 /*
604 * Don't filter page faults, all of them should cause a world switch.
605 */
606 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
607 AssertRC(rc);
608 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
609 AssertRC(rc);
610
611 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
612 AssertRC(rc);
613 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
614 AssertRC(rc);
615 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
616 AssertRC(rc);
617
618 /*
619 * Set the MSR bitmap address.
620 */
621 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
622 {
623 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
624
625 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
626 AssertRC(rc);
627
628 /* Allow the guest to directly modify these MSRs; they are restored and saved automatically. */
629 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
630 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
631 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
632 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
633 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
634 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
635 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
636 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
637 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
638 }
639
640#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
641 /*
642 * Set the guest & host MSR load/store physical addresses.
643 */
644 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
645 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
646 AssertRC(rc);
647 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
648 AssertRC(rc);
649
650 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
651 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
652 AssertRC(rc);
653#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
654
655 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
656 AssertRC(rc);
657 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
658 AssertRC(rc);
659
660 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
661 {
662 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
663 /* Optional */
664 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
665 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC);
666
667 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
668 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
669
670 AssertRC(rc);
671 }
672
673 /* Set link pointer to -1. Not currently used. */
674 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
675 AssertRC(rc);
676
677 /*
678 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
679 * VMCS data back to memory.
680 */
681 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
682 AssertRC(rc);
683
684 /*
685 * Configure the VMCS read cache.
686 */
687 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
688
689 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
690 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
691 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
692 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
693 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
694 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
695 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
696 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
697 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
698 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
699 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
700 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
701 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
702 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
703 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
704 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
705
706 VMX_SETUP_SELREG(ES, pCache);
707 VMX_SETUP_SELREG(SS, pCache);
708 VMX_SETUP_SELREG(CS, pCache);
709 VMX_SETUP_SELREG(DS, pCache);
710 VMX_SETUP_SELREG(FS, pCache);
711 VMX_SETUP_SELREG(GS, pCache);
712 VMX_SETUP_SELREG(LDTR, pCache);
713 VMX_SETUP_SELREG(TR, pCache);
714
715 /*
716 * Status code VMCS reads.
717 */
718 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
719 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
720 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
721 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
722 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
723 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
724 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
725 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
726 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
727
728 if (pVM->hwaccm.s.fNestedPaging)
729 {
730 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
731 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
732 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
733 }
734 else
735 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
736 } /* for each VMCPU */
737
738 /*
739 * Setup the right TLB function based on CPU capabilities.
740 */
741 if (pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID)
742 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBBoth;
743 else if (pVM->hwaccm.s.fNestedPaging)
744 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT;
745 else if (pVM->hwaccm.s.vmx.fVPID)
746 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID;
747 else
748 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy;
749
750vmx_end:
751 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
752 return rc;
753}
754
755
756/**
757 * Sets the permission bits for the specified MSR.
758 *
759 * @param pVCpu Pointer to the VMCPU.
760 * @param ulMSR The MSR value.
761 * @param fRead Whether reading is allowed.
762 * @param fWrite Whether writing is allowed.
763 */
764static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
765{
766 unsigned ulBit;
767 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
768
769 /*
770 * Layout:
771 * 0x000 - 0x3ff - Low MSR read bits
772 * 0x400 - 0x7ff - High MSR read bits
773 * 0x800 - 0xbff - Low MSR write bits
774 * 0xc00 - 0xfff - High MSR write bits
775 */
776 if (ulMSR <= 0x00001FFF)
777 {
778 /* Pentium-compatible MSRs */
779 ulBit = ulMSR;
780 }
781 else if ( ulMSR >= 0xC0000000
782 && ulMSR <= 0xC0001FFF)
783 {
784 /* AMD Sixth Generation x86 Processor MSRs */
785 ulBit = (ulMSR - 0xC0000000);
786 pMSRBitmap += 0x400;
787 }
788 else
789 {
790 AssertFailed();
791 return;
792 }
793
794 Assert(ulBit <= 0x1fff);
795 if (fRead)
796 ASMBitClear(pMSRBitmap, ulBit);
797 else
798 ASMBitSet(pMSRBitmap, ulBit);
799
800 if (fWrite)
801 ASMBitClear(pMSRBitmap + 0x800, ulBit);
802 else
803 ASMBitSet(pMSRBitmap + 0x800, ulBit);
804}
805
806
807/**
808 * Injects an event (trap or external interrupt).
809 *
810 * @returns VBox status code. Note that it may return VINF_EM_RESET to
811 * indicate a triple fault when injecting X86_XCPT_DF.
812 *
813 * @param pVM Pointer to the VM.
814 * @param pVCpu Pointer to the VMCPU.
815 * @param pCtx Pointer to the guest CPU Context.
816 * @param intInfo VMX interrupt info.
817 * @param cbInstr Opcode length of faulting instruction.
818 * @param errCode Error code (optional).
819 */
820static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
821{
822 int rc;
823 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
824
825#ifdef VBOX_WITH_STATISTICS
826 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
827#endif
828
829#ifdef VBOX_STRICT
830 if (iGate == 0xE)
831 {
832 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
833 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
834 }
835 else if (iGate < 0x20)
836 {
837 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
838 errCode));
839 }
840 else
841 {
842 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
843 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
844 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
845 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
846 || pCtx->eflags.u32 & X86_EFL_IF);
847 }
848#endif
849
850 if ( CPUMIsGuestInRealModeEx(pCtx)
851 && pVM->hwaccm.s.vmx.pRealModeTSS)
852 {
853 RTGCPHYS GCPhysHandler;
854 uint16_t offset, ip;
855 RTSEL sel;
856
857 /*
858 * Injecting events doesn't work right with real mode emulation.
859 * (#GP if we try to inject external hardware interrupts)
860 * Inject the interrupt or trap directly instead.
861 *
862 * ASSUMES no access handlers for the bits we read or write below (should be safe).
863 */
864 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
865
866 /*
867 * Check if the interrupt handler is present.
868 */
869 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
870 {
871 Log(("IDT cbIdt violation\n"));
872 if (iGate != X86_XCPT_DF)
873 {
874 uint32_t intInfo2;
875
876 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
877 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
878 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
879 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
880
881 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
882 }
883 Log(("Triple fault -> reset the VM!\n"));
884 return VINF_EM_RESET;
885 }
886 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
887 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
888 || iGate == 4)
889 {
890 ip = pCtx->ip + cbInstr;
891 }
892 else
893 ip = pCtx->ip;
894
895 /*
896 * Read the selector:offset pair of the interrupt handler.
897 */
898 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
899 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
900 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
901
902 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
903
904 /*
905 * Construct the stack frame.
906 */
907 /** @todo Check stack limit. */
908 pCtx->sp -= 2;
909 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
910 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
911 pCtx->sp -= 2;
912 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
913 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
914 pCtx->sp -= 2;
915 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
916 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
917
918 /*
919 * Update the CPU state for executing the handler.
920 */
921 pCtx->rip = offset;
922 pCtx->cs.Sel = sel;
923 pCtx->cs.u64Base = sel << 4;
924 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
925
926 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
927 return VINF_SUCCESS;
928 }
929
930 /*
931 * Set event injection state.
932 */
933 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
934 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
935 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
936
937 AssertRC(rc);
938 return rc;
939}
940
941
942/**
943 * Checks for pending guest interrupts and injects them.
944 *
945 * @returns VBox status code.
946 * @param pVM Pointer to the VM.
947 * @param pVCpu Pointer to the VMCPU.
948 * @param pCtx Pointer to the guest CPU context.
949 */
950static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
951{
952 int rc;
953
954 /*
955 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
956 */
957 if (pVCpu->hwaccm.s.Event.fPending)
958 {
959 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo,
960 pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
961 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
962 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
963 AssertRC(rc);
964
965 pVCpu->hwaccm.s.Event.fPending = false;
966 return VINF_SUCCESS;
967 }
968
969 /*
970 * If an active trap is already pending, we must forward it first!
971 */
972 if (!TRPMHasTrap(pVCpu))
973 {
974 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
975 {
976 RTGCUINTPTR intInfo;
977
978 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
979
980 intInfo = X86_XCPT_NMI;
981 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
982 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
983
984 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
985 AssertRC(rc);
986
987 return VINF_SUCCESS;
988 }
989
990 /** @todo SMI interrupts. */
991
992 /*
993 * When external interrupts are pending, we should exit the VM when IF is set.
994 */
995 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
996 {
997 if (!(pCtx->eflags.u32 & X86_EFL_IF))
998 {
999 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
1000 {
1001 LogFlow(("Enable irq window exit!\n"));
1002 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
1003 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1004 AssertRC(rc);
1005 }
1006 /* else nothing to do but wait */
1007 }
1008 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1009 {
1010 uint8_t u8Interrupt;
1011
1012 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1013 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1014 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1015 if (RT_SUCCESS(rc))
1016 {
1017 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1018 AssertRC(rc);
1019 }
1020 else
1021 {
1022 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1023 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1024 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
1025 /* Just continue */
1026 }
1027 }
1028 else
1029 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1030 }
1031 }
1032
1033#ifdef VBOX_STRICT
1034 if (TRPMHasTrap(pVCpu))
1035 {
1036 uint8_t u8Vector;
1037 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1038 AssertRC(rc);
1039 }
1040#endif
1041
1042 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1043 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1044 && TRPMHasTrap(pVCpu)
1045 )
1046 {
1047 uint8_t u8Vector;
1048 TRPMEVENT enmType;
1049 RTGCUINTPTR intInfo;
1050 RTGCUINT errCode;
1051
1052 /*
1053 * If a new event is pending, dispatch it now.
1054 */
1055 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1056 AssertRC(rc);
1057 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1058 Assert(enmType != TRPM_SOFTWARE_INT);
1059
1060 /*
1061 * Clear the pending trap.
1062 */
1063 rc = TRPMResetTrap(pVCpu);
1064 AssertRC(rc);
1065
1066 intInfo = u8Vector;
1067 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1068
1069 if (enmType == TRPM_TRAP)
1070 {
1071 switch (u8Vector)
1072 {
1073 case X86_XCPT_DF:
1074 case X86_XCPT_TS:
1075 case X86_XCPT_NP:
1076 case X86_XCPT_SS:
1077 case X86_XCPT_GP:
1078 case X86_XCPT_PF:
1079 case X86_XCPT_AC:
1080 {
1081 /* Valid error codes. */
1082 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1083 break;
1084 }
1085
1086 default:
1087 break;
1088 }
1089
1090 if ( u8Vector == X86_XCPT_BP
1091 || u8Vector == X86_XCPT_OF)
1092 {
1093 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1094 }
1095 else
1096 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1097 }
1098 else
1099 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1100
1101 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
1102 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1103 AssertRC(rc);
1104 } /* if (interrupts can be dispatched) */
1105
1106 return VINF_SUCCESS;
1107}
1108
1109
1110/**
1111 * Save the host state into the VMCS.
1112 *
1113 * @returns VBox status code.
1114 * @param pVM Pointer to the VM.
1115 * @param pVCpu Pointer to the VMCPU.
1116 */
1117VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1118{
1119 int rc = VINF_SUCCESS;
1120 NOREF(pVM);
1121
1122 /*
1123 * Host CPU Context.
1124 */
1125 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1126 {
1127 RTIDTR idtr;
1128 RTGDTR gdtr;
1129 RTSEL SelTR;
1130 PCX86DESCHC pDesc;
1131 uintptr_t trBase;
1132 RTSEL cs;
1133 RTSEL ss;
1134 uint64_t cr3;
1135
1136 /*
1137 * Control registers.
1138 */
1139 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1140 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1141#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1142 if (VMX_IS_64BIT_HOST_MODE())
1143 {
1144 cr3 = hwaccmR0Get64bitCR3();
1145 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1146 }
1147 else
1148#endif
1149 {
1150 cr3 = ASMGetCR3();
1151 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1152 }
1153 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1154 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1155 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1156 AssertRC(rc);
1157
1158 /*
1159 * Selector registers.
1160 */
1161#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1162 if (VMX_IS_64BIT_HOST_MODE())
1163 {
1164 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1165 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1166 }
1167 else
1168 {
1169 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1170 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1171 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1172 }
1173#else
1174 cs = ASMGetCS();
1175 ss = ASMGetSS();
1176#endif
1177 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1178 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1179 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1180 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1181 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1182 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1183#if HC_ARCH_BITS == 32
1184 if (!VMX_IS_64BIT_HOST_MODE())
1185 {
1186 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1187 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1188 }
1189#endif
1190 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1191 SelTR = ASMGetTR();
1192 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1193 AssertRC(rc);
1194 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1195 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1196 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1197 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1198 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1199 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1200 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1201
1202 /*
1203 * GDTR & IDTR.
1204 */
1205#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1206 if (VMX_IS_64BIT_HOST_MODE())
1207 {
1208 X86XDTR64 gdtr64, idtr64;
1209 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1210 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1211 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1212 AssertRC(rc);
1213 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1214 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1215 gdtr.cbGdt = gdtr64.cb;
1216 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1217 }
1218 else
1219#endif
1220 {
1221 ASMGetGDTR(&gdtr);
1222 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1223 ASMGetIDTR(&idtr);
1224 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1225 AssertRC(rc);
1226 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1227 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1228 }
1229
1230 /*
1231 * Save the base address of the TR selector.
1232 */
1233 if (SelTR > gdtr.cbGdt)
1234 {
1235 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1236 return VERR_VMX_INVALID_HOST_STATE;
1237 }
1238
1239 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1240#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1241 if (VMX_IS_64BIT_HOST_MODE())
1242 {
1243 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
1244 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1245 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1246 AssertRC(rc);
1247 }
1248 else
1249#endif
1250 {
1251#if HC_ARCH_BITS == 64
1252 trBase = X86DESC64_BASE(*pDesc);
1253#else
1254 trBase = X86DESC_BASE(*pDesc);
1255#endif
1256 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1257 AssertRC(rc);
1258 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1259 }
1260
1261 /*
1262 * FS base and GS base.
1263 */
1264#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1265 if (VMX_IS_64BIT_HOST_MODE())
1266 {
1267 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1268 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1269 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1270 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1271 }
1272#endif
1273 AssertRC(rc);
1274
1275 /*
1276 * Sysenter MSRs.
1277 */
1278 /** @todo expensive!! */
1279 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1280 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1281#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1282 if (VMX_IS_64BIT_HOST_MODE())
1283 {
1284 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1285 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1286 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1287 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1288 }
1289 else
1290 {
1291 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1292 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1293 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1294 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1295 }
1296#elif HC_ARCH_BITS == 32
1297 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1298 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1299 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1300 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1301#else
1302 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1303 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1304 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1305 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1306#endif
1307 AssertRC(rc);
1308
1309
1310#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1311 /*
1312 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1313 * the world switch back to the host.
1314 */
1315 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1316 unsigned idxMsr = 0;
1317
1318 /*
1319 * Check if EFER MSR present.
1320 */
1321 if (ASMCpuId_EDX(0x80000001) & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1322 {
1323 if (ASMCpuId_EDX(0x80000001) & X86_CPUID_EXT_FEATURE_EDX_SYSCALL)
1324 {
1325 pMsr->u32IndexMSR = MSR_K6_STAR;
1326 pMsr->u32Reserved = 0;
1327 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1328 pMsr++; idxMsr++;
1329 }
1330
1331 pMsr->u32IndexMSR = MSR_K6_EFER;
1332 pMsr->u32Reserved = 0;
1333# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1334 if (CPUMIsGuestInLongMode(pVCpu))
1335 {
1336 /* Must match the EFER value in our 64 bits switcher. */
1337 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1338 }
1339 else
1340# endif
1341 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1342 pMsr++; idxMsr++;
1343 }
1344
1345# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1346 if (VMX_IS_64BIT_HOST_MODE())
1347 {
1348 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1349 pMsr->u32Reserved = 0;
1350 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1351 pMsr++; idxMsr++;
1352 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1353 pMsr->u32Reserved = 0;
1354 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1355 pMsr++; idxMsr++;
1356 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1357 pMsr->u32Reserved = 0;
1358 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1359 pMsr++; idxMsr++;
1360 }
1361# endif
1362 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1363 AssertRC(rc);
1364#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1365
1366 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1367 }
1368 return rc;
1369}
1370
1371
1372/**
1373 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1374 * guest operates in PAE mode.
1375 *
1376 * @returns VBox status code.
1377 * @param pVCpu Pointer to the VMCPU.
1378 * @param pCtx Pointer to the guest CPU context.
1379 */
1380static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1381{
1382 if (CPUMIsGuestInPAEModeEx(pCtx))
1383 {
1384 X86PDPE aPdpes[4];
1385 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1386 AssertRCReturn(rc, rc);
1387
1388 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1389 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1390 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1391 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1392 }
1393 return VINF_SUCCESS;
1394}
1395
1396
1397/**
1398 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1399 * guest operates in PAE mode.
1400 *
1401 * @returns VBox status code.
1402 * @param pVCpu Pointer to the VM CPU.
1403 * @param pCtx Pointer to the guest CPU context.
1404 *
1405 * @remarks Tell PGM about CR3 changes before calling this helper.
1406 */
1407static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1408{
1409 if (CPUMIsGuestInPAEModeEx(pCtx))
1410 {
1411 int rc;
1412 X86PDPE aPdpes[4];
1413 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1414 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1415 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1416 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1417
1418 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1419 AssertRCReturn(rc, rc);
1420 }
1421 return VINF_SUCCESS;
1422}
1423
1424
1425/**
1426 * Update the exception bitmap according to the current CPU state.
1427 *
1428 * @param pVM Pointer to the VM.
1429 * @param pVCpu Pointer to the VMCPU.
1430 * @param pCtx Pointer to the guest CPU context.
1431 */
1432static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1433{
1434 uint32_t u32TrapMask;
1435 Assert(pCtx);
1436
1437 /*
1438 * Set up a mask for intercepting traps.
1439 */
1440 /** @todo Do we really need to always intercept #DB? */
1441 u32TrapMask = RT_BIT(X86_XCPT_DB)
1442 | RT_BIT(X86_XCPT_NM)
1443#ifdef VBOX_ALWAYS_TRAP_PF
1444 | RT_BIT(X86_XCPT_PF)
1445#endif
1446#ifdef VBOX_STRICT
1447 | RT_BIT(X86_XCPT_BP)
1448 | RT_BIT(X86_XCPT_DB)
1449 | RT_BIT(X86_XCPT_DE)
1450 | RT_BIT(X86_XCPT_NM)
1451 | RT_BIT(X86_XCPT_UD)
1452 | RT_BIT(X86_XCPT_NP)
1453 | RT_BIT(X86_XCPT_SS)
1454 | RT_BIT(X86_XCPT_GP)
1455 | RT_BIT(X86_XCPT_MF)
1456#endif
1457 ;
1458
1459 /*
1460 * Without nested paging, #PF must be intercepted to implement shadow paging.
1461 */
1462 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1463 if (!pVM->hwaccm.s.fNestedPaging)
1464 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1465
1466 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1467 if (!(pCtx->cr0 & X86_CR0_NE))
1468 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1469
1470#ifdef VBOX_STRICT
1471 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1472#endif
1473
1474 /*
1475 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1476 */
1477 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1478 if ( CPUMIsGuestInRealModeEx(pCtx)
1479 && pVM->hwaccm.s.vmx.pRealModeTSS)
1480 {
1481 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1482 | RT_BIT(X86_XCPT_DB)
1483 | RT_BIT(X86_XCPT_NMI)
1484 | RT_BIT(X86_XCPT_BP)
1485 | RT_BIT(X86_XCPT_OF)
1486 | RT_BIT(X86_XCPT_BR)
1487 | RT_BIT(X86_XCPT_UD)
1488 | RT_BIT(X86_XCPT_DF)
1489 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1490 | RT_BIT(X86_XCPT_TS)
1491 | RT_BIT(X86_XCPT_NP)
1492 | RT_BIT(X86_XCPT_SS)
1493 | RT_BIT(X86_XCPT_GP)
1494 | RT_BIT(X86_XCPT_MF)
1495 | RT_BIT(X86_XCPT_AC)
1496 | RT_BIT(X86_XCPT_MC)
1497 | RT_BIT(X86_XCPT_XF)
1498 ;
1499 }
1500
1501 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1502 AssertRC(rc);
1503}
1504
1505
1506/**
1507 * Loads a minimal guest state.
1508 *
1509 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1510 *
1511 * @param pVM Pointer to the VM.
1512 * @param pVCpu Pointer to the VMCPU.
1513 * @param pCtx Pointer to the guest CPU context.
1514 */
1515VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1516{
1517 int rc;
1518 X86EFLAGS eflags;
1519
1520 Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST));
1521
1522 /*
1523 * Load EIP, ESP and EFLAGS.
1524 */
1525 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1526 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1527 AssertRC(rc);
1528
1529 /*
1530 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1531 */
1532 eflags = pCtx->eflags;
1533 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1534 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1535
1536 /*
1537 * Check if real mode emulation using v86 mode.
1538 */
1539 if ( CPUMIsGuestInRealModeEx(pCtx)
1540 && pVM->hwaccm.s.vmx.pRealModeTSS)
1541 {
1542 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1543
1544 eflags.Bits.u1VM = 1;
1545 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1546 }
1547 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1548 AssertRC(rc);
1549}
1550
1551
1552/**
1553 * Loads the guest state.
1554 *
1555 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1556 *
1557 * @returns VBox status code.
1558 * @param pVM Pointer to the VM.
1559 * @param pVCpu Pointer to the VMCPU.
1560 * @param pCtx Pointer to the guest CPU context.
1561 */
1562VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1563{
1564 int rc = VINF_SUCCESS;
1565 RTGCUINTPTR val;
1566
1567 /*
1568 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1569 * Set required bits to one and zero according to the MSR capabilities.
1570 */
1571 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1572
1573 /*
1574 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1575 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1576 */
1577 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1578
1579 if (CPUMIsGuestInLongModeEx(pCtx))
1580 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1581 /* else Must be zero when AMD64 is not available. */
1582
1583 /*
1584 * Mask away the bits that the CPU doesn't support.
1585 */
1586 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1587 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1588 AssertRC(rc);
1589
1590 /*
1591 * VMX_VMCS_CTRL_EXIT_CONTROLS
1592 * Set required bits to one and zero according to the MSR capabilities.
1593 */
1594 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1595
1596 /*
1597 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1598 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1599 */
1600 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1601
1602#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1603 if (VMX_IS_64BIT_HOST_MODE())
1604 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1605 /* else Must be zero when AMD64 is not available. */
1606#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1607 if (CPUMIsGuestInLongModeEx(pCtx))
1608 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1609 else
1610 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1611#endif
1612 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1613
1614 /*
1615 * Don't acknowledge external interrupts on VM-exit.
1616 */
1617 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1618 AssertRC(rc);
1619
1620 /*
1621 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1622 */
1623 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1624 {
1625 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1626 {
1627 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1628 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1629 {
1630 /*
1631 * Correct weird requirements for switching to protected mode.
1632 */
1633 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1634 && enmGuestMode >= PGMMODE_PROTECTED)
1635 {
1636#ifdef VBOX_WITH_REM
1637 /*
1638 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1639 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1640 */
1641 REMFlushTBs(pVM);
1642#endif
1643
1644 /*
1645 * DPL of all hidden selector registers must match the current CPL (0).
1646 */
1647 pCtx->cs.Attr.n.u2Dpl = 0;
1648 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1649
1650 pCtx->ds.Attr.n.u2Dpl = 0;
1651 pCtx->es.Attr.n.u2Dpl = 0;
1652 pCtx->fs.Attr.n.u2Dpl = 0;
1653 pCtx->gs.Attr.n.u2Dpl = 0;
1654 pCtx->ss.Attr.n.u2Dpl = 0;
1655 }
1656 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1657 }
1658 else if ( CPUMIsGuestInRealModeEx(pCtx)
1659 && pCtx->cs.u64Base == 0xffff0000)
1660 {
1661 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1662 pCtx->cs.u64Base = 0xf0000;
1663 pCtx->cs.Sel = 0xf000;
1664 }
1665 }
1666
1667 VMX_WRITE_SELREG(ES, es);
1668 AssertRC(rc);
1669
1670 VMX_WRITE_SELREG(CS, cs);
1671 AssertRC(rc);
1672
1673 VMX_WRITE_SELREG(SS, ss);
1674 AssertRC(rc);
1675
1676 VMX_WRITE_SELREG(DS, ds);
1677 AssertRC(rc);
1678
1679 VMX_WRITE_SELREG(FS, fs);
1680 AssertRC(rc);
1681
1682 VMX_WRITE_SELREG(GS, gs);
1683 AssertRC(rc);
1684 }
1685
1686 /*
1687 * Guest CPU context: LDTR.
1688 */
1689 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1690 {
1691 if (pCtx->ldtr.Sel == 0)
1692 {
1693 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1694 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1695 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1696 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1697 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1698 }
1699 else
1700 {
1701 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1702 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1703 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtr.u64Base);
1704 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1705 }
1706 AssertRC(rc);
1707 }
1708
1709 /*
1710 * Guest CPU context: TR.
1711 */
1712 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1713 {
1714 /*
1715 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1716 * using the int bitmap in the TSS).
1717 */
1718 if ( CPUMIsGuestInRealModeEx(pCtx)
1719 && pVM->hwaccm.s.vmx.pRealModeTSS)
1720 {
1721 RTGCPHYS GCPhys;
1722
1723 /* We convert it here every time as PCI regions could be reconfigured. */
1724 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1725 AssertRC(rc);
1726
1727 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1728 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1729 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1730
1731 X86DESCATTR attr;
1732
1733 attr.u = 0;
1734 attr.n.u1Present = 1;
1735 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1736 val = attr.u;
1737 }
1738 else
1739 {
1740 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1741 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1742 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->tr.u64Base);
1743
1744 val = pCtx->tr.Attr.u;
1745
1746 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1747 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1748 {
1749 if (val & 0xf)
1750 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1751 else
1752 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1753 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1754 }
1755 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("%#x\n", val));
1756 }
1757 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1758 AssertRC(rc);
1759 }
1760
1761 /*
1762 * Guest CPU context: GDTR.
1763 */
1764 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1765 {
1766 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1767 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1768 AssertRC(rc);
1769 }
1770
1771 /*
1772 * Guest CPU context: IDTR.
1773 */
1774 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1775 {
1776 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1777 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1778 AssertRC(rc);
1779 }
1780
1781 /*
1782 * Sysenter MSRs.
1783 */
1784 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1785 {
1786 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1787 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1788 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1789 AssertRC(rc);
1790 }
1791
1792 /*
1793 * Guest CPU context: Control registers.
1794 */
1795 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1796 {
1797 val = pCtx->cr0;
1798 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1799 Log2(("Guest CR0-shadow %08x\n", val));
1800 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1801 {
1802 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1803 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1804 }
1805 else
1806 {
1807 /** @todo check if we support the old style mess correctly. */
1808 if (!(val & X86_CR0_NE))
1809 Log(("Forcing X86_CR0_NE!!!\n"));
1810
1811 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1812 }
1813 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1814 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1815 val |= X86_CR0_PE | X86_CR0_PG;
1816
1817 if (pVM->hwaccm.s.fNestedPaging)
1818 {
1819 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1820 {
1821 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1822 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1823 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1824 }
1825 else
1826 {
1827 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1828 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1829 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1830 }
1831 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1832 AssertRC(rc);
1833 }
1834 else
1835 {
1836 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1837 val |= X86_CR0_WP;
1838 }
1839
1840 /* Always enable caching. */
1841 val &= ~(X86_CR0_CD|X86_CR0_NW);
1842
1843 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1844 Log2(("Guest CR0 %08x\n", val));
1845
1846 /*
1847 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1848 */
1849 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1850 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1851 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1852 | X86_CR0_CD /* Bit not restored during VM-exit! */
1853 | X86_CR0_NW /* Bit not restored during VM-exit! */
1854 | X86_CR0_NE;
1855
1856 /*
1857 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1858 */
1859 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1860 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1861
1862 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1863
1864 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1865 Log2(("Guest CR0-mask %08x\n", val));
1866 AssertRC(rc);
1867 }
1868
1869 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1870 {
1871 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1872 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1873 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1874 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1875
1876 if (!pVM->hwaccm.s.fNestedPaging)
1877 {
1878 switch(pVCpu->hwaccm.s.enmShadowMode)
1879 {
1880 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1881 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1882 case PGMMODE_32_BIT: /* 32-bit paging. */
1883 val &= ~X86_CR4_PAE;
1884 break;
1885
1886 case PGMMODE_PAE: /* PAE paging. */
1887 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1888 /** Must use PAE paging as we could use physical memory > 4 GB */
1889 val |= X86_CR4_PAE;
1890 break;
1891
1892 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1893 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1894#ifdef VBOX_ENABLE_64_BITS_GUESTS
1895 break;
1896#else
1897 AssertFailed();
1898 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1899#endif
1900 default: /* shut up gcc */
1901 AssertFailed();
1902 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1903 }
1904 }
1905 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1906 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1907 {
1908 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1909 val |= X86_CR4_PSE;
1910 /* Our identity mapping is a 32 bits page directory. */
1911 val &= ~X86_CR4_PAE;
1912 }
1913
1914 /*
1915 * Turn off VME if we're in emulated real mode.
1916 */
1917 if ( CPUMIsGuestInRealModeEx(pCtx)
1918 && pVM->hwaccm.s.vmx.pRealModeTSS)
1919 {
1920 val &= ~X86_CR4_VME;
1921 }
1922
1923 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1924 Log2(("Guest CR4 %08x\n", val));
1925
1926 /*
1927 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1928 */
1929 val = 0
1930 | X86_CR4_VME
1931 | X86_CR4_PAE
1932 | X86_CR4_PGE
1933 | X86_CR4_PSE
1934 | X86_CR4_VMXE;
1935 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1936
1937 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1938 Log2(("Guest CR4-mask %08x\n", val));
1939 AssertRC(rc);
1940 }
1941
1942#if 0
1943 /* Enable single stepping if requested and CPU supports it. */
1944 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
1945 if (DBGFIsStepping(pVCpu))
1946 {
1947 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
1948 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1949 AssertRC(rc);
1950 }
1951#endif
1952
1953 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1954 {
1955 if (pVM->hwaccm.s.fNestedPaging)
1956 {
1957 Assert(PGMGetHyperCR3(pVCpu));
1958 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1959
1960 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1961 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1962 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1963 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1964
1965 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1966 AssertRC(rc);
1967
1968 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1969 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1970 {
1971 RTGCPHYS GCPhys;
1972
1973 /* We convert it here every time as PCI regions could be reconfigured. */
1974 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1975 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
1976
1977 /*
1978 * We use our identity mapping page table here as we need to map guest virtual to
1979 * guest physical addresses; EPT will take care of the translation to host physical addresses.
1980 */
1981 val = GCPhys;
1982 }
1983 else
1984 {
1985 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1986 val = pCtx->cr3;
1987 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
1988 AssertRCReturn(rc, rc);
1989 }
1990 }
1991 else
1992 {
1993 val = PGMGetHyperCR3(pVCpu);
1994 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1995 }
1996
1997 /* Save our shadow CR3 register. */
1998 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1999 AssertRC(rc);
2000 }
2001
2002 /*
2003 * Guest CPU context: Debug registers.
2004 */
2005 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
2006 {
2007 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2008 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2009
2010 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2011 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2012 pCtx->dr[7] |= 0x400; /* must be one */
2013
2014 /* Resync DR7 */
2015 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2016 AssertRC(rc);
2017
2018#ifdef DEBUG
2019 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2020 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2021 && !CPUMIsHyperDebugStateActive(pVCpu)
2022 && !DBGFIsStepping(pVCpu))
2023 {
2024 /* Save the host and load the hypervisor debug state. */
2025 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2026 AssertRC(rc);
2027
2028 /* DRx intercepts remain enabled. */
2029
2030 /* Override dr7 with the hypervisor value. */
2031 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2032 AssertRC(rc);
2033 }
2034 else
2035#endif
2036 /* Sync the debug state now if any breakpoint is armed. */
2037 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2038 && !CPUMIsGuestDebugStateActive(pVCpu)
2039 && !DBGFIsStepping(pVCpu))
2040 {
2041 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
2042
2043 /* Disable DRx move intercepts. */
2044 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2045 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2046 AssertRC(rc);
2047
2048 /* Save the host and load the guest debug state. */
2049 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2050 AssertRC(rc);
2051 }
2052
2053 /* IA32_DEBUGCTL MSR. */
2054 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
2055 AssertRC(rc);
2056
2057 /** @todo do we really ever need this? */
2058 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
2059 AssertRC(rc);
2060 }
2061
2062 /*
2063 * 64-bit guest mode.
2064 */
2065 if (CPUMIsGuestInLongModeEx(pCtx))
2066 {
2067#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2068 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2069#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2070 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2071#else
2072# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2073 if (!pVM->hwaccm.s.fAllow64BitGuests)
2074 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2075# endif
2076 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
2077#endif
2078 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
2079 {
2080 /* Update these as wrmsr might have changed them. */
2081 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fs.u64Base);
2082 AssertRC(rc);
2083 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gs.u64Base);
2084 AssertRC(rc);
2085 }
2086 }
2087 else
2088 {
2089 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
2090 }
2091
2092 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2093
2094#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2095 /* Store all guest MSRs in the VM-Entry load area, so they will be loaded during the world switch. */
2096 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2097 unsigned idxMsr = 0;
2098
2099 uint32_t ulEdx;
2100 uint32_t ulTemp;
2101 CPUMGetGuestCpuId(pVCpu, 0x80000001, &ulTemp, &ulTemp, &ulTemp, &ulEdx);
2102 /* EFER MSR present? */
2103 if (ulEdx & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2104 {
2105 pMsr->u32IndexMSR = MSR_K6_EFER;
2106 pMsr->u32Reserved = 0;
2107 pMsr->u64Value = pCtx->msrEFER;
2108 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2109 if (!CPUMIsGuestInLongModeEx(pCtx))
2110 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2111 pMsr++; idxMsr++;
2112
2113 if (ulEdx & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2114 {
2115 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2116 pMsr->u32Reserved = 0;
2117 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2118 pMsr++; idxMsr++;
2119 pMsr->u32IndexMSR = MSR_K6_STAR;
2120 pMsr->u32Reserved = 0;
2121 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2122 pMsr++; idxMsr++;
2123 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2124 pMsr->u32Reserved = 0;
2125 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2126 pMsr++; idxMsr++;
2127 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2128 pMsr->u32Reserved = 0;
2129 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2130 pMsr++; idxMsr++;
2131 }
2132 }
2133 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
2134
2135 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2136 AssertRC(rc);
2137
2138 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2139 AssertRC(rc);
2140#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2141
2142 bool fOffsettedTsc;
2143 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
2144 {
2145 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2146
2147 /* Make sure the returned values have sane upper and lower boundaries. */
2148 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2149
2150 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2151 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2152
2153 cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift;
2154 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2155 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2156 AssertRC(rc);
2157 }
2158 else
2159 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2160
2161 if (fOffsettedTsc)
2162 {
2163 uint64_t u64CurTSC = ASMReadTSC();
2164 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2165 {
2166 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2167 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
2168 AssertRC(rc);
2169
2170 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2171 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2172 AssertRC(rc);
2173 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
2174 }
2175 else
2176 {
2177 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2178 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2179 pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset,
2180 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset,
2181 TMCpuTickGet(pVCpu)));
2182 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2183 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2184 AssertRC(rc);
2185 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
2186 }
2187 }
2188 else
2189 {
2190 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2191 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2192 AssertRC(rc);
2193 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
2194 }
2195
2196 /* Done with the major changes */
2197 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
2198
2199 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2200 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2201 return rc;
2202}
2203
2204
2205/**
2206 * Syncs back the guest state from VMCS.
2207 *
2208 * @returns VBox status code.
2209 * @param pVM Pointer to the VM.
2210 * @param pVCpu Pointer to the VMCPU.
2211 * @param pCtx Pointer to the guest CPU context.
2212 */
2213DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2214{
2215 RTGCUINTREG val, valShadow;
2216 RTGCUINTPTR uInterruptState;
2217 int rc;
2218
2219 /* First sync back EIP, ESP, and EFLAGS. */
2220 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
2221 AssertRC(rc);
2222 pCtx->rip = val;
2223 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
2224 AssertRC(rc);
2225 pCtx->rsp = val;
2226 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2227 AssertRC(rc);
2228 pCtx->eflags.u32 = val;
2229
2230 /* Take care of instruction fusing (sti, mov ss) */
2231 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2232 uInterruptState = val;
2233 if (uInterruptState != 0)
2234 {
2235 Assert(uInterruptState <= 2); /* only sti & mov ss */
2236 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2237 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2238 }
2239 else
2240 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2241
2242 /* Control registers. */
2243 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2244 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
2245 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
2246 CPUMSetGuestCR0(pVCpu, val);
2247
2248 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2249 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
2250 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
2251 CPUMSetGuestCR4(pVCpu, val);
2252
2253 /*
2254 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2255 * the nested paging case where CR3 & CR4 can be changed by the guest.
2256 */
2257 if ( pVM->hwaccm.s.fNestedPaging
2258 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2259 {
2260 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
2261
2262 /* Can be updated behind our back in the nested paging case. */
2263 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2264
2265 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
2266
2267 if (val != pCtx->cr3)
2268 {
2269 CPUMSetGuestCR3(pVCpu, val);
2270 PGMUpdateCR3(pVCpu, val);
2271 }
2272 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2273 AssertRCReturn(rc, rc);
2274 }
2275
2276 /* Sync back DR7. */
2277 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2278 pCtx->dr[7] = val;
2279
2280 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2281 VMX_READ_SELREG(ES, es);
2282 VMX_READ_SELREG(SS, ss);
2283 VMX_READ_SELREG(CS, cs);
2284 VMX_READ_SELREG(DS, ds);
2285 VMX_READ_SELREG(FS, fs);
2286 VMX_READ_SELREG(GS, gs);
2287
2288 /* System MSRs */
2289 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2290 pCtx->SysEnter.cs = val;
2291 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2292 pCtx->SysEnter.eip = val;
2293 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2294 pCtx->SysEnter.esp = val;
2295
2296 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2297 VMX_READ_SELREG(LDTR, ldtr);
2298
2299 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2300 pCtx->gdtr.cbGdt = val;
2301 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2302 pCtx->gdtr.pGdt = val;
2303
2304 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2305 pCtx->idtr.cbIdt = val;
2306 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2307 pCtx->idtr.pIdt = val;
2308
2309 /* Real mode emulation using v86 mode. */
2310 if ( CPUMIsGuestInRealModeEx(pCtx)
2311 && pVM->hwaccm.s.vmx.pRealModeTSS)
2312 {
2313 /* Hide our emulation flags */
2314 pCtx->eflags.Bits.u1VM = 0;
2315
2316 /* Restore original IOPL setting as we always use 0. */
2317 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2318
2319 /* Force a TR resync every time in case we switch modes. */
2320 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2321 }
2322 else
2323 {
2324 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2325 VMX_READ_SELREG(TR, tr);
2326 }
2327
2328#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2329 /* Save the possibly changed MSRs that we automatically restore and save during a world switch. */
2330 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2331 {
2332 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2333 pMsr += i;
2334
2335 switch (pMsr->u32IndexMSR)
2336 {
2337 case MSR_K8_LSTAR:
2338 pCtx->msrLSTAR = pMsr->u64Value;
2339 break;
2340 case MSR_K6_STAR:
2341 pCtx->msrSTAR = pMsr->u64Value;
2342 break;
2343 case MSR_K8_SF_MASK:
2344 pCtx->msrSFMASK = pMsr->u64Value;
2345 break;
2346 case MSR_K8_KERNEL_GS_BASE:
2347 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2348 break;
2349 case MSR_K6_EFER:
2350 /* EFER can't be changed without causing a VM-exit. */
2351 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2352 break;
2353 default:
2354 AssertFailed();
2355 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2356 }
2357 }
2358#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2359 return VINF_SUCCESS;
2360}
2361
2362
2363/**
2364 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2365 * where neither EPT nor VPID is supported by the CPU.
2366 *
2367 * @param pVM Pointer to the VM.
2368 * @param pVCpu Pointer to the VMCPU.
2369 */
2370static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2371{
2372 NOREF(pVM);
2373 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2374 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2375 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2376 return;
2377}
2378
2379
2380/**
2381 * Setup the tagged TLB for EPT+VPID.
2382 *
2383 * @param pVM Pointer to the VM.
2384 * @param pVCpu Pointer to the VMCPU.
2385 */
2386static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2387{
2388 PHMGLOBLCPUINFO pCpu;
2389
2390 Assert(pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID);
2391
2392 pCpu = HWACCMR0GetCurrentCpu();
2393
2394 /*
2395 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2396 * This can happen both for start & resume due to long jumps back to ring-3.
2397 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2398 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2399 */
2400 bool fNewASID = false;
2401 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2402 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2403 {
2404 pVCpu->hwaccm.s.fForceTLBFlush = true;
2405 fNewASID = true;
2406 }
2407
2408 /*
2409 * Check for explicit TLB shootdowns.
2410 */
2411 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2412 pVCpu->hwaccm.s.fForceTLBFlush = true;
2413
2414 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2415
2416 if (pVCpu->hwaccm.s.fForceTLBFlush)
2417 {
2418 if (fNewASID)
2419 {
2420 ++pCpu->uCurrentASID;
2421 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2422 {
2423 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2424 pCpu->cTLBFlushes++;
2425 pCpu->fFlushASIDBeforeUse = true;
2426 }
2427
2428 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2429 if (pCpu->fFlushASIDBeforeUse)
2430 {
2431 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2432#ifdef VBOX_WITH_STATISTICS
2433 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2434#endif
2435 }
2436 }
2437 else
2438 {
2439 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2440 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2441 else
2442 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2443
2444#ifdef VBOX_WITH_STATISTICS
2445 /*
2446 * This is not terribly accurate (i.e. we don't have any StatFlushEPT counter). We currently count these
2447 * as ASID flushes too, better than including them under StatFlushTLBWorldSwitch.
2448 */
2449 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2450#endif
2451 }
2452
2453 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2454 pVCpu->hwaccm.s.fForceTLBFlush = false;
2455 }
2456 else
2457 {
2458 AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID,
2459 ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n",
2460 pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes,
2461 pCpu->uCurrentASID, pCpu->cTLBFlushes));
2462
2463 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2464 * not be executed. See hwaccmQueueInvlPage() where it is commented
2465 * out. Support individual entry flushing someday. */
2466 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2467 {
2468 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2469
2470 /*
2471 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2472 * as supported by the CPU.
2473 */
2474 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2475 {
2476 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2477 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2478 }
2479 else
2480 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2481 }
2482 else
2483 {
2484#ifdef VBOX_WITH_STATISTICS
2485 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2486#endif
2487 }
2488 }
2489 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2490 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2491
2492 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2493 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2494 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2495 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2496 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2497 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2498
2499 /* Update VMCS with the VPID. */
2500 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2501 AssertRC(rc);
2502}
2503
2504
2505/**
2506 * Setup the tagged TLB for EPT only.
2507 *
2508 * @returns VBox status code.
2509 * @param pVM Pointer to the VM.
2510 * @param pVCpu Pointer to the VMCPU.
2511 */
2512static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2513{
2514 PHMGLOBLCPUINFO pCpu;
2515
2516 Assert(pVM->hwaccm.s.fNestedPaging);
2517 Assert(!pVM->hwaccm.s.vmx.fVPID);
2518
2519 pCpu = HWACCMR0GetCurrentCpu();
2520
2521 /*
2522 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2523 * This can happen both for start & resume due to long jumps back to ring-3.
2524 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2525 */
2526 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2527 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2528 {
2529 pVCpu->hwaccm.s.fForceTLBFlush = true;
2530 }
2531
2532 /*
2533 * Check for explicit TLB shootdown flushes.
2534 */
2535 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2536 pVCpu->hwaccm.s.fForceTLBFlush = true;
2537
2538 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2539 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2540
2541 if (pVCpu->hwaccm.s.fForceTLBFlush)
2542 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2543 else
2544 {
2545 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2546 * not be executed. See hwaccmQueueInvlPage() where it is commented
2547 * out. Support individual entry flushing someday. */
2548 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2549 {
2550 /*
2551 * We cannot flush individual entries without VPID support. Flush using EPT.
2552 */
2553 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2554 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2555 }
2556 }
2557 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2558 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2559
2560#ifdef VBOX_WITH_STATISTICS
2561 if (pVCpu->hwaccm.s.fForceTLBFlush)
2562 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2563 else
2564 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2565#endif
2566}
2567
2568
2569/**
2570 * Setup the tagged TLB for VPID.
2571 *
2572 * @returns VBox status code.
2573 * @param pVM Pointer to the VM.
2574 * @param pVCpu Pointer to the VMCPU.
2575 */
2576static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2577{
2578 PHMGLOBLCPUINFO pCpu;
2579
2580 Assert(pVM->hwaccm.s.vmx.fVPID);
2581 Assert(!pVM->hwaccm.s.fNestedPaging);
2582
2583 pCpu = HWACCMR0GetCurrentCpu();
2584
2585 /*
2586 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2587 * This can happen both for start & resume due to long jumps back to ring-3.
2588 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2589 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2590 */
2591 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2592 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2593 {
2594 /* Force a TLB flush on VM entry. */
2595 pVCpu->hwaccm.s.fForceTLBFlush = true;
2596 }
2597
2598 /*
2599 * Check for explicit TLB shootdown flushes.
2600 */
2601 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2602 pVCpu->hwaccm.s.fForceTLBFlush = true;
2603
2604 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2605
2606 if (pVCpu->hwaccm.s.fForceTLBFlush)
2607 {
2608 ++pCpu->uCurrentASID;
2609 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2610 {
2611 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2612 pCpu->cTLBFlushes++;
2613 pCpu->fFlushASIDBeforeUse = true;
2614 }
2615 else
2616 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2617
2618 pVCpu->hwaccm.s.fForceTLBFlush = false;
2619 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2620 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2621 if (pCpu->fFlushASIDBeforeUse)
2622 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2623 }
2624 else
2625 {
2626 AssertMsg(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID,
2627 ("hwaccm->uCurrentASID=%lu hwaccm->cTLBFlushes=%lu cpu->uCurrentASID=%lu cpu->cTLBFlushes=%lu\n",
2628 pVCpu->hwaccm.s.uCurrentASID, pVCpu->hwaccm.s.cTLBFlushes,
2629 pCpu->uCurrentASID, pCpu->cTLBFlushes));
2630
2631 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2632 * not be executed. See hwaccmQueueInvlPage() where it is commented
2633 * out. Support individual entry flushing someday. */
2634 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2635 {
2636 /*
2637 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2638 * as supported by the CPU.
2639 */
2640 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2641 {
2642 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2643 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2644 }
2645 else
2646 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2647 }
2648 }
2649 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2650 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2651
2652 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2653 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2654 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2655
2656 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2657 AssertRC(rc);
2658
2659# ifdef VBOX_WITH_STATISTICS
2660 if (pVCpu->hwaccm.s.fForceTLBFlush)
2661 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2662 else
2663 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2664# endif
2665}
2666
2667
2668/**
2669 * Runs guest code in a VT-x VM.
2670 *
2671 * @returns VBox status code.
2672 * @param pVM Pointer to the VM.
2673 * @param pVCpu Pointer to the VMCPU.
2674 * @param pCtx Pointer to the guest CPU context.
2675 */
2676VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2677{
2678 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2679 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
2680 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
2681
2682 VBOXSTRICTRC rc = VINF_SUCCESS;
2683 int rc2;
2684 RTGCUINTREG val;
2685 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2686 RTGCUINTREG instrError, cbInstr;
2687 RTGCUINTPTR exitQualification = 0;
2688 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2689 RTGCUINTPTR errCode, instrInfo;
2690 bool fSetupTPRCaching = false;
2691 uint64_t u64OldLSTAR = 0;
2692 uint8_t u8LastTPR = 0;
2693 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2694 unsigned cResume = 0;
2695#ifdef VBOX_STRICT
2696 RTCPUID idCpuCheck;
2697 bool fWasInLongMode = false;
2698#endif
2699#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2700 uint64_t u64LastTime = RTTimeMilliTS();
2701#endif
2702
2703 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2704
2705 /*
2706 * Check if we need to use TPR shadowing.
2707 */
2708 if ( CPUMIsGuestInLongModeEx(pCtx)
2709 || ( ((pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || pVM->hwaccm.s.fTRPPatchingAllowed)
2710 && pVM->hwaccm.s.fHasIoApic)
2711 )
2712 {
2713 fSetupTPRCaching = true;
2714 }
2715
2716 Log2(("\nE"));
2717
2718#ifdef VBOX_STRICT
2719 {
2720 RTCCUINTREG val2;
2721
2722 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2723 AssertRC(rc2);
2724 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2725
2726 /* allowed zero */
2727 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2728 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2729
2730 /* allowed one */
2731 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2732 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2733
2734 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2735 AssertRC(rc2);
2736 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2737
2738 /*
2739 * Must be set according to the MSR, but can be cleared if nested paging is used.
2740 */
2741 if (pVM->hwaccm.s.fNestedPaging)
2742 {
2743 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2744 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2745 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2746 }
2747
2748 /* allowed zero */
2749 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2750 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2751
2752 /* allowed one */
2753 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2754 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2755
2756 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2757 AssertRC(rc2);
2758 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2759
2760 /* allowed zero */
2761 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2762 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2763
2764 /* allowed one */
2765 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2766 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2767
2768 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2769 AssertRC(rc2);
2770 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2771
2772 /* allowed zero */
2773 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2774 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2775
2776 /* allowed one */
2777 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2778 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2779 }
2780 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2781#endif /* VBOX_STRICT */
2782
2783#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2784 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2785#endif
2786
2787 /*
2788 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2789 */
2790ResumeExecution:
2791 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
2792 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
2793 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2794 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2795 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2796 Assert(!HWACCMR0SuspendPending());
2797 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2798 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2799
2800 /*
2801 * Safety precaution; looping for too long here can have a very bad effect on the host.
2802 */
2803 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2804 {
2805 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2806 rc = VINF_EM_RAW_INTERRUPT;
2807 goto end;
2808 }
2809
2810 /*
2811 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2812 */
2813 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2814 {
2815 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2816 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2817 {
2818 /*
2819 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2820 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2821 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2822 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2823 */
2824 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2825 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2826 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2827 AssertRC(rc2);
2828 }
2829 }
2830 else
2831 {
2832 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2833 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2834 AssertRC(rc2);
2835 }
2836
2837#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2838 if (RT_UNLIKELY((cResume & 0xf) == 0))
2839 {
2840 uint64_t u64CurTime = RTTimeMilliTS();
2841
2842 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2843 {
2844 u64LastTime = u64CurTime;
2845 TMTimerPollVoid(pVM, pVCpu);
2846 }
2847 }
2848#endif
2849
2850 /*
2851 * Check for pending actions that force us to go back to ring-3.
2852 */
2853 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2854 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2855 {
2856 /* Check if a sync operation is pending. */
2857 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2858 {
2859 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2860 if (rc != VINF_SUCCESS)
2861 {
2862 AssertRC(VBOXSTRICTRC_VAL(rc));
2863 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2864 goto end;
2865 }
2866 }
2867
2868#ifdef DEBUG
2869 /* Intercept X86_XCPT_DB if stepping is enabled */
2870 if (!DBGFIsStepping(pVCpu))
2871#endif
2872 {
2873 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2874 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2875 {
2876 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2877 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2878 goto end;
2879 }
2880 }
2881
2882 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2883 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2884 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2885 {
2886 rc = VINF_EM_PENDING_REQUEST;
2887 goto end;
2888 }
2889
2890 /* Check if a pgm pool flush is in progress. */
2891 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2892 {
2893 rc = VINF_PGM_POOL_FLUSH_PENDING;
2894 goto end;
2895 }
2896
2897 /* Check if DMA work is pending (2nd+ run). */
2898 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
2899 {
2900 rc = VINF_EM_RAW_TO_R3;
2901 goto end;
2902 }
2903 }
2904
2905#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2906 /*
2907 * Exit to ring-3 preemption/work is pending.
2908 *
2909 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2910 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2911 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
2912 *
2913 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2914 * shootdowns rely on this.
2915 */
2916 uOldEFlags = ASMIntDisableFlags();
2917 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2918 {
2919 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
2920 rc = VINF_EM_RAW_INTERRUPT;
2921 goto end;
2922 }
2923 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2924#endif
2925
2926 /*
2927 * When external interrupts are pending, we should exit the VM when IF is et.
2928 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
2929 */
2930 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
2931 if (RT_FAILURE(rc))
2932 goto end;
2933
2934 /** @todo check timers?? */
2935
2936 /*
2937 * TPR caching using CR8 is only available in 64-bit mode.
2938 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
2939 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
2940 */
2941 /** @todo query and update the TPR only when it could have been changed (mmio
2942 * access & wrsmr (x2apic) */
2943 if (fSetupTPRCaching)
2944 {
2945 /* TPR caching in CR8 */
2946 bool fPending;
2947
2948 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
2949 AssertRC(rc2);
2950 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2951 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
2952
2953 /*
2954 * Two options here:
2955 * - external interrupt pending, but masked by the TPR value.
2956 * -> a CR8 update that lower the current TPR value should cause an exit
2957 * - no pending interrupts
2958 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2959 */
2960 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
2961 AssertRC(VBOXSTRICTRC_VAL(rc));
2962
2963 if (pVM->hwaccm.s.fTPRPatchingActive)
2964 {
2965 Assert(!CPUMIsGuestInLongModeEx(pCtx));
2966 /* Our patch code uses LSTAR for TPR caching. */
2967 pCtx->msrLSTAR = u8LastTPR;
2968
2969 if (fPending)
2970 {
2971 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
2972 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
2973 }
2974 else
2975 {
2976 /*
2977 * No interrupts are pending, so we don't need to be explicitely notified.
2978 * There are enough world switches for detecting pending interrupts.
2979 */
2980 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
2981 }
2982 }
2983 }
2984
2985#ifdef LOG_ENABLED
2986 if ( pVM->hwaccm.s.fNestedPaging
2987 || pVM->hwaccm.s.vmx.fVPID)
2988 {
2989 PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu();
2990 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2991 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2992 {
2993 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2994 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2995 else
2996 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2997 }
2998 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
2999 LogFlow(("Manual TLB flush\n"));
3000 }
3001#endif
3002#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3003 PGMRZDynMapFlushAutoSet(pVCpu);
3004#endif
3005
3006 /*
3007 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3008 * (until the actual world switch)
3009 */
3010#ifdef VBOX_STRICT
3011 idCpuCheck = RTMpCpuId();
3012#endif
3013#ifdef LOG_ENABLED
3014 VMMR0LogFlushDisable(pVCpu);
3015#endif
3016
3017 /*
3018 * Save the host state first.
3019 */
3020 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
3021 {
3022 rc = VMXR0SaveHostState(pVM, pVCpu);
3023 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3024 {
3025 VMMR0LogFlushEnable(pVCpu);
3026 goto end;
3027 }
3028 }
3029
3030 /*
3031 * Load the guest state.
3032 */
3033 if (!pVCpu->hwaccm.s.fContextUseFlags)
3034 {
3035 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3036 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal);
3037 }
3038 else
3039 {
3040 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3041 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3042 {
3043 VMMR0LogFlushEnable(pVCpu);
3044 goto end;
3045 }
3046 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull);
3047 }
3048
3049#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3050 /*
3051 * Disable interrupts to make sure a poke will interrupt execution.
3052 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3053 */
3054 uOldEFlags = ASMIntDisableFlags();
3055 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3056#endif
3057
3058 /* Non-register state Guest Context */
3059 /** @todo change me according to cpu state */
3060 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
3061 AssertRC(rc2);
3062
3063 /* Set TLB flush state as checked until we return from the world switch. */
3064 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
3065 /* Deal with tagged TLB setup and invalidation. */
3066 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
3067
3068 /*
3069 * Manual save and restore:
3070 * - General purpose registers except RIP, RSP
3071 *
3072 * Trashed:
3073 * - CR2 (we don't care)
3074 * - LDTR (reset to 0)
3075 * - DRx (presumably not changed at all)
3076 * - DR7 (reset to 0x400)
3077 * - EFLAGS (reset to RT_BIT(1); not relevant)
3078 */
3079
3080 /* All done! Let's start VM execution. */
3081 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
3082 Assert(idCpuCheck == RTMpCpuId());
3083
3084#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3085 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
3086 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3087#endif
3088
3089 /*
3090 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3091 */
3092 if (pVM->hwaccm.s.fTPRPatchingActive)
3093 {
3094 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3095 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3096 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3097 }
3098
3099 TMNotifyStartOfExecution(pVCpu);
3100#ifdef VBOX_WITH_KERNEL_USING_XMM
3101 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
3102#else
3103 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
3104#endif
3105 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
3106 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
3107 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3108 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3109 {
3110 TMCpuTickSetLastSeen(pVCpu,
3111 ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3112 }
3113
3114 TMNotifyEndOfExecution(pVCpu);
3115 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3116 Assert(!(ASMGetFlags() & X86_EFL_IF));
3117
3118 /*
3119 * Restore the host LSTAR MSR if the guest could have changed it.
3120 */
3121 if (pVM->hwaccm.s.fTPRPatchingActive)
3122 {
3123 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3124 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3125 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3126 }
3127
3128 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
3129 ASMSetFlags(uOldEFlags);
3130#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3131 uOldEFlags = ~(RTCCUINTREG)0;
3132#endif
3133
3134 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
3135
3136 /* In case we execute a goto ResumeExecution later on. */
3137 pVCpu->hwaccm.s.fResumeVM = true;
3138 pVCpu->hwaccm.s.fForceTLBFlush = false;
3139
3140 /*
3141 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3142 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3143 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3144 */
3145
3146 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3147 {
3148 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3149 VMMR0LogFlushEnable(pVCpu);
3150 goto end;
3151 }
3152
3153 /* Success. Query the guest state and figure out what has happened. */
3154
3155 /* Investigate why there was a VM-exit. */
3156 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3157 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3158
3159 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3160 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3161 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3162 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3163 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3164 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
3165 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3166 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3167 AssertRC(rc2);
3168
3169 /*
3170 * Sync back the guest state.
3171 */
3172 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3173 AssertRC(rc2);
3174
3175 /* Note! NOW IT'S SAFE FOR LOGGING! */
3176 VMMR0LogFlushEnable(pVCpu);
3177 Log2(("Raw exit reason %08x\n", exitReason));
3178#if ARCH_BITS == 64 /* for the time being */
3179 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3180#endif
3181
3182 /*
3183 * Check if an injected event was interrupted prematurely.
3184 */
3185 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
3186 AssertRC(rc2);
3187 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3188 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3189 /* Ignore 'int xx' as they'll be restarted anyway. */
3190 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
3191 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3192 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3193 {
3194 Assert(!pVCpu->hwaccm.s.Event.fPending);
3195 pVCpu->hwaccm.s.Event.fPending = true;
3196 /* Error code present? */
3197 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
3198 {
3199 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
3200 AssertRC(rc2);
3201 pVCpu->hwaccm.s.Event.errCode = val;
3202 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3203 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3204 }
3205 else
3206 {
3207 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo,
3208 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3209 pVCpu->hwaccm.s.Event.errCode = 0;
3210 }
3211 }
3212#ifdef VBOX_STRICT
3213 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3214 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3215 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3216 {
3217 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3218 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3219 }
3220
3221 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3222 HWACCMDumpRegs(pVM, pVCpu, pCtx);
3223#endif
3224
3225 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3226 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3227 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3228 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3229 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3230
3231 /*
3232 * Sync back the TPR if it was changed.
3233 */
3234 if ( fSetupTPRCaching
3235 && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80])
3236 {
3237 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]);
3238 AssertRC(rc2);
3239 }
3240
3241#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3242 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3243 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3244#endif
3245 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
3246
3247 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3248 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3249 switch (exitReason)
3250 {
3251 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3252 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3253 {
3254 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3255
3256 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3257 {
3258 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3259#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3260 if ( RTThreadPreemptIsPendingTrusty()
3261 && !RTThreadPreemptIsPending(NIL_RTTHREAD))
3262 goto ResumeExecution;
3263#endif
3264 /* External interrupt; leave to allow it to be dispatched again. */
3265 rc = VINF_EM_RAW_INTERRUPT;
3266 break;
3267 }
3268 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3269 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3270 {
3271 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3272 /* External interrupt; leave to allow it to be dispatched again. */
3273 rc = VINF_EM_RAW_INTERRUPT;
3274 break;
3275
3276 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3277 AssertFailed(); /* can't come here; fails the first check. */
3278 break;
3279
3280 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3281 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3282 Assert(vector == 1 || vector == 3 || vector == 4);
3283 /* no break */
3284 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3285 Log2(("Hardware/software interrupt %d\n", vector));
3286 switch (vector)
3287 {
3288 case X86_XCPT_NM:
3289 {
3290 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3291
3292 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3293 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3294 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3295 if (rc == VINF_SUCCESS)
3296 {
3297 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3298
3299 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
3300
3301 /* Continue execution. */
3302 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3303
3304 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3305 goto ResumeExecution;
3306 }
3307
3308 Log(("Forward #NM fault to the guest\n"));
3309 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
3310 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3311 cbInstr, 0);
3312 AssertRC(rc2);
3313 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3314 goto ResumeExecution;
3315 }
3316
3317 case X86_XCPT_PF: /* Page fault */
3318 {
3319#ifdef VBOX_ALWAYS_TRAP_PF
3320 if (pVM->hwaccm.s.fNestedPaging)
3321 {
3322 /*
3323 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3324 */
3325 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3326 errCode, (RTGCPTR)pCtx->rsp));
3327
3328 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3329
3330 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3331
3332 /* Now we must update CR2. */
3333 pCtx->cr2 = exitQualification;
3334 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3335 cbInstr, errCode);
3336 AssertRC(rc2);
3337
3338 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3339 goto ResumeExecution;
3340 }
3341#else
3342 Assert(!pVM->hwaccm.s.fNestedPaging);
3343#endif
3344
3345#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
3346 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3347 if ( pVM->hwaccm.s.fTRPPatchingAllowed
3348 && pVM->hwaccm.s.pGuestPatchMem
3349 && (exitQualification & 0xfff) == 0x080
3350 && !(errCode & X86_TRAP_PF_P) /* not present */
3351 && CPUMGetGuestCPL(pVCpu) == 0
3352 && !CPUMIsGuestInLongModeEx(pCtx)
3353 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
3354 {
3355 RTGCPHYS GCPhysApicBase, GCPhys;
3356 PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */
3357 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3358
3359 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3360 if ( rc == VINF_SUCCESS
3361 && GCPhys == GCPhysApicBase)
3362 {
3363 /* Only attempt to patch the instruction once. */
3364 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3365 if (!pPatch)
3366 {
3367 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
3368 break;
3369 }
3370 }
3371 }
3372#endif
3373
3374 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3375 /* Exit qualification contains the linear address of the page fault. */
3376 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3377 TRPMSetErrorCode(pVCpu, errCode);
3378 TRPMSetFaultAddress(pVCpu, exitQualification);
3379
3380 /* Shortcut for APIC TPR reads and writes. */
3381 if ( (exitQualification & 0xfff) == 0x080
3382 && !(errCode & X86_TRAP_PF_P) /* not present */
3383 && fSetupTPRCaching
3384 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3385 {
3386 RTGCPHYS GCPhysApicBase, GCPhys;
3387 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3388 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3389
3390 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3391 if ( rc == VINF_SUCCESS
3392 && GCPhys == GCPhysApicBase)
3393 {
3394 Log(("Enable VT-x virtual APIC access filtering\n"));
3395 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3396 AssertRC(rc2);
3397 }
3398 }
3399
3400 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3401 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3402 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3403
3404 if (rc == VINF_SUCCESS)
3405 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3406 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3407 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
3408
3409 TRPMResetTrap(pVCpu);
3410 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3411 goto ResumeExecution;
3412 }
3413 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3414 {
3415 /*
3416 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3417 */
3418 Log2(("Forward page fault to the guest\n"));
3419
3420 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3421 /* The error code might have been changed. */
3422 errCode = TRPMGetErrorCode(pVCpu);
3423
3424 TRPMResetTrap(pVCpu);
3425
3426 /* Now we must update CR2. */
3427 pCtx->cr2 = exitQualification;
3428 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3429 cbInstr, errCode);
3430 AssertRC(rc2);
3431
3432 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3433 goto ResumeExecution;
3434 }
3435#ifdef VBOX_STRICT
3436 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3437 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3438#endif
3439 /* Need to go back to the recompiler to emulate the instruction. */
3440 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPFEM);
3441 TRPMResetTrap(pVCpu);
3442 break;
3443 }
3444
3445 case X86_XCPT_MF: /* Floating point exception. */
3446 {
3447 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
3448 if (!(pCtx->cr0 & X86_CR0_NE))
3449 {
3450 /* old style FPU error reporting needs some extra work. */
3451 /** @todo don't fall back to the recompiler, but do it manually. */
3452 rc = VINF_EM_RAW_EMULATE_INSTR;
3453 break;
3454 }
3455 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3456 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3457 cbInstr, errCode);
3458 AssertRC(rc2);
3459
3460 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3461 goto ResumeExecution;
3462 }
3463
3464 case X86_XCPT_DB: /* Debug exception. */
3465 {
3466 uint64_t uDR6;
3467
3468 /*
3469 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3470 *
3471 * Exit qualification bits:
3472 * 3:0 B0-B3 which breakpoint condition was met
3473 * 12:4 Reserved (0)
3474 * 13 BD - debug register access detected
3475 * 14 BS - single step execution or branch taken
3476 * 63:15 Reserved (0)
3477 */
3478 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3479
3480 /* Note that we don't support guest and host-initiated debugging at the same time. */
3481
3482 uDR6 = X86_DR6_INIT_VAL;
3483 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3484 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3485 if (rc == VINF_EM_RAW_GUEST_TRAP)
3486 {
3487 /* Update DR6 here. */
3488 pCtx->dr[6] = uDR6;
3489
3490 /* Resync DR6 if the debug state is active. */
3491 if (CPUMIsGuestDebugStateActive(pVCpu))
3492 ASMSetDR6(pCtx->dr[6]);
3493
3494 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3495 pCtx->dr[7] &= ~X86_DR7_GD;
3496
3497 /* Paranoia. */
3498 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3499 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3500 pCtx->dr[7] |= 0x400; /* must be one */
3501
3502 /* Resync DR7 */
3503 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3504 AssertRC(rc2);
3505
3506 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3507 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3508 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3509 cbInstr, errCode);
3510 AssertRC(rc2);
3511
3512 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3513 goto ResumeExecution;
3514 }
3515 /* Return to ring 3 to deal with the debug exit code. */
3516 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3517 break;
3518 }
3519
3520 case X86_XCPT_BP: /* Breakpoint. */
3521 {
3522 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestBP);
3523 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3524 if (rc == VINF_EM_RAW_GUEST_TRAP)
3525 {
3526 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3527 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3528 cbInstr, errCode);
3529 AssertRC(rc2);
3530 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3531 goto ResumeExecution;
3532 }
3533 if (rc == VINF_SUCCESS)
3534 {
3535 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3536 goto ResumeExecution;
3537 }
3538 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3539 break;
3540 }
3541
3542 case X86_XCPT_GP: /* General protection failure exception. */
3543 {
3544 uint32_t cbOp;
3545 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3546
3547 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3548#ifdef VBOX_STRICT
3549 if ( !CPUMIsGuestInRealModeEx(pCtx)
3550 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3551 {
3552 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3553 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3554 cbInstr, errCode);
3555 AssertRC(rc2);
3556 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3557 goto ResumeExecution;
3558 }
3559#endif
3560 Assert(CPUMIsGuestInRealModeEx(pCtx));
3561
3562 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3563
3564 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp);
3565 if (RT_SUCCESS(rc2))
3566 {
3567 bool fUpdateRIP = true;
3568
3569 rc = VINF_SUCCESS;
3570 Assert(cbOp == pDis->cbInstr);
3571 switch (pDis->pCurInstr->uOpcode)
3572 {
3573 case OP_CLI:
3574 pCtx->eflags.Bits.u1IF = 0;
3575 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3576 break;
3577
3578 case OP_STI:
3579 pCtx->eflags.Bits.u1IF = 1;
3580 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3581 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3582 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3583 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3584 AssertRC(rc2);
3585 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3586 break;
3587
3588 case OP_HLT:
3589 fUpdateRIP = false;
3590 rc = VINF_EM_HALT;
3591 pCtx->rip += pDis->cbInstr;
3592 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3593 break;
3594
3595 case OP_POPF:
3596 {
3597 RTGCPTR GCPtrStack;
3598 uint32_t cbParm;
3599 uint32_t uMask;
3600 X86EFLAGS eflags;
3601
3602 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3603 {
3604 cbParm = 4;
3605 uMask = 0xffffffff;
3606 }
3607 else
3608 {
3609 cbParm = 2;
3610 uMask = 0xffff;
3611 }
3612
3613 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3614 if (RT_FAILURE(rc2))
3615 {
3616 rc = VERR_EM_INTERPRETER;
3617 break;
3618 }
3619 eflags.u = 0;
3620 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3621 if (RT_FAILURE(rc2))
3622 {
3623 rc = VERR_EM_INTERPRETER;
3624 break;
3625 }
3626 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3627 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
3628 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3629 pCtx->eflags.Bits.u1RF = 0;
3630 pCtx->esp += cbParm;
3631 pCtx->esp &= uMask;
3632
3633 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3634 break;
3635 }
3636
3637 case OP_PUSHF:
3638 {
3639 RTGCPTR GCPtrStack;
3640 uint32_t cbParm;
3641 uint32_t uMask;
3642 X86EFLAGS eflags;
3643
3644 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3645 {
3646 cbParm = 4;
3647 uMask = 0xffffffff;
3648 }
3649 else
3650 {
3651 cbParm = 2;
3652 uMask = 0xffff;
3653 }
3654
3655 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3656 &GCPtrStack);
3657 if (RT_FAILURE(rc2))
3658 {
3659 rc = VERR_EM_INTERPRETER;
3660 break;
3661 }
3662 eflags = pCtx->eflags;
3663 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3664 eflags.Bits.u1RF = 0;
3665 eflags.Bits.u1VM = 0;
3666
3667 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3668 if (RT_FAILURE(rc2))
3669 {
3670 rc = VERR_EM_INTERPRETER;
3671 break;
3672 }
3673 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3674 pCtx->esp -= cbParm;
3675 pCtx->esp &= uMask;
3676 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3677 break;
3678 }
3679
3680 case OP_IRET:
3681 {
3682 RTGCPTR GCPtrStack;
3683 uint32_t uMask = 0xffff;
3684 uint16_t aIretFrame[3];
3685
3686 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3687 {
3688 rc = VERR_EM_INTERPRETER;
3689 break;
3690 }
3691
3692 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3693 if (RT_FAILURE(rc2))
3694 {
3695 rc = VERR_EM_INTERPRETER;
3696 break;
3697 }
3698 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3699 if (RT_FAILURE(rc2))
3700 {
3701 rc = VERR_EM_INTERPRETER;
3702 break;
3703 }
3704 pCtx->ip = aIretFrame[0];
3705 pCtx->cs.Sel = aIretFrame[1];
3706 pCtx->cs.ValidSel = aIretFrame[1];
3707 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
3708 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3709 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3710 pCtx->sp += sizeof(aIretFrame);
3711
3712 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
3713 fUpdateRIP = false;
3714 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3715 break;
3716 }
3717
3718 case OP_INT:
3719 {
3720 uint32_t intInfo2;
3721
3722 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
3723 intInfo2 = pDis->Param1.uValue & 0xff;
3724 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3725 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3726
3727 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3728 AssertRC(VBOXSTRICTRC_VAL(rc));
3729 fUpdateRIP = false;
3730 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3731 break;
3732 }
3733
3734 case OP_INTO:
3735 {
3736 if (pCtx->eflags.Bits.u1OF)
3737 {
3738 uint32_t intInfo2;
3739
3740 LogFlow(("Realmode: INTO\n"));
3741 intInfo2 = X86_XCPT_OF;
3742 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3743 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3744
3745 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3746 AssertRC(VBOXSTRICTRC_VAL(rc));
3747 fUpdateRIP = false;
3748 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3749 }
3750 break;
3751 }
3752
3753 case OP_INT3:
3754 {
3755 uint32_t intInfo2;
3756
3757 LogFlow(("Realmode: INT 3\n"));
3758 intInfo2 = 3;
3759 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3760 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3761
3762 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3763 AssertRC(VBOXSTRICTRC_VAL(rc));
3764 fUpdateRIP = false;
3765 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3766 break;
3767 }
3768
3769 default:
3770 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3771 fUpdateRIP = false;
3772 break;
3773 }
3774
3775 if (rc == VINF_SUCCESS)
3776 {
3777 if (fUpdateRIP)
3778 pCtx->rip += cbOp; /* Move on to the next instruction. */
3779
3780 /*
3781 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3782 * whole context to be done with it.
3783 */
3784 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3785
3786 /* Only resume if successful. */
3787 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3788 goto ResumeExecution;
3789 }
3790 }
3791 else
3792 rc = VERR_EM_INTERPRETER;
3793
3794 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3795 break;
3796 }
3797
3798#ifdef VBOX_STRICT
3799 case X86_XCPT_XF: /* SIMD exception. */
3800 case X86_XCPT_DE: /* Divide error. */
3801 case X86_XCPT_UD: /* Unknown opcode exception. */
3802 case X86_XCPT_SS: /* Stack segment exception. */
3803 case X86_XCPT_NP: /* Segment not present exception. */
3804 {
3805 switch(vector)
3806 {
3807 case X86_XCPT_DE:
3808 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
3809 break;
3810 case X86_XCPT_UD:
3811 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
3812 break;
3813 case X86_XCPT_SS:
3814 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
3815 break;
3816 case X86_XCPT_NP:
3817 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
3818 break;
3819 case X86_XCPT_XF:
3820 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXF);
3821 break;
3822 }
3823
3824 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3825 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3826 cbInstr, errCode);
3827 AssertRC(rc2);
3828
3829 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3830 goto ResumeExecution;
3831 }
3832#endif
3833 default:
3834 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXcpUnk);
3835 if ( CPUMIsGuestInRealModeEx(pCtx)
3836 && pVM->hwaccm.s.vmx.pRealModeTSS)
3837 {
3838 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
3839 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3840 cbInstr, errCode);
3841 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3842
3843 /* Go back to ring-3 in case of a triple fault. */
3844 if ( vector == X86_XCPT_DF
3845 && rc == VINF_EM_RESET)
3846 {
3847 break;
3848 }
3849
3850 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3851 goto ResumeExecution;
3852 }
3853 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3854 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3855 break;
3856 } /* switch (vector) */
3857
3858 break;
3859
3860 default:
3861 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3862 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
3863 break;
3864 }
3865
3866 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3867 break;
3868 }
3869
3870 /*
3871 * 48 EPT violation. An attemp to access memory with a guest-physical address was disallowed
3872 * by the configuration of the EPT paging structures.
3873 */
3874 case VMX_EXIT_EPT_VIOLATION:
3875 {
3876 RTGCPHYS GCPhys;
3877
3878 Assert(pVM->hwaccm.s.fNestedPaging);
3879
3880 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3881 AssertRC(rc2);
3882 Assert(((exitQualification >> 7) & 3) != 2);
3883
3884 /* Determine the kind of violation. */
3885 errCode = 0;
3886 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3887 errCode |= X86_TRAP_PF_ID;
3888
3889 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3890 errCode |= X86_TRAP_PF_RW;
3891
3892 /* If the page is present, then it's a page level protection fault. */
3893 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3894 errCode |= X86_TRAP_PF_P;
3895 else
3896 {
3897 /* Shortcut for APIC TPR reads and writes. */
3898 if ( (GCPhys & 0xfff) == 0x080
3899 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3900 && fSetupTPRCaching
3901 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3902 {
3903 RTGCPHYS GCPhysApicBase;
3904 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3905 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3906 if (GCPhys == GCPhysApicBase + 0x80)
3907 {
3908 Log(("Enable VT-x virtual APIC access filtering\n"));
3909 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3910 AssertRC(rc2);
3911 }
3912 }
3913 }
3914 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
3915
3916 /* GCPhys contains the guest physical address of the page fault. */
3917 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3918 TRPMSetErrorCode(pVCpu, errCode);
3919 TRPMSetFaultAddress(pVCpu, GCPhys);
3920
3921 /* Handle the pagefault trap for the nested shadow table. */
3922 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
3923
3924 /*
3925 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
3926 */
3927 if ( rc == VINF_SUCCESS
3928 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3929 || rc == VERR_PAGE_NOT_PRESENT)
3930 {
3931 /* We've successfully synced our shadow pages, so let's just continue execution. */
3932 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
3933 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
3934
3935 TRPMResetTrap(pVCpu);
3936 goto ResumeExecution;
3937 }
3938
3939#ifdef VBOX_STRICT
3940 if (rc != VINF_EM_RAW_EMULATE_INSTR)
3941 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3942#endif
3943 /* Need to go back to the recompiler to emulate the instruction. */
3944 TRPMResetTrap(pVCpu);
3945 break;
3946 }
3947
3948 case VMX_EXIT_EPT_MISCONFIG:
3949 {
3950 RTGCPHYS GCPhys;
3951
3952 Assert(pVM->hwaccm.s.fNestedPaging);
3953
3954 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3955 AssertRC(rc2);
3956 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
3957
3958 /* Shortcut for APIC TPR reads and writes. */
3959 if ( (GCPhys & 0xfff) == 0x080
3960 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3961 && fSetupTPRCaching
3962 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3963 {
3964 RTGCPHYS GCPhysApicBase;
3965 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3966 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3967 if (GCPhys == GCPhysApicBase + 0x80)
3968 {
3969 Log(("Enable VT-x virtual APIC access filtering\n"));
3970 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3971 AssertRC(rc2);
3972 }
3973 }
3974
3975 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
3976
3977 /*
3978 * If we succeed, resume execution.
3979 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
3980 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
3981 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
3982 * weird case. See @bugref{6043}.
3983 */
3984 if ( rc == VINF_SUCCESS
3985 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3986 || rc == VERR_PAGE_NOT_PRESENT)
3987 {
3988 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
3989 goto ResumeExecution;
3990 }
3991
3992 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3993 break;
3994 }
3995
3996 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3997 /* Clear VM-exit on IF=1 change. */
3998 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
3999 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4000 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
4001 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4002 AssertRC(rc2);
4003 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
4004 goto ResumeExecution; /* we check for pending guest interrupts there */
4005
4006 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4007 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4008 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
4009 /* Skip instruction and continue directly. */
4010 pCtx->rip += cbInstr;
4011 /* Continue execution.*/
4012 goto ResumeExecution;
4013
4014 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4015 {
4016 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4017 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
4018 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4019 if (rc == VINF_SUCCESS)
4020 {
4021 /* Update EIP and continue execution. */
4022 Assert(cbInstr == 2);
4023 pCtx->rip += cbInstr;
4024 goto ResumeExecution;
4025 }
4026 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4027 rc = VINF_EM_RAW_EMULATE_INSTR;
4028 break;
4029 }
4030
4031 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4032 {
4033 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4034 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
4035 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4036 if (rc == VINF_SUCCESS)
4037 {
4038 /* Update EIP and continue execution. */
4039 Assert(cbInstr == 2);
4040 pCtx->rip += cbInstr;
4041 goto ResumeExecution;
4042 }
4043 rc = VINF_EM_RAW_EMULATE_INSTR;
4044 break;
4045 }
4046
4047 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4048 {
4049 Log2(("VMX: Rdtsc\n"));
4050 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
4051 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4052 if (rc == VINF_SUCCESS)
4053 {
4054 /* Update EIP and continue execution. */
4055 Assert(cbInstr == 2);
4056 pCtx->rip += cbInstr;
4057 goto ResumeExecution;
4058 }
4059 rc = VINF_EM_RAW_EMULATE_INSTR;
4060 break;
4061 }
4062
4063 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4064 {
4065 Log2(("VMX: Rdtscp\n"));
4066 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtscp);
4067 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4068 if (rc == VINF_SUCCESS)
4069 {
4070 /* Update EIP and continue execution. */
4071 Assert(cbInstr == 3);
4072 pCtx->rip += cbInstr;
4073 goto ResumeExecution;
4074 }
4075 rc = VINF_EM_RAW_EMULATE_INSTR;
4076 break;
4077 }
4078
4079 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4080 {
4081 Log2(("VMX: invlpg\n"));
4082 Assert(!pVM->hwaccm.s.fNestedPaging);
4083
4084 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
4085 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4086 if (rc == VINF_SUCCESS)
4087 {
4088 /* Update EIP and continue execution. */
4089 pCtx->rip += cbInstr;
4090 goto ResumeExecution;
4091 }
4092 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4093 break;
4094 }
4095
4096 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4097 {
4098 Log2(("VMX: monitor\n"));
4099
4100 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
4101 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4102 if (rc == VINF_SUCCESS)
4103 {
4104 /* Update EIP and continue execution. */
4105 pCtx->rip += cbInstr;
4106 goto ResumeExecution;
4107 }
4108 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4109 break;
4110 }
4111
4112 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4113 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4114 if ( pVM->hwaccm.s.fTPRPatchingActive
4115 && pCtx->ecx == MSR_K8_LSTAR)
4116 {
4117 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4118 if ((pCtx->eax & 0xff) != u8LastTPR)
4119 {
4120 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4121
4122 /* Our patch code uses LSTAR for TPR caching. */
4123 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4124 AssertRC(rc2);
4125 }
4126
4127 /* Skip the instruction and continue. */
4128 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4129
4130 /* Only resume if successful. */
4131 goto ResumeExecution;
4132 }
4133 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR;
4134 /* no break */
4135 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4136 {
4137 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
4138
4139 /*
4140 * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different,
4141 * so we play safe by completely disassembling the instruction.
4142 */
4143 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4144 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4145 if (rc == VINF_SUCCESS)
4146 {
4147 /* EIP has been updated already. */
4148 /* Only resume if successful. */
4149 goto ResumeExecution;
4150 }
4151 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4152 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4153 break;
4154 }
4155
4156 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4157 {
4158 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4159
4160 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4161 {
4162 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4163 {
4164 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4165 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4166 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4167 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4168 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4169 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4170 {
4171 case 0:
4172 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
4173 break;
4174 case 2:
4175 break;
4176 case 3:
4177 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4178 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
4179 break;
4180 case 4:
4181 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
4182 break;
4183 case 8:
4184 /* CR8 contains the APIC TPR */
4185 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4186 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4187 break;
4188
4189 default:
4190 AssertFailed();
4191 break;
4192 }
4193 break;
4194 }
4195
4196 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4197 {
4198 Log2(("VMX: mov x, crx\n"));
4199 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4200
4201 Assert( !pVM->hwaccm.s.fNestedPaging
4202 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4203 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4204
4205 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4206 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4207 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4208
4209 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4210 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4211 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4212 break;
4213 }
4214
4215 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4216 {
4217 Log2(("VMX: clts\n"));
4218 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
4219 rc = EMInterpretCLTS(pVM, pVCpu);
4220 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4221 break;
4222 }
4223
4224 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4225 {
4226 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4227 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
4228 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4229 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4230 break;
4231 }
4232 }
4233
4234 /* Update EIP if no error occurred. */
4235 if (RT_SUCCESS(rc))
4236 pCtx->rip += cbInstr;
4237
4238 if (rc == VINF_SUCCESS)
4239 {
4240 /* Only resume if successful. */
4241 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4242 goto ResumeExecution;
4243 }
4244 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4245 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4246 break;
4247 }
4248
4249 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4250 {
4251 if ( !DBGFIsStepping(pVCpu)
4252 && !CPUMIsHyperDebugStateActive(pVCpu))
4253 {
4254 /* Disable DRx move intercepts. */
4255 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4256 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4257 AssertRC(rc2);
4258
4259 /* Save the host and load the guest debug state. */
4260 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4261 AssertRC(rc2);
4262
4263#ifdef LOG_ENABLED
4264 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4265 {
4266 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4267 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4268 }
4269 else
4270 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4271#endif
4272
4273#ifdef VBOX_WITH_STATISTICS
4274 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
4275 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4276 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4277 else
4278 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4279#endif
4280
4281 goto ResumeExecution;
4282 }
4283
4284 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4285 * time and restore DRx registers afterwards */
4286 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4287 {
4288 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4289 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4290 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4291 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4292 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4293 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4294 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4295 Log2(("DR7=%08x\n", pCtx->dr[7]));
4296 }
4297 else
4298 {
4299 Log2(("VMX: mov x, DRx\n"));
4300 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4301 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4302 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4303 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4304 }
4305 /* Update EIP if no error occurred. */
4306 if (RT_SUCCESS(rc))
4307 pCtx->rip += cbInstr;
4308
4309 if (rc == VINF_SUCCESS)
4310 {
4311 /* Only resume if successful. */
4312 goto ResumeExecution;
4313 }
4314 Assert(rc == VERR_EM_INTERPRETER);
4315 break;
4316 }
4317
4318 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4319 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4320 {
4321 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4322 uint32_t uPort;
4323 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4324 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4325
4326 /** @todo necessary to make the distinction? */
4327 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4328 uPort = pCtx->edx & 0xffff;
4329 else
4330 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4331
4332 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4333 {
4334 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4335 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4336 break;
4337 }
4338
4339 uint32_t cbSize = g_aIOSize[uIOWidth];
4340 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4341 {
4342 /* ins/outs */
4343 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
4344
4345 /* Disassemble manually to deal with segment prefixes. */
4346 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4347 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4348 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
4349 if (RT_SUCCESS(rc))
4350 {
4351 if (fIOWrite)
4352 {
4353 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4354 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
4355 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4356 }
4357 else
4358 {
4359 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4360 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
4361 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4362 }
4363 }
4364 else
4365 rc = VINF_EM_RAW_EMULATE_INSTR;
4366 }
4367 else
4368 {
4369 /* Normal in/out */
4370 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4371
4372 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4373
4374 if (fIOWrite)
4375 {
4376 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
4377 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4378 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4379 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4380 }
4381 else
4382 {
4383 uint32_t u32Val = 0;
4384
4385 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
4386 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4387 if (IOM_SUCCESS(rc))
4388 {
4389 /* Write back to the EAX register. */
4390 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4391 }
4392 else
4393 if (rc == VINF_IOM_R3_IOPORT_READ)
4394 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4395 }
4396 }
4397
4398 /*
4399 * Handled the I/O return codes.
4400 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4401 */
4402 if (IOM_SUCCESS(rc))
4403 {
4404 /* Update EIP and continue execution. */
4405 pCtx->rip += cbInstr;
4406 if (RT_LIKELY(rc == VINF_SUCCESS))
4407 {
4408 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4409 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4410 {
4411 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
4412 for (unsigned i = 0; i < 4; i++)
4413 {
4414 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4415
4416 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4417 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4418 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4419 {
4420 uint64_t uDR6;
4421
4422 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4423
4424 uDR6 = ASMGetDR6();
4425
4426 /* Clear all breakpoint status flags and set the one we just hit. */
4427 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4428 uDR6 |= (uint64_t)RT_BIT(i);
4429
4430 /*
4431 * Note: AMD64 Architecture Programmer's Manual 13.1:
4432 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4433 * be cleared by software after the contents have been read.
4434 */
4435 ASMSetDR6(uDR6);
4436
4437 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4438 pCtx->dr[7] &= ~X86_DR7_GD;
4439
4440 /* Paranoia. */
4441 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4442 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4443 pCtx->dr[7] |= 0x400; /* must be one */
4444
4445 /* Resync DR7 */
4446 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
4447 AssertRC(rc2);
4448
4449 /* Construct inject info. */
4450 intInfo = X86_XCPT_DB;
4451 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4452 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4453
4454 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4455 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4456 0 /* cbInstr */, 0 /* errCode */);
4457 AssertRC(rc2);
4458
4459 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4460 goto ResumeExecution;
4461 }
4462 }
4463 }
4464 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4465 goto ResumeExecution;
4466 }
4467 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4468 break;
4469 }
4470
4471#ifdef VBOX_STRICT
4472 if (rc == VINF_IOM_R3_IOPORT_READ)
4473 Assert(!fIOWrite);
4474 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4475 Assert(fIOWrite);
4476 else
4477 {
4478 AssertMsg( RT_FAILURE(rc)
4479 || rc == VINF_EM_RAW_EMULATE_INSTR
4480 || rc == VINF_EM_RAW_GUEST_TRAP
4481 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4482 }
4483#endif
4484 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4485 break;
4486 }
4487
4488 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4489 LogFlow(("VMX_EXIT_TPR\n"));
4490 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4491 goto ResumeExecution;
4492
4493 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4494 on the APIC-access page. */
4495 {
4496 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4497 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4498
4499 switch(uAccessType)
4500 {
4501 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4502 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4503 {
4504 RTGCPHYS GCPhys;
4505 PDMApicGetBase(pVM, &GCPhys);
4506 GCPhys &= PAGE_BASE_GC_MASK;
4507 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4508
4509 LogFlow(("Apic access at %RGp\n", GCPhys));
4510 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4511 CPUMCTX2CORE(pCtx), GCPhys);
4512 if (rc == VINF_SUCCESS)
4513 goto ResumeExecution; /* rip already updated */
4514 break;
4515 }
4516
4517 default:
4518 rc = VINF_EM_RAW_EMULATE_INSTR;
4519 break;
4520 }
4521 break;
4522 }
4523
4524 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4525 if (!TMTimerPollBool(pVM, pVCpu))
4526 goto ResumeExecution;
4527 rc = VINF_EM_RAW_TIMER_PENDING;
4528 break;
4529
4530 default:
4531 /* The rest is handled after syncing the entire CPU state. */
4532 break;
4533 }
4534
4535
4536 /*
4537 * Note: The guest state is not entirely synced back at this stage!
4538 */
4539
4540 /* Investigate why there was a VM-exit. (part 2) */
4541 switch (exitReason)
4542 {
4543 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4544 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4545 case VMX_EXIT_EPT_VIOLATION:
4546 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4547 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4548 /* Already handled above. */
4549 break;
4550
4551 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4552 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4553 break;
4554
4555 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4556 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4557 rc = VINF_EM_RAW_INTERRUPT;
4558 AssertFailed(); /* Can't happen. Yet. */
4559 break;
4560
4561 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4562 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4563 rc = VINF_EM_RAW_INTERRUPT;
4564 AssertFailed(); /* Can't happen afaik. */
4565 break;
4566
4567 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4568 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4569 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4570 && pVCpu->hwaccm.s.Event.fPending)
4571 {
4572 /* Caused by an injected interrupt. */
4573 pVCpu->hwaccm.s.Event.fPending = false;
4574
4575 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
4576 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
4577 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
4578 AssertRC(rc2);
4579 }
4580 /* else Exceptions and software interrupts can just be restarted. */
4581 rc = VERR_EM_INTERPRETER;
4582 break;
4583
4584 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4585 /* Check if external interrupts are pending; if so, don't switch back. */
4586 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
4587 pCtx->rip++; /* skip hlt */
4588 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4589 goto ResumeExecution;
4590
4591 rc = VINF_EM_HALT;
4592 break;
4593
4594 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4595 Log2(("VMX: mwait\n"));
4596 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4597 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4598 if ( rc == VINF_EM_HALT
4599 || rc == VINF_SUCCESS)
4600 {
4601 /* Update EIP and continue execution. */
4602 pCtx->rip += cbInstr;
4603
4604 /* Check if external interrupts are pending; if so, don't switch back. */
4605 if ( rc == VINF_SUCCESS
4606 || ( rc == VINF_EM_HALT
4607 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4608 )
4609 goto ResumeExecution;
4610 }
4611 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4612 break;
4613
4614 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4615 AssertFailed(); /* can't happen. */
4616 rc = VERR_EM_INTERPRETER;
4617 break;
4618
4619 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4620 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4621 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4622 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4623 AssertRC(rc2);
4624 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMTF);
4625#if 0
4626 DBGFDoneStepping(pVCpu);
4627#endif
4628 rc = VINF_EM_DBG_STOP;
4629 break;
4630
4631 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4632 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4633 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4634 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4635 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4636 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4637 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4638 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4639 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4640 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4641 /** @todo inject #UD immediately */
4642 rc = VERR_EM_INTERPRETER;
4643 break;
4644
4645 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4646 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4647 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4648 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4649 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4650 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4651 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4652 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4653 /* already handled above */
4654 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4655 || rc == VINF_EM_RAW_INTERRUPT
4656 || rc == VERR_EM_INTERPRETER
4657 || rc == VINF_EM_RAW_EMULATE_INSTR
4658 || rc == VINF_PGM_SYNC_CR3
4659 || rc == VINF_IOM_R3_IOPORT_READ
4660 || rc == VINF_IOM_R3_IOPORT_WRITE
4661 || rc == VINF_EM_RAW_GUEST_TRAP
4662 || rc == VINF_TRPM_XCPT_DISPATCHED
4663 || rc == VINF_EM_RESCHEDULE_REM,
4664 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4665 break;
4666
4667 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4668 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4669 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4670 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4671 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4672 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4673 on the APIC-access page. */
4674 {
4675 /*
4676 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4677 */
4678 rc = VERR_EM_INTERPRETER;
4679 break;
4680 }
4681
4682 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4683 Assert(rc == VINF_EM_RAW_INTERRUPT);
4684 break;
4685
4686 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4687 {
4688#ifdef VBOX_STRICT
4689 RTCCUINTREG val2 = 0;
4690
4691 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4692
4693 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4694 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4695
4696 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4697 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4698
4699 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4700 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4701
4702 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4703 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4704
4705 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4706 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4707
4708 VMX_LOG_SELREG(CS, "CS", val2);
4709 VMX_LOG_SELREG(DS, "DS", val2);
4710 VMX_LOG_SELREG(ES, "ES", val2);
4711 VMX_LOG_SELREG(FS, "FS", val2);
4712 VMX_LOG_SELREG(GS, "GS", val2);
4713 VMX_LOG_SELREG(SS, "SS", val2);
4714 VMX_LOG_SELREG(TR, "TR", val2);
4715 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4716
4717 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4718 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4719 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4720 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4721#endif /* VBOX_STRICT */
4722 rc = VERR_VMX_INVALID_GUEST_STATE;
4723 break;
4724 }
4725
4726 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4727 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4728 default:
4729 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4730 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4731 break;
4732
4733 }
4734
4735end:
4736 /* We now going back to ring-3, so clear the action flag. */
4737 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4738
4739 /*
4740 * Signal changes for the recompiler.
4741 */
4742 CPUMSetChangedFlags(pVCpu,
4743 CPUM_CHANGED_SYSENTER_MSR
4744 | CPUM_CHANGED_LDTR
4745 | CPUM_CHANGED_GDTR
4746 | CPUM_CHANGED_IDTR
4747 | CPUM_CHANGED_TR
4748 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4749
4750 /*
4751 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4752 */
4753 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4754 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4755 {
4756 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4757 /* On the next entry we'll only sync the host context. */
4758 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4759 }
4760 else
4761 {
4762 /* On the next entry we'll sync everything. */
4763 /** @todo we can do better than this */
4764 /* Not in the VINF_PGM_CHANGE_MODE though! */
4765 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4766 }
4767
4768 /* Translate into a less severe return code */
4769 if (rc == VERR_EM_INTERPRETER)
4770 rc = VINF_EM_RAW_EMULATE_INSTR;
4771 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4772 {
4773 /* Try to extract more information about what might have gone wrong here. */
4774 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4775 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS;
4776 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4777 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4778 }
4779
4780 /* Just set the correct state here instead of trying to catch every goto above. */
4781 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4782
4783#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4784 /* Restore interrupts if we exited after disabling them. */
4785 if (uOldEFlags != ~(RTCCUINTREG)0)
4786 ASMSetFlags(uOldEFlags);
4787#endif
4788
4789 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
4790 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
4791 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4792 Log2(("X"));
4793 return VBOXSTRICTRC_TODO(rc);
4794}
4795
4796
4797/**
4798 * Enters the VT-x session.
4799 *
4800 * @returns VBox status code.
4801 * @param pVM Pointer to the VM.
4802 * @param pVCpu Pointer to the VMCPU.
4803 * @param pCpu Pointer to the CPU info struct.
4804 */
4805VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4806{
4807 Assert(pVM->hwaccm.s.vmx.fSupported);
4808 NOREF(pCpu);
4809
4810 unsigned cr4 = ASMGetCR4();
4811 if (!(cr4 & X86_CR4_VMXE))
4812 {
4813 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4814 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4815 }
4816
4817 /* Activate the VMCS. */
4818 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4819 if (RT_FAILURE(rc))
4820 return rc;
4821
4822 pVCpu->hwaccm.s.fResumeVM = false;
4823 return VINF_SUCCESS;
4824}
4825
4826
4827/**
4828 * Leaves the VT-x session.
4829 *
4830 * @returns VBox status code.
4831 * @param pVM Pointer to the VM.
4832 * @param pVCpu Pointer to the VMCPU.
4833 * @param pCtx Pointer to the guests CPU context.
4834 */
4835VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4836{
4837 Assert(pVM->hwaccm.s.vmx.fSupported);
4838
4839#ifdef DEBUG
4840 if (CPUMIsHyperDebugStateActive(pVCpu))
4841 {
4842 CPUMR0LoadHostDebugState(pVM, pVCpu);
4843 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4844 }
4845 else
4846#endif
4847
4848 /*
4849 * Save the guest debug state if necessary.
4850 */
4851 if (CPUMIsGuestDebugStateActive(pVCpu))
4852 {
4853 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4854
4855 /* Enable DRx move intercepts again. */
4856 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4857 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4858 AssertRC(rc);
4859
4860 /* Resync the debug registers the next time. */
4861 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4862 }
4863 else
4864 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4865
4866 /*
4867 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
4868 * VMCS data back to memory.
4869 */
4870 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4871 AssertRC(rc);
4872
4873 return VINF_SUCCESS;
4874}
4875
4876
4877/**
4878 * Flush the TLB using EPT.
4879 *
4880 * @returns VBox status code.
4881 * @param pVM Pointer to the VM.
4882 * @param pVCpu Pointer to the VMCPU.
4883 * @param enmFlush Type of flush.
4884 */
4885static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
4886{
4887 uint64_t descriptor[2];
4888
4889 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
4890 Assert(pVM->hwaccm.s.fNestedPaging);
4891 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4892 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
4893 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4894 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.vmx.GCPhysEPTP, rc));
4895}
4896
4897
4898/**
4899 * Flush the TLB using VPID.
4900 *
4901 * @returns VBox status code.
4902 * @param pVM Pointer to the VM.
4903 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
4904 * enmFlush).
4905 * @param enmFlush Type of flush.
4906 * @param GCPtr Virtual address of the page to flush (can be 0 depending
4907 * on @a enmFlush).
4908 */
4909static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
4910{
4911#if HC_ARCH_BITS == 32
4912 /*
4913 * If we get a flush in 64-bit guest mode, then force a full TLB flush. invvpid probably takes only 32-bit addresses.
4914 */
4915 if ( CPUMIsGuestInLongMode(pVCpu)
4916 && !VMX_IS_64BIT_HOST_MODE())
4917 {
4918 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4919 }
4920 else
4921#endif
4922 {
4923 uint64_t descriptor[2];
4924
4925 Assert(pVM->hwaccm.s.vmx.fVPID);
4926 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
4927 {
4928 descriptor[0] = 0;
4929 descriptor[1] = 0;
4930 }
4931 else
4932 {
4933 AssertPtr(pVCpu);
4934 Assert(pVCpu->hwaccm.s.uCurrentASID != 0);
4935 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
4936 descriptor[1] = GCPtr;
4937 }
4938 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
4939 AssertMsg(rc == VINF_SUCCESS,
4940 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hwaccm.s.uCurrentASID : 0, GCPtr, rc));
4941 }
4942}
4943
4944
4945/**
4946 * Invalidates a guest page by guest virtual address. Only relevant for
4947 * EPT/VPID, otherwise there is nothing really to invalidate.
4948 *
4949 * @returns VBox status code.
4950 * @param pVM Pointer to the VM.
4951 * @param pVCpu Pointer to the VMCPU.
4952 * @param GCVirt Guest virtual address of the page to invalidate.
4953 */
4954VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
4955{
4956 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4957
4958 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
4959
4960 if (!fFlushPending)
4961 {
4962 /*
4963 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
4964 * See @bugref{6043} and @bugref{6177}
4965 *
4966 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
4967 * function maybe called in a loop with individual addresses.
4968 */
4969 if (pVM->hwaccm.s.vmx.fVPID)
4970 {
4971 /* If we can flush just this page do it, otherwise flush as little as possible. */
4972 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
4973 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
4974 else
4975 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4976 }
4977 else if (pVM->hwaccm.s.fNestedPaging)
4978 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4979 }
4980
4981 return VINF_SUCCESS;
4982}
4983
4984
4985/**
4986 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
4987 * otherwise there is nothing really to invalidate.
4988 *
4989 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
4990 *
4991 * @returns VBox status code.
4992 * @param pVM Pointer to the VM.
4993 * @param pVCpu Pointer to the VMCPU.
4994 * @param GCPhys Guest physical address of the page to invalidate.
4995 */
4996VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
4997{
4998 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
4999
5000 /*
5001 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5002 * while invept only flushes by EPT not individual addresses. We update the force flag here
5003 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5004 */
5005 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5006 return VINF_SUCCESS;
5007}
5008
5009
5010/**
5011 * Report world switch error and dump some useful debug info.
5012 *
5013 * @param pVM Pointer to the VM.
5014 * @param pVCpu Pointer to the VMCPU.
5015 * @param rc Return code.
5016 * @param pCtx Pointer to the current guest CPU context (not updated).
5017 */
5018static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5019{
5020 NOREF(pVM);
5021
5022 switch (VBOXSTRICTRC_VAL(rc))
5023 {
5024 case VERR_VMX_INVALID_VMXON_PTR:
5025 AssertFailed();
5026 break;
5027
5028 case VERR_VMX_UNABLE_TO_START_VM:
5029 case VERR_VMX_UNABLE_TO_RESUME_VM:
5030 {
5031 int rc2;
5032 RTCCUINTREG exitReason, instrError;
5033
5034 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5035 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5036 AssertRC(rc2);
5037 if (rc2 == VINF_SUCCESS)
5038 {
5039 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5040 (uint32_t)instrError));
5041 Log(("Current stack %08x\n", &rc2));
5042
5043 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
5044 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
5045
5046#ifdef VBOX_STRICT
5047 RTGDTR gdtr;
5048 PCX86DESCHC pDesc;
5049 RTCCUINTREG val;
5050
5051 ASMGetGDTR(&gdtr);
5052
5053 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
5054 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5055 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
5056 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5057 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
5058 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5059 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
5060 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5061 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
5062 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5063
5064 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
5065 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5066 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
5067 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5068 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
5069 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5070
5071 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
5072 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5073 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
5074 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5075
5076 if (val < gdtr.cbGdt)
5077 {
5078 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5079 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
5080 }
5081
5082 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
5083 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5084 if (val < gdtr.cbGdt)
5085 {
5086 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5087 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
5088 }
5089
5090 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
5091 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5092 if (val < gdtr.cbGdt)
5093 {
5094 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5095 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
5096 }
5097
5098 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
5099 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5100 if (val < gdtr.cbGdt)
5101 {
5102 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5103 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
5104 }
5105
5106 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
5107 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5108 if (val < gdtr.cbGdt)
5109 {
5110 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5111 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
5112 }
5113
5114 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
5115 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5116 if (val < gdtr.cbGdt)
5117 {
5118 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5119 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
5120 }
5121
5122 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
5123 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5124 if (val < gdtr.cbGdt)
5125 {
5126 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5127 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
5128 }
5129
5130 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
5131 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5132 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
5133 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5134 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
5135 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5136 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5137 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5138 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5139 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5140 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5141 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5142 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
5143 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5144 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
5145 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5146# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5147 if (VMX_IS_64BIT_HOST_MODE())
5148 {
5149 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5150 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5151 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5152 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5153 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5154 }
5155# endif
5156#endif /* VBOX_STRICT */
5157 }
5158 break;
5159 }
5160
5161 default:
5162 /* impossible */
5163 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5164 break;
5165 }
5166}
5167
5168
5169#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5170/**
5171 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5172 *
5173 * @returns VBox status code.
5174 * @param fResume Whether to vmlauch/vmresume.
5175 * @param pCtx Pointer to the guest CPU context.
5176 * @param pCache Pointer to the VMCS cache.
5177 * @param pVM Pointer to the VM.
5178 * @param pVCpu Pointer to the VMCPU.
5179 */
5180DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5181{
5182 uint32_t aParam[6];
5183 PHMGLOBLCPUINFO pCpu;
5184 RTHCPHYS HCPhysCpuPage;
5185 int rc;
5186
5187 pCpu = HWACCMR0GetCurrentCpu();
5188 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5189
5190#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5191 pCache->uPos = 1;
5192 pCache->interPD = PGMGetInterPaeCR3(pVM);
5193 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
5194#endif
5195
5196#ifdef DEBUG
5197 pCache->TestIn.HCPhysCpuPage= 0;
5198 pCache->TestIn.HCPhysVMCS = 0;
5199 pCache->TestIn.pCache = 0;
5200 pCache->TestOut.HCPhysVMCS = 0;
5201 pCache->TestOut.pCache = 0;
5202 pCache->TestOut.pCtx = 0;
5203 pCache->TestOut.eflags = 0;
5204#endif
5205
5206 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5207 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5208 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
5209 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
5210 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
5211 aParam[5] = 0;
5212
5213#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5214 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
5215 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
5216#endif
5217 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5218
5219#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5220 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
5221 Assert(pCtx->dr[4] == 10);
5222 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
5223#endif
5224
5225#ifdef DEBUG
5226 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5227 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5228 pVCpu->hwaccm.s.vmx.HCPhysVMCS));
5229 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5230 pCache->TestOut.HCPhysVMCS));
5231 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5232 pCache->TestOut.pCache));
5233 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache),
5234 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
5235 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5236 pCache->TestOut.pCtx));
5237 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5238#endif
5239 return rc;
5240}
5241
5242
5243# ifdef VBOX_STRICT
5244static bool hmR0VmxIsValidReadField(uint32_t idxField)
5245{
5246 switch (idxField)
5247 {
5248 case VMX_VMCS64_GUEST_RIP:
5249 case VMX_VMCS64_GUEST_RSP:
5250 case VMX_VMCS_GUEST_RFLAGS:
5251 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5252 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5253 case VMX_VMCS64_GUEST_CR0:
5254 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5255 case VMX_VMCS64_GUEST_CR4:
5256 case VMX_VMCS64_GUEST_DR7:
5257 case VMX_VMCS32_GUEST_SYSENTER_CS:
5258 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5259 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5260 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5261 case VMX_VMCS64_GUEST_GDTR_BASE:
5262 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5263 case VMX_VMCS64_GUEST_IDTR_BASE:
5264 case VMX_VMCS16_GUEST_FIELD_CS:
5265 case VMX_VMCS32_GUEST_CS_LIMIT:
5266 case VMX_VMCS64_GUEST_CS_BASE:
5267 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5268 case VMX_VMCS16_GUEST_FIELD_DS:
5269 case VMX_VMCS32_GUEST_DS_LIMIT:
5270 case VMX_VMCS64_GUEST_DS_BASE:
5271 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5272 case VMX_VMCS16_GUEST_FIELD_ES:
5273 case VMX_VMCS32_GUEST_ES_LIMIT:
5274 case VMX_VMCS64_GUEST_ES_BASE:
5275 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5276 case VMX_VMCS16_GUEST_FIELD_FS:
5277 case VMX_VMCS32_GUEST_FS_LIMIT:
5278 case VMX_VMCS64_GUEST_FS_BASE:
5279 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5280 case VMX_VMCS16_GUEST_FIELD_GS:
5281 case VMX_VMCS32_GUEST_GS_LIMIT:
5282 case VMX_VMCS64_GUEST_GS_BASE:
5283 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5284 case VMX_VMCS16_GUEST_FIELD_SS:
5285 case VMX_VMCS32_GUEST_SS_LIMIT:
5286 case VMX_VMCS64_GUEST_SS_BASE:
5287 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5288 case VMX_VMCS16_GUEST_FIELD_LDTR:
5289 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5290 case VMX_VMCS64_GUEST_LDTR_BASE:
5291 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5292 case VMX_VMCS16_GUEST_FIELD_TR:
5293 case VMX_VMCS32_GUEST_TR_LIMIT:
5294 case VMX_VMCS64_GUEST_TR_BASE:
5295 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5296 case VMX_VMCS32_RO_EXIT_REASON:
5297 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5298 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5299 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
5300 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5301 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5302 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5303 case VMX_VMCS32_RO_IDT_INFO:
5304 case VMX_VMCS32_RO_IDT_ERRCODE:
5305 case VMX_VMCS64_GUEST_CR3:
5306 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
5307 return true;
5308 }
5309 return false;
5310}
5311
5312
5313static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5314{
5315 switch (idxField)
5316 {
5317 case VMX_VMCS64_GUEST_LDTR_BASE:
5318 case VMX_VMCS64_GUEST_TR_BASE:
5319 case VMX_VMCS64_GUEST_GDTR_BASE:
5320 case VMX_VMCS64_GUEST_IDTR_BASE:
5321 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5322 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5323 case VMX_VMCS64_GUEST_CR0:
5324 case VMX_VMCS64_GUEST_CR4:
5325 case VMX_VMCS64_GUEST_CR3:
5326 case VMX_VMCS64_GUEST_DR7:
5327 case VMX_VMCS64_GUEST_RIP:
5328 case VMX_VMCS64_GUEST_RSP:
5329 case VMX_VMCS64_GUEST_CS_BASE:
5330 case VMX_VMCS64_GUEST_DS_BASE:
5331 case VMX_VMCS64_GUEST_ES_BASE:
5332 case VMX_VMCS64_GUEST_FS_BASE:
5333 case VMX_VMCS64_GUEST_GS_BASE:
5334 case VMX_VMCS64_GUEST_SS_BASE:
5335 return true;
5336 }
5337 return false;
5338}
5339# endif /* VBOX_STRICT */
5340
5341
5342/**
5343 * Executes the specified handler in 64-bit mode.
5344 *
5345 * @returns VBox status code.
5346 * @param pVM Pointer to the VM.
5347 * @param pVCpu Pointer to the VMCPU.
5348 * @param pCtx Pointer to the guest CPU context.
5349 * @param pfnHandler Pointer to the RC handler function.
5350 * @param cbParam Number of parameters.
5351 * @param paParam Array of 32-bit parameters.
5352 */
5353VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5354 uint32_t *paParam)
5355{
5356 int rc, rc2;
5357 PHMGLOBLCPUINFO pCpu;
5358 RTHCPHYS HCPhysCpuPage;
5359 RTHCUINTREG uOldEFlags;
5360
5361 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5362 Assert(pfnHandler);
5363 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
5364 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
5365
5366#ifdef VBOX_STRICT
5367 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5368 Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
5369
5370 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5371 Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
5372#endif
5373
5374 /* Disable interrupts. */
5375 uOldEFlags = ASMIntDisableFlags();
5376
5377#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5378 RTCPUID idHostCpu = RTMpCpuId();
5379 CPUMR0SetLApic(pVM, idHostCpu);
5380#endif
5381
5382 pCpu = HWACCMR0GetCurrentCpu();
5383 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5384
5385 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5386 VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5387
5388 /* Leave VMX Root Mode. */
5389 VMXDisable();
5390
5391 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5392
5393 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5394 CPUMSetHyperEIP(pVCpu, pfnHandler);
5395 for (int i=(int)cbParam-1;i>=0;i--)
5396 CPUMPushHyper(pVCpu, paParam[i]);
5397
5398 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5399
5400 /* Call switcher. */
5401 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5402 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5403
5404 /* Make sure the VMX instructions don't cause #UD faults. */
5405 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5406
5407 /* Enter VMX Root Mode */
5408 rc2 = VMXEnable(HCPhysCpuPage);
5409 if (RT_FAILURE(rc2))
5410 {
5411 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5412 ASMSetFlags(uOldEFlags);
5413 return VERR_VMX_VMXON_FAILED;
5414 }
5415
5416 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5417 AssertRC(rc2);
5418 Assert(!(ASMGetFlags() & X86_EFL_IF));
5419 ASMSetFlags(uOldEFlags);
5420 return rc;
5421}
5422#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5423
5424
5425#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
5426/**
5427 * Executes VMWRITE.
5428 *
5429 * @returns VBox status code
5430 * @param pVCpu Pointer to the VMCPU.
5431 * @param idxField VMCS field index.
5432 * @param u64Val 16, 32 or 64 bits value.
5433 */
5434VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5435{
5436 int rc;
5437 switch (idxField)
5438 {
5439 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
5440 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
5441 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
5442 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
5443 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
5444 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
5445 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
5446 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
5447 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
5448 case VMX_VMCS_GUEST_LINK_PTR_FULL:
5449 case VMX_VMCS_GUEST_PDPTR0_FULL:
5450 case VMX_VMCS_GUEST_PDPTR1_FULL:
5451 case VMX_VMCS_GUEST_PDPTR2_FULL:
5452 case VMX_VMCS_GUEST_PDPTR3_FULL:
5453 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
5454 case VMX_VMCS_GUEST_EFER_FULL:
5455 case VMX_VMCS_CTRL_EPTP_FULL:
5456 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5457 rc = VMXWriteVMCS32(idxField, u64Val);
5458 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5459 AssertRC(rc);
5460 return rc;
5461
5462 case VMX_VMCS64_GUEST_LDTR_BASE:
5463 case VMX_VMCS64_GUEST_TR_BASE:
5464 case VMX_VMCS64_GUEST_GDTR_BASE:
5465 case VMX_VMCS64_GUEST_IDTR_BASE:
5466 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5467 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5468 case VMX_VMCS64_GUEST_CR0:
5469 case VMX_VMCS64_GUEST_CR4:
5470 case VMX_VMCS64_GUEST_CR3:
5471 case VMX_VMCS64_GUEST_DR7:
5472 case VMX_VMCS64_GUEST_RIP:
5473 case VMX_VMCS64_GUEST_RSP:
5474 case VMX_VMCS64_GUEST_CS_BASE:
5475 case VMX_VMCS64_GUEST_DS_BASE:
5476 case VMX_VMCS64_GUEST_ES_BASE:
5477 case VMX_VMCS64_GUEST_FS_BASE:
5478 case VMX_VMCS64_GUEST_GS_BASE:
5479 case VMX_VMCS64_GUEST_SS_BASE:
5480 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5481 if (u64Val >> 32ULL)
5482 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5483 else
5484 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5485
5486 return rc;
5487
5488 default:
5489 AssertMsgFailed(("Unexpected field %x\n", idxField));
5490 return VERR_INVALID_PARAMETER;
5491 }
5492}
5493
5494
5495/**
5496 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
5497 *
5498 * @param pVCpu Pointer to the VMCPU.
5499 * @param idxField VMCS field index.
5500 * @param u64Val 16, 32 or 64 bits value.
5501 */
5502VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5503{
5504 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
5505
5506 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5507 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5508
5509 /* Make sure there are no duplicates. */
5510 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5511 {
5512 if (pCache->Write.aField[i] == idxField)
5513 {
5514 pCache->Write.aFieldVal[i] = u64Val;
5515 return VINF_SUCCESS;
5516 }
5517 }
5518
5519 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5520 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5521 pCache->Write.cValidEntries++;
5522 return VINF_SUCCESS;
5523}
5524
5525#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
5526
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette