VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 41728

Last change on this file since 41728 was 41728, checked in by vboxsync, 13 years ago

DIS: register macro name adjustments - part two.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 215.6 KB
Line 
1/* $Id: HWVMXR0.cpp 41728 2012-06-14 23:04:57Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hwaccm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HWACCMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63
64/*******************************************************************************
65* Global Variables *
66*******************************************************************************/
67/* IO operation lookup arrays. */
68static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
69static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
70
71#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
72/** See HWACCMR0A.asm. */
73extern "C" uint32_t g_fVMXIs64bitHost;
74#endif
75
76
77/*******************************************************************************
78* Local Functions *
79*******************************************************************************/
80static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
81static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
82static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
83static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
84static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
85static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
86static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
87static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
89
90
91/**
92 * Updates error from VMCS to HWACCMCPU's lasterror record.
93 *
94 * @param pVM Pointer to the VM.
95 * @param pVCpu Pointer to the VMCPU.
96 * @param rc The error code.
97 */
98static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
99{
100 if (rc == VERR_VMX_GENERIC)
101 {
102 RTCCUINTREG instrError;
103
104 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
105 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
106 }
107 pVM->hwaccm.s.lLastError = rc;
108}
109
110
111/**
112 * Sets up and activates VT-x on the current CPU.
113 *
114 * @returns VBox status code.
115 * @param pCpu Pointer to the CPU info struct.
116 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
117 * @param pvCpuPage Pointer to the global CPU page.
118 * @param HCPhysCpuPage Physical address of the global CPU page.
119 */
120VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
121{
122 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
123 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
124
125 if (pVM)
126 {
127 /* Set revision dword at the beginning of the VMXON structure. */
128 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
129 }
130
131 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
132 * (which can have very bad consequences!!!)
133 */
134
135 if (ASMGetCR4() & X86_CR4_VMXE)
136 return VERR_VMX_IN_VMX_ROOT_MODE;
137
138 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
139
140 /*
141 * Enter VM root mode.
142 */
143 int rc = VMXEnable(HCPhysCpuPage);
144 if (RT_FAILURE(rc))
145 {
146 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
147 return VERR_VMX_VMXON_FAILED;
148 }
149
150 /*
151 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
152 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
153 * each time while reusing a VPID after hitting the MaxASID limit once.
154 */
155 if ( pVM
156 && pVM->hwaccm.s.vmx.fVPID
157 && (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS))
158 {
159 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
160 pCpu->fFlushASIDBeforeUse = false;
161 }
162 else
163 pCpu->fFlushASIDBeforeUse = true;
164
165 return VINF_SUCCESS;
166}
167
168
169/**
170 * Deactivates VT-x on the current CPU.
171 *
172 * @returns VBox status code.
173 * @param pCpu Pointer to the CPU info struct.
174 * @param pvCpuPage Pointer to the global CPU page.
175 * @param HCPhysCpuPage Physical address of the global CPU page.
176 */
177VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
178{
179 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
180 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
181 NOREF(pCpu);
182
183 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
184 if (!(ASMGetCR4() & X86_CR4_VMXE))
185 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
186
187 /* Leave VMX Root Mode. */
188 VMXDisable();
189
190 /* And clear the X86_CR4_VMXE bit. */
191 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
192 return VINF_SUCCESS;
193}
194
195
196/**
197 * Does Ring-0 per VM VT-x initialization.
198 *
199 * @returns VBox status code.
200 * @param pVM Pointer to the VM.
201 */
202VMMR0DECL(int) VMXR0InitVM(PVM pVM)
203{
204 int rc;
205
206#ifdef LOG_ENABLED
207 SUPR0Printf("VMXR0InitVM %x\n", pVM);
208#endif
209
210 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
211
212 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
213 {
214 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
215 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, PAGE_SIZE, true /* executable R0 mapping */);
216 AssertRC(rc);
217 if (RT_FAILURE(rc))
218 return rc;
219
220 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
221 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
222 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
223 }
224 else
225 {
226 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
227 pVM->hwaccm.s.vmx.pAPIC = 0;
228 pVM->hwaccm.s.vmx.pAPICPhys = 0;
229 }
230
231#ifdef VBOX_WITH_CRASHDUMP_MAGIC
232 {
233 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, PAGE_SIZE, true /* executable R0 mapping */);
234 AssertRC(rc);
235 if (RT_FAILURE(rc))
236 return rc;
237
238 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
239 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
240
241 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
242 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
243 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
244 }
245#endif
246
247 /* Allocate VMCSs for all guest CPUs. */
248 for (VMCPUID i = 0; i < pVM->cCpus; i++)
249 {
250 PVMCPU pVCpu = &pVM->aCpus[i];
251
252 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
253
254 /* Allocate one page for the VM control structure (VMCS). */
255 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVMCS, PAGE_SIZE, true /* executable R0 mapping */);
256 AssertRC(rc);
257 if (RT_FAILURE(rc))
258 return rc;
259
260 pVCpu->hwaccm.s.vmx.pvVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVMCS);
261 pVCpu->hwaccm.s.vmx.HCPhysVMCS = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVMCS, 0);
262 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pvVMCS);
263
264 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
265 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
266
267 /* Allocate one page for the virtual APIC page for TPR caching. */
268 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.hMemObjVAPIC, PAGE_SIZE, true /* executable R0 mapping */);
269 AssertRC(rc);
270 if (RT_FAILURE(rc))
271 return rc;
272
273 pVCpu->hwaccm.s.vmx.pbVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.hMemObjVAPIC);
274 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, 0);
275 ASMMemZeroPage(pVCpu->hwaccm.s.vmx.pbVAPIC);
276
277 /* Allocate the MSR bitmap if this feature is supported. */
278 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
279 {
280 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, PAGE_SIZE, true /* executable R0 mapping */);
281 AssertRC(rc);
282 if (RT_FAILURE(rc))
283 return rc;
284
285 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
286 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
287 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
288 }
289
290#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
291 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
292 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, PAGE_SIZE, true /* executable R0 mapping */);
293 AssertRC(rc);
294 if (RT_FAILURE(rc))
295 return rc;
296
297 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
298 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
299 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
300
301 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
302 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, PAGE_SIZE, true /* executable R0 mapping */);
303 AssertRC(rc);
304 if (RT_FAILURE(rc))
305 return rc;
306
307 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
308 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
309 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
310#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
311
312 /* Current guest paging mode. */
313 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
314
315#ifdef LOG_ENABLED
316 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pvVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.HCPhysVMCS);
317#endif
318 }
319
320 return VINF_SUCCESS;
321}
322
323
324/**
325 * Does Ring-0 per VM VT-x termination.
326 *
327 * @returns VBox status code.
328 * @param pVM Pointer to the VM.
329 */
330VMMR0DECL(int) VMXR0TermVM(PVM pVM)
331{
332 for (VMCPUID i = 0; i < pVM->cCpus; i++)
333 {
334 PVMCPU pVCpu = &pVM->aCpus[i];
335
336 if (pVCpu->hwaccm.s.vmx.hMemObjVMCS != NIL_RTR0MEMOBJ)
337 {
338 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVMCS, false);
339 pVCpu->hwaccm.s.vmx.hMemObjVMCS = NIL_RTR0MEMOBJ;
340 pVCpu->hwaccm.s.vmx.pvVMCS = 0;
341 pVCpu->hwaccm.s.vmx.HCPhysVMCS = 0;
342 }
343 if (pVCpu->hwaccm.s.vmx.hMemObjVAPIC != NIL_RTR0MEMOBJ)
344 {
345 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.hMemObjVAPIC, false);
346 pVCpu->hwaccm.s.vmx.hMemObjVAPIC = NIL_RTR0MEMOBJ;
347 pVCpu->hwaccm.s.vmx.pbVAPIC = 0;
348 pVCpu->hwaccm.s.vmx.HCPhysVAPIC = 0;
349 }
350 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
351 {
352 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
353 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
354 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
355 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
356 }
357#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
358 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
359 {
360 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
361 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
362 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
363 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
364 }
365 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
366 {
367 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
368 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
369 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
370 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
371 }
372#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
373 }
374 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
375 {
376 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
377 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
378 pVM->hwaccm.s.vmx.pAPIC = 0;
379 pVM->hwaccm.s.vmx.pAPICPhys = 0;
380 }
381#ifdef VBOX_WITH_CRASHDUMP_MAGIC
382 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
383 {
384 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
385 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
386 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
387 pVM->hwaccm.s.vmx.pScratch = 0;
388 pVM->hwaccm.s.vmx.pScratchPhys = 0;
389 }
390#endif
391 return VINF_SUCCESS;
392}
393
394
395/**
396 * Sets up VT-x for the specified VM.
397 *
398 * @returns VBox status code.
399 * @param pVM Pointer to the VM.
400 */
401VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
402{
403 int rc = VINF_SUCCESS;
404 uint32_t val;
405
406 AssertReturn(pVM, VERR_INVALID_PARAMETER);
407
408 /* Initialize these always, see hwaccmR3InitFinalizeR0().*/
409 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NONE;
410 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NONE;
411
412 /* Determine optimal flush type for EPT. */
413 if (pVM->hwaccm.s.fNestedPaging)
414 {
415 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_SINGLE_CONTEXT)
416 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_SINGLE_CONTEXT;
417 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_ALL_CONTEXTS)
418 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_ALL_CONTEXTS;
419 else
420 {
421 /*
422 * Should never really happen. EPT is supported but no suitable flush types supported.
423 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
424 */
425 pVM->hwaccm.s.vmx.enmFlushEPT = VMX_FLUSH_EPT_NOT_SUPPORTED;
426 return VERR_VMX_GENERIC;
427 }
428 }
429
430 /* Determine optimal flush type for VPID. */
431 if (pVM->hwaccm.s.vmx.fVPID)
432 {
433 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
434 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_SINGLE_CONTEXT;
435 else if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_ALL_CONTEXTS)
436 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_ALL_CONTEXTS;
437 else
438 {
439 /*
440 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
441 * We do not handle other flush type combinations, ignore VPID capabilities.
442 */
443 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
444 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
445 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT_RETAIN_GLOBALS)
446 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
447 pVM->hwaccm.s.vmx.enmFlushVPID = VMX_FLUSH_VPID_NOT_SUPPORTED;
448 pVM->hwaccm.s.vmx.fVPID = false;
449 }
450 }
451
452 for (VMCPUID i = 0; i < pVM->cCpus; i++)
453 {
454 PVMCPU pVCpu = &pVM->aCpus[i];
455
456 AssertPtr(pVCpu->hwaccm.s.vmx.pvVMCS);
457
458 /* Set revision dword at the beginning of the VMCS structure. */
459 *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
460
461 /*
462 * Clear and activate the VMCS.
463 */
464 Log(("HCPhysVMCS = %RHp\n", pVCpu->hwaccm.s.vmx.HCPhysVMCS));
465 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
466 if (RT_FAILURE(rc))
467 goto vmx_end;
468
469 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
470 if (RT_FAILURE(rc))
471 goto vmx_end;
472
473 /*
474 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
475 * Set required bits to one and zero according to the MSR capabilities.
476 */
477 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
478 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
479 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
480
481 /*
482 * Enable the VMX preemption timer.
483 */
484 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
485 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
486 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
487
488 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
489 AssertRC(rc);
490
491 /*
492 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
493 * Set required bits to one and zero according to the MSR capabilities.
494 */
495 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
496 /* Program which event cause VM-exits and which features we want to use. */
497 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
498 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
499 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
500 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
501 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
502 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
503 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
504 the guest (host thinks the cpu load is high) */
505
506 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
507 if (!pVM->hwaccm.s.fNestedPaging)
508 {
509 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
510 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
511 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
512 }
513
514 /*
515 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
516 * failure with an invalid control fields error. (combined with some other exit reasons)
517 */
518 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
519 {
520 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
521 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
522 Assert(pVM->hwaccm.s.vmx.pAPIC);
523 }
524 else
525 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
526 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
527
528 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
529 {
530 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
531 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
532 }
533
534 /* We will use the secondary control if it's present. */
535 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
536
537 /* Mask away the bits that the CPU doesn't support */
538 /** @todo make sure they don't conflict with the above requirements. */
539 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
540 pVCpu->hwaccm.s.vmx.proc_ctls = val;
541
542 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
543 AssertRC(rc);
544
545 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
546 {
547 /*
548 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
549 * Set required bits to one and zero according to the MSR capabilities.
550 */
551 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
552 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
553
554 if (pVM->hwaccm.s.fNestedPaging)
555 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
556
557 if (pVM->hwaccm.s.vmx.fVPID)
558 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
559
560 if (pVM->hwaccm.s.fHasIoApic)
561 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
562
563 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
564 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
565
566 /* Mask away the bits that the CPU doesn't support */
567 /** @todo make sure they don't conflict with the above requirements. */
568 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
569 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
570 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
571 AssertRC(rc);
572 }
573
574 /*
575 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
576 * Set required bits to one and zero according to the MSR capabilities.
577 */
578 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
579 AssertRC(rc);
580
581 /*
582 * Forward all exception except #NM & #PF to the guest.
583 * We always need to check pagefaults since our shadow page table can be out of sync.
584 * And we always lazily sync the FPU & XMM state. .
585 */
586
587 /** @todo Possible optimization:
588 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
589 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
590 * registers ourselves of course.
591 *
592 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
593 */
594
595 /*
596 * Don't filter page faults, all of them should cause a world switch.
597 */
598 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
599 AssertRC(rc);
600 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
601 AssertRC(rc);
602
603 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
604 AssertRC(rc);
605 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
606 AssertRC(rc);
607 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
608 AssertRC(rc);
609
610 /*
611 * Set the MSR bitmap address.
612 */
613 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
614 {
615 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
616
617 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
618 AssertRC(rc);
619
620 /* Allow the guest to directly modify these MSRs; they are restored and saved automatically. */
621 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
622 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
623 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
624 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
625 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
626 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
627 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
628 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
629 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
630 }
631
632#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
633 /*
634 * Set the guest & host MSR load/store physical addresses.
635 */
636 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
637 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
638 AssertRC(rc);
639 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
640 AssertRC(rc);
641
642 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
643 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
644 AssertRC(rc);
645#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
646
647 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
648 AssertRC(rc);
649 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
650 AssertRC(rc);
651
652 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
653 {
654 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
655 /* Optional */
656 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
657 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.HCPhysVAPIC);
658
659 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
660 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
661
662 AssertRC(rc);
663 }
664
665 /* Set link pointer to -1. Not currently used. */
666 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
667 AssertRC(rc);
668
669 /*
670 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
671 * VMCS data back to memory.
672 */
673 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
674 AssertRC(rc);
675
676 /*
677 * Configure the VMCS read cache.
678 */
679 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
680
681 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
682 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
683 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
684 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
685 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
686 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
687 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
688 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
689 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
690 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
691 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
692 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
693 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
694 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
695 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
696 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
697
698 VMX_SETUP_SELREG(ES, pCache);
699 VMX_SETUP_SELREG(SS, pCache);
700 VMX_SETUP_SELREG(CS, pCache);
701 VMX_SETUP_SELREG(DS, pCache);
702 VMX_SETUP_SELREG(FS, pCache);
703 VMX_SETUP_SELREG(GS, pCache);
704 VMX_SETUP_SELREG(LDTR, pCache);
705 VMX_SETUP_SELREG(TR, pCache);
706
707 /*
708 * Status code VMCS reads.
709 */
710 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
711 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
712 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
713 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
714 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
715 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
716 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
717 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
718 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
719
720 if (pVM->hwaccm.s.fNestedPaging)
721 {
722 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
723 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
724 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
725 }
726 else
727 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
728 } /* for each VMCPU */
729
730 /*
731 * Setup the right TLB function based on CPU capabilities.
732 */
733 if (pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID)
734 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBBoth;
735 else if (pVM->hwaccm.s.fNestedPaging)
736 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBEPT;
737 else if (pVM->hwaccm.s.vmx.fVPID)
738 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBVPID;
739 else
740 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = hmR0VmxSetupTLBDummy;
741
742vmx_end:
743 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
744 return rc;
745}
746
747
748/**
749 * Sets the permission bits for the specified MSR.
750 *
751 * @param pVCpu Pointer to the VMCPU.
752 * @param ulMSR The MSR value.
753 * @param fRead Whether reading is allowed.
754 * @param fWrite Whether writing is allowed.
755 */
756static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
757{
758 unsigned ulBit;
759 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
760
761 /*
762 * Layout:
763 * 0x000 - 0x3ff - Low MSR read bits
764 * 0x400 - 0x7ff - High MSR read bits
765 * 0x800 - 0xbff - Low MSR write bits
766 * 0xc00 - 0xfff - High MSR write bits
767 */
768 if (ulMSR <= 0x00001FFF)
769 {
770 /* Pentium-compatible MSRs */
771 ulBit = ulMSR;
772 }
773 else if ( ulMSR >= 0xC0000000
774 && ulMSR <= 0xC0001FFF)
775 {
776 /* AMD Sixth Generation x86 Processor MSRs */
777 ulBit = (ulMSR - 0xC0000000);
778 pMSRBitmap += 0x400;
779 }
780 else
781 {
782 AssertFailed();
783 return;
784 }
785
786 Assert(ulBit <= 0x1fff);
787 if (fRead)
788 ASMBitClear(pMSRBitmap, ulBit);
789 else
790 ASMBitSet(pMSRBitmap, ulBit);
791
792 if (fWrite)
793 ASMBitClear(pMSRBitmap + 0x800, ulBit);
794 else
795 ASMBitSet(pMSRBitmap + 0x800, ulBit);
796}
797
798
799/**
800 * Injects an event (trap or external interrupt).
801 *
802 * @returns VBox status code. Note that it may return VINF_EM_RESET to
803 * indicate a triple fault when injecting X86_XCPT_DF.
804 *
805 * @param pVM Pointer to the VM.
806 * @param pVCpu Pointer to the VMCPU.
807 * @param pCtx Pointer to the guest CPU Context.
808 * @param intInfo VMX interrupt info.
809 * @param cbInstr Opcode length of faulting instruction.
810 * @param errCode Error code (optional).
811 */
812static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
813{
814 int rc;
815 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
816
817#ifdef VBOX_WITH_STATISTICS
818 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
819#endif
820
821#ifdef VBOX_STRICT
822 if (iGate == 0xE)
823 {
824 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
825 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
826 }
827 else if (iGate < 0x20)
828 {
829 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
830 errCode));
831 }
832 else
833 {
834 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
835 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
836 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
837 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
838 || pCtx->eflags.u32 & X86_EFL_IF);
839 }
840#endif
841
842 if ( CPUMIsGuestInRealModeEx(pCtx)
843 && pVM->hwaccm.s.vmx.pRealModeTSS)
844 {
845 RTGCPHYS GCPhysHandler;
846 uint16_t offset, ip;
847 RTSEL sel;
848
849 /*
850 * Injecting events doesn't work right with real mode emulation.
851 * (#GP if we try to inject external hardware interrupts)
852 * Inject the interrupt or trap directly instead.
853 *
854 * ASSUMES no access handlers for the bits we read or write below (should be safe).
855 */
856 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
857
858 /*
859 * Check if the interrupt handler is present.
860 */
861 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
862 {
863 Log(("IDT cbIdt violation\n"));
864 if (iGate != X86_XCPT_DF)
865 {
866 uint32_t intInfo2;
867
868 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
869 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
870 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
871 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
872
873 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
874 }
875 Log(("Triple fault -> reset the VM!\n"));
876 return VINF_EM_RESET;
877 }
878 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
879 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
880 || iGate == 4)
881 {
882 ip = pCtx->ip + cbInstr;
883 }
884 else
885 ip = pCtx->ip;
886
887 /*
888 * Read the selector:offset pair of the interrupt handler.
889 */
890 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
891 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
892 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
893
894 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
895
896 /*
897 * Construct the stack frame.
898 */
899 /** @todo Check stack limit. */
900 pCtx->sp -= 2;
901 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
902 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
903 pCtx->sp -= 2;
904 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
905 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
906 pCtx->sp -= 2;
907 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
908 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
909
910 /*
911 * Update the CPU state for executing the handler.
912 */
913 pCtx->rip = offset;
914 pCtx->cs = sel;
915 pCtx->csHid.u64Base = sel << 4;
916 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
917
918 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
919 return VINF_SUCCESS;
920 }
921
922 /*
923 * Set event injection state.
924 */
925 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
926 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
927 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
928
929 AssertRC(rc);
930 return rc;
931}
932
933
934/**
935 * Checks for pending guest interrupts and injects them.
936 *
937 * @returns VBox status code.
938 * @param pVM Pointer to the VM.
939 * @param pVCpu Pointer to the VMCPU.
940 * @param pCtx Pointer to the guest CPU context.
941 */
942static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
943{
944 int rc;
945
946 /*
947 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
948 */
949 if (pVCpu->hwaccm.s.Event.fPending)
950 {
951 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo,
952 pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
953 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
954 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
955 AssertRC(rc);
956
957 pVCpu->hwaccm.s.Event.fPending = false;
958 return VINF_SUCCESS;
959 }
960
961 /*
962 * If an active trap is already pending, we must forward it first!
963 */
964 if (!TRPMHasTrap(pVCpu))
965 {
966 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
967 {
968 RTGCUINTPTR intInfo;
969
970 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
971
972 intInfo = X86_XCPT_NMI;
973 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
974 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
975
976 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
977 AssertRC(rc);
978
979 return VINF_SUCCESS;
980 }
981
982 /** @todo SMI interrupts. */
983
984 /*
985 * When external interrupts are pending, we should exit the VM when IF is set.
986 */
987 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
988 {
989 if (!(pCtx->eflags.u32 & X86_EFL_IF))
990 {
991 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
992 {
993 LogFlow(("Enable irq window exit!\n"));
994 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
995 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
996 AssertRC(rc);
997 }
998 /* else nothing to do but wait */
999 }
1000 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1001 {
1002 uint8_t u8Interrupt;
1003
1004 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1005 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1006 u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
1007 if (RT_SUCCESS(rc))
1008 {
1009 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1010 AssertRC(rc);
1011 }
1012 else
1013 {
1014 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1015 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1016 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
1017 /* Just continue */
1018 }
1019 }
1020 else
1021 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1022 }
1023 }
1024
1025#ifdef VBOX_STRICT
1026 if (TRPMHasTrap(pVCpu))
1027 {
1028 uint8_t u8Vector;
1029 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1030 AssertRC(rc);
1031 }
1032#endif
1033
1034 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1035 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1036 && TRPMHasTrap(pVCpu)
1037 )
1038 {
1039 uint8_t u8Vector;
1040 TRPMEVENT enmType;
1041 RTGCUINTPTR intInfo;
1042 RTGCUINT errCode;
1043
1044 /*
1045 * If a new event is pending, dispatch it now.
1046 */
1047 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1048 AssertRC(rc);
1049 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1050 Assert(enmType != TRPM_SOFTWARE_INT);
1051
1052 /*
1053 * Clear the pending trap.
1054 */
1055 rc = TRPMResetTrap(pVCpu);
1056 AssertRC(rc);
1057
1058 intInfo = u8Vector;
1059 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1060
1061 if (enmType == TRPM_TRAP)
1062 {
1063 switch (u8Vector)
1064 {
1065 case X86_XCPT_DF:
1066 case X86_XCPT_TS:
1067 case X86_XCPT_NP:
1068 case X86_XCPT_SS:
1069 case X86_XCPT_GP:
1070 case X86_XCPT_PF:
1071 case X86_XCPT_AC:
1072 {
1073 /* Valid error codes. */
1074 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1075 break;
1076 }
1077
1078 default:
1079 break;
1080 }
1081
1082 if ( u8Vector == X86_XCPT_BP
1083 || u8Vector == X86_XCPT_OF)
1084 {
1085 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1086 }
1087 else
1088 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1089 }
1090 else
1091 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1092
1093 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
1094 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1095 AssertRC(rc);
1096 } /* if (interrupts can be dispatched) */
1097
1098 return VINF_SUCCESS;
1099}
1100
1101
1102/**
1103 * Save the host state into the VMCS.
1104 *
1105 * @returns VBox status code.
1106 * @param pVM Pointer to the VM.
1107 * @param pVCpu Pointer to the VMCPU.
1108 */
1109VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1110{
1111 int rc = VINF_SUCCESS;
1112 NOREF(pVM);
1113
1114 /*
1115 * Host CPU Context.
1116 */
1117 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1118 {
1119 RTIDTR idtr;
1120 RTGDTR gdtr;
1121 RTSEL SelTR;
1122 PCX86DESCHC pDesc;
1123 uintptr_t trBase;
1124 RTSEL cs;
1125 RTSEL ss;
1126 uint64_t cr3;
1127
1128 /*
1129 * Control registers.
1130 */
1131 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1132 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1133#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1134 if (VMX_IS_64BIT_HOST_MODE())
1135 {
1136 cr3 = hwaccmR0Get64bitCR3();
1137 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1138 }
1139 else
1140#endif
1141 {
1142 cr3 = ASMGetCR3();
1143 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1144 }
1145 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1146 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1147 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1148 AssertRC(rc);
1149
1150 /*
1151 * Selector registers.
1152 */
1153#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1154 if (VMX_IS_64BIT_HOST_MODE())
1155 {
1156 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1157 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1158 }
1159 else
1160 {
1161 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1162 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1163 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1164 }
1165#else
1166 cs = ASMGetCS();
1167 ss = ASMGetSS();
1168#endif
1169 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1170 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1171 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1172 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1173 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1174 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1175#if HC_ARCH_BITS == 32
1176 if (!VMX_IS_64BIT_HOST_MODE())
1177 {
1178 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1179 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1180 }
1181#endif
1182 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1183 SelTR = ASMGetTR();
1184 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1185 AssertRC(rc);
1186 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1187 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1188 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1189 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1190 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1191 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1192 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1193
1194 /*
1195 * GDTR & IDTR.
1196 */
1197#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1198 if (VMX_IS_64BIT_HOST_MODE())
1199 {
1200 X86XDTR64 gdtr64, idtr64;
1201 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1202 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1203 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1204 AssertRC(rc);
1205 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1206 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1207 gdtr.cbGdt = gdtr64.cb;
1208 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1209 }
1210 else
1211#endif
1212 {
1213 ASMGetGDTR(&gdtr);
1214 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1215 ASMGetIDTR(&idtr);
1216 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1217 AssertRC(rc);
1218 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1219 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1220 }
1221
1222 /*
1223 * Save the base address of the TR selector.
1224 */
1225 if (SelTR > gdtr.cbGdt)
1226 {
1227 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1228 return VERR_VMX_INVALID_HOST_STATE;
1229 }
1230
1231 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1232#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1233 if (VMX_IS_64BIT_HOST_MODE())
1234 {
1235 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
1236 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1237 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1238 AssertRC(rc);
1239 }
1240 else
1241#endif
1242 {
1243#if HC_ARCH_BITS == 64
1244 trBase = X86DESC64_BASE(*pDesc);
1245#else
1246 trBase = X86DESC_BASE(*pDesc);
1247#endif
1248 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1249 AssertRC(rc);
1250 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1251 }
1252
1253 /*
1254 * FS base and GS base.
1255 */
1256#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1257 if (VMX_IS_64BIT_HOST_MODE())
1258 {
1259 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1260 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1261 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1262 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1263 }
1264#endif
1265 AssertRC(rc);
1266
1267 /*
1268 * Sysenter MSRs.
1269 */
1270 /** @todo expensive!! */
1271 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1272 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1273#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1274 if (VMX_IS_64BIT_HOST_MODE())
1275 {
1276 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1277 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1278 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1279 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1280 }
1281 else
1282 {
1283 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1284 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1285 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1286 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1287 }
1288#elif HC_ARCH_BITS == 32
1289 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1290 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1291 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1292 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1293#else
1294 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1295 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1296 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1297 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1298#endif
1299 AssertRC(rc);
1300
1301
1302#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1303 /*
1304 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1305 * the world switch back to the host.
1306 */
1307 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1308 unsigned idxMsr = 0;
1309
1310 /*
1311 * Check if EFER MSR present.
1312 */
1313 if (ASMCpuId_EDX(0x80000001) & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1314 {
1315 if (ASMCpuId_EDX(0x80000001) & X86_CPUID_AMD_FEATURE_EDX_SEP)
1316 {
1317 pMsr->u32IndexMSR = MSR_K6_STAR;
1318 pMsr->u32Reserved = 0;
1319 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1320 pMsr++; idxMsr++;
1321 }
1322
1323 pMsr->u32IndexMSR = MSR_K6_EFER;
1324 pMsr->u32Reserved = 0;
1325# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1326 if (CPUMIsGuestInLongMode(pVCpu))
1327 {
1328 /* Must match the EFER value in our 64 bits switcher. */
1329 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1330 }
1331 else
1332# endif
1333 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1334 pMsr++; idxMsr++;
1335 }
1336
1337# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1338 if (VMX_IS_64BIT_HOST_MODE())
1339 {
1340 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1341 pMsr->u32Reserved = 0;
1342 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1343 pMsr++; idxMsr++;
1344 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1345 pMsr->u32Reserved = 0;
1346 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1347 pMsr++; idxMsr++;
1348 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1349 pMsr->u32Reserved = 0;
1350 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1351 pMsr++; idxMsr++;
1352 }
1353# endif
1354 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1355 AssertRC(rc);
1356#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1357
1358 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1359 }
1360 return rc;
1361}
1362
1363
1364/**
1365 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1366 * guest operates in PAE mode.
1367 *
1368 * @returns VBox status code.
1369 * @param pVCpu Pointer to the VMCPU.
1370 * @param pCtx Pointer to the guest CPU context.
1371 */
1372static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1373{
1374 if (CPUMIsGuestInPAEModeEx(pCtx))
1375 {
1376 X86PDPE aPdpes[4];
1377 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1378 AssertRCReturn(rc, rc);
1379
1380 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1381 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1382 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1383 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1384 }
1385 return VINF_SUCCESS;
1386}
1387
1388
1389/**
1390 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1391 * guest operates in PAE mode.
1392 *
1393 * @returns VBox status code.
1394 * @param pVCpu Pointer to the VM CPU.
1395 * @param pCtx Pointer to the guest CPU context.
1396 *
1397 * @remarks Tell PGM about CR3 changes before calling this helper.
1398 */
1399static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1400{
1401 if (CPUMIsGuestInPAEModeEx(pCtx))
1402 {
1403 int rc;
1404 X86PDPE aPdpes[4];
1405 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1406 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1407 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1408 rc = VMXReadVMCS64(VMX_VMCS_GUEST_PDPTR3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1409
1410 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1411 AssertRCReturn(rc, rc);
1412 }
1413 return VINF_SUCCESS;
1414}
1415
1416
1417/**
1418 * Update the exception bitmap according to the current CPU state.
1419 *
1420 * @param pVM Pointer to the VM.
1421 * @param pVCpu Pointer to the VMCPU.
1422 * @param pCtx Pointer to the guest CPU context.
1423 */
1424static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1425{
1426 uint32_t u32TrapMask;
1427 Assert(pCtx);
1428
1429 /*
1430 * Set up a mask for intercepting traps.
1431 */
1432 /** @todo Do we really need to always intercept #DB? */
1433 u32TrapMask = RT_BIT(X86_XCPT_DB)
1434 | RT_BIT(X86_XCPT_NM)
1435#ifdef VBOX_ALWAYS_TRAP_PF
1436 | RT_BIT(X86_XCPT_PF)
1437#endif
1438#ifdef VBOX_STRICT
1439 | RT_BIT(X86_XCPT_BP)
1440 | RT_BIT(X86_XCPT_DB)
1441 | RT_BIT(X86_XCPT_DE)
1442 | RT_BIT(X86_XCPT_NM)
1443 | RT_BIT(X86_XCPT_UD)
1444 | RT_BIT(X86_XCPT_NP)
1445 | RT_BIT(X86_XCPT_SS)
1446 | RT_BIT(X86_XCPT_GP)
1447 | RT_BIT(X86_XCPT_MF)
1448#endif
1449 ;
1450
1451 /*
1452 * Without nested paging, #PF must be intercepted to implement shadow paging.
1453 */
1454 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1455 if (!pVM->hwaccm.s.fNestedPaging)
1456 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1457
1458 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1459 if (!(pCtx->cr0 & X86_CR0_NE))
1460 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1461
1462#ifdef VBOX_STRICT
1463 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1464#endif
1465
1466 /*
1467 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1468 */
1469 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1470 if ( CPUMIsGuestInRealModeEx(pCtx)
1471 && pVM->hwaccm.s.vmx.pRealModeTSS)
1472 {
1473 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1474 | RT_BIT(X86_XCPT_DB)
1475 | RT_BIT(X86_XCPT_NMI)
1476 | RT_BIT(X86_XCPT_BP)
1477 | RT_BIT(X86_XCPT_OF)
1478 | RT_BIT(X86_XCPT_BR)
1479 | RT_BIT(X86_XCPT_UD)
1480 | RT_BIT(X86_XCPT_DF)
1481 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1482 | RT_BIT(X86_XCPT_TS)
1483 | RT_BIT(X86_XCPT_NP)
1484 | RT_BIT(X86_XCPT_SS)
1485 | RT_BIT(X86_XCPT_GP)
1486 | RT_BIT(X86_XCPT_MF)
1487 | RT_BIT(X86_XCPT_AC)
1488 | RT_BIT(X86_XCPT_MC)
1489 | RT_BIT(X86_XCPT_XF)
1490 ;
1491 }
1492
1493 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1494 AssertRC(rc);
1495}
1496
1497
1498/**
1499 * Loads a minimal guest state.
1500 *
1501 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1502 *
1503 * @param pVM Pointer to the VM.
1504 * @param pVCpu Pointer to the VMCPU.
1505 * @param pCtx Pointer to the guest CPU context.
1506 */
1507VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1508{
1509 int rc;
1510 X86EFLAGS eflags;
1511
1512 Assert(!(pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_ALL_GUEST));
1513
1514 /*
1515 * Load EIP, ESP and EFLAGS.
1516 */
1517 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1518 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1519 AssertRC(rc);
1520
1521 /*
1522 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1523 */
1524 eflags = pCtx->eflags;
1525 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1526 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1527
1528 /*
1529 * Check if real mode emulation using v86 mode.
1530 */
1531 if ( CPUMIsGuestInRealModeEx(pCtx)
1532 && pVM->hwaccm.s.vmx.pRealModeTSS)
1533 {
1534 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1535
1536 eflags.Bits.u1VM = 1;
1537 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1538 }
1539 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1540 AssertRC(rc);
1541}
1542
1543
1544/**
1545 * Loads the guest state.
1546 *
1547 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1548 *
1549 * @returns VBox status code.
1550 * @param pVM Pointer to the VM.
1551 * @param pVCpu Pointer to the VMCPU.
1552 * @param pCtx Pointer to the guest CPU context.
1553 */
1554VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1555{
1556 int rc = VINF_SUCCESS;
1557 RTGCUINTPTR val;
1558
1559 /*
1560 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1561 * Set required bits to one and zero according to the MSR capabilities.
1562 */
1563 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1564
1565 /*
1566 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1567 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1568 */
1569 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1570
1571 if (CPUMIsGuestInLongModeEx(pCtx))
1572 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1573 /* else Must be zero when AMD64 is not available. */
1574
1575 /*
1576 * Mask away the bits that the CPU doesn't support.
1577 */
1578 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1579 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1580 AssertRC(rc);
1581
1582 /*
1583 * VMX_VMCS_CTRL_EXIT_CONTROLS
1584 * Set required bits to one and zero according to the MSR capabilities.
1585 */
1586 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1587
1588 /*
1589 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1590 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1591 */
1592 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1593
1594#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1595 if (VMX_IS_64BIT_HOST_MODE())
1596 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1597 /* else Must be zero when AMD64 is not available. */
1598#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1599 if (CPUMIsGuestInLongModeEx(pCtx))
1600 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1601 else
1602 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1603#endif
1604 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1605
1606 /*
1607 * Don't acknowledge external interrupts on VM-exit.
1608 */
1609 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1610 AssertRC(rc);
1611
1612 /*
1613 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1614 */
1615 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1616 {
1617 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1618 {
1619 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1620 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1621 {
1622 /*
1623 * Correct weird requirements for switching to protected mode.
1624 */
1625 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1626 && enmGuestMode >= PGMMODE_PROTECTED)
1627 {
1628#ifdef VBOX_WITH_REM
1629 /*
1630 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1631 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1632 */
1633 REMFlushTBs(pVM);
1634#endif
1635
1636 /*
1637 * DPL of all hidden selector registers must match the current CPL (0).
1638 */
1639 pCtx->csHid.Attr.n.u2Dpl = 0;
1640 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1641
1642 pCtx->dsHid.Attr.n.u2Dpl = 0;
1643 pCtx->esHid.Attr.n.u2Dpl = 0;
1644 pCtx->fsHid.Attr.n.u2Dpl = 0;
1645 pCtx->gsHid.Attr.n.u2Dpl = 0;
1646 pCtx->ssHid.Attr.n.u2Dpl = 0;
1647 }
1648 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1649 }
1650 else if ( CPUMIsGuestInRealModeEx(pCtx)
1651 && pCtx->csHid.u64Base == 0xffff0000)
1652 {
1653 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1654 pCtx->csHid.u64Base = 0xf0000;
1655 pCtx->cs = 0xf000;
1656 }
1657 }
1658
1659 VMX_WRITE_SELREG(ES, es);
1660 AssertRC(rc);
1661
1662 VMX_WRITE_SELREG(CS, cs);
1663 AssertRC(rc);
1664
1665 VMX_WRITE_SELREG(SS, ss);
1666 AssertRC(rc);
1667
1668 VMX_WRITE_SELREG(DS, ds);
1669 AssertRC(rc);
1670
1671 VMX_WRITE_SELREG(FS, fs);
1672 AssertRC(rc);
1673
1674 VMX_WRITE_SELREG(GS, gs);
1675 AssertRC(rc);
1676 }
1677
1678 /*
1679 * Guest CPU context: LDTR.
1680 */
1681 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1682 {
1683 if (pCtx->ldtr == 0)
1684 {
1685 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1686 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1687 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1688 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1689 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1690 }
1691 else
1692 {
1693 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1694 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1695 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1696 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1697 }
1698 AssertRC(rc);
1699 }
1700
1701 /*
1702 * Guest CPU context: TR.
1703 */
1704 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1705 {
1706 /*
1707 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1708 * using the int bitmap in the TSS).
1709 */
1710 if ( CPUMIsGuestInRealModeEx(pCtx)
1711 && pVM->hwaccm.s.vmx.pRealModeTSS)
1712 {
1713 RTGCPHYS GCPhys;
1714
1715 /* We convert it here every time as PCI regions could be reconfigured. */
1716 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1717 AssertRC(rc);
1718
1719 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1720 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1721 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1722
1723 X86DESCATTR attr;
1724
1725 attr.u = 0;
1726 attr.n.u1Present = 1;
1727 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1728 val = attr.u;
1729 }
1730 else
1731 {
1732 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1733 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1734 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1735
1736 val = pCtx->trHid.Attr.u;
1737
1738 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1739 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1740 {
1741 if (val & 0xf)
1742 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1743 else
1744 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1745 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1746 }
1747 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("%#x\n", val));
1748 }
1749 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1750 AssertRC(rc);
1751 }
1752
1753 /*
1754 * Guest CPU context: GDTR.
1755 */
1756 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1757 {
1758 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1759 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1760 AssertRC(rc);
1761 }
1762
1763 /*
1764 * Guest CPU context: IDTR.
1765 */
1766 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1767 {
1768 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1769 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1770 AssertRC(rc);
1771 }
1772
1773 /*
1774 * Sysenter MSRs.
1775 */
1776 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
1777 {
1778 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1779 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1780 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1781 AssertRC(rc);
1782 }
1783
1784 /*
1785 * Guest CPU context: Control registers.
1786 */
1787 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1788 {
1789 val = pCtx->cr0;
1790 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1791 Log2(("Guest CR0-shadow %08x\n", val));
1792 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1793 {
1794 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1795 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1796 }
1797 else
1798 {
1799 /** @todo check if we support the old style mess correctly. */
1800 if (!(val & X86_CR0_NE))
1801 Log(("Forcing X86_CR0_NE!!!\n"));
1802
1803 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1804 }
1805 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1806 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1807 val |= X86_CR0_PE | X86_CR0_PG;
1808
1809 if (pVM->hwaccm.s.fNestedPaging)
1810 {
1811 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1812 {
1813 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
1814 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1815 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1816 }
1817 else
1818 {
1819 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
1820 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1821 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1822 }
1823 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1824 AssertRC(rc);
1825 }
1826 else
1827 {
1828 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1829 val |= X86_CR0_WP;
1830 }
1831
1832 /* Always enable caching. */
1833 val &= ~(X86_CR0_CD|X86_CR0_NW);
1834
1835 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1836 Log2(("Guest CR0 %08x\n", val));
1837
1838 /*
1839 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1840 */
1841 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1842 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1843 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1844 | X86_CR0_CD /* Bit not restored during VM-exit! */
1845 | X86_CR0_NW /* Bit not restored during VM-exit! */
1846 | X86_CR0_NE;
1847
1848 /*
1849 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
1850 */
1851 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1852 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
1853
1854 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1855
1856 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1857 Log2(("Guest CR0-mask %08x\n", val));
1858 AssertRC(rc);
1859 }
1860
1861 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1862 {
1863 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1864 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1865 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1866 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1867
1868 if (!pVM->hwaccm.s.fNestedPaging)
1869 {
1870 switch(pVCpu->hwaccm.s.enmShadowMode)
1871 {
1872 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1873 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1874 case PGMMODE_32_BIT: /* 32-bit paging. */
1875 val &= ~X86_CR4_PAE;
1876 break;
1877
1878 case PGMMODE_PAE: /* PAE paging. */
1879 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1880 /** Must use PAE paging as we could use physical memory > 4 GB */
1881 val |= X86_CR4_PAE;
1882 break;
1883
1884 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1885 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1886#ifdef VBOX_ENABLE_64_BITS_GUESTS
1887 break;
1888#else
1889 AssertFailed();
1890 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1891#endif
1892 default: /* shut up gcc */
1893 AssertFailed();
1894 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1895 }
1896 }
1897 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1898 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1899 {
1900 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1901 val |= X86_CR4_PSE;
1902 /* Our identity mapping is a 32 bits page directory. */
1903 val &= ~X86_CR4_PAE;
1904 }
1905
1906 /*
1907 * Turn off VME if we're in emulated real mode.
1908 */
1909 if ( CPUMIsGuestInRealModeEx(pCtx)
1910 && pVM->hwaccm.s.vmx.pRealModeTSS)
1911 {
1912 val &= ~X86_CR4_VME;
1913 }
1914
1915 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1916 Log2(("Guest CR4 %08x\n", val));
1917
1918 /*
1919 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
1920 */
1921 val = 0
1922 | X86_CR4_VME
1923 | X86_CR4_PAE
1924 | X86_CR4_PGE
1925 | X86_CR4_PSE
1926 | X86_CR4_VMXE;
1927 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1928
1929 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1930 Log2(("Guest CR4-mask %08x\n", val));
1931 AssertRC(rc);
1932 }
1933
1934#if 0
1935 /* Enable single stepping if requested and CPU supports it. */
1936 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
1937 if (DBGFIsStepping(pVCpu))
1938 {
1939 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
1940 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1941 AssertRC(rc);
1942 }
1943#endif
1944
1945 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1946 {
1947 if (pVM->hwaccm.s.fNestedPaging)
1948 {
1949 Assert(PGMGetHyperCR3(pVCpu));
1950 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1951
1952 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1953 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1954 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1955 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1956
1957 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1958 AssertRC(rc);
1959
1960 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1961 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1962 {
1963 RTGCPHYS GCPhys;
1964
1965 /* We convert it here every time as PCI regions could be reconfigured. */
1966 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1967 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
1968
1969 /*
1970 * We use our identity mapping page table here as we need to map guest virtual to
1971 * guest physical addresses; EPT will take care of the translation to host physical addresses.
1972 */
1973 val = GCPhys;
1974 }
1975 else
1976 {
1977 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1978 val = pCtx->cr3;
1979 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
1980 AssertRCReturn(rc, rc);
1981 }
1982 }
1983 else
1984 {
1985 val = PGMGetHyperCR3(pVCpu);
1986 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1987 }
1988
1989 /* Save our shadow CR3 register. */
1990 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1991 AssertRC(rc);
1992 }
1993
1994 /*
1995 * Guest CPU context: Debug registers.
1996 */
1997 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1998 {
1999 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2000 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2001
2002 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2003 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2004 pCtx->dr[7] |= 0x400; /* must be one */
2005
2006 /* Resync DR7 */
2007 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2008 AssertRC(rc);
2009
2010#ifdef DEBUG
2011 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2012 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2013 && !CPUMIsHyperDebugStateActive(pVCpu)
2014 && !DBGFIsStepping(pVCpu))
2015 {
2016 /* Save the host and load the hypervisor debug state. */
2017 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2018 AssertRC(rc);
2019
2020 /* DRx intercepts remain enabled. */
2021
2022 /* Override dr7 with the hypervisor value. */
2023 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2024 AssertRC(rc);
2025 }
2026 else
2027#endif
2028 /* Sync the debug state now if any breakpoint is armed. */
2029 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2030 && !CPUMIsGuestDebugStateActive(pVCpu)
2031 && !DBGFIsStepping(pVCpu))
2032 {
2033 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
2034
2035 /* Disable DRx move intercepts. */
2036 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2037 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2038 AssertRC(rc);
2039
2040 /* Save the host and load the guest debug state. */
2041 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2042 AssertRC(rc);
2043 }
2044
2045 /* IA32_DEBUGCTL MSR. */
2046 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
2047 AssertRC(rc);
2048
2049 /** @todo do we really ever need this? */
2050 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
2051 AssertRC(rc);
2052 }
2053
2054 /*
2055 * 64-bit guest mode.
2056 */
2057 if (CPUMIsGuestInLongModeEx(pCtx))
2058 {
2059#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2060 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2061#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2062 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2063#else
2064# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2065 if (!pVM->hwaccm.s.fAllow64BitGuests)
2066 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2067# endif
2068 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
2069#endif
2070 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_MSR)
2071 {
2072 /* Update these as wrmsr might have changed them. */
2073 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
2074 AssertRC(rc);
2075 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
2076 AssertRC(rc);
2077 }
2078 }
2079 else
2080 {
2081 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
2082 }
2083
2084 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2085
2086#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2087 /* Store all guest MSRs in the VM-Entry load area, so they will be loaded during the world switch. */
2088 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2089 unsigned idxMsr = 0;
2090
2091 uint32_t ulEdx;
2092 uint32_t ulTemp;
2093 CPUMGetGuestCpuId(pVCpu, 0x80000001, &ulTemp, &ulTemp, &ulTemp, &ulEdx);
2094 /* EFER MSR present? */
2095 if (ulEdx & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
2096 {
2097 pMsr->u32IndexMSR = MSR_K6_EFER;
2098 pMsr->u32Reserved = 0;
2099 pMsr->u64Value = pCtx->msrEFER;
2100 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2101 if (!CPUMIsGuestInLongModeEx(pCtx))
2102 pMsr->u64Value &= ~(MSR_K6_EFER_LMA|MSR_K6_EFER_LME);
2103 pMsr++; idxMsr++;
2104
2105 if (ulEdx & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE)
2106 {
2107 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2108 pMsr->u32Reserved = 0;
2109 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2110 pMsr++; idxMsr++;
2111 pMsr->u32IndexMSR = MSR_K6_STAR;
2112 pMsr->u32Reserved = 0;
2113 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2114 pMsr++; idxMsr++;
2115 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2116 pMsr->u32Reserved = 0;
2117 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2118 pMsr++; idxMsr++;
2119 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2120 pMsr->u32Reserved = 0;
2121 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2122 pMsr++; idxMsr++;
2123 }
2124 }
2125 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
2126
2127 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2128 AssertRC(rc);
2129
2130 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2131 AssertRC(rc);
2132#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2133
2134 bool fOffsettedTsc;
2135 if (pVM->hwaccm.s.vmx.fUsePreemptTimer)
2136 {
2137 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2138
2139 /* Make sure the returned values have sane upper and lower boundaries. */
2140 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2141
2142 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2143 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2144
2145 cTicksToDeadline >>= pVM->hwaccm.s.vmx.cPreemptTimerShift;
2146 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2147 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2148 AssertRC(rc);
2149 }
2150 else
2151 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset);
2152
2153 if (fOffsettedTsc)
2154 {
2155 uint64_t u64CurTSC = ASMReadTSC();
2156 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
2157 {
2158 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
2159 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
2160 AssertRC(rc);
2161
2162 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2163 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2164 AssertRC(rc);
2165 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
2166 }
2167 else
2168 {
2169 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
2170 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2171 pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset,
2172 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset,
2173 TMCpuTickGet(pVCpu)));
2174 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2175 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2176 AssertRC(rc);
2177 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
2178 }
2179 }
2180 else
2181 {
2182 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2183 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2184 AssertRC(rc);
2185 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
2186 }
2187
2188 /* Done with the major changes */
2189 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
2190
2191 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2192 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2193 return rc;
2194}
2195
2196
2197/**
2198 * Syncs back the guest state from VMCS.
2199 *
2200 * @returns VBox status code.
2201 * @param pVM Pointer to the VM.
2202 * @param pVCpu Pointer to the VMCPU.
2203 * @param pCtx Pointer the guest CPU context.
2204 */
2205DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2206{
2207 RTGCUINTREG val, valShadow;
2208 RTGCUINTPTR uInterruptState;
2209 int rc;
2210
2211 /* First sync back EIP, ESP, and EFLAGS. */
2212 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
2213 AssertRC(rc);
2214 pCtx->rip = val;
2215 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
2216 AssertRC(rc);
2217 pCtx->rsp = val;
2218 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2219 AssertRC(rc);
2220 pCtx->eflags.u32 = val;
2221
2222 /* Take care of instruction fusing (sti, mov ss) */
2223 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2224 uInterruptState = val;
2225 if (uInterruptState != 0)
2226 {
2227 Assert(uInterruptState <= 2); /* only sti & mov ss */
2228 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2229 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2230 }
2231 else
2232 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2233
2234 /* Control registers. */
2235 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2236 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
2237 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
2238 CPUMSetGuestCR0(pVCpu, val);
2239
2240 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2241 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
2242 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
2243 CPUMSetGuestCR4(pVCpu, val);
2244
2245 /*
2246 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2247 * the nested paging case where CR3 & CR4 can be changed by the guest.
2248 */
2249 if ( pVM->hwaccm.s.fNestedPaging
2250 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2251 {
2252 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
2253
2254 /* Can be updated behind our back in the nested paging case. */
2255 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2256
2257 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
2258
2259 if (val != pCtx->cr3)
2260 {
2261 CPUMSetGuestCR3(pVCpu, val);
2262 PGMUpdateCR3(pVCpu, val);
2263 }
2264 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2265 AssertRCReturn(rc, rc);
2266 }
2267
2268 /* Sync back DR7. */
2269 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2270 pCtx->dr[7] = val;
2271
2272 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2273 VMX_READ_SELREG(ES, es);
2274 VMX_READ_SELREG(SS, ss);
2275 VMX_READ_SELREG(CS, cs);
2276 VMX_READ_SELREG(DS, ds);
2277 VMX_READ_SELREG(FS, fs);
2278 VMX_READ_SELREG(GS, gs);
2279
2280 /* System MSRs */
2281 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2282 pCtx->SysEnter.cs = val;
2283 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2284 pCtx->SysEnter.eip = val;
2285 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2286 pCtx->SysEnter.esp = val;
2287
2288 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2289 VMX_READ_SELREG(LDTR, ldtr);
2290
2291 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2292 pCtx->gdtr.cbGdt = val;
2293 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2294 pCtx->gdtr.pGdt = val;
2295
2296 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2297 pCtx->idtr.cbIdt = val;
2298 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2299 pCtx->idtr.pIdt = val;
2300
2301 /* Real mode emulation using v86 mode. */
2302 if ( CPUMIsGuestInRealModeEx(pCtx)
2303 && pVM->hwaccm.s.vmx.pRealModeTSS)
2304 {
2305 /* Hide our emulation flags */
2306 pCtx->eflags.Bits.u1VM = 0;
2307
2308 /* Restore original IOPL setting as we always use 0. */
2309 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2310
2311 /* Force a TR resync every time in case we switch modes. */
2312 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2313 }
2314 else
2315 {
2316 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2317 VMX_READ_SELREG(TR, tr);
2318 }
2319
2320#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2321 /* Save the possibly changed MSRs that we automatically restore and save during a world switch. */
2322 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2323 {
2324 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2325 pMsr += i;
2326
2327 switch (pMsr->u32IndexMSR)
2328 {
2329 case MSR_K8_LSTAR:
2330 pCtx->msrLSTAR = pMsr->u64Value;
2331 break;
2332 case MSR_K6_STAR:
2333 pCtx->msrSTAR = pMsr->u64Value;
2334 break;
2335 case MSR_K8_SF_MASK:
2336 pCtx->msrSFMASK = pMsr->u64Value;
2337 break;
2338 case MSR_K8_KERNEL_GS_BASE:
2339 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2340 break;
2341 case MSR_K6_EFER:
2342 /* EFER can't be changed without causing a VM-exit. */
2343 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2344 break;
2345 default:
2346 AssertFailed();
2347 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2348 }
2349 }
2350#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2351 return VINF_SUCCESS;
2352}
2353
2354
2355/**
2356 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2357 * where neither EPT nor VPID is supported by the CPU.
2358 *
2359 * @param pVM Pointer to the VM.
2360 * @param pVCpu Pointer to the VMCPU.
2361 */
2362static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2363{
2364 NOREF(pVM);
2365 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2366 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2367 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2368 return;
2369}
2370
2371
2372/**
2373 * Setup the tagged TLB for EPT+VPID.
2374 *
2375 * @param pVM Pointer to the VM.
2376 * @param pVCpu Pointer to the VMCPU.
2377 */
2378static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2379{
2380 PHMGLOBLCPUINFO pCpu;
2381
2382 Assert(pVM->hwaccm.s.fNestedPaging && pVM->hwaccm.s.vmx.fVPID);
2383
2384 pCpu = HWACCMR0GetCurrentCpu();
2385
2386 /*
2387 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2388 * This can happen both for start & resume due to long jumps back to ring-3.
2389 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB,
2390 * so we cannot reuse the current ASID anymore.
2391 */
2392 bool fNewASID = false;
2393 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2394 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2395 {
2396 pVCpu->hwaccm.s.fForceTLBFlush = true;
2397 fNewASID = true;
2398 }
2399
2400 /*
2401 * Check for explicit TLB shootdowns.
2402 */
2403 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2404 pVCpu->hwaccm.s.fForceTLBFlush = true;
2405
2406 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2407
2408 if (pVCpu->hwaccm.s.fForceTLBFlush)
2409 {
2410 if (fNewASID)
2411 {
2412 ++pCpu->uCurrentASID;
2413 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2414 {
2415 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2416 pCpu->cTLBFlushes++;
2417 pCpu->fFlushASIDBeforeUse = true;
2418 }
2419
2420 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2421 if (pCpu->fFlushASIDBeforeUse)
2422 {
2423 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2424#ifdef VBOX_WITH_STATISTICS
2425 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2426#endif
2427 }
2428 }
2429 else
2430 {
2431 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_SINGLE_CONTEXT)
2432 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2433 else
2434 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2435
2436#ifdef VBOX_WITH_STATISTICS
2437 /*
2438 * This is not terribly accurate (i.e. we don't have any StatFlushEPT counter). We currently count these
2439 * as ASID flushes too, better than including them under StatFlushTLBWorldSwitch.
2440 */
2441 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2442#endif
2443 }
2444
2445 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2446 pVCpu->hwaccm.s.fForceTLBFlush = false;
2447 }
2448 else
2449 {
2450 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2451
2452 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2453 * not be executed. See hwaccmQueueInvlPage() where it is commented
2454 * out. Support individual entry flushing someday. */
2455 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2456 {
2457 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2458
2459 /*
2460 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2461 * as supported by the CPU.
2462 */
2463 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2464 {
2465 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2466 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2467 }
2468 else
2469 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2470 }
2471 else
2472 {
2473#ifdef VBOX_WITH_STATISTICS
2474 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2475#endif
2476 }
2477 }
2478 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2479 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2480
2481 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes,
2482 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2483 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID,
2484 ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2485 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID,
2486 ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2487
2488 /* Update VMCS with the VPID. */
2489 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2490 AssertRC(rc);
2491}
2492
2493
2494/**
2495 * Setup the tagged TLB for EPT only.
2496 *
2497 * @returns VBox status code.
2498 * @param pVM Pointer to the VM.
2499 * @param pVCpu Pointer to the VMCPU.
2500 */
2501static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2502{
2503 PHMGLOBLCPUINFO pCpu;
2504
2505 Assert(pVM->hwaccm.s.fNestedPaging);
2506 Assert(!pVM->hwaccm.s.vmx.fVPID);
2507
2508 pCpu = HWACCMR0GetCurrentCpu();
2509
2510 /*
2511 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2512 * This can happen both for start & resume due to long jumps back to ring-3.
2513 * If the TLB flush count shouldn't really change in this EPT-only case.
2514 */
2515 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2516 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2517 {
2518 pVCpu->hwaccm.s.fForceTLBFlush = true;
2519 }
2520
2521 /*
2522 * Check for explicit TLB shootdown flushes.
2523 */
2524 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2525 pVCpu->hwaccm.s.fForceTLBFlush = true;
2526
2527 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2528
2529 if (pVCpu->hwaccm.s.fForceTLBFlush)
2530 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2531 else
2532 {
2533 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2534 * not be executed. See hwaccmQueueInvlPage() where it is commented
2535 * out. Support individual entry flushing someday. */
2536 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2537 {
2538 /*
2539 * We cannot flush individual entries without VPID support. Flush using EPT.
2540 */
2541 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2542 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushEPT);
2543 }
2544 }
2545 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2546 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2547
2548#ifdef VBOX_WITH_STATISTICS
2549 if (pVCpu->hwaccm.s.fForceTLBFlush)
2550 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2551 else
2552 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2553#endif
2554}
2555
2556
2557/**
2558 * Setup the tagged TLB for VPID.
2559 *
2560 * @returns VBox status code.
2561 * @param pVM Pointer to the VM.
2562 * @param pVCpu Pointer to the VMCPU.
2563 */
2564static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2565{
2566 PHMGLOBLCPUINFO pCpu;
2567
2568 Assert(pVM->hwaccm.s.vmx.fVPID);
2569 Assert(!pVM->hwaccm.s.fNestedPaging);
2570
2571 pCpu = HWACCMR0GetCurrentCpu();
2572
2573 /*
2574 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2575 * This can happen both for start & resume due to long jumps back to ring-3.
2576 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB,
2577 * so we cannot reuse the current ASID anymore.
2578 */
2579 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2580 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2581 {
2582 /* Force a TLB flush on VM entry. */
2583 pVCpu->hwaccm.s.fForceTLBFlush = true;
2584 }
2585
2586 /*
2587 * Check for explicit TLB shootdown flushes.
2588 */
2589 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2590 pVCpu->hwaccm.s.fForceTLBFlush = true;
2591
2592 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2593
2594 if (pVCpu->hwaccm.s.fForceTLBFlush)
2595 {
2596 ++pCpu->uCurrentASID;
2597 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
2598 {
2599 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2600 pCpu->cTLBFlushes++;
2601 pCpu->fFlushASIDBeforeUse = true;
2602 }
2603 else
2604 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2605
2606 pVCpu->hwaccm.s.fForceTLBFlush = false;
2607 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2608 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2609 if (pCpu->fFlushASIDBeforeUse)
2610 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2611 }
2612 else
2613 {
2614 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2615
2616 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2617 * not be executed. See hwaccmQueueInvlPage() where it is commented
2618 * out. Support individual entry flushing someday. */
2619 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2620 {
2621 /*
2622 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2623 * as supported by the CPU.
2624 */
2625 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
2626 {
2627 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
2628 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2629 }
2630 else
2631 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushVPID, 0 /* GCPtr */);
2632 }
2633 }
2634 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2635 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2636
2637 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2638 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2639 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2640
2641 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2642 AssertRC(rc);
2643
2644# ifdef VBOX_WITH_STATISTICS
2645 if (pVCpu->hwaccm.s.fForceTLBFlush)
2646 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2647 else
2648 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2649# endif
2650}
2651
2652
2653/**
2654 * Runs guest code in a VT-x VM.
2655 *
2656 * @returns VBox status code.
2657 * @param pVM Pointer to the VM.
2658 * @param pVCpu Pointer to the VMCPU.
2659 * @param pCtx Pointer to the guest CPU context.
2660 */
2661VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2662{
2663 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2664 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
2665 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
2666
2667 VBOXSTRICTRC rc = VINF_SUCCESS;
2668 int rc2;
2669 RTGCUINTREG val;
2670 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2671 RTGCUINTREG instrError, cbInstr;
2672 RTGCUINTPTR exitQualification = 0;
2673 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2674 RTGCUINTPTR errCode, instrInfo;
2675 bool fSetupTPRCaching = false;
2676 uint64_t u64OldLSTAR = 0;
2677 uint8_t u8LastTPR = 0;
2678 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2679 unsigned cResume = 0;
2680#ifdef VBOX_STRICT
2681 RTCPUID idCpuCheck;
2682 bool fWasInLongMode = false;
2683#endif
2684#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2685 uint64_t u64LastTime = RTTimeMilliTS();
2686#endif
2687
2688 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pbVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2689
2690 /*
2691 * Check if we need to use TPR shadowing.
2692 */
2693 if ( CPUMIsGuestInLongModeEx(pCtx)
2694 || ( ((pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || pVM->hwaccm.s.fTRPPatchingAllowed)
2695 && pVM->hwaccm.s.fHasIoApic)
2696 )
2697 {
2698 fSetupTPRCaching = true;
2699 }
2700
2701 Log2(("\nE"));
2702
2703#ifdef VBOX_STRICT
2704 {
2705 RTCCUINTREG val2;
2706
2707 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2708 AssertRC(rc2);
2709 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2710
2711 /* allowed zero */
2712 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2713 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2714
2715 /* allowed one */
2716 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2717 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2718
2719 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2720 AssertRC(rc2);
2721 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2722
2723 /*
2724 * Must be set according to the MSR, but can be cleared if nested paging is used.
2725 */
2726 if (pVM->hwaccm.s.fNestedPaging)
2727 {
2728 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2729 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2730 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2731 }
2732
2733 /* allowed zero */
2734 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2735 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2736
2737 /* allowed one */
2738 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2739 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2740
2741 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2742 AssertRC(rc2);
2743 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2744
2745 /* allowed zero */
2746 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2747 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2748
2749 /* allowed one */
2750 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2751 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2752
2753 rc2 = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2754 AssertRC(rc2);
2755 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2756
2757 /* allowed zero */
2758 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2759 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2760
2761 /* allowed one */
2762 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2763 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2764 }
2765 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2766#endif /* VBOX_STRICT */
2767
2768#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2769 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2770#endif
2771
2772 /*
2773 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
2774 */
2775ResumeExecution:
2776 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
2777 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
2778 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2779 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2780 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2781 Assert(!HWACCMR0SuspendPending());
2782 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2783 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2784
2785 /*
2786 * Safety precaution; looping for too long here can have a very bad effect on the host.
2787 */
2788 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2789 {
2790 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2791 rc = VINF_EM_RAW_INTERRUPT;
2792 goto end;
2793 }
2794
2795 /*
2796 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
2797 */
2798 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2799 {
2800 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2801 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2802 {
2803 /*
2804 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2805 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2806 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2807 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2808 */
2809 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2810 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2811 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2812 AssertRC(rc2);
2813 }
2814 }
2815 else
2816 {
2817 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2818 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2819 AssertRC(rc2);
2820 }
2821
2822#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2823 if (RT_UNLIKELY((cResume & 0xf) == 0))
2824 {
2825 uint64_t u64CurTime = RTTimeMilliTS();
2826
2827 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2828 {
2829 u64LastTime = u64CurTime;
2830 TMTimerPollVoid(pVM, pVCpu);
2831 }
2832 }
2833#endif
2834
2835 /*
2836 * Check for pending actions that force us to go back to ring-3.
2837 */
2838 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2839 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2840 {
2841 /* Check if a sync operation is pending. */
2842 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2843 {
2844 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2845 if (rc != VINF_SUCCESS)
2846 {
2847 AssertRC(VBOXSTRICTRC_VAL(rc));
2848 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
2849 goto end;
2850 }
2851 }
2852
2853#ifdef DEBUG
2854 /* Intercept X86_XCPT_DB if stepping is enabled */
2855 if (!DBGFIsStepping(pVCpu))
2856#endif
2857 {
2858 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2859 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2860 {
2861 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2862 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2863 goto end;
2864 }
2865 }
2866
2867 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2868 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2869 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2870 {
2871 rc = VINF_EM_PENDING_REQUEST;
2872 goto end;
2873 }
2874
2875 /* Check if a pgm pool flush is in progress. */
2876 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2877 {
2878 rc = VINF_PGM_POOL_FLUSH_PENDING;
2879 goto end;
2880 }
2881
2882 /* Check if DMA work is pending (2nd+ run). */
2883 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
2884 {
2885 rc = VINF_EM_RAW_TO_R3;
2886 goto end;
2887 }
2888 }
2889
2890#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2891 /*
2892 * Exit to ring-3 preemption/work is pending.
2893 *
2894 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2895 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2896 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
2897 *
2898 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2899 * shootdowns rely on this.
2900 */
2901 uOldEFlags = ASMIntDisableFlags();
2902 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2903 {
2904 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
2905 rc = VINF_EM_RAW_INTERRUPT;
2906 goto end;
2907 }
2908 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2909#endif
2910
2911 /*
2912 * When external interrupts are pending, we should exit the VM when IF is et.
2913 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
2914 */
2915 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
2916 if (RT_FAILURE(rc))
2917 goto end;
2918
2919 /** @todo check timers?? */
2920
2921 /*
2922 * TPR caching using CR8 is only available in 64-bit mode.
2923 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
2924 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
2925 */
2926 /** @todo query and update the TPR only when it could have been changed (mmio
2927 * access & wrsmr (x2apic) */
2928 if (fSetupTPRCaching)
2929 {
2930 /* TPR caching in CR8 */
2931 bool fPending;
2932
2933 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
2934 AssertRC(rc2);
2935 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2936 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = u8LastTPR;
2937
2938 /*
2939 * Two options here:
2940 * - external interrupt pending, but masked by the TPR value.
2941 * -> a CR8 update that lower the current TPR value should cause an exit
2942 * - no pending interrupts
2943 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2944 */
2945 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
2946 AssertRC(VBOXSTRICTRC_VAL(rc));
2947
2948 if (pVM->hwaccm.s.fTPRPatchingActive)
2949 {
2950 Assert(!CPUMIsGuestInLongModeEx(pCtx));
2951 /* Our patch code uses LSTAR for TPR caching. */
2952 pCtx->msrLSTAR = u8LastTPR;
2953
2954 if (fPending)
2955 {
2956 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
2957 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
2958 }
2959 else
2960 {
2961 /*
2962 * No interrupts are pending, so we don't need to be explicitely notified.
2963 * There are enough world switches for detecting pending interrupts.
2964 */
2965 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
2966 }
2967 }
2968 }
2969
2970#ifdef LOG_ENABLED
2971 if ( pVM->hwaccm.s.fNestedPaging
2972 || pVM->hwaccm.s.vmx.fVPID)
2973 {
2974 PHMGLOBLCPUINFO pCpu = HWACCMR0GetCurrentCpu();
2975 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2976 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2977 {
2978 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2979 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2980 else
2981 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2982 }
2983 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
2984 LogFlow(("Manual TLB flush\n"));
2985 }
2986#endif
2987#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2988 PGMRZDynMapFlushAutoSet(pVCpu);
2989#endif
2990
2991 /*
2992 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
2993 * (until the actual world switch)
2994 */
2995#ifdef VBOX_STRICT
2996 idCpuCheck = RTMpCpuId();
2997#endif
2998#ifdef LOG_ENABLED
2999 VMMR0LogFlushDisable(pVCpu);
3000#endif
3001
3002 /*
3003 * Save the host state first.
3004 */
3005 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
3006 {
3007 rc = VMXR0SaveHostState(pVM, pVCpu);
3008 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3009 {
3010 VMMR0LogFlushEnable(pVCpu);
3011 goto end;
3012 }
3013 }
3014
3015 /*
3016 * Load the guest state.
3017 */
3018 if (!pVCpu->hwaccm.s.fContextUseFlags)
3019 {
3020 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3021 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadMinimal);
3022 }
3023 else
3024 {
3025 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3026 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3027 {
3028 VMMR0LogFlushEnable(pVCpu);
3029 goto end;
3030 }
3031 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatLoadFull);
3032 }
3033
3034#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3035 /*
3036 * Disable interrupts to make sure a poke will interrupt execution.
3037 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3038 */
3039 uOldEFlags = ASMIntDisableFlags();
3040 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3041#endif
3042
3043 /* Non-register state Guest Context */
3044 /** @todo change me according to cpu state */
3045 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
3046 AssertRC(rc2);
3047
3048 /* Set TLB flush state as checked until we return from the world switch. */
3049 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
3050 /* Deal with tagged TLB setup and invalidation. */
3051 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
3052
3053 /*
3054 * Manual save and restore:
3055 * - General purpose registers except RIP, RSP
3056 *
3057 * Trashed:
3058 * - CR2 (we don't care)
3059 * - LDTR (reset to 0)
3060 * - DRx (presumably not changed at all)
3061 * - DR7 (reset to 0x400)
3062 * - EFLAGS (reset to RT_BIT(1); not relevant)
3063 */
3064
3065 /* All done! Let's start VM execution. */
3066 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
3067 Assert(idCpuCheck == RTMpCpuId());
3068
3069#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3070 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
3071 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3072#endif
3073
3074 /*
3075 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3076 */
3077 if (pVM->hwaccm.s.fTPRPatchingActive)
3078 {
3079 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3080 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3081 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3082 }
3083
3084 TMNotifyStartOfExecution(pVCpu);
3085#ifdef VBOX_WITH_KERNEL_USING_XMM
3086 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
3087#else
3088 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
3089#endif
3090 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
3091 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
3092 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3093 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3094 {
3095 TMCpuTickSetLastSeen(pVCpu,
3096 ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3097 }
3098
3099 TMNotifyEndOfExecution(pVCpu);
3100 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3101 Assert(!(ASMGetFlags() & X86_EFL_IF));
3102
3103 /*
3104 * Restore the host LSTAR MSR if the guest could have changed it.
3105 */
3106 if (pVM->hwaccm.s.fTPRPatchingActive)
3107 {
3108 Assert(pVM->hwaccm.s.fTPRPatchingActive);
3109 pVCpu->hwaccm.s.vmx.pbVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3110 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3111 }
3112
3113 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
3114 ASMSetFlags(uOldEFlags);
3115#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3116 uOldEFlags = ~(RTCCUINTREG)0;
3117#endif
3118
3119 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
3120
3121 /* In case we execute a goto ResumeExecution later on. */
3122 pVCpu->hwaccm.s.fResumeVM = true;
3123 pVCpu->hwaccm.s.fForceTLBFlush = false;
3124
3125 /*
3126 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3127 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3128 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3129 */
3130
3131 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3132 {
3133 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3134 VMMR0LogFlushEnable(pVCpu);
3135 goto end;
3136 }
3137
3138 /* Success. Query the guest state and figure out what has happened. */
3139
3140 /* Investigate why there was a VM-exit. */
3141 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3142 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3143
3144 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3145 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3146 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3147 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3148 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3149 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
3150 rc2 |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3151 rc2 |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3152 AssertRC(rc2);
3153
3154 /*
3155 * Sync back the guest state.
3156 */
3157 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3158 AssertRC(rc2);
3159
3160 /* Note! NOW IT'S SAFE FOR LOGGING! */
3161 VMMR0LogFlushEnable(pVCpu);
3162 Log2(("Raw exit reason %08x\n", exitReason));
3163#if ARCH_BITS == 64 /* for the time being */
3164 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3165#endif
3166
3167 /*
3168 * Check if an injected event was interrupted prematurely.
3169 */
3170 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
3171 AssertRC(rc2);
3172 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3173 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3174 /* Ignore 'int xx' as they'll be restarted anyway. */
3175 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
3176 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3177 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3178 {
3179 Assert(!pVCpu->hwaccm.s.Event.fPending);
3180 pVCpu->hwaccm.s.Event.fPending = true;
3181 /* Error code present? */
3182 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
3183 {
3184 rc2 = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
3185 AssertRC(rc2);
3186 pVCpu->hwaccm.s.Event.errCode = val;
3187 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3188 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3189 }
3190 else
3191 {
3192 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo,
3193 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3194 pVCpu->hwaccm.s.Event.errCode = 0;
3195 }
3196 }
3197#ifdef VBOX_STRICT
3198 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
3199 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3200 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
3201 {
3202 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3203 pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3204 }
3205
3206 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3207 HWACCMDumpRegs(pVM, pVCpu, pCtx);
3208#endif
3209
3210 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs, (RTGCPTR)pCtx->rip));
3211 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3212 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3213 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3214 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3215
3216 /*
3217 * Sync back the TPR if it was changed.
3218 */
3219 if ( fSetupTPRCaching
3220 && u8LastTPR != pVCpu->hwaccm.s.vmx.pbVAPIC[0x80])
3221 {
3222 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pbVAPIC[0x80]);
3223 AssertRC(rc2);
3224 }
3225
3226#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3227 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3228 exitReason, (uint64_t)exitQualification, pCtx->cs, pCtx->rip, (uint64_t)intInfo);
3229#endif
3230 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
3231
3232 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3233 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3234 switch (exitReason)
3235 {
3236 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3237 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3238 {
3239 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3240
3241 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3242 {
3243 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
3244#if 0 //def VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3245 if ( RTThreadPreemptIsPendingTrusty()
3246 && !RTThreadPreemptIsPending(NIL_RTTHREAD))
3247 goto ResumeExecution;
3248#endif
3249 /* External interrupt; leave to allow it to be dispatched again. */
3250 rc = VINF_EM_RAW_INTERRUPT;
3251 break;
3252 }
3253 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3254 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3255 {
3256 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3257 /* External interrupt; leave to allow it to be dispatched again. */
3258 rc = VINF_EM_RAW_INTERRUPT;
3259 break;
3260
3261 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
3262 AssertFailed(); /* can't come here; fails the first check. */
3263 break;
3264
3265 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
3266 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
3267 Assert(vector == 1 || vector == 3 || vector == 4);
3268 /* no break */
3269 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
3270 Log2(("Hardware/software interrupt %d\n", vector));
3271 switch (vector)
3272 {
3273 case X86_XCPT_NM:
3274 {
3275 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3276
3277 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3278 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3279 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3280 if (rc == VINF_SUCCESS)
3281 {
3282 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3283
3284 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
3285
3286 /* Continue execution. */
3287 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3288
3289 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3290 goto ResumeExecution;
3291 }
3292
3293 Log(("Forward #NM fault to the guest\n"));
3294 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
3295 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3296 cbInstr, 0);
3297 AssertRC(rc2);
3298 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3299 goto ResumeExecution;
3300 }
3301
3302 case X86_XCPT_PF: /* Page fault */
3303 {
3304#ifdef VBOX_ALWAYS_TRAP_PF
3305 if (pVM->hwaccm.s.fNestedPaging)
3306 {
3307 /*
3308 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3309 */
3310 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3311 errCode, (RTGCPTR)pCtx->rsp));
3312
3313 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3314
3315 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3316
3317 /* Now we must update CR2. */
3318 pCtx->cr2 = exitQualification;
3319 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3320 cbInstr, errCode);
3321 AssertRC(rc2);
3322
3323 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3324 goto ResumeExecution;
3325 }
3326#else
3327 Assert(!pVM->hwaccm.s.fNestedPaging);
3328#endif
3329
3330#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
3331 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3332 if ( pVM->hwaccm.s.fTRPPatchingAllowed
3333 && pVM->hwaccm.s.pGuestPatchMem
3334 && (exitQualification & 0xfff) == 0x080
3335 && !(errCode & X86_TRAP_PF_P) /* not present */
3336 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
3337 && !CPUMIsGuestInLongModeEx(pCtx)
3338 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
3339 {
3340 RTGCPHYS GCPhysApicBase, GCPhys;
3341 PDMApicGetBase(pVM, &GCPhysApicBase); /** @todo cache this */
3342 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3343
3344 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3345 if ( rc == VINF_SUCCESS
3346 && GCPhys == GCPhysApicBase)
3347 {
3348 /* Only attempt to patch the instruction once. */
3349 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3350 if (!pPatch)
3351 {
3352 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
3353 break;
3354 }
3355 }
3356 }
3357#endif
3358
3359 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3360 /* Exit qualification contains the linear address of the page fault. */
3361 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3362 TRPMSetErrorCode(pVCpu, errCode);
3363 TRPMSetFaultAddress(pVCpu, exitQualification);
3364
3365 /* Shortcut for APIC TPR reads and writes. */
3366 if ( (exitQualification & 0xfff) == 0x080
3367 && !(errCode & X86_TRAP_PF_P) /* not present */
3368 && fSetupTPRCaching
3369 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3370 {
3371 RTGCPHYS GCPhysApicBase, GCPhys;
3372 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3373 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3374
3375 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3376 if ( rc == VINF_SUCCESS
3377 && GCPhys == GCPhysApicBase)
3378 {
3379 Log(("Enable VT-x virtual APIC access filtering\n"));
3380 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3381 AssertRC(rc2);
3382 }
3383 }
3384
3385 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3386 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3387 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3388
3389 if (rc == VINF_SUCCESS)
3390 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3391 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3392 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
3393
3394 TRPMResetTrap(pVCpu);
3395 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3396 goto ResumeExecution;
3397 }
3398 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3399 {
3400 /*
3401 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3402 */
3403 Log2(("Forward page fault to the guest\n"));
3404
3405 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
3406 /* The error code might have been changed. */
3407 errCode = TRPMGetErrorCode(pVCpu);
3408
3409 TRPMResetTrap(pVCpu);
3410
3411 /* Now we must update CR2. */
3412 pCtx->cr2 = exitQualification;
3413 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3414 cbInstr, errCode);
3415 AssertRC(rc2);
3416
3417 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3418 goto ResumeExecution;
3419 }
3420#ifdef VBOX_STRICT
3421 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3422 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3423#endif
3424 /* Need to go back to the recompiler to emulate the instruction. */
3425 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPFEM);
3426 TRPMResetTrap(pVCpu);
3427 break;
3428 }
3429
3430 case X86_XCPT_MF: /* Floating point exception. */
3431 {
3432 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
3433 if (!(pCtx->cr0 & X86_CR0_NE))
3434 {
3435 /* old style FPU error reporting needs some extra work. */
3436 /** @todo don't fall back to the recompiler, but do it manually. */
3437 rc = VINF_EM_RAW_EMULATE_INSTR;
3438 break;
3439 }
3440 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3441 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3442 cbInstr, errCode);
3443 AssertRC(rc2);
3444
3445 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3446 goto ResumeExecution;
3447 }
3448
3449 case X86_XCPT_DB: /* Debug exception. */
3450 {
3451 uint64_t uDR6;
3452
3453 /*
3454 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3455 *
3456 * Exit qualification bits:
3457 * 3:0 B0-B3 which breakpoint condition was met
3458 * 12:4 Reserved (0)
3459 * 13 BD - debug register access detected
3460 * 14 BS - single step execution or branch taken
3461 * 63:15 Reserved (0)
3462 */
3463 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3464
3465 /* Note that we don't support guest and host-initiated debugging at the same time. */
3466
3467 uDR6 = X86_DR6_INIT_VAL;
3468 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3469 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3470 if (rc == VINF_EM_RAW_GUEST_TRAP)
3471 {
3472 /* Update DR6 here. */
3473 pCtx->dr[6] = uDR6;
3474
3475 /* Resync DR6 if the debug state is active. */
3476 if (CPUMIsGuestDebugStateActive(pVCpu))
3477 ASMSetDR6(pCtx->dr[6]);
3478
3479 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3480 pCtx->dr[7] &= ~X86_DR7_GD;
3481
3482 /* Paranoia. */
3483 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3484 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3485 pCtx->dr[7] |= 0x400; /* must be one */
3486
3487 /* Resync DR7 */
3488 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3489 AssertRC(rc2);
3490
3491 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3492 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3493 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3494 cbInstr, errCode);
3495 AssertRC(rc2);
3496
3497 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3498 goto ResumeExecution;
3499 }
3500 /* Return to ring 3 to deal with the debug exit code. */
3501 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3502 break;
3503 }
3504
3505 case X86_XCPT_BP: /* Breakpoint. */
3506 {
3507 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestBP);
3508 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3509 if (rc == VINF_EM_RAW_GUEST_TRAP)
3510 {
3511 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip));
3512 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3513 cbInstr, errCode);
3514 AssertRC(rc2);
3515 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3516 goto ResumeExecution;
3517 }
3518 if (rc == VINF_SUCCESS)
3519 {
3520 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3521 goto ResumeExecution;
3522 }
3523 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3524 break;
3525 }
3526
3527 case X86_XCPT_GP: /* General protection failure exception. */
3528 {
3529 uint32_t cbOp;
3530 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3531
3532 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3533#ifdef VBOX_STRICT
3534 if ( !CPUMIsGuestInRealModeEx(pCtx)
3535 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3536 {
3537 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
3538 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3539 cbInstr, errCode);
3540 AssertRC(rc2);
3541 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3542 goto ResumeExecution;
3543 }
3544#endif
3545 Assert(CPUMIsGuestInRealModeEx(pCtx));
3546
3547 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip));
3548
3549 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp);
3550 if (RT_SUCCESS(rc2))
3551 {
3552 bool fUpdateRIP = true;
3553
3554 rc = VINF_SUCCESS;
3555 Assert(cbOp == pDis->opsize);
3556 switch (pDis->pCurInstr->opcode)
3557 {
3558 case OP_CLI:
3559 pCtx->eflags.Bits.u1IF = 0;
3560 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3561 break;
3562
3563 case OP_STI:
3564 pCtx->eflags.Bits.u1IF = 1;
3565 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->opsize);
3566 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3567 rc2 = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3568 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3569 AssertRC(rc2);
3570 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3571 break;
3572
3573 case OP_HLT:
3574 fUpdateRIP = false;
3575 rc = VINF_EM_HALT;
3576 pCtx->rip += pDis->opsize;
3577 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3578 break;
3579
3580 case OP_POPF:
3581 {
3582 RTGCPTR GCPtrStack;
3583 uint32_t cbParm;
3584 uint32_t uMask;
3585 X86EFLAGS eflags;
3586
3587 if (pDis->prefix & DISPREFIX_OPSIZE)
3588 {
3589 cbParm = 4;
3590 uMask = 0xffffffff;
3591 }
3592 else
3593 {
3594 cbParm = 2;
3595 uMask = 0xffff;
3596 }
3597
3598 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3599 if (RT_FAILURE(rc2))
3600 {
3601 rc = VERR_EM_INTERPRETER;
3602 break;
3603 }
3604 eflags.u = 0;
3605 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3606 if (RT_FAILURE(rc2))
3607 {
3608 rc = VERR_EM_INTERPRETER;
3609 break;
3610 }
3611 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3612 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
3613 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3614 pCtx->eflags.Bits.u1RF = 0;
3615 pCtx->esp += cbParm;
3616 pCtx->esp &= uMask;
3617
3618 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3619 break;
3620 }
3621
3622 case OP_PUSHF:
3623 {
3624 RTGCPTR GCPtrStack;
3625 uint32_t cbParm;
3626 uint32_t uMask;
3627 X86EFLAGS eflags;
3628
3629 if (pDis->prefix & DISPREFIX_OPSIZE)
3630 {
3631 cbParm = 4;
3632 uMask = 0xffffffff;
3633 }
3634 else
3635 {
3636 cbParm = 2;
3637 uMask = 0xffff;
3638 }
3639
3640 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3641 &GCPtrStack);
3642 if (RT_FAILURE(rc2))
3643 {
3644 rc = VERR_EM_INTERPRETER;
3645 break;
3646 }
3647 eflags = pCtx->eflags;
3648 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3649 eflags.Bits.u1RF = 0;
3650 eflags.Bits.u1VM = 0;
3651
3652 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3653 if (RT_FAILURE(rc2))
3654 {
3655 rc = VERR_EM_INTERPRETER;
3656 break;
3657 }
3658 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3659 pCtx->esp -= cbParm;
3660 pCtx->esp &= uMask;
3661 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3662 break;
3663 }
3664
3665 case OP_IRET:
3666 {
3667 RTGCPTR GCPtrStack;
3668 uint32_t uMask = 0xffff;
3669 uint16_t aIretFrame[3];
3670
3671 if (pDis->prefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3672 {
3673 rc = VERR_EM_INTERPRETER;
3674 break;
3675 }
3676
3677 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3678 if (RT_FAILURE(rc2))
3679 {
3680 rc = VERR_EM_INTERPRETER;
3681 break;
3682 }
3683 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3684 if (RT_FAILURE(rc2))
3685 {
3686 rc = VERR_EM_INTERPRETER;
3687 break;
3688 }
3689 pCtx->ip = aIretFrame[0];
3690 pCtx->cs = aIretFrame[1];
3691 pCtx->csHid.u64Base = pCtx->cs << 4;
3692 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3693 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3694 pCtx->sp += sizeof(aIretFrame);
3695
3696 LogFlow(("iret to %04x:%x\n", pCtx->cs, pCtx->ip));
3697 fUpdateRIP = false;
3698 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3699 break;
3700 }
3701
3702 case OP_INT:
3703 {
3704 uint32_t intInfo2;
3705
3706 LogFlow(("Realmode: INT %x\n", pDis->param1.parval & 0xff));
3707 intInfo2 = pDis->param1.parval & 0xff;
3708 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3709 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3710
3711 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3712 AssertRC(VBOXSTRICTRC_VAL(rc));
3713 fUpdateRIP = false;
3714 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3715 break;
3716 }
3717
3718 case OP_INTO:
3719 {
3720 if (pCtx->eflags.Bits.u1OF)
3721 {
3722 uint32_t intInfo2;
3723
3724 LogFlow(("Realmode: INTO\n"));
3725 intInfo2 = X86_XCPT_OF;
3726 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3727 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3728
3729 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3730 AssertRC(VBOXSTRICTRC_VAL(rc));
3731 fUpdateRIP = false;
3732 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3733 }
3734 break;
3735 }
3736
3737 case OP_INT3:
3738 {
3739 uint32_t intInfo2;
3740
3741 LogFlow(("Realmode: INT 3\n"));
3742 intInfo2 = 3;
3743 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3744 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3745
3746 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3747 AssertRC(VBOXSTRICTRC_VAL(rc));
3748 fUpdateRIP = false;
3749 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3750 break;
3751 }
3752
3753 default:
3754 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
3755 fUpdateRIP = false;
3756 break;
3757 }
3758
3759 if (rc == VINF_SUCCESS)
3760 {
3761 if (fUpdateRIP)
3762 pCtx->rip += cbOp; /* Move on to the next instruction. */
3763
3764 /*
3765 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
3766 * whole context to be done with it.
3767 */
3768 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3769
3770 /* Only resume if successful. */
3771 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3772 goto ResumeExecution;
3773 }
3774 }
3775 else
3776 rc = VERR_EM_INTERPRETER;
3777
3778 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
3779 break;
3780 }
3781
3782#ifdef VBOX_STRICT
3783 case X86_XCPT_XF: /* SIMD exception. */
3784 case X86_XCPT_DE: /* Divide error. */
3785 case X86_XCPT_UD: /* Unknown opcode exception. */
3786 case X86_XCPT_SS: /* Stack segment exception. */
3787 case X86_XCPT_NP: /* Segment not present exception. */
3788 {
3789 switch(vector)
3790 {
3791 case X86_XCPT_DE:
3792 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
3793 break;
3794 case X86_XCPT_UD:
3795 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
3796 break;
3797 case X86_XCPT_SS:
3798 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
3799 break;
3800 case X86_XCPT_NP:
3801 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
3802 break;
3803 case X86_XCPT_XF:
3804 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXF);
3805 break;
3806 }
3807
3808 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3809 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3810 cbInstr, errCode);
3811 AssertRC(rc2);
3812
3813 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3814 goto ResumeExecution;
3815 }
3816#endif
3817 default:
3818 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestXcpUnk);
3819 if ( CPUMIsGuestInRealModeEx(pCtx)
3820 && pVM->hwaccm.s.vmx.pRealModeTSS)
3821 {
3822 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
3823 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3824 cbInstr, errCode);
3825 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
3826
3827 /* Go back to ring-3 in case of a triple fault. */
3828 if ( vector == X86_XCPT_DF
3829 && rc == VINF_EM_RESET)
3830 {
3831 break;
3832 }
3833
3834 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3835 goto ResumeExecution;
3836 }
3837 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3838 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3839 break;
3840 } /* switch (vector) */
3841
3842 break;
3843
3844 default:
3845 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3846 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
3847 break;
3848 }
3849
3850 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3851 break;
3852 }
3853
3854 /*
3855 * 48 EPT violation. An attemp to access memory with a guest-physical address was disallowed
3856 * by the configuration of the EPT paging structures.
3857 */
3858 case VMX_EXIT_EPT_VIOLATION:
3859 {
3860 RTGCPHYS GCPhys;
3861
3862 Assert(pVM->hwaccm.s.fNestedPaging);
3863
3864 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3865 AssertRC(rc2);
3866 Assert(((exitQualification >> 7) & 3) != 2);
3867
3868 /* Determine the kind of violation. */
3869 errCode = 0;
3870 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3871 errCode |= X86_TRAP_PF_ID;
3872
3873 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3874 errCode |= X86_TRAP_PF_RW;
3875
3876 /* If the page is present, then it's a page level protection fault. */
3877 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3878 errCode |= X86_TRAP_PF_P;
3879 else
3880 {
3881 /* Shortcut for APIC TPR reads and writes. */
3882 if ( (GCPhys & 0xfff) == 0x080
3883 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3884 && fSetupTPRCaching
3885 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3886 {
3887 RTGCPHYS GCPhysApicBase;
3888 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3889 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3890 if (GCPhys == GCPhysApicBase + 0x80)
3891 {
3892 Log(("Enable VT-x virtual APIC access filtering\n"));
3893 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3894 AssertRC(rc2);
3895 }
3896 }
3897 }
3898 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
3899
3900 /* GCPhys contains the guest physical address of the page fault. */
3901 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3902 TRPMSetErrorCode(pVCpu, errCode);
3903 TRPMSetFaultAddress(pVCpu, GCPhys);
3904
3905 /* Handle the pagefault trap for the nested shadow table. */
3906 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
3907
3908 /*
3909 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
3910 */
3911 if ( rc == VINF_SUCCESS
3912 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3913 || rc == VERR_PAGE_NOT_PRESENT)
3914 {
3915 /* We've successfully synced our shadow pages, so let's just continue execution. */
3916 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
3917 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
3918
3919 TRPMResetTrap(pVCpu);
3920 goto ResumeExecution;
3921 }
3922
3923#ifdef VBOX_STRICT
3924 if (rc != VINF_EM_RAW_EMULATE_INSTR)
3925 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3926#endif
3927 /* Need to go back to the recompiler to emulate the instruction. */
3928 TRPMResetTrap(pVCpu);
3929 break;
3930 }
3931
3932 case VMX_EXIT_EPT_MISCONFIG:
3933 {
3934 RTGCPHYS GCPhys;
3935
3936 Assert(pVM->hwaccm.s.fNestedPaging);
3937
3938 rc2 = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3939 AssertRC(rc2);
3940 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
3941
3942 /* Shortcut for APIC TPR reads and writes. */
3943 if ( (GCPhys & 0xfff) == 0x080
3944 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3945 && fSetupTPRCaching
3946 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3947 {
3948 RTGCPHYS GCPhysApicBase;
3949 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3950 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3951 if (GCPhys == GCPhysApicBase + 0x80)
3952 {
3953 Log(("Enable VT-x virtual APIC access filtering\n"));
3954 rc2 = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3955 AssertRC(rc2);
3956 }
3957 }
3958
3959 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
3960
3961 /*
3962 * If we succeed, resume execution.
3963 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
3964 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
3965 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
3966 * weird case. See @bugref{6043}.
3967 */
3968 if ( rc == VINF_SUCCESS
3969 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3970 || rc == VERR_PAGE_NOT_PRESENT)
3971 {
3972 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
3973 goto ResumeExecution;
3974 }
3975
3976 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3977 break;
3978 }
3979
3980 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3981 /* Clear VM-exit on IF=1 change. */
3982 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
3983 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
3984 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
3985 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3986 AssertRC(rc2);
3987 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
3988 goto ResumeExecution; /* we check for pending guest interrupts there */
3989
3990 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
3991 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
3992 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
3993 /* Skip instruction and continue directly. */
3994 pCtx->rip += cbInstr;
3995 /* Continue execution.*/
3996 goto ResumeExecution;
3997
3998 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3999 {
4000 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4001 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
4002 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4003 if (rc == VINF_SUCCESS)
4004 {
4005 /* Update EIP and continue execution. */
4006 Assert(cbInstr == 2);
4007 pCtx->rip += cbInstr;
4008 goto ResumeExecution;
4009 }
4010 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4011 rc = VINF_EM_RAW_EMULATE_INSTR;
4012 break;
4013 }
4014
4015 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4016 {
4017 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4018 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
4019 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4020 if (rc == VINF_SUCCESS)
4021 {
4022 /* Update EIP and continue execution. */
4023 Assert(cbInstr == 2);
4024 pCtx->rip += cbInstr;
4025 goto ResumeExecution;
4026 }
4027 rc = VINF_EM_RAW_EMULATE_INSTR;
4028 break;
4029 }
4030
4031 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4032 {
4033 Log2(("VMX: Rdtsc\n"));
4034 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
4035 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4036 if (rc == VINF_SUCCESS)
4037 {
4038 /* Update EIP and continue execution. */
4039 Assert(cbInstr == 2);
4040 pCtx->rip += cbInstr;
4041 goto ResumeExecution;
4042 }
4043 rc = VINF_EM_RAW_EMULATE_INSTR;
4044 break;
4045 }
4046
4047 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4048 {
4049 Log2(("VMX: invlpg\n"));
4050 Assert(!pVM->hwaccm.s.fNestedPaging);
4051
4052 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
4053 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4054 if (rc == VINF_SUCCESS)
4055 {
4056 /* Update EIP and continue execution. */
4057 pCtx->rip += cbInstr;
4058 goto ResumeExecution;
4059 }
4060 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4061 break;
4062 }
4063
4064 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4065 {
4066 Log2(("VMX: monitor\n"));
4067
4068 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
4069 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4070 if (rc == VINF_SUCCESS)
4071 {
4072 /* Update EIP and continue execution. */
4073 pCtx->rip += cbInstr;
4074 goto ResumeExecution;
4075 }
4076 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4077 break;
4078 }
4079
4080 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4081 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4082 if ( pVM->hwaccm.s.fTPRPatchingActive
4083 && pCtx->ecx == MSR_K8_LSTAR)
4084 {
4085 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4086 if ((pCtx->eax & 0xff) != u8LastTPR)
4087 {
4088 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4089
4090 /* Our patch code uses LSTAR for TPR caching. */
4091 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4092 AssertRC(rc2);
4093 }
4094
4095 /* Skip the instruction and continue. */
4096 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4097
4098 /* Only resume if successful. */
4099 goto ResumeExecution;
4100 }
4101 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_MSR;
4102 /* no break */
4103 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4104 {
4105 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
4106
4107 /*
4108 * Note: The Intel spec. claims there's an REX version of RDMSR that's slightly different,
4109 * so we play safe by completely disassembling the instruction.
4110 */
4111 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4112 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4113 if (rc == VINF_SUCCESS)
4114 {
4115 /* EIP has been updated already. */
4116 /* Only resume if successful. */
4117 goto ResumeExecution;
4118 }
4119 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4120 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4121 break;
4122 }
4123
4124 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4125 {
4126 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4127
4128 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4129 {
4130 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4131 {
4132 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4133 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4134 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4135 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4136 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4137 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4138 {
4139 case 0:
4140 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
4141 break;
4142 case 2:
4143 break;
4144 case 3:
4145 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4146 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
4147 break;
4148 case 4:
4149 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
4150 break;
4151 case 8:
4152 /* CR8 contains the APIC TPR */
4153 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4154 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4155 break;
4156
4157 default:
4158 AssertFailed();
4159 break;
4160 }
4161 break;
4162 }
4163
4164 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4165 {
4166 Log2(("VMX: mov x, crx\n"));
4167 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4168
4169 Assert( !pVM->hwaccm.s.fNestedPaging
4170 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4171 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4172
4173 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4174 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4175 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4176
4177 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4178 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4179 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4180 break;
4181 }
4182
4183 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4184 {
4185 Log2(("VMX: clts\n"));
4186 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
4187 rc = EMInterpretCLTS(pVM, pVCpu);
4188 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4189 break;
4190 }
4191
4192 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4193 {
4194 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4195 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
4196 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4197 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
4198 break;
4199 }
4200 }
4201
4202 /* Update EIP if no error occurred. */
4203 if (RT_SUCCESS(rc))
4204 pCtx->rip += cbInstr;
4205
4206 if (rc == VINF_SUCCESS)
4207 {
4208 /* Only resume if successful. */
4209 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4210 goto ResumeExecution;
4211 }
4212 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4213 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
4214 break;
4215 }
4216
4217 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4218 {
4219 if ( !DBGFIsStepping(pVCpu)
4220 && !CPUMIsHyperDebugStateActive(pVCpu))
4221 {
4222 /* Disable DRx move intercepts. */
4223 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4224 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4225 AssertRC(rc2);
4226
4227 /* Save the host and load the guest debug state. */
4228 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4229 AssertRC(rc2);
4230
4231#ifdef LOG_ENABLED
4232 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4233 {
4234 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4235 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4236 }
4237 else
4238 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4239#endif
4240
4241#ifdef VBOX_WITH_STATISTICS
4242 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
4243 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4244 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4245 else
4246 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4247#endif
4248
4249 goto ResumeExecution;
4250 }
4251
4252 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4253 * time and restore DRx registers afterwards */
4254 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4255 {
4256 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4257 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4258 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
4259 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4260 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4261 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4262 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4263 Log2(("DR7=%08x\n", pCtx->dr[7]));
4264 }
4265 else
4266 {
4267 Log2(("VMX: mov x, DRx\n"));
4268 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
4269 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4270 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4271 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4272 }
4273 /* Update EIP if no error occurred. */
4274 if (RT_SUCCESS(rc))
4275 pCtx->rip += cbInstr;
4276
4277 if (rc == VINF_SUCCESS)
4278 {
4279 /* Only resume if successful. */
4280 goto ResumeExecution;
4281 }
4282 Assert(rc == VERR_EM_INTERPRETER);
4283 break;
4284 }
4285
4286 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4287 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4288 {
4289 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4290 uint32_t uPort;
4291 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4292 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4293
4294 /** @todo necessary to make the distinction? */
4295 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4296 uPort = pCtx->edx & 0xffff;
4297 else
4298 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4299
4300 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4301 {
4302 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4303 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4304 break;
4305 }
4306
4307 uint32_t cbSize = g_aIOSize[uIOWidth];
4308 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4309 {
4310 /* ins/outs */
4311 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
4312
4313 /* Disassemble manually to deal with segment prefixes. */
4314 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
4315 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4316 rc2 = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
4317 if (RT_SUCCESS(rc))
4318 {
4319 if (fIOWrite)
4320 {
4321 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4322 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
4323 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, (DISCPUMODE)pDis->addrmode, cbSize);
4324 }
4325 else
4326 {
4327 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4328 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
4329 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, (DISCPUMODE)pDis->addrmode, cbSize);
4330 }
4331 }
4332 else
4333 rc = VINF_EM_RAW_EMULATE_INSTR;
4334 }
4335 else
4336 {
4337 /* Normal in/out */
4338 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4339
4340 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4341
4342 if (fIOWrite)
4343 {
4344 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
4345 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4346 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4347 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4348 }
4349 else
4350 {
4351 uint32_t u32Val = 0;
4352
4353 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
4354 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4355 if (IOM_SUCCESS(rc))
4356 {
4357 /* Write back to the EAX register. */
4358 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4359 }
4360 else
4361 if (rc == VINF_IOM_R3_IOPORT_READ)
4362 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4363 }
4364 }
4365
4366 /*
4367 * Handled the I/O return codes.
4368 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4369 */
4370 if (IOM_SUCCESS(rc))
4371 {
4372 /* Update EIP and continue execution. */
4373 pCtx->rip += cbInstr;
4374 if (RT_LIKELY(rc == VINF_SUCCESS))
4375 {
4376 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4377 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4378 {
4379 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
4380 for (unsigned i = 0; i < 4; i++)
4381 {
4382 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4383
4384 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4385 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4386 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4387 {
4388 uint64_t uDR6;
4389
4390 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4391
4392 uDR6 = ASMGetDR6();
4393
4394 /* Clear all breakpoint status flags and set the one we just hit. */
4395 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4396 uDR6 |= (uint64_t)RT_BIT(i);
4397
4398 /*
4399 * Note: AMD64 Architecture Programmer's Manual 13.1:
4400 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4401 * be cleared by software after the contents have been read.
4402 */
4403 ASMSetDR6(uDR6);
4404
4405 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4406 pCtx->dr[7] &= ~X86_DR7_GD;
4407
4408 /* Paranoia. */
4409 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4410 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4411 pCtx->dr[7] |= 0x400; /* must be one */
4412
4413 /* Resync DR7 */
4414 rc2 = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
4415 AssertRC(rc2);
4416
4417 /* Construct inject info. */
4418 intInfo = X86_XCPT_DB;
4419 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4420 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4421
4422 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4423 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4424 0 /* cbInstr */, 0 /* errCode */);
4425 AssertRC(rc2);
4426
4427 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4428 goto ResumeExecution;
4429 }
4430 }
4431 }
4432 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4433 goto ResumeExecution;
4434 }
4435 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4436 break;
4437 }
4438
4439#ifdef VBOX_STRICT
4440 if (rc == VINF_IOM_R3_IOPORT_READ)
4441 Assert(!fIOWrite);
4442 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4443 Assert(fIOWrite);
4444 else
4445 {
4446 AssertMsg( RT_FAILURE(rc)
4447 || rc == VINF_EM_RAW_EMULATE_INSTR
4448 || rc == VINF_EM_RAW_GUEST_TRAP
4449 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4450 }
4451#endif
4452 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
4453 break;
4454 }
4455
4456 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4457 LogFlow(("VMX_EXIT_TPR\n"));
4458 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4459 goto ResumeExecution;
4460
4461 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4462 on the APIC-access page. */
4463 {
4464 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4465 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4466
4467 switch(uAccessType)
4468 {
4469 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4470 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4471 {
4472 RTGCPHYS GCPhys;
4473 PDMApicGetBase(pVM, &GCPhys);
4474 GCPhys &= PAGE_BASE_GC_MASK;
4475 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4476
4477 LogFlow(("Apic access at %RGp\n", GCPhys));
4478 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4479 CPUMCTX2CORE(pCtx), GCPhys);
4480 if (rc == VINF_SUCCESS)
4481 goto ResumeExecution; /* rip already updated */
4482 break;
4483 }
4484
4485 default:
4486 rc = VINF_EM_RAW_EMULATE_INSTR;
4487 break;
4488 }
4489 break;
4490 }
4491
4492 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4493 if (!TMTimerPollBool(pVM, pVCpu))
4494 goto ResumeExecution;
4495 rc = VINF_EM_RAW_TIMER_PENDING;
4496 break;
4497
4498 default:
4499 /* The rest is handled after syncing the entire CPU state. */
4500 break;
4501 }
4502
4503
4504 /*
4505 * Note: The guest state is not entirely synced back at this stage!
4506 */
4507
4508 /* Investigate why there was a VM-exit. (part 2) */
4509 switch (exitReason)
4510 {
4511 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
4512 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
4513 case VMX_EXIT_EPT_VIOLATION:
4514 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4515 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4516 /* Already handled above. */
4517 break;
4518
4519 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4520 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4521 break;
4522
4523 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4524 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4525 rc = VINF_EM_RAW_INTERRUPT;
4526 AssertFailed(); /* Can't happen. Yet. */
4527 break;
4528
4529 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
4530 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
4531 rc = VINF_EM_RAW_INTERRUPT;
4532 AssertFailed(); /* Can't happen afaik. */
4533 break;
4534
4535 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4536 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4537 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4538 && pVCpu->hwaccm.s.Event.fPending)
4539 {
4540 /* Caused by an injected interrupt. */
4541 pVCpu->hwaccm.s.Event.fPending = false;
4542
4543 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
4544 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
4545 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
4546 AssertRC(rc2);
4547 }
4548 /* else Exceptions and software interrupts can just be restarted. */
4549 rc = VERR_EM_INTERPRETER;
4550 break;
4551
4552 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4553 /* Check if external interrupts are pending; if so, don't switch back. */
4554 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
4555 pCtx->rip++; /* skip hlt */
4556 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4557 goto ResumeExecution;
4558
4559 rc = VINF_EM_HALT;
4560 break;
4561
4562 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4563 Log2(("VMX: mwait\n"));
4564 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4565 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4566 if ( rc == VINF_EM_HALT
4567 || rc == VINF_SUCCESS)
4568 {
4569 /* Update EIP and continue execution. */
4570 pCtx->rip += cbInstr;
4571
4572 /* Check if external interrupts are pending; if so, don't switch back. */
4573 if ( rc == VINF_SUCCESS
4574 || ( rc == VINF_EM_HALT
4575 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4576 )
4577 goto ResumeExecution;
4578 }
4579 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4580 break;
4581
4582 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4583 AssertFailed(); /* can't happen. */
4584 rc = VERR_EM_INTERPRETER;
4585 break;
4586
4587 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4588 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4589 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4590 rc2 = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4591 AssertRC(rc2);
4592 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMTF);
4593#if 0
4594 DBGFDoneStepping(pVCpu);
4595#endif
4596 rc = VINF_EM_DBG_STOP;
4597 break;
4598
4599 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4600 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4601 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4602 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4603 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4604 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4605 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4606 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4607 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4608 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4609 /** @todo inject #UD immediately */
4610 rc = VERR_EM_INTERPRETER;
4611 break;
4612
4613 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4614 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4615 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4616 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4617 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4618 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4619 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4620 /* already handled above */
4621 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4622 || rc == VINF_EM_RAW_INTERRUPT
4623 || rc == VERR_EM_INTERPRETER
4624 || rc == VINF_EM_RAW_EMULATE_INSTR
4625 || rc == VINF_PGM_SYNC_CR3
4626 || rc == VINF_IOM_R3_IOPORT_READ
4627 || rc == VINF_IOM_R3_IOPORT_WRITE
4628 || rc == VINF_EM_RAW_GUEST_TRAP
4629 || rc == VINF_TRPM_XCPT_DISPATCHED
4630 || rc == VINF_EM_RESCHEDULE_REM,
4631 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4632 break;
4633
4634 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4635 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4636 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4637 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4638 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4639 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4640 on the APIC-access page. */
4641 {
4642 /*
4643 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4644 */
4645 rc = VERR_EM_INTERPRETER;
4646 break;
4647 }
4648
4649 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4650 Assert(rc == VINF_EM_RAW_INTERRUPT);
4651 break;
4652
4653 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4654 {
4655#ifdef VBOX_STRICT
4656 RTCCUINTREG val2 = 0;
4657
4658 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4659
4660 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4661 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4662
4663 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4664 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4665
4666 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4667 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4668
4669 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4670 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4671
4672 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4673 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4674
4675 VMX_LOG_SELREG(CS, "CS", val2);
4676 VMX_LOG_SELREG(DS, "DS", val2);
4677 VMX_LOG_SELREG(ES, "ES", val2);
4678 VMX_LOG_SELREG(FS, "FS", val2);
4679 VMX_LOG_SELREG(GS, "GS", val2);
4680 VMX_LOG_SELREG(SS, "SS", val2);
4681 VMX_LOG_SELREG(TR, "TR", val2);
4682 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4683
4684 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4685 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4686 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4687 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4688#endif /* VBOX_STRICT */
4689 rc = VERR_VMX_INVALID_GUEST_STATE;
4690 break;
4691 }
4692
4693 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4694 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4695 default:
4696 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4697 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4698 break;
4699
4700 }
4701
4702end:
4703 /* We now going back to ring-3, so clear the action flag. */
4704 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
4705
4706 /*
4707 * Signal changes for the recompiler.
4708 */
4709 CPUMSetChangedFlags(pVCpu,
4710 CPUM_CHANGED_SYSENTER_MSR
4711 | CPUM_CHANGED_LDTR
4712 | CPUM_CHANGED_GDTR
4713 | CPUM_CHANGED_IDTR
4714 | CPUM_CHANGED_TR
4715 | CPUM_CHANGED_HIDDEN_SEL_REGS);
4716
4717 /*
4718 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
4719 */
4720 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4721 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4722 {
4723 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4724 /* On the next entry we'll only sync the host context. */
4725 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4726 }
4727 else
4728 {
4729 /* On the next entry we'll sync everything. */
4730 /** @todo we can do better than this */
4731 /* Not in the VINF_PGM_CHANGE_MODE though! */
4732 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4733 }
4734
4735 /* Translate into a less severe return code */
4736 if (rc == VERR_EM_INTERPRETER)
4737 rc = VINF_EM_RAW_EMULATE_INSTR;
4738 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
4739 {
4740 /* Try to extract more information about what might have gone wrong here. */
4741 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4742 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pvVMCS;
4743 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4744 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4745 }
4746
4747 /* Just set the correct state here instead of trying to catch every goto above. */
4748 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4749
4750#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4751 /* Restore interrupts if we exited after disabling them. */
4752 if (uOldEFlags != ~(RTCCUINTREG)0)
4753 ASMSetFlags(uOldEFlags);
4754#endif
4755
4756 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
4757 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
4758 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4759 Log2(("X"));
4760 return VBOXSTRICTRC_TODO(rc);
4761}
4762
4763
4764/**
4765 * Enters the VT-x session.
4766 *
4767 * @returns VBox status code.
4768 * @param pVM Pointer to the VM.
4769 * @param pVCpu Pointer to the VMCPU.
4770 * @param pCpu Pointer to the CPU info struct.
4771 */
4772VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
4773{
4774 Assert(pVM->hwaccm.s.vmx.fSupported);
4775 NOREF(pCpu);
4776
4777 unsigned cr4 = ASMGetCR4();
4778 if (!(cr4 & X86_CR4_VMXE))
4779 {
4780 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4781 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4782 }
4783
4784 /* Activate the VMCS. */
4785 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4786 if (RT_FAILURE(rc))
4787 return rc;
4788
4789 pVCpu->hwaccm.s.fResumeVM = false;
4790 return VINF_SUCCESS;
4791}
4792
4793
4794/**
4795 * Leaves the VT-x session.
4796 *
4797 * @returns VBox status code.
4798 * @param pVM Pointer to the VM.
4799 * @param pVCpu Pointer to the VMCPU.
4800 * @param pCtx Pointer to the guests CPU context.
4801 */
4802VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4803{
4804 Assert(pVM->hwaccm.s.vmx.fSupported);
4805
4806#ifdef DEBUG
4807 if (CPUMIsHyperDebugStateActive(pVCpu))
4808 {
4809 CPUMR0LoadHostDebugState(pVM, pVCpu);
4810 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4811 }
4812 else
4813#endif
4814
4815 /*
4816 * Save the guest debug state if necessary.
4817 */
4818 if (CPUMIsGuestDebugStateActive(pVCpu))
4819 {
4820 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4821
4822 /* Enable DRx move intercepts again. */
4823 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4824 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4825 AssertRC(rc);
4826
4827 /* Resync the debug registers the next time. */
4828 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4829 }
4830 else
4831 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4832
4833 /*
4834 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
4835 * VMCS data back to memory.
4836 */
4837 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
4838 AssertRC(rc);
4839
4840 return VINF_SUCCESS;
4841}
4842
4843
4844/**
4845 * Flush the TLB using EPT.
4846 *
4847 * @returns VBox status code.
4848 * @param pVM Pointer to the VM.
4849 * @param pVCpu Pointer to the VMCPU.
4850 * @param enmFlush Type of flush.
4851 */
4852static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
4853{
4854 uint64_t descriptor[2];
4855
4856 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
4857 Assert(pVM->hwaccm.s.fNestedPaging);
4858 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4859 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
4860 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4861 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.vmx.GCPhysEPTP, rc));
4862}
4863
4864
4865/**
4866 * Flush the TLB using VPID.
4867 *
4868 * @returns VBox status code.
4869 * @param pVM Pointer to the VM.
4870 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
4871 * enmFlush).
4872 * @param enmFlush Type of flush.
4873 * @param GCPtr Virtual address of the page to flush (can be 0 depending
4874 * on @a enmFlush).
4875 */
4876static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
4877{
4878#if HC_ARCH_BITS == 32
4879 /*
4880 * If we get a flush in 64-bit guest mode, then force a full TLB flush. invvpid probably takes only 32-bit addresses.
4881 */
4882 if ( CPUMIsGuestInLongMode(pVCpu)
4883 && !VMX_IS_64BIT_HOST_MODE())
4884 {
4885 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4886 }
4887 else
4888#endif
4889 {
4890 uint64_t descriptor[2];
4891
4892 Assert(pVM->hwaccm.s.vmx.fVPID);
4893 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
4894 {
4895 descriptor[0] = 0;
4896 descriptor[1] = 0;
4897 }
4898 else
4899 {
4900 AssertPtr(pVCpu);
4901 Assert(pVCpu->hwaccm.s.uCurrentASID != 0);
4902 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
4903 descriptor[1] = GCPtr;
4904 }
4905 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
4906 AssertMsg(rc == VINF_SUCCESS,
4907 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hwaccm.s.uCurrentASID : 0, GCPtr, rc));
4908 }
4909}
4910
4911
4912/**
4913 * Invalidates a guest page by guest virtual address. Only relevant for
4914 * EPT/VPID, otherwise there is nothing really to invalidate.
4915 *
4916 * @returns VBox status code.
4917 * @param pVM Pointer to the VM.
4918 * @param pVCpu Pointer to the VMCPU.
4919 * @param GCVirt Guest virtual address of the page to invalidate.
4920 */
4921VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
4922{
4923 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4924
4925 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
4926
4927 if (!fFlushPending)
4928 {
4929 /*
4930 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
4931 * See @bugref{6043} and @bugref{6177}
4932 *
4933 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
4934 * function maybe called in a loop with individual addresses.
4935 */
4936 if (pVM->hwaccm.s.vmx.fVPID)
4937 {
4938 /* If we can flush just this page do it, otherwise flush as little as possible. */
4939 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV_ADDR)
4940 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
4941 else
4942 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4943 }
4944 else if (pVM->hwaccm.s.fNestedPaging)
4945 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4946 }
4947
4948 return VINF_SUCCESS;
4949}
4950
4951
4952/**
4953 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
4954 * otherwise there is nothing really to invalidate.
4955 *
4956 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
4957 *
4958 * @returns VBox status code.
4959 * @param pVM Pointer to the VM.
4960 * @param pVCpu The VM CPU to operate on.
4961 * @param GCPhys Guest physical address of the page to invalidate.
4962 */
4963VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
4964{
4965 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
4966
4967 /*
4968 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
4969 * while invept only flushes by EPT not individual addresses. We update the force flag here
4970 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
4971 */
4972 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4973 return VINF_SUCCESS;
4974}
4975
4976
4977/**
4978 * Report world switch error and dump some useful debug info.
4979 *
4980 * @param pVM Pointer to the VM.
4981 * @param pVCpu Pointer to the VMCPU.
4982 * @param rc Return code.
4983 * @param pCtx Pointer to the current guest CPU context (not updated).
4984 */
4985static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
4986{
4987 NOREF(pVM);
4988
4989 switch (VBOXSTRICTRC_VAL(rc))
4990 {
4991 case VERR_VMX_INVALID_VMXON_PTR:
4992 AssertFailed();
4993 break;
4994
4995 case VERR_VMX_UNABLE_TO_START_VM:
4996 case VERR_VMX_UNABLE_TO_RESUME_VM:
4997 {
4998 int rc2;
4999 RTCCUINTREG exitReason, instrError;
5000
5001 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5002 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5003 AssertRC(rc2);
5004 if (rc2 == VINF_SUCCESS)
5005 {
5006 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5007 (uint32_t)instrError));
5008 Log(("Current stack %08x\n", &rc2));
5009
5010 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
5011 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
5012
5013#ifdef VBOX_STRICT
5014 RTGDTR gdtr;
5015 PCX86DESCHC pDesc;
5016 RTCCUINTREG val;
5017
5018 ASMGetGDTR(&gdtr);
5019
5020 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
5021 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5022 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
5023 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5024 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
5025 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5026 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
5027 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5028 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
5029 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5030
5031 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
5032 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5033 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
5034 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5035 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
5036 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5037
5038 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
5039 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5040 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
5041 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5042
5043 if (val < gdtr.cbGdt)
5044 {
5045 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5046 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
5047 }
5048
5049 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
5050 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5051 if (val < gdtr.cbGdt)
5052 {
5053 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5054 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
5055 }
5056
5057 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
5058 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5059 if (val < gdtr.cbGdt)
5060 {
5061 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5062 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
5063 }
5064
5065 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
5066 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5067 if (val < gdtr.cbGdt)
5068 {
5069 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5070 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
5071 }
5072
5073 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
5074 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5075 if (val < gdtr.cbGdt)
5076 {
5077 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5078 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
5079 }
5080
5081 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
5082 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5083 if (val < gdtr.cbGdt)
5084 {
5085 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5086 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
5087 }
5088
5089 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
5090 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5091 if (val < gdtr.cbGdt)
5092 {
5093 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5094 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
5095 }
5096
5097 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
5098 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5099 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
5100 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5101 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
5102 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5103 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5104 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5105 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5106 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5107 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5108 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5109 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
5110 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5111 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
5112 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5113# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5114 if (VMX_IS_64BIT_HOST_MODE())
5115 {
5116 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5117 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5118 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5119 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5120 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5121 }
5122# endif
5123#endif /* VBOX_STRICT */
5124 }
5125 break;
5126 }
5127
5128 default:
5129 /* impossible */
5130 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5131 break;
5132 }
5133}
5134
5135
5136#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5137/**
5138 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5139 *
5140 * @returns VBox status code.
5141 * @param fResume Whether to vmlauch/vmresume.
5142 * @param pCtx Pointer to the guest CPU context.
5143 * @param pCache Pointer to the VMCS cache.
5144 * @param pVM Pointer to the VM.
5145 * @param pVCpu Pointer to the VMCPU.
5146 */
5147DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5148{
5149 uint32_t aParam[6];
5150 PHMGLOBLCPUINFO pCpu;
5151 RTHCPHYS HCPhysCpuPage;
5152 int rc;
5153
5154 pCpu = HWACCMR0GetCurrentCpu();
5155 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5156
5157#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5158 pCache->uPos = 1;
5159 pCache->interPD = PGMGetInterPaeCR3(pVM);
5160 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
5161#endif
5162
5163#ifdef DEBUG
5164 pCache->TestIn.HCPhysCpuPage= 0;
5165 pCache->TestIn.HCPhysVMCS = 0;
5166 pCache->TestIn.pCache = 0;
5167 pCache->TestOut.HCPhysVMCS = 0;
5168 pCache->TestOut.pCache = 0;
5169 pCache->TestOut.pCtx = 0;
5170 pCache->TestOut.eflags = 0;
5171#endif
5172
5173 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5174 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5175 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS); /* Param 2: VMCS physical address - Lo. */
5176 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.HCPhysVMCS >> 32); /* Param 2: VMCS physical address - Hi. */
5177 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
5178 aParam[5] = 0;
5179
5180#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5181 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
5182 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
5183#endif
5184 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5185
5186#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5187 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
5188 Assert(pCtx->dr[4] == 10);
5189 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
5190#endif
5191
5192#ifdef DEBUG
5193 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5194 AssertMsg(pCache->TestIn.HCPhysVMCS == pVCpu->hwaccm.s.vmx.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5195 pVCpu->hwaccm.s.vmx.HCPhysVMCS));
5196 AssertMsg(pCache->TestIn.HCPhysVMCS == pCache->TestOut.HCPhysVMCS, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVMCS,
5197 pCache->TestOut.HCPhysVMCS));
5198 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5199 pCache->TestOut.pCache));
5200 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache),
5201 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
5202 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5203 pCache->TestOut.pCtx));
5204 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5205#endif
5206 return rc;
5207}
5208
5209
5210# ifdef VBOX_STRICT
5211static bool hmR0VmxIsValidReadField(uint32_t idxField)
5212{
5213 switch (idxField)
5214 {
5215 case VMX_VMCS64_GUEST_RIP:
5216 case VMX_VMCS64_GUEST_RSP:
5217 case VMX_VMCS_GUEST_RFLAGS:
5218 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5219 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5220 case VMX_VMCS64_GUEST_CR0:
5221 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5222 case VMX_VMCS64_GUEST_CR4:
5223 case VMX_VMCS64_GUEST_DR7:
5224 case VMX_VMCS32_GUEST_SYSENTER_CS:
5225 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5226 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5227 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5228 case VMX_VMCS64_GUEST_GDTR_BASE:
5229 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5230 case VMX_VMCS64_GUEST_IDTR_BASE:
5231 case VMX_VMCS16_GUEST_FIELD_CS:
5232 case VMX_VMCS32_GUEST_CS_LIMIT:
5233 case VMX_VMCS64_GUEST_CS_BASE:
5234 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5235 case VMX_VMCS16_GUEST_FIELD_DS:
5236 case VMX_VMCS32_GUEST_DS_LIMIT:
5237 case VMX_VMCS64_GUEST_DS_BASE:
5238 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5239 case VMX_VMCS16_GUEST_FIELD_ES:
5240 case VMX_VMCS32_GUEST_ES_LIMIT:
5241 case VMX_VMCS64_GUEST_ES_BASE:
5242 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5243 case VMX_VMCS16_GUEST_FIELD_FS:
5244 case VMX_VMCS32_GUEST_FS_LIMIT:
5245 case VMX_VMCS64_GUEST_FS_BASE:
5246 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5247 case VMX_VMCS16_GUEST_FIELD_GS:
5248 case VMX_VMCS32_GUEST_GS_LIMIT:
5249 case VMX_VMCS64_GUEST_GS_BASE:
5250 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5251 case VMX_VMCS16_GUEST_FIELD_SS:
5252 case VMX_VMCS32_GUEST_SS_LIMIT:
5253 case VMX_VMCS64_GUEST_SS_BASE:
5254 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5255 case VMX_VMCS16_GUEST_FIELD_LDTR:
5256 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5257 case VMX_VMCS64_GUEST_LDTR_BASE:
5258 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5259 case VMX_VMCS16_GUEST_FIELD_TR:
5260 case VMX_VMCS32_GUEST_TR_LIMIT:
5261 case VMX_VMCS64_GUEST_TR_BASE:
5262 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5263 case VMX_VMCS32_RO_EXIT_REASON:
5264 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5265 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5266 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
5267 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5268 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5269 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5270 case VMX_VMCS32_RO_IDT_INFO:
5271 case VMX_VMCS32_RO_IDT_ERRCODE:
5272 case VMX_VMCS64_GUEST_CR3:
5273 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
5274 return true;
5275 }
5276 return false;
5277}
5278
5279
5280static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5281{
5282 switch (idxField)
5283 {
5284 case VMX_VMCS64_GUEST_LDTR_BASE:
5285 case VMX_VMCS64_GUEST_TR_BASE:
5286 case VMX_VMCS64_GUEST_GDTR_BASE:
5287 case VMX_VMCS64_GUEST_IDTR_BASE:
5288 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5289 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5290 case VMX_VMCS64_GUEST_CR0:
5291 case VMX_VMCS64_GUEST_CR4:
5292 case VMX_VMCS64_GUEST_CR3:
5293 case VMX_VMCS64_GUEST_DR7:
5294 case VMX_VMCS64_GUEST_RIP:
5295 case VMX_VMCS64_GUEST_RSP:
5296 case VMX_VMCS64_GUEST_CS_BASE:
5297 case VMX_VMCS64_GUEST_DS_BASE:
5298 case VMX_VMCS64_GUEST_ES_BASE:
5299 case VMX_VMCS64_GUEST_FS_BASE:
5300 case VMX_VMCS64_GUEST_GS_BASE:
5301 case VMX_VMCS64_GUEST_SS_BASE:
5302 return true;
5303 }
5304 return false;
5305}
5306# endif /* VBOX_STRICT */
5307
5308
5309/**
5310 * Executes the specified handler in 64-bit mode.
5311 *
5312 * @returns VBox status code.
5313 * @param pVM Pointer to the VM.
5314 * @param pVCpu Pointer to the VMCPU.
5315 * @param pCtx Pointer to the guest CPU context.
5316 * @param pfnHandler Pointer to the RC handler function.
5317 * @param cbParam Number of parameters.
5318 * @param paParam Array of 32-bit parameters.
5319 */
5320VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5321 uint32_t *paParam)
5322{
5323 int rc, rc2;
5324 PHMGLOBLCPUINFO pCpu;
5325 RTHCPHYS HCPhysCpuPage;
5326 RTHCUINTREG uOldEFlags;
5327
5328 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5329 Assert(pfnHandler);
5330 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
5331 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
5332
5333#ifdef VBOX_STRICT
5334 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5335 Assert(hmR0VmxIsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
5336
5337 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5338 Assert(hmR0VmxIsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
5339#endif
5340
5341 /* Disable interrupts. */
5342 uOldEFlags = ASMIntDisableFlags();
5343
5344#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5345 RTCPUID idHostCpu = RTMpCpuId();
5346 CPUMR0SetLApic(pVM, idHostCpu);
5347#endif
5348
5349 pCpu = HWACCMR0GetCurrentCpu();
5350 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5351
5352 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5353 VMXClearVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5354
5355 /* Leave VMX Root Mode. */
5356 VMXDisable();
5357
5358 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5359
5360 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5361 CPUMSetHyperEIP(pVCpu, pfnHandler);
5362 for (int i=(int)cbParam-1;i>=0;i--)
5363 CPUMPushHyper(pVCpu, paParam[i]);
5364
5365 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5366
5367 /* Call switcher. */
5368 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5369 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
5370
5371 /* Make sure the VMX instructions don't cause #UD faults. */
5372 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5373
5374 /* Enter VMX Root Mode */
5375 rc2 = VMXEnable(HCPhysCpuPage);
5376 if (RT_FAILURE(rc2))
5377 {
5378 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5379 ASMSetFlags(uOldEFlags);
5380 return VERR_VMX_VMXON_FAILED;
5381 }
5382
5383 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.HCPhysVMCS);
5384 AssertRC(rc2);
5385 Assert(!(ASMGetFlags() & X86_EFL_IF));
5386 ASMSetFlags(uOldEFlags);
5387 return rc;
5388}
5389#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5390
5391
5392#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
5393/**
5394 * Executes VMWRITE.
5395 *
5396 * @returns VBox status code
5397 * @param pVCpu Pointer to the VMCPU.
5398 * @param idxField VMCS field index.
5399 * @param u64Val 16, 32 or 64 bits value.
5400 */
5401VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5402{
5403 int rc;
5404 switch (idxField)
5405 {
5406 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
5407 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
5408 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
5409 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
5410 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
5411 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
5412 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
5413 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
5414 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
5415 case VMX_VMCS_GUEST_LINK_PTR_FULL:
5416 case VMX_VMCS_GUEST_PDPTR0_FULL:
5417 case VMX_VMCS_GUEST_PDPTR1_FULL:
5418 case VMX_VMCS_GUEST_PDPTR2_FULL:
5419 case VMX_VMCS_GUEST_PDPTR3_FULL:
5420 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
5421 case VMX_VMCS_GUEST_EFER_FULL:
5422 case VMX_VMCS_CTRL_EPTP_FULL:
5423 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5424 rc = VMXWriteVMCS32(idxField, u64Val);
5425 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5426 AssertRC(rc);
5427 return rc;
5428
5429 case VMX_VMCS64_GUEST_LDTR_BASE:
5430 case VMX_VMCS64_GUEST_TR_BASE:
5431 case VMX_VMCS64_GUEST_GDTR_BASE:
5432 case VMX_VMCS64_GUEST_IDTR_BASE:
5433 case VMX_VMCS64_GUEST_SYSENTER_EIP:
5434 case VMX_VMCS64_GUEST_SYSENTER_ESP:
5435 case VMX_VMCS64_GUEST_CR0:
5436 case VMX_VMCS64_GUEST_CR4:
5437 case VMX_VMCS64_GUEST_CR3:
5438 case VMX_VMCS64_GUEST_DR7:
5439 case VMX_VMCS64_GUEST_RIP:
5440 case VMX_VMCS64_GUEST_RSP:
5441 case VMX_VMCS64_GUEST_CS_BASE:
5442 case VMX_VMCS64_GUEST_DS_BASE:
5443 case VMX_VMCS64_GUEST_ES_BASE:
5444 case VMX_VMCS64_GUEST_FS_BASE:
5445 case VMX_VMCS64_GUEST_GS_BASE:
5446 case VMX_VMCS64_GUEST_SS_BASE:
5447 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5448 if (u64Val >> 32ULL)
5449 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
5450 else
5451 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
5452
5453 return rc;
5454
5455 default:
5456 AssertMsgFailed(("Unexpected field %x\n", idxField));
5457 return VERR_INVALID_PARAMETER;
5458 }
5459}
5460
5461
5462/**
5463 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
5464 *
5465 * @param pVCpu Pointer to the VMCPU.
5466 * @param idxField VMCS field index.
5467 * @param u64Val 16, 32 or 64 bits value.
5468 */
5469VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5470{
5471 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
5472
5473 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5474 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5475
5476 /* Make sure there are no duplicates. */
5477 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5478 {
5479 if (pCache->Write.aField[i] == idxField)
5480 {
5481 pCache->Write.aFieldVal[i] = u64Val;
5482 return VINF_SUCCESS;
5483 }
5484 }
5485
5486 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5487 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5488 pCache->Write.cValidEntries++;
5489 return VINF_SUCCESS;
5490}
5491
5492#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
5493
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette