VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 45092

Last change on this file since 45092 was 45092, checked in by vboxsync, 12 years ago

VMM/VMMR0: HM bits, cleanup header a bit.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 230.2 KB
Line 
1/* $Id: HWVMXR0.cpp 45092 2013-03-19 17:07:12Z vboxsync $ */
2/** @file
3 * HM VMX (VT-x) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HM
23#include <iprt/asm-amd64-x86.h>
24#include <VBox/vmm/hm.h>
25#include <VBox/vmm/pgm.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/selm.h>
29#include <VBox/vmm/iom.h>
30#ifdef VBOX_WITH_REM
31# include <VBox/vmm/rem.h>
32#endif
33#include <VBox/vmm/tm.h>
34#include "HMInternal.h"
35#include <VBox/vmm/vm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/err.h>
38#include <VBox/log.h>
39#include <iprt/assert.h>
40#include <iprt/param.h>
41#include <iprt/string.h>
42#include <iprt/time.h>
43#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
44# include <iprt/thread.h>
45#endif
46#include <iprt/x86.h>
47#include "HWVMXR0.h"
48
49#include "dtrace/VBoxVMM.h"
50
51
52/*******************************************************************************
53* Defined Constants And Macros *
54*******************************************************************************/
55#if defined(RT_ARCH_AMD64)
56# define VMX_IS_64BIT_HOST_MODE() (true)
57#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
58# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
59#else
60# define VMX_IS_64BIT_HOST_MODE() (false)
61#endif
62
63# define VMX_WRITE_SELREG(REG, reg) \
64 do \
65 { \
66 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_##REG, pCtx->reg.Sel); \
67 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_##REG##_LIMIT, pCtx->reg.u32Limit); \
68 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_##REG##_BASE, pCtx->reg.u64Base); \
69 if ((pCtx->eflags.u32 & X86_EFL_VM)) \
70 { \
71 /* Must override this or else VT-x will fail with invalid guest state errors. */ \
72 /* DPL=3, present, code/data, r/w/accessed. */ \
73 /** @todo we shouldn't have to do this, if it is not 0xf3 it means we screwed up elsewhere (recompiler). */ \
74 /** @todo VT-x docs explicitly mentions 0xF3. Why not just val = 0xf3 ??. */ \
75 val = (pCtx->reg.Attr.u & ~0xFF) | 0xF3; \
76 } \
77 else \
78 if ( CPUMIsGuestInRealModeEx(pCtx) \
79 && !pVM->hm.s.vmx.fUnrestrictedGuest) \
80 { \
81 /** @todo shouldn't the 'if' condition above check for 'pRealModeTSS' ? */ \
82 /* Must override this or else VT-x will fail with invalid guest state errors. */ \
83 /* DPL=3, present, code/data, r/w/accessed. */ \
84 val = 0xf3; \
85 } \
86 else \
87 if ( ( pCtx->reg.Sel \
88 || !CPUMIsGuestInPagedProtectedModeEx(pCtx) \
89 || (!pCtx->cs.Attr.n.u1DefBig && !CPUMIsGuestIn64BitCodeEx(pCtx)) \
90 ) \
91 && pCtx->reg.Attr.n.u1Present == 1) \
92 { \
93 val = pCtx->reg.Attr.u | X86_SEL_TYPE_ACCESSED; \
94 } \
95 else \
96 val = 0x10000; /* Invalid guest state error otherwise. (BIT(16) = Unusable) */ \
97 \
98 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS, val); \
99 } while (0)
100
101# define VMX_READ_SELREG(REG, reg) \
102 do \
103 { \
104 VMXReadCachedVmcs(VMX_VMCS16_GUEST_FIELD_##REG, &val); \
105 pCtx->reg.Sel = val; \
106 pCtx->reg.ValidSel = val; \
107 pCtx->reg.fFlags = CPUMSELREG_FLAGS_VALID; \
108 VMXReadCachedVmcs(VMX_VMCS32_GUEST_##REG##_LIMIT, &val); \
109 pCtx->reg.u32Limit = val; \
110 VMXReadCachedVmcs(VMX_VMCS_GUEST_##REG##_BASE, &val); \
111 pCtx->reg.u64Base = val; \
112 VMXReadCachedVmcs(VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS, &val); \
113 pCtx->reg.Attr.u = val; \
114 } while (0)
115
116/* Don't read from the cache in this macro; used only in case of failure where the cache is out of sync. */
117# define VMX_LOG_SELREG(REG, szSelReg, val) \
118 do \
119 { \
120 VMXReadVmcs(VMX_VMCS16_GUEST_FIELD_##REG, &(val)); \
121 Log(("%s Selector %x\n", szSelReg, (val))); \
122 VMXReadVmcs(VMX_VMCS32_GUEST_##REG##_LIMIT, &(val)); \
123 Log(("%s Limit %x\n", szSelReg, (val))); \
124 VMXReadVmcs(VMX_VMCS_GUEST_##REG##_BASE, &(val)); \
125 Log(("%s Base %RX64\n", szSelReg, (uint64_t)(val))); \
126 VMXReadVmcs(VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS, &(val)); \
127 Log(("%s Attributes %x\n", szSelReg, (val))); \
128 } while (0)
129
130#define VMXSetupCachedReadVmcs(pCache, idxField) \
131{ \
132 Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \
133 pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \
134 pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \
135}
136#define VMX_SETUP_SELREG(REG, pCache) \
137{ \
138 VMXSetupCachedReadVmcs(pCache, VMX_VMCS16_GUEST_FIELD_##REG); \
139 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_##REG##_LIMIT); \
140 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_##REG##_BASE); \
141 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_##REG##_ACCESS_RIGHTS); \
142}
143
144
145/*******************************************************************************
146* Global Variables *
147*******************************************************************************/
148/* IO operation lookup arrays. */
149static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
150static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
151
152#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
153/** See HMR0A.asm. */
154extern "C" uint32_t g_fVMXIs64bitHost;
155#endif
156
157
158/*******************************************************************************
159* Local Functions *
160*******************************************************************************/
161static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu);
162static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu);
163static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu);
164static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu);
165static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush);
166static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr);
167static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
168static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
169static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx);
170
171
172/**
173 * Updates error from VMCS to HMCPU's lasterror record.
174 *
175 * @param pVM Pointer to the VM.
176 * @param pVCpu Pointer to the VMCPU.
177 * @param rc The error code.
178 */
179static void hmR0VmxCheckError(PVM pVM, PVMCPU pVCpu, int rc)
180{
181 if (rc == VERR_VMX_GENERIC)
182 {
183 RTCCUINTREG instrError;
184
185 VMXReadVmcs(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
186 pVCpu->hm.s.vmx.lasterror.u32InstrError = instrError;
187 }
188 pVM->hm.s.lLastError = rc;
189}
190
191
192/**
193 * Sets up and activates VT-x on the current CPU.
194 *
195 * @returns VBox status code.
196 * @param pCpu Pointer to the CPU info struct.
197 * @param pVM Pointer to the VM. (can be NULL after a resume!!)
198 * @param pvCpuPage Pointer to the global CPU page.
199 * @param HCPhysCpuPage Physical address of the global CPU page.
200 * @param fEnabledByHost Set if SUPR0EnableVTx or similar was used to enable
201 * VT-x/AMD-V on the host.
202 */
203VMMR0DECL(int) VMXR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
204{
205 if (!fEnabledByHost)
206 {
207 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
208 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
209
210 if (pVM)
211 {
212 /* Set revision dword at the beginning of the VMXON structure. */
213 *(uint32_t *)pvCpuPage = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
214 }
215
216 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
217 * (which can have very bad consequences!!!)
218 */
219
220 /** @todo r=bird: Why is this code different than the probing code earlier
221 * on? It just sets VMXE if needed and doesn't check that it isn't
222 * set. Mac OS X host_vmxoff may leave this set and we'll fail here
223 * and debug-assert in the calling code. This is what caused the
224 * "regression" after backing out the SUPR0EnableVTx code hours before
225 * 4.2.0GA (reboot fixed the issue). I've changed here to do the same
226 * as the init code. */
227 uint64_t uCr4 = ASMGetCR4();
228 if (!(uCr4 & X86_CR4_VMXE))
229 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE); /* Make sure the VMX instructions don't cause #UD faults. */
230
231 /*
232 * Enter VM root mode.
233 */
234 int rc = VMXEnable(HCPhysCpuPage);
235 if (RT_FAILURE(rc))
236 {
237 ASMSetCR4(uCr4);
238 return VERR_VMX_VMXON_FAILED;
239 }
240 }
241
242 /*
243 * Flush all VPIDs (in case we or any other hypervisor have been using VPIDs) so that
244 * we can avoid an explicit flush while using new VPIDs. We would still need to flush
245 * each time while reusing a VPID after hitting the MaxASID limit once.
246 */
247 if ( pVM
248 && pVM->hm.s.vmx.fVpid
249 && (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS))
250 {
251 hmR0VmxFlushVPID(pVM, NULL /* pvCpu */, VMX_FLUSH_VPID_ALL_CONTEXTS, 0 /* GCPtr */);
252 pCpu->fFlushAsidBeforeUse = false;
253 }
254 else
255 pCpu->fFlushAsidBeforeUse = true;
256
257 /*
258 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
259 */
260 ++pCpu->cTlbFlushes;
261
262 return VINF_SUCCESS;
263}
264
265
266/**
267 * Deactivates VT-x on the current CPU.
268 *
269 * @returns VBox status code.
270 * @param pCpu Pointer to the CPU info struct.
271 * @param pvCpuPage Pointer to the global CPU page.
272 * @param HCPhysCpuPage Physical address of the global CPU page.
273 */
274VMMR0DECL(int) VMXR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
275{
276 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
277 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
278 NOREF(pCpu);
279
280 /* If we're somehow not in VMX root mode, then we shouldn't dare leaving it. */
281 if (!(ASMGetCR4() & X86_CR4_VMXE))
282 return VERR_VMX_NOT_IN_VMX_ROOT_MODE;
283
284 /* Leave VMX Root Mode. */
285 VMXDisable();
286
287 /* And clear the X86_CR4_VMXE bit. */
288 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
289 return VINF_SUCCESS;
290}
291
292VMMR0DECL(int) VMXR0GlobalInit(void)
293{
294 /* Nothing to do. */
295 return VINF_SUCCESS;
296}
297
298VMMR0DECL(void) VMXR0GlobalTerm(void)
299{
300 /* Nothing to do. */
301}
302
303/**
304 * Does Ring-0 per VM VT-x initialization.
305 *
306 * @returns VBox status code.
307 * @param pVM Pointer to the VM.
308 */
309VMMR0DECL(int) VMXR0InitVM(PVM pVM)
310{
311 int rc;
312
313#ifdef LOG_ENABLED
314 SUPR0Printf("VMXR0InitVM %p\n", pVM);
315#endif
316
317 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
318
319 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
320 {
321 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
322 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjApicAccess, PAGE_SIZE, false /* fExecutable */);
323 AssertRC(rc);
324 if (RT_FAILURE(rc))
325 return rc;
326
327 pVM->hm.s.vmx.pbApicAccess = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjApicAccess);
328 pVM->hm.s.vmx.HCPhysApicAccess = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjApicAccess, 0);
329 ASMMemZero32(pVM->hm.s.vmx.pbApicAccess, PAGE_SIZE);
330 }
331 else
332 {
333 pVM->hm.s.vmx.hMemObjApicAccess = 0;
334 pVM->hm.s.vmx.pbApicAccess = 0;
335 pVM->hm.s.vmx.HCPhysApicAccess = 0;
336 }
337
338#ifdef VBOX_WITH_CRASHDUMP_MAGIC
339 {
340 rc = RTR0MemObjAllocCont(&pVM->hm.s.vmx.hMemObjScratch, PAGE_SIZE, false /* fExecutable */);
341 AssertRC(rc);
342 if (RT_FAILURE(rc))
343 return rc;
344
345 pVM->hm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hm.s.vmx.hMemObjScratch);
346 pVM->hm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hm.s.vmx.hMemObjScratch, 0);
347
348 ASMMemZero32(pVM->hm.s.vmx.pbScratch, PAGE_SIZE);
349 strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic");
350 *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
351 }
352#endif
353
354 /* Allocate VMCSs for all guest CPUs. */
355 for (VMCPUID i = 0; i < pVM->cCpus; i++)
356 {
357 PVMCPU pVCpu = &pVM->aCpus[i];
358
359 pVCpu->hm.s.vmx.hMemObjVmcs = NIL_RTR0MEMOBJ;
360
361 /* Allocate one page for the VM control structure (VMCS). */
362 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVmcs, PAGE_SIZE, false /* fExecutable */);
363 AssertRC(rc);
364 if (RT_FAILURE(rc))
365 return rc;
366
367 pVCpu->hm.s.vmx.pvVmcs = RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVmcs);
368 pVCpu->hm.s.vmx.HCPhysVmcs = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVmcs, 0);
369 ASMMemZeroPage(pVCpu->hm.s.vmx.pvVmcs);
370
371 pVCpu->hm.s.vmx.cr0_mask = 0;
372 pVCpu->hm.s.vmx.cr4_mask = 0;
373
374 /* Allocate one page for the virtual APIC page for TPR caching. */
375 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjVirtApic, PAGE_SIZE, false /* fExecutable */);
376 AssertRC(rc);
377 if (RT_FAILURE(rc))
378 return rc;
379
380 pVCpu->hm.s.vmx.pbVirtApic = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjVirtApic);
381 pVCpu->hm.s.vmx.HCPhysVirtApic = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjVirtApic, 0);
382 ASMMemZeroPage(pVCpu->hm.s.vmx.pbVirtApic);
383
384 /* Allocate the MSR bitmap if this feature is supported. */
385 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
386 {
387 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, PAGE_SIZE, false /* fExecutable */);
388 AssertRC(rc);
389 if (RT_FAILURE(rc))
390 return rc;
391
392 pVCpu->hm.s.vmx.pvMsrBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjMsrBitmap);
393 pVCpu->hm.s.vmx.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjMsrBitmap, 0);
394 memset(pVCpu->hm.s.vmx.pvMsrBitmap, 0xff, PAGE_SIZE);
395 }
396
397#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
398 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
399 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjGuestMsr, PAGE_SIZE, false /* fExecutable */);
400 AssertRC(rc);
401 if (RT_FAILURE(rc))
402 return rc;
403
404 pVCpu->hm.s.vmx.pvGuestMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjGuestMsr);
405 pVCpu->hm.s.vmx.HCPhysGuestMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjGuestMsr, 0);
406 Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf));
407 memset(pVCpu->hm.s.vmx.pvGuestMsr, 0, PAGE_SIZE);
408
409 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
410 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.vmx.hMemObjHostMsr, PAGE_SIZE, false /* fExecutable */);
411 AssertRC(rc);
412 if (RT_FAILURE(rc))
413 return rc;
414
415 pVCpu->hm.s.vmx.pvHostMsr = (uint8_t *)RTR0MemObjAddress(pVCpu->hm.s.vmx.hMemObjHostMsr);
416 pVCpu->hm.s.vmx.HCPhysHostMsr = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.vmx.hMemObjHostMsr, 0);
417 Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf));
418 memset(pVCpu->hm.s.vmx.pvHostMsr, 0, PAGE_SIZE);
419#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
420
421 /* Current guest paging mode. */
422 pVCpu->hm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
423
424#ifdef LOG_ENABLED
425 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hm.s.vmx.pvVmcs, (uint32_t)pVCpu->hm.s.vmx.HCPhysVmcs);
426#endif
427 }
428
429 return VINF_SUCCESS;
430}
431
432
433/**
434 * Does Ring-0 per VM VT-x termination.
435 *
436 * @returns VBox status code.
437 * @param pVM Pointer to the VM.
438 */
439VMMR0DECL(int) VMXR0TermVM(PVM pVM)
440{
441 for (VMCPUID i = 0; i < pVM->cCpus; i++)
442 {
443 PVMCPU pVCpu = &pVM->aCpus[i];
444
445 if (pVCpu->hm.s.vmx.hMemObjVmcs != NIL_RTR0MEMOBJ)
446 {
447 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVmcs, false);
448 pVCpu->hm.s.vmx.hMemObjVmcs = NIL_RTR0MEMOBJ;
449 pVCpu->hm.s.vmx.pvVmcs = 0;
450 pVCpu->hm.s.vmx.HCPhysVmcs = 0;
451 }
452 if (pVCpu->hm.s.vmx.hMemObjVirtApic != NIL_RTR0MEMOBJ)
453 {
454 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjVirtApic, false);
455 pVCpu->hm.s.vmx.hMemObjVirtApic = NIL_RTR0MEMOBJ;
456 pVCpu->hm.s.vmx.pbVirtApic = 0;
457 pVCpu->hm.s.vmx.HCPhysVirtApic = 0;
458 }
459 if (pVCpu->hm.s.vmx.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
460 {
461 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjMsrBitmap, false);
462 pVCpu->hm.s.vmx.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
463 pVCpu->hm.s.vmx.pvMsrBitmap = 0;
464 pVCpu->hm.s.vmx.HCPhysMsrBitmap = 0;
465 }
466#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
467 if (pVCpu->hm.s.vmx.hMemObjHostMsr != NIL_RTR0MEMOBJ)
468 {
469 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjHostMsr, false);
470 pVCpu->hm.s.vmx.hMemObjHostMsr = NIL_RTR0MEMOBJ;
471 pVCpu->hm.s.vmx.pvHostMsr = 0;
472 pVCpu->hm.s.vmx.HCPhysHostMsr = 0;
473 }
474 if (pVCpu->hm.s.vmx.hMemObjGuestMsr != NIL_RTR0MEMOBJ)
475 {
476 RTR0MemObjFree(pVCpu->hm.s.vmx.hMemObjGuestMsr, false);
477 pVCpu->hm.s.vmx.hMemObjGuestMsr = NIL_RTR0MEMOBJ;
478 pVCpu->hm.s.vmx.pvGuestMsr = 0;
479 pVCpu->hm.s.vmx.HCPhysGuestMsr = 0;
480 }
481#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
482 }
483 if (pVM->hm.s.vmx.hMemObjApicAccess != NIL_RTR0MEMOBJ)
484 {
485 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjApicAccess, false);
486 pVM->hm.s.vmx.hMemObjApicAccess = NIL_RTR0MEMOBJ;
487 pVM->hm.s.vmx.pbApicAccess = 0;
488 pVM->hm.s.vmx.HCPhysApicAccess = 0;
489 }
490#ifdef VBOX_WITH_CRASHDUMP_MAGIC
491 if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ)
492 {
493 ASMMemZero32(pVM->hm.s.vmx.pScratch, PAGE_SIZE);
494 RTR0MemObjFree(pVM->hm.s.vmx.hMemObjScratch, false);
495 pVM->hm.s.vmx.hMemObjScratch = NIL_RTR0MEMOBJ;
496 pVM->hm.s.vmx.pScratch = 0;
497 pVM->hm.s.vmx.pScratchPhys = 0;
498 }
499#endif
500 return VINF_SUCCESS;
501}
502
503
504/**
505 * Sets up VT-x for the specified VM.
506 *
507 * @returns VBox status code.
508 * @param pVM Pointer to the VM.
509 */
510VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
511{
512 int rc = VINF_SUCCESS;
513 uint32_t val;
514
515 AssertReturn(pVM, VERR_INVALID_PARAMETER);
516
517 /* Initialize these always, see hmR3InitFinalizeR0().*/
518 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NONE;
519 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NONE;
520
521 /* Determine optimal flush type for EPT. */
522 if (pVM->hm.s.fNestedPaging)
523 {
524 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT)
525 {
526 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT)
527 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_SINGLE_CONTEXT;
528 else if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS)
529 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_ALL_CONTEXTS;
530 else
531 {
532 /*
533 * Should never really happen. EPT is supported but no suitable flush types supported.
534 * We cannot ignore EPT at this point as we've already setup Unrestricted Guest execution.
535 */
536 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
537 return VERR_VMX_GENERIC;
538 }
539 }
540 else
541 {
542 /*
543 * Should never really happen. EPT is supported but INVEPT instruction is not supported.
544 */
545 pVM->hm.s.vmx.enmFlushEpt = VMX_FLUSH_EPT_NOT_SUPPORTED;
546 return VERR_VMX_GENERIC;
547 }
548 }
549
550 /* Determine optimal flush type for VPID. */
551 if (pVM->hm.s.vmx.fVpid)
552 {
553 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID)
554 {
555 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
556 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_SINGLE_CONTEXT;
557 else if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS)
558 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_ALL_CONTEXTS;
559 else
560 {
561 /*
562 * Neither SINGLE nor ALL context flush types for VPID supported by the CPU.
563 * We do not handle other flush type combinations, ignore VPID capabilities.
564 */
565 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
566 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_INDIV_ADDR supported. Ignoring VPID.\n"));
567 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS)
568 Log(("VMXR0SetupVM: Only VMX_FLUSH_VPID_SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n"));
569 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
570 pVM->hm.s.vmx.fVpid = false;
571 }
572 }
573 else
574 {
575 /*
576 * Should not really happen. EPT is supported but INVEPT is not supported.
577 * Ignore VPID capabilities as our code relies on using INVEPT for selective flushing.
578 */
579 Log(("VMXR0SetupVM: VPID supported without INVEPT support. Ignoring VPID.\n"));
580 pVM->hm.s.vmx.enmFlushVpid = VMX_FLUSH_VPID_NOT_SUPPORTED;
581 pVM->hm.s.vmx.fVpid = false;
582 }
583 }
584
585 for (VMCPUID i = 0; i < pVM->cCpus; i++)
586 {
587 PVMCPU pVCpu = &pVM->aCpus[i];
588
589 AssertPtr(pVCpu->hm.s.vmx.pvVmcs);
590
591 /* Set revision dword at the beginning of the VMCS structure. */
592 *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hm.s.vmx.msr.vmx_basic_info);
593
594 /*
595 * Clear and activate the VMCS.
596 */
597 Log(("HCPhysVmcs = %RHp\n", pVCpu->hm.s.vmx.HCPhysVmcs));
598 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
599 if (RT_FAILURE(rc))
600 goto vmx_end;
601
602 rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
603 if (RT_FAILURE(rc))
604 goto vmx_end;
605
606 /*
607 * VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
608 * Set required bits to one and zero according to the MSR capabilities.
609 */
610 val = pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
611 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT /* External interrupts */
612 | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT; /* Non-maskable interrupts */
613
614 /*
615 * Enable the VMX preemption timer.
616 */
617 if (pVM->hm.s.vmx.fUsePreemptTimer)
618 val |= VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_PREEMPT_TIMER;
619 val &= pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
620
621 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, val);
622 AssertRC(rc);
623 pVCpu->hm.s.vmx.u32PinCtls = val;
624
625 /*
626 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
627 * Set required bits to one and zero according to the MSR capabilities.
628 */
629 val = pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
630 /* Program which event cause VM-exits and which features we want to use. */
631 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
632 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
633 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
634 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
635 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
636 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
637 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside
638 the guest (host thinks the cpu load is high) */
639
640 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
641 if (!pVM->hm.s.fNestedPaging)
642 {
643 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
644 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
645 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
646 }
647
648 /*
649 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch
650 * failure with an invalid control fields error. (combined with some other exit reasons)
651 */
652 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
653 {
654 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
655 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
656 Assert(pVM->hm.s.vmx.pbApicAccess);
657 }
658 else
659 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
660 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
661
662 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
663 {
664 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
665 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
666 }
667
668 /* We will use the secondary control if it's present. */
669 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
670
671 /* Mask away the bits that the CPU doesn't support */
672 /** @todo make sure they don't conflict with the above requirements. */
673 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
674 pVCpu->hm.s.vmx.u32ProcCtls = val;
675
676 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, val);
677 AssertRC(rc);
678
679 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
680 {
681 /*
682 * VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
683 * Set required bits to one and zero according to the MSR capabilities.
684 */
685 val = pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
686 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
687
688 if (pVM->hm.s.fNestedPaging)
689 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
690
691 if (pVM->hm.s.vmx.fVpid)
692 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
693
694 if (pVM->hm.s.fHasIoApic)
695 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
696
697 if (pVM->hm.s.vmx.fUnrestrictedGuest)
698 val |= VMX_VMCS_CTRL_PROC_EXEC2_UNRESTRICTED_GUEST;
699
700 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
701 val |= VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP;
702
703 /* Mask away the bits that the CPU doesn't support */
704 /** @todo make sure they don't conflict with the above requirements. */
705 val &= pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
706 pVCpu->hm.s.vmx.u32ProcCtls2 = val;
707 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS2, val);
708 AssertRC(rc);
709 }
710
711 /*
712 * VMX_VMCS_CTRL_CR3_TARGET_COUNT
713 * Set required bits to one and zero according to the MSR capabilities.
714 */
715 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0);
716 AssertRC(rc);
717
718 /*
719 * Forward all exception except #NM & #PF to the guest.
720 * We always need to check pagefaults since our shadow page table can be out of sync.
721 * And we always lazily sync the FPU & XMM state. .
722 */
723
724 /** @todo Possible optimization:
725 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
726 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
727 * registers ourselves of course.
728 *
729 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
730 */
731
732 /*
733 * Don't filter page faults, all of them should cause a world switch.
734 */
735 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0);
736 AssertRC(rc);
737 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0);
738 AssertRC(rc);
739
740 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0);
741 AssertRC(rc);
742 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0);
743 AssertRC(rc);
744 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0);
745 AssertRC(rc);
746
747 /*
748 * Set the MSR bitmap address.
749 */
750 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
751 {
752 Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap);
753
754 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap);
755 AssertRC(rc);
756
757 /*
758 * Allow the guest to directly modify these MSRs; they are loaded/stored automatically
759 * using MSR-load/store areas in the VMCS.
760 */
761 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
762 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
763 hmR0VmxSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
764 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
765 hmR0VmxSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
766 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
767 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
768 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
769 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
770 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
771 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_TSC_AUX, true, true);
772 }
773
774#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
775 /*
776 * Set the guest & host MSR load/store physical addresses.
777 */
778 Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr);
779 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
780 AssertRC(rc);
781 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr);
782 AssertRC(rc);
783 Assert(pVCpu->hm.s.vmx.HCPhysHostMsr);
784 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr);
785 AssertRC(rc);
786#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
787
788 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
789 AssertRC(rc);
790 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0);
791 AssertRC(rc);
792 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0);
793 AssertRC(rc);
794
795 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
796 {
797 Assert(pVM->hm.s.vmx.hMemObjApicAccess);
798 /* Optional */
799 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0);
800 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic);
801
802 if (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
803 rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess);
804
805 AssertRC(rc);
806 }
807
808 /* Set link pointer to -1. Not currently used. */
809 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
810 AssertRC(rc);
811
812 /*
813 * Clear VMCS, marking it inactive. Clear implementation specific data and writing back
814 * VMCS data back to memory.
815 */
816 rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
817 AssertRC(rc);
818
819 /*
820 * Configure the VMCS read cache.
821 */
822 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
823
824 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_RIP);
825 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_RSP);
826 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_RFLAGS);
827 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
828 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
829 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_CR0);
830 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
831 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_CR4);
832 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_DR7);
833 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
834 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_SYSENTER_EIP);
835 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_SYSENTER_ESP);
836 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
837 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_GDTR_BASE);
838 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
839 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_IDTR_BASE);
840
841 VMX_SETUP_SELREG(ES, pCache);
842 VMX_SETUP_SELREG(SS, pCache);
843 VMX_SETUP_SELREG(CS, pCache);
844 VMX_SETUP_SELREG(DS, pCache);
845 VMX_SETUP_SELREG(FS, pCache);
846 VMX_SETUP_SELREG(GS, pCache);
847 VMX_SETUP_SELREG(LDTR, pCache);
848 VMX_SETUP_SELREG(TR, pCache);
849
850 /*
851 * Status code VMCS reads.
852 */
853 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_REASON);
854 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
855 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
856 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE);
857 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
858 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
859 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
860 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_IDT_INFO);
861 VMXSetupCachedReadVmcs(pCache, VMX_VMCS32_RO_IDT_ERROR_CODE);
862
863 if (pVM->hm.s.fNestedPaging)
864 {
865 VMXSetupCachedReadVmcs(pCache, VMX_VMCS_GUEST_CR3);
866 VMXSetupCachedReadVmcs(pCache, VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL);
867 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
868 }
869 else
870 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
871 } /* for each VMCPU */
872
873 /*
874 * Setup the right TLB function based on CPU capabilities.
875 */
876 if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid)
877 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBBoth;
878 else if (pVM->hm.s.fNestedPaging)
879 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBEPT;
880 else if (pVM->hm.s.vmx.fVpid)
881 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBVPID;
882 else
883 pVM->hm.s.vmx.pfnFlushTaggedTlb = hmR0VmxSetupTLBDummy;
884
885vmx_end:
886 hmR0VmxCheckError(pVM, &pVM->aCpus[0], rc);
887 return rc;
888}
889
890
891/**
892 * Sets the permission bits for the specified MSR.
893 *
894 * @param pVCpu Pointer to the VMCPU.
895 * @param ulMSR The MSR value.
896 * @param fRead Whether reading is allowed.
897 * @param fWrite Whether writing is allowed.
898 */
899static void hmR0VmxSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
900{
901 unsigned ulBit;
902 uint8_t *pvMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap;
903
904 /*
905 * Layout:
906 * 0x000 - 0x3ff - Low MSR read bits
907 * 0x400 - 0x7ff - High MSR read bits
908 * 0x800 - 0xbff - Low MSR write bits
909 * 0xc00 - 0xfff - High MSR write bits
910 */
911 if (ulMSR <= 0x00001FFF)
912 {
913 /* Pentium-compatible MSRs */
914 ulBit = ulMSR;
915 }
916 else if ( ulMSR >= 0xC0000000
917 && ulMSR <= 0xC0001FFF)
918 {
919 /* AMD Sixth Generation x86 Processor MSRs */
920 ulBit = (ulMSR - 0xC0000000);
921 pvMsrBitmap += 0x400;
922 }
923 else
924 {
925 AssertFailed();
926 return;
927 }
928
929 Assert(ulBit <= 0x1fff);
930 if (fRead)
931 ASMBitClear(pvMsrBitmap, ulBit);
932 else
933 ASMBitSet(pvMsrBitmap, ulBit);
934
935 if (fWrite)
936 ASMBitClear(pvMsrBitmap + 0x800, ulBit);
937 else
938 ASMBitSet(pvMsrBitmap + 0x800, ulBit);
939}
940
941
942/**
943 * Injects an event (trap or external interrupt).
944 *
945 * @returns VBox status code. Note that it may return VINF_EM_RESET to
946 * indicate a triple fault when injecting X86_XCPT_DF.
947 *
948 * @param pVM Pointer to the VM.
949 * @param pVCpu Pointer to the VMCPU.
950 * @param pCtx Pointer to the guest CPU Context.
951 * @param intInfo VMX interrupt info.
952 * @param cbInstr Opcode length of faulting instruction.
953 * @param errCode Error code (optional).
954 */
955static int hmR0VmxInjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
956{
957 int rc;
958 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
959
960#ifdef VBOX_WITH_STATISTICS
961 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
962#endif
963
964#ifdef VBOX_STRICT
965 if (iGate == 0xE)
966 {
967 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate,
968 (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
969 }
970 else if (iGate < 0x20)
971 {
972 LogFlow(("hmR0VmxInjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip,
973 errCode));
974 }
975 else
976 {
977 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
978 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
979 || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
980 Assert( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
981 || pCtx->eflags.u32 & X86_EFL_IF);
982 }
983#endif
984
985 if ( CPUMIsGuestInRealModeEx(pCtx)
986 && pVM->hm.s.vmx.pRealModeTSS)
987 {
988 RTGCPHYS GCPhysHandler;
989 uint16_t offset, ip;
990 RTSEL sel;
991
992 /*
993 * Injecting events doesn't work right with real mode emulation.
994 * (#GP if we try to inject external hardware interrupts)
995 * Inject the interrupt or trap directly instead.
996 *
997 * ASSUMES no access handlers for the bits we read or write below (should be safe).
998 */
999 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
1000
1001 /*
1002 * Check if the interrupt handler is present.
1003 */
1004 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
1005 {
1006 Log(("IDT cbIdt violation\n"));
1007 if (iGate != X86_XCPT_DF)
1008 {
1009 uint32_t intInfo2;
1010
1011 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : (uint32_t)X86_XCPT_GP;
1012 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1013 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1014 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1015
1016 return hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
1017 }
1018 Log(("Triple fault -> reset the VM!\n"));
1019 return VINF_EM_RESET;
1020 }
1021 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
1022 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
1023 || iGate == 4)
1024 {
1025 ip = pCtx->ip + cbInstr;
1026 }
1027 else
1028 ip = pCtx->ip;
1029
1030 /*
1031 * Read the selector:offset pair of the interrupt handler.
1032 */
1033 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
1034 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
1035 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
1036
1037 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
1038
1039 /*
1040 * Construct the stack frame.
1041 */
1042 /** @todo Check stack limit. */
1043 pCtx->sp -= 2;
1044 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->eflags.u));
1045 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
1046 pCtx->sp -= 2;
1047 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss.Sel, pCtx->sp, pCtx->cs.Sel));
1048 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
1049 pCtx->sp -= 2;
1050 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss.Sel, pCtx->sp, ip));
1051 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ss.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
1052
1053 /*
1054 * Update the CPU state for executing the handler.
1055 */
1056 pCtx->rip = offset;
1057 pCtx->cs.Sel = sel;
1058 pCtx->cs.u64Base = sel << 4;
1059 pCtx->eflags.u &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC);
1060
1061 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_SEGMENT_REGS;
1062 return VINF_SUCCESS;
1063 }
1064
1065 /*
1066 * Set event injection state.
1067 */
1068 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
1069 rc |= VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
1070 rc |= VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
1071
1072 AssertRC(rc);
1073 return rc;
1074}
1075
1076
1077/**
1078 * Checks for pending guest interrupts and injects them.
1079 *
1080 * @returns VBox status code.
1081 * @param pVM Pointer to the VM.
1082 * @param pVCpu Pointer to the VMCPU.
1083 * @param pCtx Pointer to the guest CPU context.
1084 */
1085static int hmR0VmxCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
1086{
1087 int rc;
1088
1089 /*
1090 * Dispatch any pending interrupts (injected before, but a VM exit occurred prematurely).
1091 */
1092 if (pVCpu->hm.s.Event.fPending)
1093 {
1094 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntrInfo,
1095 pVCpu->hm.s.Event.u32ErrCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
1096 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntReinject);
1097 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, pVCpu->hm.s.Event.u64IntrInfo, 0, pVCpu->hm.s.Event.u32ErrCode);
1098 AssertRC(rc);
1099
1100 pVCpu->hm.s.Event.fPending = false;
1101 return VINF_SUCCESS;
1102 }
1103
1104 /*
1105 * If an active trap is already pending, we must forward it first!
1106 */
1107 if (!TRPMHasTrap(pVCpu))
1108 {
1109 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
1110 {
1111 RTGCUINTPTR intInfo;
1112
1113 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
1114
1115 intInfo = X86_XCPT_NMI;
1116 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1117 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1118
1119 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
1120 AssertRC(rc);
1121
1122 return VINF_SUCCESS;
1123 }
1124
1125 /** @todo SMI interrupts. */
1126
1127 /*
1128 * When external interrupts are pending, we should exit the VM when IF is set.
1129 */
1130 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
1131 {
1132 if (!(pCtx->eflags.u32 & X86_EFL_IF))
1133 {
1134 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INT_WINDOW_EXIT))
1135 {
1136 LogFlow(("Enable irq window exit!\n"));
1137 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INT_WINDOW_EXIT;
1138 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
1139 AssertRC(rc);
1140 }
1141 /* else nothing to do but wait */
1142 }
1143 else if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1144 {
1145 uint8_t u8Interrupt;
1146
1147 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
1148 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu,
1149 u8Interrupt, u8Interrupt, rc, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
1150 if (RT_SUCCESS(rc))
1151 {
1152 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
1153 AssertRC(rc);
1154 }
1155 else
1156 {
1157 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
1158 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
1159 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
1160 /* Just continue */
1161 }
1162 }
1163 else
1164 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
1165 }
1166 }
1167
1168#ifdef VBOX_STRICT
1169 if (TRPMHasTrap(pVCpu))
1170 {
1171 uint8_t u8Vector;
1172 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
1173 AssertRC(rc);
1174 }
1175#endif
1176
1177 if ( (pCtx->eflags.u32 & X86_EFL_IF)
1178 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1179 && TRPMHasTrap(pVCpu)
1180 )
1181 {
1182 uint8_t u8Vector;
1183 TRPMEVENT enmType;
1184 RTGCUINTPTR intInfo;
1185 RTGCUINT errCode;
1186
1187 /*
1188 * If a new event is pending, dispatch it now.
1189 */
1190 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
1191 AssertRC(rc);
1192 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
1193 Assert(enmType != TRPM_SOFTWARE_INT);
1194
1195 /*
1196 * Clear the pending trap.
1197 */
1198 rc = TRPMResetTrap(pVCpu);
1199 AssertRC(rc);
1200
1201 intInfo = u8Vector;
1202 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
1203
1204 if (enmType == TRPM_TRAP)
1205 {
1206 switch (u8Vector)
1207 {
1208 case X86_XCPT_DF:
1209 case X86_XCPT_TS:
1210 case X86_XCPT_NP:
1211 case X86_XCPT_SS:
1212 case X86_XCPT_GP:
1213 case X86_XCPT_PF:
1214 case X86_XCPT_AC:
1215 {
1216 /** @todo r=ramshankar: setting this bit would blow up for real-mode guests with
1217 * unrestricted guest execution. */
1218 /* Valid error codes. */
1219 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
1220 break;
1221 }
1222
1223 default:
1224 break;
1225 }
1226
1227 if ( u8Vector == X86_XCPT_BP
1228 || u8Vector == X86_XCPT_OF)
1229 {
1230 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1231 }
1232 else
1233 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1234 }
1235 else
1236 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
1237
1238 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntInject);
1239 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
1240 AssertRC(rc);
1241 } /* if (interrupts can be dispatched) */
1242
1243 return VINF_SUCCESS;
1244}
1245
1246/**
1247 * Checks for pending VMX events and converts them to TRPM. Before we execute any instruction
1248 * outside of VMX, any pending VMX event must be converted so that it can be delivered properly.
1249 *
1250 * @returns VBox status code.
1251 * @param pVCpu Pointer to the VMCPU.
1252 */
1253static int hmR0VmxCheckPendingEvent(PVMCPU pVCpu)
1254{
1255 if (pVCpu->hm.s.Event.fPending)
1256 {
1257 TRPMEVENT enmTrapType;
1258
1259 /* If a trap was already pending, we did something wrong! */
1260 Assert((TRPMQueryTrap(pVCpu, NULL, NULL) == VERR_TRPM_NO_ACTIVE_TRAP));
1261
1262 /*
1263 * Clear the pending event and move it over to TRPM for the rest
1264 * of the world to see.
1265 */
1266 pVCpu->hm.s.Event.fPending = false;
1267 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo))
1268 {
1269 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT:
1270 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI:
1271 enmTrapType = TRPM_HARDWARE_INT;
1272 break;
1273 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT:
1274 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /** @todo Is classifying #BP, #OF as TRPM_SOFTWARE_INT correct? */
1275 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DB_XCPT:
1276 enmTrapType = TRPM_SOFTWARE_INT;
1277 break;
1278 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT:
1279 enmTrapType = TRPM_TRAP;
1280 break;
1281 default:
1282 enmTrapType = TRPM_32BIT_HACK; /* Can't get here. */
1283 AssertFailed();
1284 }
1285 TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo), enmTrapType);
1286 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo))
1287 TRPMSetErrorCode(pVCpu, pVCpu->hm.s.Event.u32ErrCode);
1288 //@todo: Is there any situation where we need to call TRPMSetFaultAddress()?
1289 }
1290 return VINF_SUCCESS;
1291}
1292
1293/**
1294 * Save the host state into the VMCS.
1295 *
1296 * @returns VBox status code.
1297 * @param pVM Pointer to the VM.
1298 * @param pVCpu Pointer to the VMCPU.
1299 */
1300VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1301{
1302 int rc = VINF_SUCCESS;
1303 NOREF(pVM);
1304
1305 /*
1306 * Host CPU Context.
1307 */
1308 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
1309 {
1310 RTIDTR idtr;
1311 RTGDTR gdtr;
1312 RTSEL SelTR;
1313 PCX86DESCHC pDesc;
1314 uintptr_t trBase;
1315 RTSEL cs;
1316 RTSEL ss;
1317 uint64_t cr3;
1318
1319 /*
1320 * Control registers.
1321 */
1322 rc = VMXWriteVmcs(VMX_VMCS_HOST_CR0, ASMGetCR0());
1323 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1324#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1325 if (VMX_IS_64BIT_HOST_MODE())
1326 {
1327 cr3 = hmR0Get64bitCR3();
1328 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_CR3, cr3);
1329 }
1330 else
1331#endif
1332 {
1333 cr3 = ASMGetCR3();
1334 rc |= VMXWriteVmcs(VMX_VMCS_HOST_CR3, cr3);
1335 }
1336 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1337 rc |= VMXWriteVmcs(VMX_VMCS_HOST_CR4, ASMGetCR4());
1338 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1339 AssertRC(rc);
1340
1341 /*
1342 * Selector registers.
1343 */
1344#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1345 if (VMX_IS_64BIT_HOST_MODE())
1346 {
1347 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1348 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1349 }
1350 else
1351 {
1352 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1353 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1354 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1355 }
1356#else
1357 cs = ASMGetCS();
1358 ss = ASMGetSS();
1359#endif
1360 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1361 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1362 rc = VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_CS, cs);
1363 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1364 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_DS, 0);
1365 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_ES, 0);
1366#if HC_ARCH_BITS == 32
1367 if (!VMX_IS_64BIT_HOST_MODE())
1368 {
1369 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_FS, 0);
1370 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_GS, 0);
1371 }
1372#endif
1373 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_SS, ss);
1374 SelTR = ASMGetTR();
1375 rc |= VMXWriteVmcs(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1376 AssertRC(rc);
1377 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1378 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1379 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1380 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1381 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1382 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1383 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1384
1385 /*
1386 * GDTR & IDTR.
1387 */
1388#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1389 if (VMX_IS_64BIT_HOST_MODE())
1390 {
1391 X86XDTR64 gdtr64, idtr64;
1392 hmR0Get64bitGdtrAndIdtr(&gdtr64, &idtr64);
1393 rc = VMXWriteVmcs64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1394 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_IDTR_BASE, idtr64.uAddr);
1395 AssertRC(rc);
1396 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1397 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1398 gdtr.cbGdt = gdtr64.cb;
1399 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1400 }
1401 else
1402#endif
1403 {
1404 ASMGetGDTR(&gdtr);
1405 rc = VMXWriteVmcs(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1406 ASMGetIDTR(&idtr);
1407 rc |= VMXWriteVmcs(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1408 AssertRC(rc);
1409 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1410 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1411 }
1412
1413 /*
1414 * Save the base address of the TR selector.
1415 */
1416 if (SelTR > gdtr.cbGdt)
1417 {
1418 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1419 return VERR_VMX_INVALID_HOST_STATE;
1420 }
1421
1422 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1423#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1424 if (VMX_IS_64BIT_HOST_MODE())
1425 {
1426 uint64_t trBase64 = X86DESC64_BASE((PX86DESC64)pDesc);
1427 rc = VMXWriteVmcs64(VMX_VMCS_HOST_TR_BASE, trBase64);
1428 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1429 AssertRC(rc);
1430 }
1431 else
1432#endif
1433 {
1434#if HC_ARCH_BITS == 64
1435 trBase = X86DESC64_BASE(pDesc);
1436#else
1437 trBase = X86DESC_BASE(pDesc);
1438#endif
1439 rc = VMXWriteVmcs(VMX_VMCS_HOST_TR_BASE, trBase);
1440 AssertRC(rc);
1441 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1442 }
1443
1444 /*
1445 * FS base and GS base.
1446 */
1447#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1448 if (VMX_IS_64BIT_HOST_MODE())
1449 {
1450 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1451 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1452 rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1453 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1454 }
1455#endif
1456 AssertRC(rc);
1457
1458 /*
1459 * Sysenter MSRs.
1460 */
1461 /** @todo expensive!! */
1462 rc = VMXWriteVmcs(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1463 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1464#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1465 if (VMX_IS_64BIT_HOST_MODE())
1466 {
1467 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1468 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1469 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1470 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1471 }
1472 else
1473 {
1474 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1475 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1476 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1477 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1478 }
1479#elif HC_ARCH_BITS == 32
1480 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1481 rc |= VMXWriteVmcs(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1482 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1483 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1484#else
1485 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1486 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1487 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1488 rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1489#endif
1490 AssertRC(rc);
1491
1492
1493#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1494 /*
1495 * Store all host MSRs in the VM-Exit load area, so they will be reloaded after
1496 * the world switch back to the host.
1497 */
1498 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvHostMsr;
1499 unsigned idxMsr = 0;
1500
1501 uint32_t u32HostExtFeatures = ASMCpuId_EDX(0x80000001);
1502 if (u32HostExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
1503 {
1504 pMsr->u32IndexMSR = MSR_K6_EFER;
1505 pMsr->u32Reserved = 0;
1506# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1507 if (CPUMIsGuestInLongMode(pVCpu))
1508 {
1509 /* Must match the EFER value in our 64 bits switcher. */
1510 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1511 }
1512 else
1513# endif
1514 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1515 pMsr++; idxMsr++;
1516 }
1517
1518# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1519 if (VMX_IS_64BIT_HOST_MODE())
1520 {
1521 pMsr->u32IndexMSR = MSR_K6_STAR;
1522 pMsr->u32Reserved = 0;
1523 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1524 pMsr++; idxMsr++;
1525 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1526 pMsr->u32Reserved = 0;
1527 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1528 pMsr++; idxMsr++;
1529 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1530 pMsr->u32Reserved = 0;
1531 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1532 pMsr++; idxMsr++;
1533
1534 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
1535#if 0
1536 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1537 pMsr->u32Reserved = 0;
1538 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1539 pMsr++; idxMsr++;
1540#endif
1541 }
1542# endif
1543
1544 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
1545 {
1546 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
1547 pMsr->u32Reserved = 0;
1548 pMsr->u64Value = ASMRdMsr(MSR_K8_TSC_AUX);
1549 pMsr++; idxMsr++;
1550 }
1551
1552 /** @todo r=ramshankar: check IA32_VMX_MISC bits 27:25 for valid idxMsr
1553 * range. */
1554 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1555 AssertRC(rc);
1556#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1557
1558 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_HOST_CONTEXT;
1559 }
1560 return rc;
1561}
1562
1563
1564/**
1565 * Loads the 4 PDPEs into the guest state when nested paging is used and the
1566 * guest operates in PAE mode.
1567 *
1568 * @returns VBox status code.
1569 * @param pVCpu Pointer to the VMCPU.
1570 * @param pCtx Pointer to the guest CPU context.
1571 */
1572static int hmR0VmxLoadPaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1573{
1574 if (CPUMIsGuestInPAEModeEx(pCtx))
1575 {
1576 X86PDPE aPdpes[4];
1577 int rc = PGMGstGetPaePdpes(pVCpu, &aPdpes[0]);
1578 AssertRCReturn(rc, rc);
1579
1580 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, aPdpes[0].u); AssertRCReturn(rc, rc);
1581 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, aPdpes[1].u); AssertRCReturn(rc, rc);
1582 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, aPdpes[2].u); AssertRCReturn(rc, rc);
1583 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, aPdpes[3].u); AssertRCReturn(rc, rc);
1584 }
1585 return VINF_SUCCESS;
1586}
1587
1588
1589/**
1590 * Saves the 4 PDPEs into the guest state when nested paging is used and the
1591 * guest operates in PAE mode.
1592 *
1593 * @returns VBox status code.
1594 * @param pVCpu Pointer to the VM CPU.
1595 * @param pCtx Pointer to the guest CPU context.
1596 *
1597 * @remarks Tell PGM about CR3 changes before calling this helper.
1598 */
1599static int hmR0VmxSavePaePdpes(PVMCPU pVCpu, PCPUMCTX pCtx)
1600{
1601 if (CPUMIsGuestInPAEModeEx(pCtx))
1602 {
1603 int rc;
1604 X86PDPE aPdpes[4];
1605 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &aPdpes[0].u); AssertRCReturn(rc, rc);
1606 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &aPdpes[1].u); AssertRCReturn(rc, rc);
1607 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &aPdpes[2].u); AssertRCReturn(rc, rc);
1608 rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &aPdpes[3].u); AssertRCReturn(rc, rc);
1609
1610 rc = PGMGstUpdatePaePdpes(pVCpu, &aPdpes[0]);
1611 AssertRCReturn(rc, rc);
1612 }
1613 return VINF_SUCCESS;
1614}
1615
1616
1617/**
1618 * Update the exception bitmap according to the current CPU state.
1619 *
1620 * @param pVM Pointer to the VM.
1621 * @param pVCpu Pointer to the VMCPU.
1622 * @param pCtx Pointer to the guest CPU context.
1623 */
1624static void hmR0VmxUpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1625{
1626 uint32_t u32TrapMask;
1627 Assert(pCtx);
1628
1629 /*
1630 * Set up a mask for intercepting traps.
1631 */
1632 /** @todo Do we really need to always intercept #DB? */
1633 u32TrapMask = RT_BIT(X86_XCPT_DB)
1634 | RT_BIT(X86_XCPT_NM)
1635#ifdef VBOX_ALWAYS_TRAP_PF
1636 | RT_BIT(X86_XCPT_PF)
1637#endif
1638#ifdef VBOX_STRICT
1639 | RT_BIT(X86_XCPT_BP)
1640 | RT_BIT(X86_XCPT_DB)
1641 | RT_BIT(X86_XCPT_DE)
1642 | RT_BIT(X86_XCPT_NM)
1643 | RT_BIT(X86_XCPT_UD)
1644 | RT_BIT(X86_XCPT_NP)
1645 | RT_BIT(X86_XCPT_SS)
1646 | RT_BIT(X86_XCPT_GP)
1647 | RT_BIT(X86_XCPT_MF)
1648#endif
1649 ;
1650
1651 /*
1652 * Without nested paging, #PF must be intercepted to implement shadow paging.
1653 */
1654 /** @todo NP state won't change so maybe we should build the initial trap mask up front? */
1655 if (!pVM->hm.s.fNestedPaging)
1656 u32TrapMask |= RT_BIT(X86_XCPT_PF);
1657
1658 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
1659 if (!(pCtx->cr0 & X86_CR0_NE))
1660 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1661
1662#ifdef VBOX_STRICT
1663 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1664#endif
1665
1666 /*
1667 * Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise).
1668 */
1669 /** @todo Despite the claim to intercept everything, with NP we do not intercept #PF. Should we? */
1670 if ( CPUMIsGuestInRealModeEx(pCtx)
1671 && pVM->hm.s.vmx.pRealModeTSS)
1672 {
1673 u32TrapMask |= RT_BIT(X86_XCPT_DE)
1674 | RT_BIT(X86_XCPT_DB)
1675 | RT_BIT(X86_XCPT_NMI)
1676 | RT_BIT(X86_XCPT_BP)
1677 | RT_BIT(X86_XCPT_OF)
1678 | RT_BIT(X86_XCPT_BR)
1679 | RT_BIT(X86_XCPT_UD)
1680 | RT_BIT(X86_XCPT_DF)
1681 | RT_BIT(X86_XCPT_CO_SEG_OVERRUN)
1682 | RT_BIT(X86_XCPT_TS)
1683 | RT_BIT(X86_XCPT_NP)
1684 | RT_BIT(X86_XCPT_SS)
1685 | RT_BIT(X86_XCPT_GP)
1686 | RT_BIT(X86_XCPT_MF)
1687 | RT_BIT(X86_XCPT_AC)
1688 | RT_BIT(X86_XCPT_MC)
1689 | RT_BIT(X86_XCPT_XF)
1690 ;
1691 }
1692
1693 int rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1694 AssertRC(rc);
1695}
1696
1697
1698/**
1699 * Loads a minimal guest state.
1700 *
1701 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1702 *
1703 * @param pVM Pointer to the VM.
1704 * @param pVCpu Pointer to the VMCPU.
1705 * @param pCtx Pointer to the guest CPU context.
1706 */
1707VMMR0DECL(void) VMXR0LoadMinimalGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1708{
1709 int rc;
1710 X86EFLAGS eflags;
1711
1712 Assert(!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_ALL_GUEST));
1713
1714 /*
1715 * Load EIP, ESP and EFLAGS.
1716 */
1717 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_RIP, pCtx->rip);
1718 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_RSP, pCtx->rsp);
1719 AssertRC(rc);
1720
1721 /*
1722 * Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1.
1723 */
1724 eflags = pCtx->eflags;
1725 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1726 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1727
1728 /*
1729 * Check if real mode emulation using v86 mode.
1730 */
1731 if ( CPUMIsGuestInRealModeEx(pCtx)
1732 && pVM->hm.s.vmx.pRealModeTSS)
1733 {
1734 pVCpu->hm.s.vmx.RealMode.eflags = eflags;
1735
1736 eflags.Bits.u1VM = 1;
1737 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1738 }
1739 rc = VMXWriteVmcs(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1740 AssertRC(rc);
1741}
1742
1743
1744/**
1745 * Loads the guest state.
1746 *
1747 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1748 *
1749 * @returns VBox status code.
1750 * @param pVM Pointer to the VM.
1751 * @param pVCpu Pointer to the VMCPU.
1752 * @param pCtx Pointer to the guest CPU context.
1753 */
1754VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1755{
1756 int rc = VINF_SUCCESS;
1757 RTGCUINTPTR val;
1758
1759 /*
1760 * VMX_VMCS_CTRL_ENTRY_CONTROLS
1761 * Set required bits to one and zero according to the MSR capabilities.
1762 */
1763 val = pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0;
1764
1765 /*
1766 * Load guest debug controls (DR7 & IA32_DEBUGCTL_MSR).
1767 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1768 */
1769 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1770
1771 if (CPUMIsGuestInLongModeEx(pCtx))
1772 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA32E_MODE_GUEST;
1773 /* else Must be zero when AMD64 is not available. */
1774
1775 /*
1776 * Mask away the bits that the CPU doesn't support.
1777 */
1778 val &= pVM->hm.s.vmx.msr.vmx_entry.n.allowed1;
1779 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_CONTROLS, val);
1780 AssertRC(rc);
1781
1782 /*
1783 * VMX_VMCS_CTRL_EXIT_CONTROLS
1784 * Set required bits to one and zero according to the MSR capabilities.
1785 */
1786 val = pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0;
1787
1788 /*
1789 * Save debug controls (DR7 & IA32_DEBUGCTL_MSR)
1790 * Forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs
1791 */
1792 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1793
1794#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1795 if (VMX_IS_64BIT_HOST_MODE())
1796 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE;
1797 /* else Must be zero when AMD64 is not available. */
1798#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1799 if (CPUMIsGuestInLongModeEx(pCtx))
1800 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE; /* our switcher goes to long mode */
1801 else
1802 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_ADDR_SPACE_SIZE));
1803#endif
1804 val &= pVM->hm.s.vmx.msr.vmx_exit.n.allowed1;
1805
1806 /*
1807 * Don't acknowledge external interrupts on VM-exit.
1808 */
1809 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_CONTROLS, val);
1810 AssertRC(rc);
1811
1812 /*
1813 * Guest CPU context: ES, CS, SS, DS, FS, GS.
1814 */
1815 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
1816 {
1817 if (pVM->hm.s.vmx.pRealModeTSS)
1818 {
1819 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1820 if (pVCpu->hm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1821 {
1822 /*
1823 * Correct weird requirements for switching to protected mode.
1824 */
1825 if ( pVCpu->hm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1826 && enmGuestMode >= PGMMODE_PROTECTED)
1827 {
1828#ifdef VBOX_WITH_REM
1829 /*
1830 * Flush the recompiler code cache as it's not unlikely the guest will rewrite code
1831 * it will later execute in real mode (OpenBSD 4.0 is one such example)
1832 */
1833 REMFlushTBs(pVM);
1834#endif
1835
1836 /*
1837 * DPL of all hidden selector registers must match the current CPL (0).
1838 */
1839 pCtx->cs.Attr.n.u2Dpl = 0;
1840 pCtx->cs.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1841
1842 pCtx->ds.Attr.n.u2Dpl = 0;
1843 pCtx->es.Attr.n.u2Dpl = 0;
1844 pCtx->fs.Attr.n.u2Dpl = 0;
1845 pCtx->gs.Attr.n.u2Dpl = 0;
1846 pCtx->ss.Attr.n.u2Dpl = 0;
1847 }
1848 pVCpu->hm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1849 }
1850 }
1851
1852 VMX_WRITE_SELREG(ES, es);
1853 AssertRC(rc);
1854
1855 VMX_WRITE_SELREG(CS, cs);
1856 AssertRC(rc);
1857
1858 VMX_WRITE_SELREG(SS, ss);
1859 AssertRC(rc);
1860
1861 VMX_WRITE_SELREG(DS, ds);
1862 AssertRC(rc);
1863
1864 VMX_WRITE_SELREG(FS, fs);
1865 AssertRC(rc);
1866
1867 VMX_WRITE_SELREG(GS, gs);
1868 AssertRC(rc);
1869 }
1870
1871 /*
1872 * Guest CPU context: LDTR.
1873 */
1874 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
1875 {
1876 if (pCtx->ldtr.Sel == 0)
1877 {
1878 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1879 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1880 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_LDTR_BASE, 0); /* @todo removing "64" in the function should be the same. */
1881 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1882 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1883 }
1884 else
1885 {
1886 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr.Sel);
1887 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit);
1888 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); /* @todo removing "64" and it should be the same */
1889 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtr.Attr.u);
1890 }
1891 AssertRC(rc);
1892 }
1893
1894 /*
1895 * Guest CPU context: TR.
1896 */
1897 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
1898 {
1899 /*
1900 * Real mode emulation using v86 mode with CR4.VME (interrupt redirection
1901 * using the int bitmap in the TSS).
1902 */
1903 if ( CPUMIsGuestInRealModeEx(pCtx)
1904 && pVM->hm.s.vmx.pRealModeTSS)
1905 {
1906 RTGCPHYS GCPhys;
1907
1908 /* We convert it here every time as PCI regions could be reconfigured. */
1909 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys);
1910 AssertRC(rc);
1911
1912 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_TR, 0);
1913 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_TR_LIMIT, HM_VTX_TSS_SIZE);
1914 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1915
1916 X86DESCATTR attr;
1917
1918 attr.u = 0;
1919 attr.n.u1Present = 1;
1920 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1921 val = attr.u;
1922 }
1923 else
1924 {
1925 rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr.Sel);
1926 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->tr.u32Limit);
1927 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_TR_BASE, pCtx->tr.u64Base);
1928
1929 val = pCtx->tr.Attr.u;
1930
1931 /* The TSS selector must be busy (REM bugs? see defect #XXXX). */
1932 if (!(val & X86_SEL_TYPE_SYS_TSS_BUSY_MASK))
1933 {
1934 if (val & 0xf)
1935 val |= X86_SEL_TYPE_SYS_TSS_BUSY_MASK;
1936 else
1937 /* Default if no TR selector has been set (otherwise vmlaunch will fail!) */
1938 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1939 }
1940 AssertMsg((val & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (val & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY,
1941 ("%#x\n", val));
1942 }
1943 rc |= VMXWriteVmcs(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1944 AssertRC(rc);
1945 }
1946
1947 /*
1948 * Guest CPU context: GDTR.
1949 */
1950 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
1951 {
1952 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1953 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1954 AssertRC(rc);
1955 }
1956
1957 /*
1958 * Guest CPU context: IDTR.
1959 */
1960 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
1961 {
1962 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1963 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1964 AssertRC(rc);
1965 }
1966
1967 /*
1968 * Sysenter MSRs.
1969 */
1970 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
1971 {
1972 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1973 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1974 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1975 AssertRC(rc);
1976 }
1977
1978 /*
1979 * Guest CPU context: Control registers.
1980 */
1981 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
1982 {
1983 val = pCtx->cr0;
1984 rc = VMXWriteVmcs(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1985 Log2(("Guest CR0-shadow %08x\n", val));
1986 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1987 {
1988 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1989 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1990 }
1991 else
1992 {
1993 /** @todo check if we support the old style mess correctly. */
1994 if (!(val & X86_CR0_NE))
1995 Log(("Forcing X86_CR0_NE!!!\n"));
1996
1997 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1998 }
1999 /* Protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
2000 if (!pVM->hm.s.vmx.fUnrestrictedGuest)
2001 val |= X86_CR0_PE | X86_CR0_PG;
2002
2003 if (pVM->hm.s.fNestedPaging)
2004 {
2005 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
2006 {
2007 /* Disable CR3 read/write monitoring as we don't need it for EPT. */
2008 pVCpu->hm.s.vmx.u32ProcCtls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2009 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
2010 }
2011 else
2012 {
2013 /* Reenable CR3 read/write monitoring as our identity mapped page table is active. */
2014 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2015 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2016 }
2017 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2018 AssertRC(rc);
2019 }
2020 else
2021 {
2022 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
2023 val |= X86_CR0_WP;
2024 }
2025
2026 /* Always enable caching. */
2027 val &= ~(X86_CR0_CD|X86_CR0_NW);
2028
2029 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_CR0, val);
2030 Log2(("Guest CR0 %08x\n", val));
2031
2032 /*
2033 * CR0 flags owned by the host; if the guests attempts to change them, then the VM will exit.
2034 */
2035 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
2036 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
2037 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
2038 | X86_CR0_CD /* Bit not restored during VM-exit! */
2039 | X86_CR0_NW /* Bit not restored during VM-exit! */
2040 | X86_CR0_NE;
2041
2042 /*
2043 * When the guest's FPU state is active, then we no longer care about the FPU related bits.
2044 */
2045 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
2046 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_MP;
2047
2048 pVCpu->hm.s.vmx.cr0_mask = val;
2049
2050 rc |= VMXWriteVmcs(VMX_VMCS_CTRL_CR0_MASK, val);
2051 Log2(("Guest CR0-mask %08x\n", val));
2052 AssertRC(rc);
2053 }
2054
2055 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
2056 {
2057 rc = VMXWriteVmcs(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
2058 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
2059 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
2060 val = pCtx->cr4 | (uint32_t)pVM->hm.s.vmx.msr.vmx_cr4_fixed0;
2061
2062 if (!pVM->hm.s.fNestedPaging)
2063 {
2064 switch (pVCpu->hm.s.enmShadowMode)
2065 {
2066 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
2067 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
2068 case PGMMODE_32_BIT: /* 32-bit paging. */
2069 val &= ~X86_CR4_PAE;
2070 break;
2071
2072 case PGMMODE_PAE: /* PAE paging. */
2073 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
2074 /** Must use PAE paging as we could use physical memory > 4 GB */
2075 val |= X86_CR4_PAE;
2076 break;
2077
2078 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
2079 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
2080#ifdef VBOX_ENABLE_64_BITS_GUESTS
2081 break;
2082#else
2083 AssertFailed();
2084 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2085#endif
2086 default: /* shut up gcc */
2087 AssertFailed();
2088 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2089 }
2090 }
2091 else if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2092 && !pVM->hm.s.vmx.fUnrestrictedGuest)
2093 {
2094 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
2095 val |= X86_CR4_PSE;
2096 /* Our identity mapping is a 32 bits page directory. */
2097 val &= ~X86_CR4_PAE;
2098 }
2099
2100 /*
2101 * Turn off VME if we're in emulated real mode.
2102 */
2103 if ( CPUMIsGuestInRealModeEx(pCtx)
2104 && pVM->hm.s.vmx.pRealModeTSS)
2105 {
2106 val &= ~X86_CR4_VME;
2107 }
2108
2109 rc |= VMXWriteVmcs64(VMX_VMCS_GUEST_CR4, val);
2110 Log2(("Guest CR4 %08x\n", val));
2111
2112 /*
2113 * CR4 flags owned by the host; if the guests attempts to change them, then the VM will exit.
2114 */
2115 val = 0
2116 | X86_CR4_VME
2117 | X86_CR4_PAE
2118 | X86_CR4_PGE
2119 | X86_CR4_PSE
2120 | X86_CR4_VMXE;
2121 pVCpu->hm.s.vmx.cr4_mask = val;
2122
2123 rc |= VMXWriteVmcs(VMX_VMCS_CTRL_CR4_MASK, val);
2124 Log2(("Guest CR4-mask %08x\n", val));
2125 AssertRC(rc);
2126 }
2127
2128#if 0
2129 /* Enable single stepping if requested and CPU supports it. */
2130 if (pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG)
2131 if (DBGFIsStepping(pVCpu))
2132 {
2133 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
2134 rc = VMXWriteVmcs(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2135 AssertRC(rc);
2136 }
2137#endif
2138
2139 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
2140 {
2141 if (pVM->hm.s.fNestedPaging)
2142 {
2143 Assert(PGMGetHyperCR3(pVCpu));
2144 pVCpu->hm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
2145
2146 Assert(!(pVCpu->hm.s.vmx.GCPhysEPTP & 0xfff));
2147 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
2148 pVCpu->hm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
2149 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
2150
2151 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.GCPhysEPTP);
2152 AssertRC(rc);
2153
2154 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
2155 && !pVM->hm.s.vmx.fUnrestrictedGuest)
2156 {
2157 RTGCPHYS GCPhys;
2158
2159 /* We convert it here every time as PCI regions could be reconfigured. */
2160 rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
2161 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hm.s.vmx.pNonPagingModeEPTPageTable));
2162
2163 /*
2164 * We use our identity mapping page table here as we need to map guest virtual to
2165 * guest physical addresses; EPT will take care of the translation to host physical addresses.
2166 */
2167 val = GCPhys;
2168 }
2169 else
2170 {
2171 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
2172 val = pCtx->cr3;
2173 rc = hmR0VmxLoadPaePdpes(pVCpu, pCtx);
2174 AssertRCReturn(rc, rc);
2175 }
2176 }
2177 else
2178 {
2179 val = PGMGetHyperCR3(pVCpu);
2180 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
2181 }
2182
2183 /* Save our shadow CR3 register. */
2184 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_CR3, val);
2185 AssertRC(rc);
2186 }
2187
2188 /*
2189 * Guest CPU context: Debug registers.
2190 */
2191 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG)
2192 {
2193 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
2194 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
2195
2196 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2197 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2198 pCtx->dr[7] |= 0x400; /* must be one */
2199
2200 /* Resync DR7 */
2201 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2202 AssertRC(rc);
2203
2204#ifdef DEBUG
2205 /* Sync the hypervisor debug state now if any breakpoint is armed. */
2206 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
2207 && !CPUMIsHyperDebugStateActive(pVCpu)
2208 && !DBGFIsStepping(pVCpu))
2209 {
2210 /* Save the host and load the hypervisor debug state. */
2211 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2212 AssertRC(rc);
2213
2214 /* DRx intercepts remain enabled. */
2215
2216 /* Override dr7 with the hypervisor value. */
2217 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
2218 AssertRC(rc);
2219 }
2220 else
2221#endif
2222 /* Sync the debug state now if any breakpoint is armed. */
2223 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
2224 && !CPUMIsGuestDebugStateActive(pVCpu)
2225 && !DBGFIsStepping(pVCpu))
2226 {
2227 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
2228
2229 /* Disable DRx move intercepts. */
2230 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2231 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2232 AssertRC(rc);
2233
2234 /* Save the host and load the guest debug state. */
2235 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2236 AssertRC(rc);
2237 }
2238
2239 /* IA32_DEBUGCTL MSR. */
2240 rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0);
2241 AssertRC(rc);
2242
2243 /** @todo do we really ever need this? */
2244 rc |= VMXWriteVmcs(VMX_VMCS_GUEST_PENDING_DEBUG_EXCEPTIONS, 0);
2245 AssertRC(rc);
2246 }
2247
2248 /*
2249 * 64-bit guest mode.
2250 */
2251 if (CPUMIsGuestInLongModeEx(pCtx))
2252 {
2253#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
2254 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2255#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2256 pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
2257#else
2258# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
2259 if (!pVM->hm.s.fAllow64BitGuests)
2260 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
2261# endif
2262 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64;
2263#endif
2264 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_MSR)
2265 {
2266 /* Update these as wrmsr might have changed them. */
2267 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_FS_BASE, pCtx->fs.u64Base);
2268 AssertRC(rc);
2269 rc = VMXWriteVmcs64(VMX_VMCS_GUEST_GS_BASE, pCtx->gs.u64Base);
2270 AssertRC(rc);
2271 }
2272 }
2273 else
2274 {
2275 pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32;
2276 }
2277
2278 hmR0VmxUpdateExceptionBitmap(pVM, pVCpu, pCtx);
2279
2280#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2281 /*
2282 * Store all guest MSRs in the VM-entry load area, so they will be loaded
2283 * during VM-entry and restored into the VM-exit store area during VM-exit.
2284 */
2285 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2286 unsigned idxMsr = 0;
2287
2288 uint32_t u32GstExtFeatures;
2289 uint32_t u32Temp;
2290 CPUMGetGuestCpuId(pVCpu, 0x80000001, &u32Temp, &u32Temp, &u32Temp, &u32GstExtFeatures);
2291
2292 if (u32GstExtFeatures & (X86_CPUID_EXT_FEATURE_EDX_NX | X86_CPUID_EXT_FEATURE_EDX_LONG_MODE))
2293 {
2294 pMsr->u32IndexMSR = MSR_K6_EFER;
2295 pMsr->u32Reserved = 0;
2296 pMsr->u64Value = pCtx->msrEFER;
2297 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
2298 if (!CPUMIsGuestInLongModeEx(pCtx))
2299 pMsr->u64Value &= ~(MSR_K6_EFER_LMA | MSR_K6_EFER_LME);
2300 pMsr++; idxMsr++;
2301
2302 if (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE)
2303 {
2304 pMsr->u32IndexMSR = MSR_K8_LSTAR;
2305 pMsr->u32Reserved = 0;
2306 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
2307 pMsr++; idxMsr++;
2308 pMsr->u32IndexMSR = MSR_K6_STAR;
2309 pMsr->u32Reserved = 0;
2310 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
2311 pMsr++; idxMsr++;
2312 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
2313 pMsr->u32Reserved = 0;
2314 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
2315 pMsr++; idxMsr++;
2316
2317 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2318#if 0
2319 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
2320 pMsr->u32Reserved = 0;
2321 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
2322 pMsr++; idxMsr++;
2323#endif
2324 }
2325 }
2326
2327 if ( pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP
2328 && (u32GstExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP))
2329 {
2330 pMsr->u32IndexMSR = MSR_K8_TSC_AUX;
2331 pMsr->u32Reserved = 0;
2332 rc = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &pMsr->u64Value);
2333 AssertRC(rc);
2334 pMsr++; idxMsr++;
2335 }
2336
2337 pVCpu->hm.s.vmx.cGuestMsrs = idxMsr;
2338
2339 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
2340 AssertRC(rc);
2341
2342 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
2343 AssertRC(rc);
2344#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2345
2346#if 0 /* Temp move for testing. */
2347 bool fOffsettedTsc;
2348 if (pVM->hm.s.vmx.fUsePreemptTimer)
2349 {
2350 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
2351
2352 /* Make sure the returned values have sane upper and lower boundaries. */
2353 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
2354
2355 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
2356 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
2357
2358 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
2359 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
2360 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
2361 AssertRC(rc);
2362 }
2363 else
2364 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
2365
2366 if (fOffsettedTsc)
2367 {
2368 uint64_t u64CurTSC = ASMReadTSC();
2369 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset > TMCpuTickGetLastSeen(pVCpu))
2370 {
2371 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
2372 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset);
2373 AssertRC(rc);
2374
2375 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2376 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2377 AssertRC(rc);
2378 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
2379 }
2380 else
2381 {
2382 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
2383 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
2384 pVCpu->hm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset,
2385 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hm.s.vmx.u64TSCOffset,
2386 TMCpuTickGet(pVCpu)));
2387 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2388 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2389 AssertRC(rc);
2390 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
2391 }
2392 }
2393 else
2394 {
2395 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
2396 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
2397 AssertRC(rc);
2398 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
2399 }
2400#endif
2401
2402 /* Done with the major changes */
2403 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_ALL_GUEST;
2404
2405 /* Minimal guest state update (ESP, EIP, EFLAGS mostly) */
2406 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
2407 return rc;
2408}
2409
2410
2411/**
2412 * Syncs back the guest state from VMCS.
2413 *
2414 * @returns VBox status code.
2415 * @param pVM Pointer to the VM.
2416 * @param pVCpu Pointer to the VMCPU.
2417 * @param pCtx Pointer to the guest CPU context.
2418 */
2419DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2420{
2421 RTGCUINTREG val, valShadow;
2422 RTGCUINTPTR uInterruptState;
2423 int rc;
2424
2425 /* First sync back EIP, ESP, and EFLAGS. */
2426 rc = VMXReadCachedVmcs(VMX_VMCS_GUEST_RIP, &val);
2427 AssertRC(rc);
2428 pCtx->rip = val;
2429 rc = VMXReadCachedVmcs(VMX_VMCS_GUEST_RSP, &val);
2430 AssertRC(rc);
2431 pCtx->rsp = val;
2432 rc = VMXReadCachedVmcs(VMX_VMCS_GUEST_RFLAGS, &val);
2433 AssertRC(rc);
2434 pCtx->eflags.u32 = val;
2435
2436 /* Take care of instruction fusing (sti, mov ss) */
2437 rc |= VMXReadCachedVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
2438 uInterruptState = val;
2439 if (uInterruptState != 0)
2440 {
2441 Assert(uInterruptState <= 2); /* only sti & mov ss */
2442 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
2443 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
2444 }
2445 else
2446 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2447
2448 /* Control registers. */
2449 VMXReadCachedVmcs(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
2450 VMXReadCachedVmcs(VMX_VMCS_GUEST_CR0, &val);
2451 val = (valShadow & pVCpu->hm.s.vmx.cr0_mask) | (val & ~pVCpu->hm.s.vmx.cr0_mask);
2452 CPUMSetGuestCR0(pVCpu, val);
2453
2454 VMXReadCachedVmcs(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
2455 VMXReadCachedVmcs(VMX_VMCS_GUEST_CR4, &val);
2456 val = (valShadow & pVCpu->hm.s.vmx.cr4_mask) | (val & ~pVCpu->hm.s.vmx.cr4_mask);
2457 CPUMSetGuestCR4(pVCpu, val);
2458
2459 /*
2460 * No reason to sync back the CRx registers. They can't be changed by the guest unless in
2461 * the nested paging case where CR3 & CR4 can be changed by the guest.
2462 */
2463 if ( pVM->hm.s.fNestedPaging
2464 && CPUMIsGuestInPagedProtectedModeEx(pCtx)) /** @todo check if we will always catch mode switches and such... */
2465 {
2466 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
2467
2468 /* Can be updated behind our back in the nested paging case. */
2469 CPUMSetGuestCR2(pVCpu, pCache->cr2);
2470
2471 VMXReadCachedVmcs(VMX_VMCS_GUEST_CR3, &val);
2472
2473 if (val != pCtx->cr3)
2474 {
2475 CPUMSetGuestCR3(pVCpu, val);
2476 PGMUpdateCR3(pVCpu, val);
2477 }
2478 rc = hmR0VmxSavePaePdpes(pVCpu, pCtx);
2479 AssertRCReturn(rc, rc);
2480 }
2481
2482 /* Sync back DR7. */
2483 VMXReadCachedVmcs(VMX_VMCS_GUEST_DR7, &val);
2484 pCtx->dr[7] = val;
2485
2486 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2487 VMX_READ_SELREG(ES, es);
2488 VMX_READ_SELREG(SS, ss);
2489 VMX_READ_SELREG(CS, cs);
2490 VMX_READ_SELREG(DS, ds);
2491 VMX_READ_SELREG(FS, fs);
2492 VMX_READ_SELREG(GS, gs);
2493
2494 /* System MSRs */
2495 VMXReadCachedVmcs(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2496 pCtx->SysEnter.cs = val;
2497 VMXReadCachedVmcs(VMX_VMCS_GUEST_SYSENTER_EIP, &val);
2498 pCtx->SysEnter.eip = val;
2499 VMXReadCachedVmcs(VMX_VMCS_GUEST_SYSENTER_ESP, &val);
2500 pCtx->SysEnter.esp = val;
2501
2502 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2503 VMX_READ_SELREG(LDTR, ldtr);
2504
2505 VMXReadCachedVmcs(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2506 pCtx->gdtr.cbGdt = val;
2507 VMXReadCachedVmcs(VMX_VMCS_GUEST_GDTR_BASE, &val);
2508 pCtx->gdtr.pGdt = val;
2509
2510 VMXReadCachedVmcs(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2511 pCtx->idtr.cbIdt = val;
2512 VMXReadCachedVmcs(VMX_VMCS_GUEST_IDTR_BASE, &val);
2513 pCtx->idtr.pIdt = val;
2514
2515 /* Real mode emulation using v86 mode. */
2516 if ( CPUMIsGuestInRealModeEx(pCtx)
2517 && pVM->hm.s.vmx.pRealModeTSS)
2518 {
2519 /* Hide our emulation flags */
2520 pCtx->eflags.Bits.u1VM = 0;
2521
2522 /* Restore original IOPL setting as we always use 0. */
2523 pCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2524
2525 /* Force a TR resync every time in case we switch modes. */
2526 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_TR;
2527 }
2528 else
2529 {
2530 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2531 VMX_READ_SELREG(TR, tr);
2532 }
2533
2534#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2535 /*
2536 * Save the possibly changed MSRs that we automatically restore and save during a world switch.
2537 */
2538 for (unsigned i = 0; i < pVCpu->hm.s.vmx.cGuestMsrs; i++)
2539 {
2540 PVMXMSR pMsr = (PVMXMSR)pVCpu->hm.s.vmx.pvGuestMsr;
2541 pMsr += i;
2542
2543 switch (pMsr->u32IndexMSR)
2544 {
2545 case MSR_K8_LSTAR:
2546 pCtx->msrLSTAR = pMsr->u64Value;
2547 break;
2548 case MSR_K6_STAR:
2549 pCtx->msrSTAR = pMsr->u64Value;
2550 break;
2551 case MSR_K8_SF_MASK:
2552 pCtx->msrSFMASK = pMsr->u64Value;
2553 break;
2554 /* The KERNEL_GS_BASE MSR doesn't work reliably with auto load/store. See @bugref{6208} */
2555#if 0
2556 case MSR_K8_KERNEL_GS_BASE:
2557 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2558 break;
2559#endif
2560 case MSR_K8_TSC_AUX:
2561 CPUMSetGuestMsr(pVCpu, MSR_K8_TSC_AUX, pMsr->u64Value);
2562 break;
2563
2564 case MSR_K6_EFER:
2565 /* EFER can't be changed without causing a VM-exit. */
2566 /* Assert(pCtx->msrEFER == pMsr->u64Value); */
2567 break;
2568
2569 default:
2570 AssertFailed();
2571 return VERR_HM_UNEXPECTED_LD_ST_MSR;
2572 }
2573 }
2574#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2575 return VINF_SUCCESS;
2576}
2577
2578
2579/**
2580 * Dummy placeholder for TLB flush handling before VM-entry. Used in the case
2581 * where neither EPT nor VPID is supported by the CPU.
2582 *
2583 * @param pVM Pointer to the VM.
2584 * @param pVCpu Pointer to the VMCPU.
2585 */
2586static DECLCALLBACK(void) hmR0VmxSetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2587{
2588 NOREF(pVM);
2589 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2590 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2591 pVCpu->hm.s.TlbShootdown.cPages = 0;
2592 return;
2593}
2594
2595
2596/**
2597 * Setup the tagged TLB for EPT+VPID.
2598 *
2599 * @param pVM Pointer to the VM.
2600 * @param pVCpu Pointer to the VMCPU.
2601 */
2602static DECLCALLBACK(void) hmR0VmxSetupTLBBoth(PVM pVM, PVMCPU pVCpu)
2603{
2604 PHMGLOBLCPUINFO pCpu;
2605
2606 Assert(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid);
2607
2608 pCpu = HMR0GetCurrentCpu();
2609
2610 /*
2611 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2612 * This can happen both for start & resume due to long jumps back to ring-3.
2613 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2614 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2615 */
2616 bool fNewAsid = false;
2617 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2618 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2619 {
2620 pVCpu->hm.s.fForceTLBFlush = true;
2621 fNewAsid = true;
2622 }
2623
2624 /*
2625 * Check for explicit TLB shootdowns.
2626 */
2627 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2628 pVCpu->hm.s.fForceTLBFlush = true;
2629
2630 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2631
2632 if (pVCpu->hm.s.fForceTLBFlush)
2633 {
2634 if (fNewAsid)
2635 {
2636 ++pCpu->uCurrentAsid;
2637 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2638 {
2639 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2640 pCpu->cTlbFlushes++;
2641 pCpu->fFlushAsidBeforeUse = true;
2642 }
2643
2644 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2645 if (pCpu->fFlushAsidBeforeUse)
2646 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2647 }
2648 else
2649 {
2650 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT)
2651 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_SINGLE_CONTEXT, 0 /* GCPtr */);
2652 else
2653 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2654 }
2655
2656 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2657 pVCpu->hm.s.fForceTLBFlush = false;
2658 }
2659 else
2660 {
2661 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2662 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2663 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2664 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2665
2666 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2667 * not be executed. See hmQueueInvlPage() where it is commented
2668 * out. Support individual entry flushing someday. */
2669 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2670 {
2671 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2672
2673 /*
2674 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2675 * as supported by the CPU.
2676 */
2677 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2678 {
2679 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2680 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2681 }
2682 else
2683 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2684 }
2685 else
2686 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2687 }
2688
2689 pVCpu->hm.s.TlbShootdown.cPages = 0;
2690 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2691
2692 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2693 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2694 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2695 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2696 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2697 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2698
2699 /* Update VMCS with the VPID. */
2700 int rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2701 AssertRC(rc);
2702}
2703
2704
2705/**
2706 * Setup the tagged TLB for EPT only.
2707 *
2708 * @returns VBox status code.
2709 * @param pVM Pointer to the VM.
2710 * @param pVCpu Pointer to the VMCPU.
2711 */
2712static DECLCALLBACK(void) hmR0VmxSetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2713{
2714 PHMGLOBLCPUINFO pCpu;
2715
2716 Assert(pVM->hm.s.fNestedPaging);
2717 Assert(!pVM->hm.s.vmx.fVpid);
2718
2719 pCpu = HMR0GetCurrentCpu();
2720
2721 /*
2722 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2723 * This can happen both for start & resume due to long jumps back to ring-3.
2724 * A change in the TLB flush count implies the host Cpu is online after a suspend/resume.
2725 */
2726 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2727 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2728 {
2729 pVCpu->hm.s.fForceTLBFlush = true;
2730 }
2731
2732 /*
2733 * Check for explicit TLB shootdown flushes.
2734 */
2735 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2736 pVCpu->hm.s.fForceTLBFlush = true;
2737
2738 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2739 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2740
2741 if (pVCpu->hm.s.fForceTLBFlush)
2742 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2743 else
2744 {
2745 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2746 * not be executed. See hmQueueInvlPage() where it is commented
2747 * out. Support individual entry flushing someday. */
2748 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2749 {
2750 /*
2751 * We cannot flush individual entries without VPID support. Flush using EPT.
2752 */
2753 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
2754 hmR0VmxFlushEPT(pVM, pVCpu, pVM->hm.s.vmx.enmFlushEpt);
2755 }
2756 }
2757 pVCpu->hm.s.TlbShootdown.cPages= 0;
2758 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2759
2760#ifdef VBOX_WITH_STATISTICS
2761 /** @todo r=ramshankar: this is not accurate anymore with the VPID+EPT
2762 * handling. Should be fixed later. */
2763 if (pVCpu->hm.s.fForceTLBFlush)
2764 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2765 else
2766 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2767#endif
2768}
2769
2770
2771/**
2772 * Setup the tagged TLB for VPID.
2773 *
2774 * @returns VBox status code.
2775 * @param pVM Pointer to the VM.
2776 * @param pVCpu Pointer to the VMCPU.
2777 */
2778static DECLCALLBACK(void) hmR0VmxSetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2779{
2780 PHMGLOBLCPUINFO pCpu;
2781
2782 Assert(pVM->hm.s.vmx.fVpid);
2783 Assert(!pVM->hm.s.fNestedPaging);
2784
2785 pCpu = HMR0GetCurrentCpu();
2786
2787 /*
2788 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last
2789 * This can happen both for start & resume due to long jumps back to ring-3.
2790 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB
2791 * or the host Cpu is online after a suspend/resume, so we cannot reuse the current ASID anymore.
2792 */
2793 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
2794 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
2795 {
2796 /* Force a TLB flush on VM entry. */
2797 pVCpu->hm.s.fForceTLBFlush = true;
2798 }
2799
2800 /*
2801 * Check for explicit TLB shootdown flushes.
2802 */
2803 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2804 pVCpu->hm.s.fForceTLBFlush = true;
2805
2806 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
2807
2808 if (pVCpu->hm.s.fForceTLBFlush)
2809 {
2810 ++pCpu->uCurrentAsid;
2811 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
2812 {
2813 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
2814 pCpu->cTlbFlushes++;
2815 pCpu->fFlushAsidBeforeUse = true;
2816 }
2817
2818 pVCpu->hm.s.fForceTLBFlush = false;
2819 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
2820 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
2821 if (pCpu->fFlushAsidBeforeUse)
2822 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2823 }
2824 else
2825 {
2826 AssertMsg(pVCpu->hm.s.uCurrentAsid && pCpu->uCurrentAsid,
2827 ("hm->uCurrentAsid=%lu hm->cTlbFlushes=%lu cpu->uCurrentAsid=%lu cpu->cTlbFlushes=%lu\n",
2828 pVCpu->hm.s.uCurrentAsid, pVCpu->hm.s.cTlbFlushes,
2829 pCpu->uCurrentAsid, pCpu->cTlbFlushes));
2830
2831 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
2832 * not be executed. See hmQueueInvlPage() where it is commented
2833 * out. Support individual entry flushing someday. */
2834 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2835 {
2836 /*
2837 * Flush individual guest entries using VPID from the TLB or as little as possible with EPT
2838 * as supported by the CPU.
2839 */
2840 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
2841 {
2842 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
2843 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, pVCpu->hm.s.TlbShootdown.aPages[i]);
2844 }
2845 else
2846 hmR0VmxFlushVPID(pVM, pVCpu, pVM->hm.s.vmx.enmFlushVpid, 0 /* GCPtr */);
2847 }
2848 }
2849 pVCpu->hm.s.TlbShootdown.cPages = 0;
2850 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2851
2852 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
2853 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
2854 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
2855 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
2856 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
2857 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
2858
2859 int rc = VMXWriteVmcs(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hm.s.uCurrentAsid);
2860 AssertRC(rc);
2861
2862# ifdef VBOX_WITH_STATISTICS
2863 /** @todo r=ramshankar: this is not accurate anymore with EPT+VPID handling.
2864 * Should be fixed later. */
2865 if (pVCpu->hm.s.fForceTLBFlush)
2866 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
2867 else
2868 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
2869# endif
2870}
2871
2872
2873/**
2874 * Runs guest code in a VT-x VM.
2875 *
2876 * @returns VBox status code.
2877 * @param pVM Pointer to the VM.
2878 * @param pVCpu Pointer to the VMCPU.
2879 * @param pCtx Pointer to the guest CPU context.
2880 */
2881VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2882{
2883 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
2884 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
2885 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
2886
2887 VBOXSTRICTRC rc = VINF_SUCCESS;
2888 int rc2;
2889 RTGCUINTREG val;
2890 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2891 RTGCUINTREG instrError, cbInstr;
2892 RTGCUINTPTR exitQualification = 0;
2893 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2894 RTGCUINTPTR errCode, instrInfo;
2895 bool fSetupTPRCaching = false;
2896 uint64_t u64OldLSTAR = 0;
2897 uint8_t u8LastTPR = 0;
2898 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2899 unsigned cResume = 0;
2900#ifdef VBOX_STRICT
2901 RTCPUID idCpuCheck;
2902 bool fWasInLongMode = false;
2903#endif
2904#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2905 uint64_t u64LastTime = RTTimeMilliTS();
2906#endif
2907
2908 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2909 || (pVCpu->hm.s.vmx.pbVirtApic && pVM->hm.s.vmx.pbApicAccess));
2910
2911 /*
2912 * Check if we need to use TPR shadowing.
2913 */
2914 if ( CPUMIsGuestInLongModeEx(pCtx)
2915 || ( (( pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
2916 || pVM->hm.s.fTRPPatchingAllowed)
2917 && pVM->hm.s.fHasIoApic)
2918 )
2919 {
2920 fSetupTPRCaching = true;
2921 }
2922
2923 Log2(("\nE"));
2924
2925 /* This is not ideal, but if we don't clear the event injection in the VMCS right here,
2926 * we may end up injecting some stale event into a VM, including injecting an event that
2927 * originated before a VM reset *after* the VM has been reset. See @bugref{6220}.
2928 */
2929 VMXWriteVmcs(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0);
2930
2931#ifdef VBOX_STRICT
2932 {
2933 RTCCUINTREG val2;
2934
2935 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val2);
2936 AssertRC(rc2);
2937 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2938
2939 /* allowed zero */
2940 if ((val2 & pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2941 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2942
2943 /* allowed one */
2944 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2945 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2946
2947 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val2);
2948 AssertRC(rc2);
2949 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2950
2951 /*
2952 * Must be set according to the MSR, but can be cleared if nested paging is used.
2953 */
2954 if (pVM->hm.s.fNestedPaging)
2955 {
2956 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2957 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2958 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2959 }
2960
2961 /* allowed zero */
2962 if ((val2 & pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2963 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2964
2965 /* allowed one */
2966 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2967 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2968
2969 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val2);
2970 AssertRC(rc2);
2971 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2972
2973 /* allowed zero */
2974 if ((val2 & pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_entry.n.disallowed0)
2975 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2976
2977 /* allowed one */
2978 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2979 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2980
2981 rc2 = VMXReadVmcs(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val2);
2982 AssertRC(rc2);
2983 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2984
2985 /* allowed zero */
2986 if ((val2 & pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hm.s.vmx.msr.vmx_exit.n.disallowed0)
2987 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2988
2989 /* allowed one */
2990 if ((val2 & ~pVM->hm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2991 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2992 }
2993 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2994#endif /* VBOX_STRICT */
2995
2996#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2997 pVCpu->hm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2998#endif
2999
3000 /*
3001 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
3002 */
3003ResumeExecution:
3004 if (!STAM_REL_PROFILE_ADV_IS_RUNNING(&pVCpu->hm.s.StatEntry))
3005 STAM_REL_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit2, &pVCpu->hm.s.StatEntry, x);
3006 AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
3007 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
3008 (int)pVCpu->hm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
3009 Assert(!HMR0SuspendPending());
3010 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
3011 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
3012
3013 /*
3014 * Safety precaution; looping for too long here can have a very bad effect on the host.
3015 */
3016 if (RT_UNLIKELY(++cResume > pVM->hm.s.cMaxResumeLoops))
3017 {
3018 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
3019 rc = VINF_EM_RAW_INTERRUPT;
3020 goto end;
3021 }
3022
3023 /*
3024 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
3025 */
3026 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
3027 {
3028 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
3029 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
3030 {
3031 /*
3032 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
3033 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
3034 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
3035 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
3036 */
3037 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
3038 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
3039 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
3040 AssertRC(rc2);
3041 }
3042 }
3043 else
3044 {
3045 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
3046 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
3047 AssertRC(rc2);
3048 }
3049
3050#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
3051 if (RT_UNLIKELY((cResume & 0xf) == 0))
3052 {
3053 uint64_t u64CurTime = RTTimeMilliTS();
3054
3055 if (RT_UNLIKELY(u64CurTime > u64LastTime))
3056 {
3057 u64LastTime = u64CurTime;
3058 TMTimerPollVoid(pVM, pVCpu);
3059 }
3060 }
3061#endif
3062
3063 /*
3064 * Check for pending actions that force us to go back to ring-3.
3065 */
3066 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
3067 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
3068 {
3069 /* Check if a sync operation is pending. */
3070 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
3071 {
3072 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
3073 if (rc != VINF_SUCCESS)
3074 {
3075 AssertRC(VBOXSTRICTRC_VAL(rc));
3076 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
3077 goto end;
3078 }
3079 }
3080
3081#ifdef DEBUG
3082 /* Intercept X86_XCPT_DB if stepping is enabled */
3083 if (!DBGFIsStepping(pVCpu))
3084#endif
3085 {
3086 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK)
3087 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
3088 {
3089 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchToR3);
3090 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
3091 goto end;
3092 }
3093 }
3094
3095 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
3096 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
3097 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
3098 {
3099 rc = VINF_EM_PENDING_REQUEST;
3100 goto end;
3101 }
3102
3103 /* Check if a pgm pool flush is in progress. */
3104 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
3105 {
3106 rc = VINF_PGM_POOL_FLUSH_PENDING;
3107 goto end;
3108 }
3109
3110 /* Check if DMA work is pending (2nd+ run). */
3111 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
3112 {
3113 rc = VINF_EM_RAW_TO_R3;
3114 goto end;
3115 }
3116 }
3117
3118#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3119 /*
3120 * Exit to ring-3 preemption/work is pending.
3121 *
3122 * Interrupts are disabled before the call to make sure we don't miss any interrupt
3123 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
3124 * further down, but hmR0VmxCheckPendingInterrupt makes that impossible.)
3125 *
3126 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
3127 * shootdowns rely on this.
3128 */
3129 uOldEFlags = ASMIntDisableFlags();
3130 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
3131 {
3132 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptPending);
3133 rc = VINF_EM_RAW_INTERRUPT;
3134 goto end;
3135 }
3136 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3137#endif
3138
3139 /*
3140 * When external interrupts are pending, we should exit the VM when IF is set.
3141 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!
3142 */
3143 rc = hmR0VmxCheckPendingInterrupt(pVM, pVCpu, pCtx);
3144 if (RT_FAILURE(rc))
3145 goto end;
3146
3147 /** @todo check timers?? */
3148
3149 /*
3150 * TPR caching using CR8 is only available in 64-bit mode.
3151 * Note: The 32-bit exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but this appears missing in Intel CPUs.
3152 * Note: We can't do this in LoadGuestState() as PDMApicGetTPR can jump back to ring-3 (lock)!! (no longer true) .
3153 */
3154 /** @todo query and update the TPR only when it could have been changed (mmio
3155 * access & wrsmr (x2apic) */
3156 if (fSetupTPRCaching)
3157 {
3158 /* TPR caching in CR8 */
3159 bool fPending;
3160
3161 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
3162 AssertRC(rc2);
3163 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
3164 pVCpu->hm.s.vmx.pbVirtApic[0x80] = u8LastTPR;
3165
3166 /*
3167 * Two options here:
3168 * - external interrupt pending, but masked by the TPR value.
3169 * -> a CR8 update that lower the current TPR value should cause an exit
3170 * - no pending interrupts
3171 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
3172 */
3173
3174 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
3175 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0);
3176 AssertRC(VBOXSTRICTRC_VAL(rc));
3177
3178 if (pVM->hm.s.fTPRPatchingActive)
3179 {
3180 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3181 /* Our patch code uses LSTAR for TPR caching. */
3182 pCtx->msrLSTAR = u8LastTPR;
3183
3184 /** @todo r=ramshankar: we should check for MSR-bitmap support here. */
3185 if (fPending)
3186 {
3187 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
3188 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
3189 }
3190 else
3191 {
3192 /*
3193 * No interrupts are pending, so we don't need to be explicitely notified.
3194 * There are enough world switches for detecting pending interrupts.
3195 */
3196 hmR0VmxSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
3197 }
3198 }
3199 }
3200
3201#ifdef LOG_ENABLED
3202 if ( pVM->hm.s.fNestedPaging
3203 || pVM->hm.s.vmx.fVpid)
3204 {
3205 PHMGLOBLCPUINFO pCpu = HMR0GetCurrentCpu();
3206 if (pVCpu->hm.s.idLastCpu != pCpu->idCpu)
3207 {
3208 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hm.s.idLastCpu,
3209 pCpu->idCpu));
3210 }
3211 else if (pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
3212 {
3213 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hm.s.cTlbFlushes,
3214 pCpu->cTlbFlushes));
3215 }
3216 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
3217 LogFlow(("Manual TLB flush\n"));
3218 }
3219#endif
3220#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
3221 PGMRZDynMapFlushAutoSet(pVCpu);
3222#endif
3223
3224 /*
3225 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING-3!
3226 * (until the actual world switch)
3227 */
3228#ifdef VBOX_STRICT
3229 idCpuCheck = RTMpCpuId();
3230#endif
3231#ifdef LOG_ENABLED
3232 VMMR0LogFlushDisable(pVCpu);
3233#endif
3234
3235 /*
3236 * Save the host state first.
3237 */
3238 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_HOST_CONTEXT)
3239 {
3240 rc = VMXR0SaveHostState(pVM, pVCpu);
3241 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3242 {
3243 VMMR0LogFlushEnable(pVCpu);
3244 goto end;
3245 }
3246 }
3247
3248 /*
3249 * Load the guest state.
3250 */
3251 if (!pVCpu->hm.s.fContextUseFlags)
3252 {
3253 VMXR0LoadMinimalGuestState(pVM, pVCpu, pCtx);
3254 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadMinimal);
3255 }
3256 else
3257 {
3258 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
3259 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3260 {
3261 VMMR0LogFlushEnable(pVCpu);
3262 goto end;
3263 }
3264 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
3265 }
3266
3267#if 1 /* Moved for testing. */
3268 bool fOffsettedTsc;
3269 if (pVM->hm.s.vmx.fUsePreemptTimer)
3270 {
3271 uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVCpu, &fOffsettedTsc, &pVCpu->hm.s.vmx.u64TSCOffset);
3272
3273 /* Make sure the returned values have sane upper and lower boundaries. */
3274 uint64_t u64CpuHz = SUPGetCpuHzFromGIP(g_pSUPGlobalInfoPage);
3275
3276 cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64 of a second */
3277 cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */
3278
3279 cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift;
3280 uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16);
3281 rc = VMXWriteVmcs(VMX_VMCS32_GUEST_PREEMPTION_TIMER_VALUE, cPreemptionTickCount);
3282 AssertRC(VBOXSTRICTRC_VAL(rc));
3283 }
3284 else
3285 fOffsettedTsc = TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hm.s.vmx.u64TSCOffset);
3286
3287 if (fOffsettedTsc)
3288 {
3289 uint64_t u64CurTSC = ASMReadTSC();
3290 if (u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset > TMCpuTickGetLastSeen(pVCpu))
3291 {
3292 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET, applies to RDTSCP too. */
3293 rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, pVCpu->hm.s.vmx.u64TSCOffset);
3294 AssertRC(VBOXSTRICTRC_VAL(rc));
3295
3296 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
3297 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
3298 AssertRC(VBOXSTRICTRC_VAL(rc));
3299 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
3300 }
3301 else
3302 {
3303 /* Fall back to rdtsc, rdtscp emulation as we would otherwise pass decreasing tsc values to the guest. */
3304 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
3305 pVCpu->hm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hm.s.vmx.u64TSCOffset,
3306 TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hm.s.vmx.u64TSCOffset,
3307 TMCpuTickGet(pVCpu)));
3308 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
3309 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
3310 AssertRC(VBOXSTRICTRC_VAL(rc));
3311 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
3312 }
3313 }
3314 else
3315 {
3316 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
3317 rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
3318 AssertRC(VBOXSTRICTRC_VAL(rc));
3319 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
3320 }
3321#endif
3322
3323#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3324 /*
3325 * Disable interrupts to make sure a poke will interrupt execution.
3326 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
3327 */
3328 uOldEFlags = ASMIntDisableFlags();
3329 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
3330#endif
3331
3332 /* Non-register state Guest Context */
3333 /** @todo change me according to cpu state */
3334 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_VMCS_GUEST_ACTIVITY_ACTIVE);
3335 AssertRC(rc2);
3336
3337 /* Set TLB flush state as checked until we return from the world switch. */
3338 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
3339 /* Deal with tagged TLB setup and invalidation. */
3340 pVM->hm.s.vmx.pfnFlushTaggedTlb(pVM, pVCpu);
3341
3342 /*
3343 * Manual save and restore:
3344 * - General purpose registers except RIP, RSP
3345 *
3346 * Trashed:
3347 * - CR2 (we don't care)
3348 * - LDTR (reset to 0)
3349 * - DRx (presumably not changed at all)
3350 * - DR7 (reset to 0x400)
3351 * - EFLAGS (reset to RT_BIT(1); not relevant)
3352 */
3353
3354 /* All done! Let's start VM execution. */
3355 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
3356 Assert(idCpuCheck == RTMpCpuId());
3357
3358#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3359 pVCpu->hm.s.vmx.VMCSCache.cResume = cResume;
3360 pVCpu->hm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
3361#endif
3362
3363 /*
3364 * Save the current TPR value in the LSTAR MSR so our patches can access it.
3365 */
3366 if (pVM->hm.s.fTPRPatchingActive)
3367 {
3368 Assert(pVM->hm.s.fTPRPatchingActive);
3369 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3370 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
3371 }
3372
3373 TMNotifyStartOfExecution(pVCpu);
3374
3375#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3376 /*
3377 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
3378 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
3379 */
3380 if ( (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3381 && !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3382 {
3383 pVCpu->hm.s.u64HostTSCAux = ASMRdMsr(MSR_K8_TSC_AUX);
3384 uint64_t u64GuestTSCAux = 0;
3385 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTSCAux);
3386 AssertRC(rc2);
3387 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTSCAux);
3388 }
3389#endif
3390
3391#ifdef VBOX_WITH_KERNEL_USING_XMM
3392 rc = hmR0VMXStartVMWrapXMM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM);
3393#else
3394 rc = pVCpu->hm.s.vmx.pfnStartVM(pVCpu->hm.s.fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu);
3395#endif
3396 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);
3397 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);
3398
3399 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
3400 if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
3401 {
3402#ifndef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
3403 /* Restore host's TSC_AUX. */
3404 if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_VMCS_CTRL_PROC_EXEC2_RDTSCP)
3405 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTSCAux);
3406#endif
3407
3408 TMCpuTickSetLastSeen(pVCpu,
3409 ASMReadTSC() + pVCpu->hm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
3410 }
3411
3412 TMNotifyEndOfExecution(pVCpu);
3413 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
3414 Assert(!(ASMGetFlags() & X86_EFL_IF));
3415
3416 /*
3417 * Restore the host LSTAR MSR if the guest could have changed it.
3418 */
3419 if (pVM->hm.s.fTPRPatchingActive)
3420 {
3421 Assert(pVM->hm.s.fTPRPatchingActive);
3422 pVCpu->hm.s.vmx.pbVirtApic[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
3423 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
3424 }
3425
3426 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
3427 ASMSetFlags(uOldEFlags);
3428#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
3429 uOldEFlags = ~(RTCCUINTREG)0;
3430#endif
3431
3432 AssertMsg(!pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries=%d\n",
3433 pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries));
3434
3435 /* In case we execute a goto ResumeExecution later on. */
3436 pVCpu->hm.s.fResumeVM = true;
3437 pVCpu->hm.s.fForceTLBFlush = false;
3438
3439 /*
3440 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3441 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
3442 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
3443 */
3444
3445 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3446 {
3447 hmR0VmxReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
3448 VMMR0LogFlushEnable(pVCpu);
3449 goto end;
3450 }
3451
3452 /* Success. Query the guest state and figure out what has happened. */
3453
3454 /* Investigate why there was a VM-exit. */
3455 rc2 = VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3456 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
3457
3458 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
3459 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3460 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
3461 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
3462 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
3463 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &errCode);
3464 rc2 |= VMXReadCachedVmcs(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
3465 rc2 |= VMXReadCachedVmcs(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
3466 AssertRC(rc2);
3467
3468 /*
3469 * Sync back the guest state.
3470 */
3471 rc2 = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
3472 AssertRC(rc2);
3473
3474 /* Note! NOW IT'S SAFE FOR LOGGING! */
3475 VMMR0LogFlushEnable(pVCpu);
3476 Log2(("Raw exit reason %08x\n", exitReason));
3477#if ARCH_BITS == 64 /* for the time being */
3478 VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, pCtx, exitReason);
3479#endif
3480
3481 /*
3482 * Check if an injected event was interrupted prematurely.
3483 */
3484 rc2 = VMXReadCachedVmcs(VMX_VMCS32_RO_IDT_INFO, &val);
3485 AssertRC(rc2);
3486 pVCpu->hm.s.Event.u64IntrInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
3487 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.u64IntrInfo)
3488 /* Ignore 'int xx' as they'll be restarted anyway. */
3489 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT
3490 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
3491 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
3492 {
3493 Assert(!pVCpu->hm.s.Event.fPending);
3494 pVCpu->hm.s.Event.fPending = true;
3495 /* Error code present? */
3496 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo))
3497 {
3498 rc2 = VMXReadCachedVmcs(VMX_VMCS32_RO_IDT_ERROR_CODE, &val);
3499 AssertRC(rc2);
3500 pVCpu->hm.s.Event.u32ErrCode = val;
3501 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n",
3502 pVCpu->hm.s.Event.u64IntrInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
3503 }
3504 else
3505 {
3506 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hm.s.Event.u64IntrInfo,
3507 (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3508 pVCpu->hm.s.Event.u32ErrCode = 0;
3509 }
3510 }
3511#ifdef VBOX_STRICT
3512 else if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hm.s.Event.u64IntrInfo)
3513 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
3514 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hm.s.Event.u64IntrInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT)
3515 {
3516 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n",
3517 pVCpu->hm.s.Event.u64IntrInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
3518 }
3519
3520 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
3521 HMDumpRegs(pVM, pVCpu, pCtx);
3522#endif
3523
3524 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3525 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
3526 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
3527 Log2(("Interruption error code %d\n", (uint32_t)errCode));
3528 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
3529
3530 /*
3531 * Sync back the TPR if it was changed.
3532 */
3533 if ( fSetupTPRCaching
3534 && u8LastTPR != pVCpu->hm.s.vmx.pbVirtApic[0x80])
3535 {
3536 rc2 = PDMApicSetTPR(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[0x80]);
3537 AssertRC(rc2);
3538 }
3539
3540#ifdef DBGFTRACE_ENABLED /** @todo DTrace later. */
3541 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x %016RX64 at %04:%08RX64 %RX64",
3542 exitReason, (uint64_t)exitQualification, pCtx->cs.Sel, pCtx->rip, (uint64_t)intInfo);
3543#endif
3544 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
3545
3546 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
3547 Assert(rc == VINF_SUCCESS); /* might consider VERR_IPE_UNINITIALIZED_STATUS here later... */
3548 switch (exitReason)
3549 {
3550 case VMX_EXIT_XCPT_NMI: /* 0 Exception or non-maskable interrupt (NMI). */
3551 case VMX_EXIT_EXT_INT: /* 1 External interrupt. */
3552 {
3553 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
3554
3555 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3556 {
3557 Assert(exitReason == VMX_EXIT_EXT_INT);
3558 /* External interrupt; leave to allow it to be dispatched again. */
3559 rc = VINF_EM_RAW_INTERRUPT;
3560 break;
3561 }
3562 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub3, y3);
3563 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
3564 {
3565 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
3566 /* External interrupt; leave to allow it to be dispatched again. */
3567 rc = VINF_EM_RAW_INTERRUPT;
3568 break;
3569
3570 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT_INT: /* External hardware interrupt. */
3571 AssertFailed(); /* can't come here; fails the first check. */
3572 break;
3573
3574 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DB_XCPT: /* Unknown why we get this type for #DB */
3575 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */
3576 Assert(vector == 1 || vector == 3 || vector == 4);
3577 /* no break */
3578 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT: /* Hardware exception. */
3579 Log2(("Hardware/software interrupt %d\n", vector));
3580 switch (vector)
3581 {
3582 case X86_XCPT_NM:
3583 {
3584 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
3585
3586 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
3587 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
3588 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3589 if (rc == VINF_SUCCESS)
3590 {
3591 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3592
3593 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
3594
3595 /* Continue execution. */
3596 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
3597
3598 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3599 goto ResumeExecution;
3600 }
3601
3602 Log(("Forward #NM fault to the guest\n"));
3603 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
3604 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3605 cbInstr, 0);
3606 AssertRC(rc2);
3607 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3608 goto ResumeExecution;
3609 }
3610
3611 case X86_XCPT_PF: /* Page fault */
3612 {
3613#ifdef VBOX_ALWAYS_TRAP_PF
3614 if (pVM->hm.s.fNestedPaging)
3615 {
3616 /*
3617 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3618 */
3619 Log(("Guest page fault at %RGv cr2=%RGv error code %RGv rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification,
3620 errCode, (RTGCPTR)pCtx->rsp));
3621
3622 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
3623
3624 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3625
3626 /* Now we must update CR2. */
3627 pCtx->cr2 = exitQualification;
3628 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3629 cbInstr, errCode);
3630 AssertRC(rc2);
3631
3632 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3633 goto ResumeExecution;
3634 }
3635#else
3636 Assert(!pVM->hm.s.fNestedPaging);
3637#endif
3638
3639#ifdef VBOX_HM_WITH_GUEST_PATCHING
3640 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
3641 if ( pVM->hm.s.fTRPPatchingAllowed
3642 && pVM->hm.s.pGuestPatchMem
3643 && (exitQualification & 0xfff) == 0x080
3644 && !(errCode & X86_TRAP_PF_P) /* not present */
3645 && CPUMGetGuestCPL(pVCpu) == 0
3646 && !CPUMIsGuestInLongModeEx(pCtx)
3647 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
3648 {
3649 RTGCPHYS GCPhysApicBase, GCPhys;
3650 GCPhysApicBase = pCtx->msrApicBase;
3651 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3652
3653 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3654 if ( rc == VINF_SUCCESS
3655 && GCPhys == GCPhysApicBase)
3656 {
3657 /* Only attempt to patch the instruction once. */
3658 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3659 if (!pPatch)
3660 {
3661 rc = VINF_EM_HM_PATCH_TPR_INSTR;
3662 break;
3663 }
3664 }
3665 }
3666#endif
3667
3668 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
3669 /* Exit qualification contains the linear address of the page fault. */
3670 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3671 TRPMSetErrorCode(pVCpu, errCode);
3672 TRPMSetFaultAddress(pVCpu, exitQualification);
3673
3674 /* Shortcut for APIC TPR reads and writes. */
3675 if ( (exitQualification & 0xfff) == 0x080
3676 && !(errCode & X86_TRAP_PF_P) /* not present */
3677 && fSetupTPRCaching
3678 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3679 {
3680 RTGCPHYS GCPhysApicBase, GCPhys;
3681 GCPhysApicBase = pCtx->msrApicBase;
3682 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3683
3684 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
3685 if ( rc == VINF_SUCCESS
3686 && GCPhys == GCPhysApicBase)
3687 {
3688 Log(("Enable VT-x virtual APIC access filtering\n"));
3689 rc2 = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess,
3690 X86_PTE_RW | X86_PTE_P);
3691 AssertRC(rc2);
3692 }
3693 }
3694
3695 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
3696 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
3697 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3698
3699 if (rc == VINF_SUCCESS)
3700 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3701 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
3702 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
3703
3704 TRPMResetTrap(pVCpu);
3705 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3706 goto ResumeExecution;
3707 }
3708 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3709 {
3710 /*
3711 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
3712 */
3713 Log2(("Forward page fault to the guest\n"));
3714
3715 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3716 /* The error code might have been changed. */
3717 errCode = TRPMGetErrorCode(pVCpu);
3718
3719 TRPMResetTrap(pVCpu);
3720
3721 /* Now we must update CR2. */
3722 pCtx->cr2 = exitQualification;
3723 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3724 cbInstr, errCode);
3725 AssertRC(rc2);
3726
3727 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3728 goto ResumeExecution;
3729 }
3730#ifdef VBOX_STRICT
3731 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
3732 Log2(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
3733#endif
3734 /* Need to go back to the recompiler to emulate the instruction. */
3735 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
3736 TRPMResetTrap(pVCpu);
3737
3738 /* If event delivery caused the #PF (shadow or not), tell TRPM. */
3739 hmR0VmxCheckPendingEvent(pVCpu);
3740 break;
3741 }
3742
3743 case X86_XCPT_MF: /* Floating point exception. */
3744 {
3745 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
3746 if (!(pCtx->cr0 & X86_CR0_NE))
3747 {
3748 /* old style FPU error reporting needs some extra work. */
3749 /** @todo don't fall back to the recompiler, but do it manually. */
3750 rc = VINF_EM_RAW_EMULATE_INSTR;
3751 break;
3752 }
3753 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3754 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3755 cbInstr, errCode);
3756 AssertRC(rc2);
3757
3758 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3759 goto ResumeExecution;
3760 }
3761
3762 case X86_XCPT_DB: /* Debug exception. */
3763 {
3764 uint64_t uDR6;
3765
3766 /*
3767 * DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
3768 *
3769 * Exit qualification bits:
3770 * 3:0 B0-B3 which breakpoint condition was met
3771 * 12:4 Reserved (0)
3772 * 13 BD - debug register access detected
3773 * 14 BS - single step execution or branch taken
3774 * 63:15 Reserved (0)
3775 */
3776 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
3777
3778 /* Note that we don't support guest and host-initiated debugging at the same time. */
3779
3780 uDR6 = X86_DR6_INIT_VAL;
3781 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3782 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3783 if (rc == VINF_EM_RAW_GUEST_TRAP)
3784 {
3785 /* Update DR6 here. */
3786 pCtx->dr[6] = uDR6;
3787
3788 /* Resync DR6 if the debug state is active. */
3789 if (CPUMIsGuestDebugStateActive(pVCpu))
3790 ASMSetDR6(pCtx->dr[6]);
3791
3792 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
3793 pCtx->dr[7] &= ~X86_DR7_GD;
3794
3795 /* Paranoia. */
3796 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3797 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3798 pCtx->dr[7] |= 0x400; /* must be one */
3799
3800 /* Resync DR7 */
3801 rc2 = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
3802 AssertRC(rc2);
3803
3804 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip,
3805 exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3806 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3807 cbInstr, errCode);
3808 AssertRC(rc2);
3809
3810 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3811 goto ResumeExecution;
3812 }
3813 /* Return to ring 3 to deal with the debug exit code. */
3814 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3815 break;
3816 }
3817
3818 case X86_XCPT_BP: /* Breakpoint. */
3819 {
3820 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP);
3821 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3822 if (rc == VINF_EM_RAW_GUEST_TRAP)
3823 {
3824 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs.Sel, pCtx->rip));
3825 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3826 cbInstr, errCode);
3827 AssertRC(rc2);
3828 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3829 goto ResumeExecution;
3830 }
3831 if (rc == VINF_SUCCESS)
3832 {
3833 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3834 goto ResumeExecution;
3835 }
3836 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
3837 break;
3838 }
3839
3840 case X86_XCPT_GP: /* General protection failure exception. */
3841 {
3842 uint32_t cbOp;
3843 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
3844
3845 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
3846#ifdef VBOX_STRICT
3847 if ( !CPUMIsGuestInRealModeEx(pCtx)
3848 || !pVM->hm.s.vmx.pRealModeTSS)
3849 {
3850 Log(("Trap %x at %04X:%RGv errorCode=%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, errCode));
3851 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
3852 cbInstr, errCode);
3853 AssertRC(rc2);
3854 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
3855 goto ResumeExecution;
3856 }
3857#endif
3858 Assert(CPUMIsGuestInRealModeEx(pCtx));
3859
3860 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
3861
3862 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, &cbOp);
3863 if (RT_SUCCESS(rc2))
3864 {
3865 bool fUpdateRIP = true;
3866
3867 rc = VINF_SUCCESS;
3868 Assert(cbOp == pDis->cbInstr);
3869 switch (pDis->pCurInstr->uOpcode)
3870 {
3871 case OP_CLI:
3872 pCtx->eflags.Bits.u1IF = 0;
3873 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCli);
3874 break;
3875
3876 case OP_STI:
3877 pCtx->eflags.Bits.u1IF = 1;
3878 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->cbInstr);
3879 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3880 rc2 = VMXWriteVmcs(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE,
3881 VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3882 AssertRC(rc2);
3883 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitSti);
3884 break;
3885
3886 case OP_HLT:
3887 fUpdateRIP = false;
3888 rc = VINF_EM_HALT;
3889 pCtx->rip += pDis->cbInstr;
3890 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
3891 break;
3892
3893 case OP_POPF:
3894 {
3895 RTGCPTR GCPtrStack;
3896 uint32_t cbParm;
3897 uint32_t uMask;
3898 X86EFLAGS eflags;
3899
3900 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3901 {
3902 cbParm = 4;
3903 uMask = 0xffffffff;
3904 }
3905 else
3906 {
3907 cbParm = 2;
3908 uMask = 0xffff;
3909 }
3910
3911 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3912 if (RT_FAILURE(rc2))
3913 {
3914 rc = VERR_EM_INTERPRETER;
3915 break;
3916 }
3917 eflags.u = 0;
3918 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3919 if (RT_FAILURE(rc2))
3920 {
3921 rc = VERR_EM_INTERPRETER;
3922 break;
3923 }
3924 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3925 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
3926 | (eflags.u & X86_EFL_POPF_BITS & uMask);
3927 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3928 pCtx->eflags.Bits.u1RF = 0;
3929 pCtx->esp += cbParm;
3930 pCtx->esp &= uMask;
3931
3932 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPopf);
3933 break;
3934 }
3935
3936 case OP_PUSHF:
3937 {
3938 RTGCPTR GCPtrStack;
3939 uint32_t cbParm;
3940 uint32_t uMask;
3941 X86EFLAGS eflags;
3942
3943 if (pDis->fPrefix & DISPREFIX_OPSIZE)
3944 {
3945 cbParm = 4;
3946 uMask = 0xffffffff;
3947 }
3948 else
3949 {
3950 cbParm = 2;
3951 uMask = 0xffff;
3952 }
3953
3954 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0,
3955 &GCPtrStack);
3956 if (RT_FAILURE(rc2))
3957 {
3958 rc = VERR_EM_INTERPRETER;
3959 break;
3960 }
3961 eflags = pCtx->eflags;
3962 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3963 eflags.Bits.u1RF = 0;
3964 eflags.Bits.u1VM = 0;
3965
3966 rc2 = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3967 if (RT_FAILURE(rc2))
3968 {
3969 rc = VERR_EM_INTERPRETER;
3970 break;
3971 }
3972 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3973 pCtx->esp -= cbParm;
3974 pCtx->esp &= uMask;
3975 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPushf);
3976 break;
3977 }
3978
3979 case OP_IRET:
3980 {
3981 RTGCPTR GCPtrStack;
3982 uint32_t uMask = 0xffff;
3983 uint16_t aIretFrame[3];
3984
3985 if (pDis->fPrefix & (DISPREFIX_OPSIZE | DISPREFIX_ADDRSIZE))
3986 {
3987 rc = VERR_EM_INTERPRETER;
3988 break;
3989 }
3990
3991 rc2 = SELMToFlatEx(pVCpu, DISSELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3992 if (RT_FAILURE(rc2))
3993 {
3994 rc = VERR_EM_INTERPRETER;
3995 break;
3996 }
3997 rc2 = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3998 if (RT_FAILURE(rc2))
3999 {
4000 rc = VERR_EM_INTERPRETER;
4001 break;
4002 }
4003 pCtx->ip = aIretFrame[0];
4004 pCtx->cs.Sel = aIretFrame[1];
4005 pCtx->cs.ValidSel = aIretFrame[1];
4006 pCtx->cs.u64Base = (uint32_t)pCtx->cs.Sel << 4;
4007 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask))
4008 | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
4009 pCtx->sp += sizeof(aIretFrame);
4010
4011 LogFlow(("iret to %04x:%x\n", pCtx->cs.Sel, pCtx->ip));
4012 fUpdateRIP = false;
4013 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIret);
4014 break;
4015 }
4016
4017 case OP_INT:
4018 {
4019 uint32_t intInfo2;
4020
4021 LogFlow(("Realmode: INT %x\n", pDis->Param1.uValue & 0xff));
4022 intInfo2 = pDis->Param1.uValue & 0xff;
4023 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4024 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4025
4026 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
4027 AssertRC(VBOXSTRICTRC_VAL(rc));
4028 fUpdateRIP = false;
4029 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
4030 break;
4031 }
4032
4033 case OP_INTO:
4034 {
4035 if (pCtx->eflags.Bits.u1OF)
4036 {
4037 uint32_t intInfo2;
4038
4039 LogFlow(("Realmode: INTO\n"));
4040 intInfo2 = X86_XCPT_OF;
4041 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4042 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4043
4044 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
4045 AssertRC(VBOXSTRICTRC_VAL(rc));
4046 fUpdateRIP = false;
4047 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
4048 }
4049 break;
4050 }
4051
4052 case OP_INT3:
4053 {
4054 uint32_t intInfo2;
4055
4056 LogFlow(("Realmode: INT 3\n"));
4057 intInfo2 = 3;
4058 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4059 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW_INT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4060
4061 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
4062 AssertRC(VBOXSTRICTRC_VAL(rc));
4063 fUpdateRIP = false;
4064 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInt);
4065 break;
4066 }
4067
4068 default:
4069 rc = EMInterpretInstructionDisasState(pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, EMCODETYPE_SUPERVISOR);
4070 fUpdateRIP = false;
4071 break;
4072 }
4073
4074 if (rc == VINF_SUCCESS)
4075 {
4076 if (fUpdateRIP)
4077 pCtx->rip += cbOp; /* Move on to the next instruction. */
4078
4079 /*
4080 * LIDT, LGDT can end up here. In the future CRx changes as well. Just reload the
4081 * whole context to be done with it.
4082 */
4083 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
4084
4085 /* Only resume if successful. */
4086 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4087 goto ResumeExecution;
4088 }
4089 }
4090 else
4091 rc = VERR_EM_INTERPRETER;
4092
4093 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT,
4094 ("Unexpected rc=%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4095 break;
4096 }
4097
4098#ifdef VBOX_STRICT
4099 case X86_XCPT_XF: /* SIMD exception. */
4100 case X86_XCPT_DE: /* Divide error. */
4101 case X86_XCPT_UD: /* Unknown opcode exception. */
4102 case X86_XCPT_SS: /* Stack segment exception. */
4103 case X86_XCPT_NP: /* Segment not present exception. */
4104 {
4105 switch (vector)
4106 {
4107 case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); break;
4108 case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); break;
4109 case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); break;
4110 case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); break;
4111 case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); break;
4112 }
4113
4114 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip));
4115 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4116 cbInstr, errCode);
4117 AssertRC(rc2);
4118
4119 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4120 goto ResumeExecution;
4121 }
4122#endif
4123 default:
4124 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk);
4125 if ( CPUMIsGuestInRealModeEx(pCtx)
4126 && pVM->hm.s.vmx.pRealModeTSS)
4127 {
4128 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs.Sel, pCtx->eip, errCode));
4129 rc = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4130 cbInstr, errCode);
4131 AssertRC(VBOXSTRICTRC_VAL(rc)); /* Strict RC check below. */
4132
4133 /* Go back to ring-3 in case of a triple fault. */
4134 if ( vector == X86_XCPT_DF
4135 && rc == VINF_EM_RESET)
4136 {
4137 break;
4138 }
4139
4140 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4141 goto ResumeExecution;
4142 }
4143 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
4144 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
4145 break;
4146 } /* switch (vector) */
4147
4148 break;
4149
4150 default:
4151 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
4152 AssertMsgFailed(("Unexpected interruption code %x\n", intInfo));
4153 break;
4154 }
4155
4156 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub3, y3);
4157 break;
4158 }
4159
4160 /*
4161 * 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed
4162 * by the configuration of the EPT paging structures.
4163 */
4164 case VMX_EXIT_EPT_VIOLATION:
4165 {
4166 RTGCPHYS GCPhys;
4167
4168 Assert(pVM->hm.s.fNestedPaging);
4169
4170 rc2 = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4171 AssertRC(rc2);
4172 Assert(((exitQualification >> 7) & 3) != 2);
4173
4174 /* Determine the kind of violation. */
4175 errCode = 0;
4176 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
4177 errCode |= X86_TRAP_PF_ID;
4178
4179 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
4180 errCode |= X86_TRAP_PF_RW;
4181
4182 /* If the page is present, then it's a page level protection fault. */
4183 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
4184 errCode |= X86_TRAP_PF_P;
4185 else
4186 {
4187 /* Shortcut for APIC TPR reads and writes. */
4188 if ( (GCPhys & 0xfff) == 0x080
4189 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4190 && fSetupTPRCaching
4191 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4192 {
4193 RTGCPHYS GCPhysApicBase;
4194 GCPhysApicBase = pCtx->msrApicBase;
4195 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4196 if (GCPhys == GCPhysApicBase + 0x80)
4197 {
4198 Log(("Enable VT-x virtual APIC access filtering\n"));
4199 rc2 = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess,
4200 X86_PTE_RW | X86_PTE_P);
4201 AssertRC(rc2);
4202 }
4203 }
4204 }
4205 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
4206
4207 /* GCPhys contains the guest physical address of the page fault. */
4208 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
4209 TRPMSetErrorCode(pVCpu, errCode);
4210 TRPMSetFaultAddress(pVCpu, GCPhys);
4211
4212 /* Handle the pagefault trap for the nested shadow table. */
4213 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
4214
4215 /*
4216 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment below, @bugref{6043}.
4217 */
4218 if ( rc == VINF_SUCCESS
4219 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4220 || rc == VERR_PAGE_NOT_PRESENT)
4221 {
4222 /* We've successfully synced our shadow pages, so let's just continue execution. */
4223 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
4224 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
4225
4226 TRPMResetTrap(pVCpu);
4227 goto ResumeExecution;
4228 }
4229
4230#ifdef VBOX_STRICT
4231 if (rc != VINF_EM_RAW_EMULATE_INSTR)
4232 LogFlow(("PGMTrap0eHandlerNestedPaging at %RGv failed with %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4233#endif
4234 /* Need to go back to the recompiler to emulate the instruction. */
4235 TRPMResetTrap(pVCpu);
4236 break;
4237 }
4238
4239 case VMX_EXIT_EPT_MISCONFIG:
4240 {
4241 RTGCPHYS GCPhys;
4242
4243 Assert(pVM->hm.s.fNestedPaging);
4244
4245 rc2 = VMXReadVmcs64(VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL, &GCPhys);
4246 AssertRC(rc2);
4247 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
4248
4249 /* Shortcut for APIC TPR reads and writes. */
4250 if ( (GCPhys & 0xfff) == 0x080
4251 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
4252 && fSetupTPRCaching
4253 && (pVM->hm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
4254 {
4255 RTGCPHYS GCPhysApicBase = pCtx->msrApicBase;
4256 GCPhysApicBase &= PAGE_BASE_GC_MASK;
4257 if (GCPhys == GCPhysApicBase + 0x80)
4258 {
4259 Log(("Enable VT-x virtual APIC access filtering\n"));
4260 rc2 = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess,
4261 X86_PTE_RW | X86_PTE_P);
4262 AssertRC(rc2);
4263 }
4264 }
4265
4266 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX);
4267
4268 /*
4269 * If we succeed, resume execution.
4270 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
4271 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
4272 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
4273 * weird case. See @bugref{6043}.
4274 */
4275 if ( rc == VINF_SUCCESS
4276 || rc == VERR_PAGE_TABLE_NOT_PRESENT
4277 || rc == VERR_PAGE_NOT_PRESENT)
4278 {
4279 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhys, (RTGCPTR)pCtx->rip));
4280 goto ResumeExecution;
4281 }
4282
4283 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> %Rrc\n", GCPhys, (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
4284 break;
4285 }
4286
4287 case VMX_EXIT_INT_WINDOW: /* 7 Interrupt window exiting. */
4288 /* Clear VM-exit on IF=1 change. */
4289 LogFlow(("VMX_EXIT_INT_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip,
4290 VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
4291 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INT_WINDOW_EXIT;
4292 rc2 = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
4293 AssertRC(rc2);
4294 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
4295 goto ResumeExecution; /* we check for pending guest interrupts there */
4296
4297 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
4298 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
4299 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
4300 /* Skip instruction and continue directly. */
4301 pCtx->rip += cbInstr;
4302 /* Continue execution.*/
4303 goto ResumeExecution;
4304
4305 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4306 {
4307 Log2(("VMX: Cpuid %x\n", pCtx->eax));
4308 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
4309 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4310 if (rc == VINF_SUCCESS)
4311 {
4312 /* Update EIP and continue execution. */
4313 Assert(cbInstr == 2);
4314 pCtx->rip += cbInstr;
4315 goto ResumeExecution;
4316 }
4317 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4318 rc = VINF_EM_RAW_EMULATE_INSTR;
4319 break;
4320 }
4321
4322 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4323 {
4324 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
4325 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
4326 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4327 if (rc == VINF_SUCCESS)
4328 {
4329 /* Update EIP and continue execution. */
4330 Assert(cbInstr == 2);
4331 pCtx->rip += cbInstr;
4332 goto ResumeExecution;
4333 }
4334 rc = VINF_EM_RAW_EMULATE_INSTR;
4335 break;
4336 }
4337
4338 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4339 {
4340 Log2(("VMX: Rdtsc\n"));
4341 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
4342 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4343 if (rc == VINF_SUCCESS)
4344 {
4345 /* Update EIP and continue execution. */
4346 Assert(cbInstr == 2);
4347 pCtx->rip += cbInstr;
4348 goto ResumeExecution;
4349 }
4350 rc = VINF_EM_RAW_EMULATE_INSTR;
4351 break;
4352 }
4353
4354 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4355 {
4356 Log2(("VMX: Rdtscp\n"));
4357 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp);
4358 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
4359 if (rc == VINF_SUCCESS)
4360 {
4361 /* Update EIP and continue execution. */
4362 Assert(cbInstr == 3);
4363 pCtx->rip += cbInstr;
4364 goto ResumeExecution;
4365 }
4366 rc = VINF_EM_RAW_EMULATE_INSTR;
4367 break;
4368 }
4369
4370 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4371 {
4372 Log2(("VMX: invlpg\n"));
4373 Assert(!pVM->hm.s.fNestedPaging);
4374
4375 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
4376 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
4377 if (rc == VINF_SUCCESS)
4378 {
4379 /* Update EIP and continue execution. */
4380 pCtx->rip += cbInstr;
4381 goto ResumeExecution;
4382 }
4383 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, VBOXSTRICTRC_VAL(rc)));
4384 break;
4385 }
4386
4387 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4388 {
4389 Log2(("VMX: monitor\n"));
4390
4391 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
4392 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4393 if (rc == VINF_SUCCESS)
4394 {
4395 /* Update EIP and continue execution. */
4396 pCtx->rip += cbInstr;
4397 goto ResumeExecution;
4398 }
4399 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4400 break;
4401 }
4402
4403 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4404 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
4405 if ( pVM->hm.s.fTPRPatchingActive
4406 && pCtx->ecx == MSR_K8_LSTAR)
4407 {
4408 Assert(!CPUMIsGuestInLongModeEx(pCtx));
4409 if ((pCtx->eax & 0xff) != u8LastTPR)
4410 {
4411 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
4412
4413 /* Our patch code uses LSTAR for TPR caching. */
4414 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
4415 AssertRC(rc2);
4416 }
4417
4418 /* Skip the instruction and continue. */
4419 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
4420
4421 /* Only resume if successful. */
4422 goto ResumeExecution;
4423 }
4424 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_MSR;
4425 /* no break */
4426 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4427 {
4428 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hm.s.StatExitRdmsr : &pVCpu->hm.s.StatExitWrmsr);
4429
4430 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
4431 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
4432 if (rc == VINF_SUCCESS)
4433 {
4434 /* EIP has been updated already. */
4435 /* Only resume if successful. */
4436 goto ResumeExecution;
4437 }
4438 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n",
4439 (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
4440 break;
4441 }
4442
4443 case VMX_EXIT_MOV_CRX: /* 28 Control-register accesses. */
4444 {
4445 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub2, y2);
4446
4447 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
4448 {
4449 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
4450 {
4451 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
4452 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4453 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4454 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
4455 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
4456 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
4457 {
4458 case 0:
4459 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0 | HM_CHANGED_GUEST_CR3;
4460 break;
4461 case 2:
4462 break;
4463 case 3:
4464 Assert(!pVM->hm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
4465 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
4466 break;
4467 case 4:
4468 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
4469 break;
4470 case 8:
4471 /* CR8 contains the APIC TPR */
4472 Assert(!(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1
4473 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4474 break;
4475
4476 default:
4477 AssertFailed();
4478 break;
4479 }
4480 break;
4481 }
4482
4483 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
4484 {
4485 Log2(("VMX: mov x, crx\n"));
4486 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
4487
4488 Assert( !pVM->hm.s.fNestedPaging
4489 || !CPUMIsGuestInPagedProtectedModeEx(pCtx)
4490 || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != DISCREG_CR3);
4491
4492 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
4493 Assert( VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8
4494 || !(pVM->hm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
4495
4496 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4497 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
4498 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
4499 break;
4500 }
4501
4502 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
4503 {
4504 Log2(("VMX: clts\n"));
4505 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts);
4506 rc = EMInterpretCLTS(pVM, pVCpu);
4507 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4508 break;
4509 }
4510
4511 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
4512 {
4513 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
4514 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw);
4515 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
4516 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
4517 break;
4518 }
4519 }
4520
4521 /* Update EIP if no error occurred. */
4522 if (RT_SUCCESS(rc))
4523 pCtx->rip += cbInstr;
4524
4525 if (rc == VINF_SUCCESS)
4526 {
4527 /* Only resume if successful. */
4528 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4529 goto ResumeExecution;
4530 }
4531 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
4532 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub2, y2);
4533 break;
4534 }
4535
4536 case VMX_EXIT_MOV_DRX: /* 29 Debug-register accesses. */
4537 {
4538 if ( !DBGFIsStepping(pVCpu)
4539 && !CPUMIsHyperDebugStateActive(pVCpu))
4540 {
4541 /* Disable DRx move intercepts. */
4542 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4543 rc2 = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
4544 AssertRC(rc2);
4545
4546 /* Save the host and load the guest debug state. */
4547 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
4548 AssertRC(rc2);
4549
4550#ifdef LOG_ENABLED
4551 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4552 {
4553 Log(("VMX_EXIT_MOV_DRX: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4554 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4555 }
4556 else
4557 Log(("VMX_EXIT_MOV_DRX: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
4558#endif
4559
4560#ifdef VBOX_WITH_STATISTICS
4561 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
4562 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4563 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4564 else
4565 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4566#endif
4567
4568 goto ResumeExecution;
4569 }
4570
4571 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first
4572 * time and restore DRx registers afterwards */
4573 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
4574 {
4575 Log2(("VMX: mov DRx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4576 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
4577 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
4578 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4579 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
4580 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
4581 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
4582 Log2(("DR7=%08x\n", pCtx->dr[7]));
4583 }
4584 else
4585 {
4586 Log2(("VMX: mov x, DRx\n"));
4587 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
4588 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
4589 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
4590 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
4591 }
4592 /* Update EIP if no error occurred. */
4593 if (RT_SUCCESS(rc))
4594 pCtx->rip += cbInstr;
4595
4596 if (rc == VINF_SUCCESS)
4597 {
4598 /* Only resume if successful. */
4599 goto ResumeExecution;
4600 }
4601 Assert(rc == VERR_EM_INTERPRETER);
4602 break;
4603 }
4604
4605 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
4606 case VMX_EXIT_IO_INSTR: /* 30 I/O instruction. */
4607 {
4608 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExit2Sub1, y1);
4609 uint32_t uPort;
4610 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
4611 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
4612
4613 /** @todo necessary to make the distinction? */
4614 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
4615 uPort = pCtx->edx & 0xffff;
4616 else
4617 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
4618
4619 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4)) /* paranoia */
4620 {
4621 rc = fIOWrite ? VINF_IOM_R3_IOPORT_WRITE : VINF_IOM_R3_IOPORT_READ;
4622 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4623 break;
4624 }
4625
4626 uint32_t cbSize = g_aIOSize[uIOWidth];
4627 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
4628 {
4629 /* ins/outs */
4630 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
4631
4632 /* Disassemble manually to deal with segment prefixes. */
4633 /** @todo VMX_VMCS_RO_EXIT_GUEST_LINEAR_ADDR contains the flat pointer
4634 * operand of the instruction. */
4635 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
4636 rc2 = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
4637 if (RT_SUCCESS(rc))
4638 {
4639 if (fIOWrite)
4640 {
4641 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4642 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
4643 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4644 }
4645 else
4646 {
4647 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
4648 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
4649 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->fPrefix, (DISCPUMODE)pDis->uAddrMode, cbSize);
4650 }
4651 }
4652 else
4653 rc = VINF_EM_RAW_EMULATE_INSTR;
4654 }
4655 else
4656 {
4657 /* Normal in/out */
4658 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
4659
4660 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
4661
4662 if (fIOWrite)
4663 {
4664 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
4665 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
4666 if (rc == VINF_IOM_R3_IOPORT_WRITE)
4667 HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4668 }
4669 else
4670 {
4671 uint32_t u32Val = 0;
4672
4673 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
4674 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
4675 if (IOM_SUCCESS(rc))
4676 {
4677 /* Write back to the EAX register. */
4678 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
4679 }
4680 else
4681 if (rc == VINF_IOM_R3_IOPORT_READ)
4682 HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
4683 }
4684 }
4685
4686 /*
4687 * Handled the I/O return codes.
4688 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
4689 */
4690 if (IOM_SUCCESS(rc))
4691 {
4692 /* Update EIP and continue execution. */
4693 pCtx->rip += cbInstr;
4694 if (RT_LIKELY(rc == VINF_SUCCESS))
4695 {
4696 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
4697 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
4698 {
4699 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
4700 for (unsigned i = 0; i < 4; i++)
4701 {
4702 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
4703
4704 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
4705 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
4706 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
4707 {
4708 uint64_t uDR6;
4709
4710 Assert(CPUMIsGuestDebugStateActive(pVCpu));
4711
4712 uDR6 = ASMGetDR6();
4713
4714 /* Clear all breakpoint status flags and set the one we just hit. */
4715 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
4716 uDR6 |= (uint64_t)RT_BIT(i);
4717
4718 /*
4719 * Note: AMD64 Architecture Programmer's Manual 13.1:
4720 * Bits 15:13 of the DR6 register is never cleared by the processor and must
4721 * be cleared by software after the contents have been read.
4722 */
4723 ASMSetDR6(uDR6);
4724
4725 /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */
4726 pCtx->dr[7] &= ~X86_DR7_GD;
4727
4728 /* Paranoia. */
4729 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
4730 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
4731 pCtx->dr[7] |= 0x400; /* must be one */
4732
4733 /* Resync DR7 */
4734 rc2 = VMXWriteVmcs64(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
4735 AssertRC(rc2);
4736
4737 /* Construct inject info. */
4738 intInfo = X86_XCPT_DB;
4739 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
4740 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HW_XCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
4741
4742 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
4743 rc2 = hmR0VmxInjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo),
4744 0 /* cbInstr */, 0 /* errCode */);
4745 AssertRC(rc2);
4746
4747 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4748 goto ResumeExecution;
4749 }
4750 }
4751 }
4752 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4753 goto ResumeExecution;
4754 }
4755 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4756 break;
4757 }
4758
4759#ifdef VBOX_STRICT
4760 if (rc == VINF_IOM_R3_IOPORT_READ)
4761 Assert(!fIOWrite);
4762 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
4763 Assert(fIOWrite);
4764 else
4765 {
4766 AssertMsg( RT_FAILURE(rc)
4767 || rc == VINF_EM_RAW_EMULATE_INSTR
4768 || rc == VINF_EM_RAW_GUEST_TRAP
4769 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
4770 }
4771#endif
4772 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2Sub1, y1);
4773 break;
4774 }
4775
4776 case VMX_EXIT_TPR_BELOW_THRESHOLD: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4777 LogFlow(("VMX_EXIT_TPR_BELOW_THRESHOLD\n"));
4778 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
4779 goto ResumeExecution;
4780
4781 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4782 on the APIC-access page. */
4783 {
4784 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
4785 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
4786
4787 switch (uAccessType)
4788 {
4789 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
4790 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
4791 {
4792 RTGCPHYS GCPhys = pCtx->msrApicBase;
4793 GCPhys &= PAGE_BASE_GC_MASK;
4794 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
4795
4796 LogFlow(("Apic access at %RGp\n", GCPhys));
4797 rc = IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW,
4798 CPUMCTX2CORE(pCtx), GCPhys);
4799 if (rc == VINF_SUCCESS)
4800 goto ResumeExecution; /* rip already updated */
4801 break;
4802 }
4803
4804 default:
4805 rc = VINF_EM_RAW_EMULATE_INSTR;
4806 break;
4807 }
4808 break;
4809 }
4810
4811 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4812 if (!TMTimerPollBool(pVM, pVCpu))
4813 goto ResumeExecution;
4814 rc = VINF_EM_RAW_TIMER_PENDING;
4815 break;
4816
4817 default:
4818 /* The rest is handled after syncing the entire CPU state. */
4819 break;
4820 }
4821
4822
4823 /*
4824 * Note: The guest state is not entirely synced back at this stage!
4825 */
4826
4827 /* Investigate why there was a VM-exit. (part 2) */
4828 switch (exitReason)
4829 {
4830 case VMX_EXIT_XCPT_NMI: /* 0 Exception or non-maskable interrupt (NMI). */
4831 case VMX_EXIT_EXT_INT: /* 1 External interrupt. */
4832 case VMX_EXIT_EPT_VIOLATION:
4833 case VMX_EXIT_EPT_MISCONFIG: /* 49 EPT misconfig is used by the PGM/MMIO optimizations. */
4834 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
4835 /* Already handled above. */
4836 break;
4837
4838 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
4839 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
4840 break;
4841
4842 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
4843 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
4844 rc = VINF_EM_RAW_INTERRUPT;
4845 AssertFailed(); /* Can't happen. Yet. */
4846 break;
4847
4848 case VMX_EXIT_IO_SMI: /* 5 I/O system-management interrupt (SMI). */
4849 case VMX_EXIT_SMI: /* 6 Other SMI. */
4850 rc = VINF_EM_RAW_INTERRUPT;
4851 AssertFailed(); /* Can't happen afaik. */
4852 break;
4853
4854 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
4855 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
4856 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
4857 && pVCpu->hm.s.Event.fPending)
4858 {
4859 /* Caused by an injected interrupt. */
4860 pVCpu->hm.s.Event.fPending = false;
4861
4862 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo)));
4863 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hm.s.Event.u64IntrInfo));
4864 //@todo: Why do we assume this had to be a hardware interrupt? What about software interrupts or exceptions?
4865 rc2 = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hm.s.Event.u64IntrInfo), TRPM_HARDWARE_INT);
4866 AssertRC(rc2);
4867 }
4868 /* else Exceptions and software interrupts can just be restarted. */
4869 rc = VERR_EM_INTERPRETER;
4870 break;
4871
4872 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
4873 /* Check if external interrupts are pending; if so, don't switch back. */
4874 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
4875 pCtx->rip++; /* skip hlt */
4876 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
4877 goto ResumeExecution;
4878
4879 rc = VINF_EM_HALT;
4880 break;
4881
4882 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4883 Log2(("VMX: mwait\n"));
4884 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
4885 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4886 if ( rc == VINF_EM_HALT
4887 || rc == VINF_SUCCESS)
4888 {
4889 /* Update EIP and continue execution. */
4890 pCtx->rip += cbInstr;
4891
4892 /* Check if external interrupts are pending; if so, don't switch back. */
4893 if ( rc == VINF_SUCCESS
4894 || ( rc == VINF_EM_HALT
4895 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4896 )
4897 goto ResumeExecution;
4898 }
4899 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
4900 break;
4901
4902 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4903 AssertFailed(); /* can't happen. */
4904 rc = VERR_EM_INTERPRETER;
4905 break;
4906
4907 case VMX_EXIT_MTF: /* 37 Exit due to Monitor Trap Flag. */
4908 LogFlow(("VMX_EXIT_MTF at %RGv\n", (RTGCPTR)pCtx->rip));
4909 pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_TRAP_FLAG;
4910 rc2 = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
4911 AssertRC(rc2);
4912 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf);
4913#if 0
4914 DBGFDoneStepping(pVCpu);
4915#endif
4916 rc = VINF_EM_DBG_STOP;
4917 break;
4918
4919 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4920 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4921 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4922 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4923 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4924 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4925 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4926 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4927 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4928 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4929 /** @todo inject #UD immediately */
4930 rc = VERR_EM_INTERPRETER;
4931 break;
4932
4933 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4934 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4935 case VMX_EXIT_INVLPG: /* 14 Guest software attempted to execute INVLPG. */
4936 case VMX_EXIT_MOV_CRX: /* 28 Control-register accesses. */
4937 case VMX_EXIT_MOV_DRX: /* 29 Debug-register accesses. */
4938 case VMX_EXIT_IO_INSTR: /* 30 I/O instruction. */
4939 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4940 case VMX_EXIT_RDTSCP: /* 51 Guest software attempted to execute RDTSCP. */
4941 /* already handled above */
4942 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4943 || rc == VINF_EM_RAW_INTERRUPT
4944 || rc == VERR_EM_INTERPRETER
4945 || rc == VINF_EM_RAW_EMULATE_INSTR
4946 || rc == VINF_PGM_SYNC_CR3
4947 || rc == VINF_IOM_R3_IOPORT_READ
4948 || rc == VINF_IOM_R3_IOPORT_WRITE
4949 || rc == VINF_EM_RAW_GUEST_TRAP
4950 || rc == VINF_TRPM_XCPT_DISPATCHED
4951 || rc == VINF_EM_RESCHEDULE_REM,
4952 ("rc = %d\n", VBOXSTRICTRC_VAL(rc)));
4953 break;
4954
4955 case VMX_EXIT_TPR_BELOW_THRESHOLD: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4956 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4957 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4958 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4959 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4960 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address
4961 on the APIC-access page. */
4962 {
4963 /*
4964 * If we decided to emulate them here, then we must sync the MSRs that could have been changed (sysenter, FS/GS base)
4965 */
4966 rc = VERR_EM_INTERPRETER;
4967 break;
4968 }
4969
4970 case VMX_EXIT_INT_WINDOW: /* 7 Interrupt window. */
4971 Assert(rc == VINF_EM_RAW_INTERRUPT);
4972 break;
4973
4974 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4975 {
4976#ifdef VBOX_STRICT
4977 RTCCUINTREG val2 = 0;
4978
4979 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4980
4981 VMXReadVmcs(VMX_VMCS_GUEST_RIP, &val2);
4982 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4983
4984 VMXReadVmcs(VMX_VMCS_GUEST_CR0, &val2);
4985 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4986
4987 VMXReadVmcs(VMX_VMCS_GUEST_CR3, &val2);
4988 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4989
4990 VMXReadVmcs(VMX_VMCS_GUEST_CR4, &val2);
4991 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4992
4993 VMXReadVmcs(VMX_VMCS_GUEST_RFLAGS, &val2);
4994 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4995
4996 VMX_LOG_SELREG(CS, "CS", val2);
4997 VMX_LOG_SELREG(DS, "DS", val2);
4998 VMX_LOG_SELREG(ES, "ES", val2);
4999 VMX_LOG_SELREG(FS, "FS", val2);
5000 VMX_LOG_SELREG(GS, "GS", val2);
5001 VMX_LOG_SELREG(SS, "SS", val2);
5002 VMX_LOG_SELREG(TR, "TR", val2);
5003 VMX_LOG_SELREG(LDTR, "LDTR", val2);
5004
5005 VMXReadVmcs(VMX_VMCS_GUEST_GDTR_BASE, &val2);
5006 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
5007 VMXReadVmcs(VMX_VMCS_GUEST_IDTR_BASE, &val2);
5008 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
5009#endif /* VBOX_STRICT */
5010 rc = VERR_VMX_INVALID_GUEST_STATE;
5011 break;
5012 }
5013
5014 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
5015 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
5016 default:
5017 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
5018 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
5019 break;
5020
5021 }
5022
5023end:
5024 /* We now going back to ring-3, so clear the action flag. */
5025 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
5026
5027 /*
5028 * Signal changes for the recompiler.
5029 */
5030 CPUMSetChangedFlags(pVCpu,
5031 CPUM_CHANGED_SYSENTER_MSR
5032 | CPUM_CHANGED_LDTR
5033 | CPUM_CHANGED_GDTR
5034 | CPUM_CHANGED_IDTR
5035 | CPUM_CHANGED_TR
5036 | CPUM_CHANGED_HIDDEN_SEL_REGS);
5037
5038 /*
5039 * If we executed vmlaunch/vmresume and an external IRQ was pending, then we don't have to do a full sync the next time.
5040 */
5041 if ( exitReason == VMX_EXIT_EXT_INT
5042 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
5043 {
5044 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
5045 /* On the next entry we'll only sync the host context. */
5046 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT;
5047 }
5048 else
5049 {
5050 /* On the next entry we'll sync everything. */
5051 /** @todo we can do better than this */
5052 /* Not in the VINF_PGM_CHANGE_MODE though! */
5053 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
5054 }
5055
5056 /* Translate into a less severe return code */
5057 if (rc == VERR_EM_INTERPRETER)
5058 rc = VINF_EM_RAW_EMULATE_INSTR;
5059 else if (rc == VERR_VMX_INVALID_VMCS_PTR)
5060 {
5061 /* Try to extract more information about what might have gone wrong here. */
5062 VMXGetActivateVMCS(&pVCpu->hm.s.vmx.lasterror.u64VMCSPhys);
5063 pVCpu->hm.s.vmx.lasterror.u32VMCSRevision = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs;
5064 pVCpu->hm.s.vmx.lasterror.idEnteredCpu = pVCpu->hm.s.idEnteredCpu;
5065 pVCpu->hm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
5066 }
5067
5068 /* Just set the correct state here instead of trying to catch every goto above. */
5069 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
5070
5071#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
5072 /* Restore interrupts if we exited after disabling them. */
5073 if (uOldEFlags != ~(RTCCUINTREG)0)
5074 ASMSetFlags(uOldEFlags);
5075#endif
5076
5077 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
5078 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
5079 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
5080 Log2(("X"));
5081 return VBOXSTRICTRC_TODO(rc);
5082}
5083
5084
5085/**
5086 * Enters the VT-x session.
5087 *
5088 * @returns VBox status code.
5089 * @param pVM Pointer to the VM.
5090 * @param pVCpu Pointer to the VMCPU.
5091 * @param pCpu Pointer to the CPU info struct.
5092 */
5093VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
5094{
5095 Assert(pVM->hm.s.vmx.fSupported);
5096 NOREF(pCpu);
5097
5098 unsigned cr4 = ASMGetCR4();
5099 if (!(cr4 & X86_CR4_VMXE))
5100 {
5101 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
5102 return VERR_VMX_X86_CR4_VMXE_CLEARED;
5103 }
5104
5105 /* Activate the VMCS. */
5106 int rc = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5107 if (RT_FAILURE(rc))
5108 return rc;
5109
5110 pVCpu->hm.s.fResumeVM = false;
5111 return VINF_SUCCESS;
5112}
5113
5114
5115/**
5116 * Leaves the VT-x session.
5117 *
5118 * @returns VBox status code.
5119 * @param pVM Pointer to the VM.
5120 * @param pVCpu Pointer to the VMCPU.
5121 * @param pCtx Pointer to the guests CPU context.
5122 */
5123VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
5124{
5125 Assert(pVM->hm.s.vmx.fSupported);
5126
5127#ifdef DEBUG
5128 if (CPUMIsHyperDebugStateActive(pVCpu))
5129 {
5130 CPUMR0LoadHostDebugState(pVM, pVCpu);
5131 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
5132 }
5133 else
5134#endif
5135
5136 /*
5137 * Save the guest debug state if necessary.
5138 */
5139 if (CPUMIsGuestDebugStateActive(pVCpu))
5140 {
5141 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
5142
5143 /* Enable DRx move intercepts again. */
5144 pVCpu->hm.s.vmx.u32ProcCtls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
5145 int rc = VMXWriteVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, pVCpu->hm.s.vmx.u32ProcCtls);
5146 AssertRC(rc);
5147
5148 /* Resync the debug registers the next time. */
5149 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
5150 }
5151 else
5152 Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
5153
5154 /*
5155 * Clear VMCS, marking it inactive, clearing implementation-specific data and writing
5156 * VMCS data back to memory.
5157 */
5158 int rc = VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5159 AssertRC(rc);
5160
5161 return VINF_SUCCESS;
5162}
5163
5164
5165/**
5166 * Flush the TLB using EPT.
5167 *
5168 * @returns VBox status code.
5169 * @param pVM Pointer to the VM.
5170 * @param pVCpu Pointer to the VMCPU.
5171 * @param enmFlush Type of flush.
5172 */
5173static void hmR0VmxFlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_EPT enmFlush)
5174{
5175 uint64_t descriptor[2];
5176
5177 LogFlow(("hmR0VmxFlushEPT %d\n", enmFlush));
5178 Assert(pVM->hm.s.fNestedPaging);
5179 descriptor[0] = pVCpu->hm.s.vmx.GCPhysEPTP;
5180 descriptor[1] = 0; /* MBZ. Intel spec. 33.3 VMX Instructions */
5181 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
5182 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %x %RGv failed with %d\n", enmFlush, pVCpu->hm.s.vmx.GCPhysEPTP, rc));
5183#ifdef VBOX_WITH_STATISTICS
5184 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging);
5185#endif
5186}
5187
5188
5189/**
5190 * Flush the TLB using VPID.
5191 *
5192 * @returns VBox status code.
5193 * @param pVM Pointer to the VM.
5194 * @param pVCpu Pointer to the VMCPU (can be NULL depending on @a
5195 * enmFlush).
5196 * @param enmFlush Type of flush.
5197 * @param GCPtr Virtual address of the page to flush (can be 0 depending
5198 * on @a enmFlush).
5199 */
5200static void hmR0VmxFlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH_VPID enmFlush, RTGCPTR GCPtr)
5201{
5202 uint64_t descriptor[2];
5203
5204 Assert(pVM->hm.s.vmx.fVpid);
5205 if (enmFlush == VMX_FLUSH_VPID_ALL_CONTEXTS)
5206 {
5207 descriptor[0] = 0;
5208 descriptor[1] = 0;
5209 }
5210 else
5211 {
5212 AssertPtr(pVCpu);
5213 AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5214 AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid));
5215 descriptor[0] = pVCpu->hm.s.uCurrentAsid;
5216 descriptor[1] = GCPtr;
5217 }
5218 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]); NOREF(rc);
5219 AssertMsg(rc == VINF_SUCCESS,
5220 ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc));
5221#ifdef VBOX_WITH_STATISTICS
5222 if (pVCpu)
5223 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
5224#endif
5225}
5226
5227
5228/**
5229 * Invalidates a guest page by guest virtual address. Only relevant for
5230 * EPT/VPID, otherwise there is nothing really to invalidate.
5231 *
5232 * @returns VBox status code.
5233 * @param pVM Pointer to the VM.
5234 * @param pVCpu Pointer to the VMCPU.
5235 * @param GCVirt Guest virtual address of the page to invalidate.
5236 */
5237VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
5238{
5239 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
5240
5241 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
5242
5243 if (!fFlushPending)
5244 {
5245 /*
5246 * We must invalidate the guest TLB entry in either case, we cannot ignore it even for the EPT case
5247 * See @bugref{6043} and @bugref{6177}
5248 *
5249 * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VMENTRY in hmR0VmxSetupTLB*() as this
5250 * function maybe called in a loop with individual addresses.
5251 */
5252 if (pVM->hm.s.vmx.fVpid)
5253 {
5254 /* If we can flush just this page do it, otherwise flush as little as possible. */
5255 if (pVM->hm.s.vmx.msr.vmx_ept_vpid_caps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR)
5256 hmR0VmxFlushVPID(pVM, pVCpu, VMX_FLUSH_VPID_INDIV_ADDR, GCVirt);
5257 else
5258 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5259 }
5260 else if (pVM->hm.s.fNestedPaging)
5261 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5262 }
5263
5264 return VINF_SUCCESS;
5265}
5266
5267
5268/**
5269 * Invalidates a guest page by physical address. Only relevant for EPT/VPID,
5270 * otherwise there is nothing really to invalidate.
5271 *
5272 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
5273 *
5274 * @returns VBox status code.
5275 * @param pVM Pointer to the VM.
5276 * @param pVCpu Pointer to the VMCPU.
5277 * @param GCPhys Guest physical address of the page to invalidate.
5278 */
5279VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
5280{
5281 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
5282
5283 /*
5284 * We cannot flush a page by guest-physical address. invvpid takes only a linear address
5285 * while invept only flushes by EPT not individual addresses. We update the force flag here
5286 * and flush before VMENTRY in hmR0VmxSetupTLB*(). This function might be called in a loop.
5287 */
5288 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
5289 return VINF_SUCCESS;
5290}
5291
5292
5293/**
5294 * Report world switch error and dump some useful debug info.
5295 *
5296 * @param pVM Pointer to the VM.
5297 * @param pVCpu Pointer to the VMCPU.
5298 * @param rc Return code.
5299 * @param pCtx Pointer to the current guest CPU context (not updated).
5300 */
5301static void hmR0VmxReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, VBOXSTRICTRC rc, PCPUMCTX pCtx)
5302{
5303 NOREF(pVM);
5304
5305 switch (VBOXSTRICTRC_VAL(rc))
5306 {
5307 case VERR_VMX_INVALID_VMXON_PTR:
5308 AssertFailed();
5309 break;
5310
5311 case VERR_VMX_UNABLE_TO_START_VM:
5312 case VERR_VMX_UNABLE_TO_RESUME_VM:
5313 {
5314 int rc2;
5315 RTCCUINTREG exitReason, instrError;
5316
5317 rc2 = VMXReadVmcs(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
5318 rc2 |= VMXReadVmcs(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
5319 AssertRC(rc2);
5320 if (rc2 == VINF_SUCCESS)
5321 {
5322 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason,
5323 (uint32_t)instrError));
5324 Log(("Current stack %08x\n", &rc2));
5325
5326 pVCpu->hm.s.vmx.lasterror.u32InstrError = instrError;
5327 pVCpu->hm.s.vmx.lasterror.u32ExitReason = exitReason;
5328
5329#ifdef VBOX_STRICT
5330 RTGDTR gdtr;
5331 PCX86DESCHC pDesc;
5332 RTCCUINTREG val;
5333
5334 ASMGetGDTR(&gdtr);
5335
5336 VMXReadVmcs(VMX_VMCS_GUEST_RIP, &val);
5337 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
5338 VMXReadVmcs(VMX_VMCS32_CTRL_PIN_EXEC_CONTROLS, &val);
5339 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
5340 VMXReadVmcs(VMX_VMCS32_CTRL_PROC_EXEC_CONTROLS, &val);
5341 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
5342 VMXReadVmcs(VMX_VMCS32_CTRL_ENTRY_CONTROLS, &val);
5343 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
5344 VMXReadVmcs(VMX_VMCS32_CTRL_EXIT_CONTROLS, &val);
5345 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
5346
5347 VMXReadVmcs(VMX_VMCS_HOST_CR0, &val);
5348 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
5349 VMXReadVmcs(VMX_VMCS_HOST_CR3, &val);
5350 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
5351 VMXReadVmcs(VMX_VMCS_HOST_CR4, &val);
5352 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
5353
5354 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_CS, &val);
5355 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
5356 VMXReadVmcs(VMX_VMCS_GUEST_RFLAGS, &val);
5357 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
5358
5359 if (val < gdtr.cbGdt)
5360 {
5361 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5362 HMR0DumpDescriptor(pDesc, val, "CS: ");
5363 }
5364
5365 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_DS, &val);
5366 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
5367 if (val < gdtr.cbGdt)
5368 {
5369 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5370 HMR0DumpDescriptor(pDesc, val, "DS: ");
5371 }
5372
5373 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_ES, &val);
5374 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
5375 if (val < gdtr.cbGdt)
5376 {
5377 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5378 HMR0DumpDescriptor(pDesc, val, "ES: ");
5379 }
5380
5381 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_FS, &val);
5382 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
5383 if (val < gdtr.cbGdt)
5384 {
5385 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5386 HMR0DumpDescriptor(pDesc, val, "FS: ");
5387 }
5388
5389 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_GS, &val);
5390 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
5391 if (val < gdtr.cbGdt)
5392 {
5393 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5394 HMR0DumpDescriptor(pDesc, val, "GS: ");
5395 }
5396
5397 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_SS, &val);
5398 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
5399 if (val < gdtr.cbGdt)
5400 {
5401 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5402 HMR0DumpDescriptor(pDesc, val, "SS: ");
5403 }
5404
5405 VMXReadVmcs(VMX_VMCS16_HOST_FIELD_TR, &val);
5406 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
5407 if (val < gdtr.cbGdt)
5408 {
5409 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
5410 HMR0DumpDescriptor(pDesc, val, "TR: ");
5411 }
5412
5413 VMXReadVmcs(VMX_VMCS_HOST_TR_BASE, &val);
5414 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
5415 VMXReadVmcs(VMX_VMCS_HOST_GDTR_BASE, &val);
5416 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
5417 VMXReadVmcs(VMX_VMCS_HOST_IDTR_BASE, &val);
5418 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
5419 VMXReadVmcs(VMX_VMCS32_HOST_SYSENTER_CS, &val);
5420 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
5421 VMXReadVmcs(VMX_VMCS_HOST_SYSENTER_EIP, &val);
5422 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
5423 VMXReadVmcs(VMX_VMCS_HOST_SYSENTER_ESP, &val);
5424 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
5425 VMXReadVmcs(VMX_VMCS_HOST_RSP, &val);
5426 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
5427 VMXReadVmcs(VMX_VMCS_HOST_RIP, &val);
5428 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
5429# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5430 if (VMX_IS_64BIT_HOST_MODE())
5431 {
5432 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
5433 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
5434 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
5435 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
5436 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
5437 Log(("MSR_K8_KERNEL_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE)));
5438 }
5439# endif
5440#endif /* VBOX_STRICT */
5441 }
5442 break;
5443 }
5444
5445 default:
5446 /* impossible */
5447 AssertMsgFailed(("%Rrc (%#x)\n", VBOXSTRICTRC_VAL(rc), VBOXSTRICTRC_VAL(rc)));
5448 break;
5449 }
5450}
5451
5452
5453#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5454/**
5455 * Prepares for and executes VMLAUNCH (64 bits guest mode).
5456 *
5457 * @returns VBox status code.
5458 * @param fResume Whether to vmlauch/vmresume.
5459 * @param pCtx Pointer to the guest CPU context.
5460 * @param pCache Pointer to the VMCS cache.
5461 * @param pVM Pointer to the VM.
5462 * @param pVCpu Pointer to the VMCPU.
5463 */
5464DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
5465{
5466 uint32_t aParam[6];
5467 PHMGLOBLCPUINFO pCpu;
5468 RTHCPHYS HCPhysCpuPage;
5469 int rc;
5470
5471 pCpu = HMR0GetCurrentCpu();
5472 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5473
5474#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5475 pCache->uPos = 1;
5476 pCache->interPD = PGMGetInterPaeCR3(pVM);
5477 pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0;
5478#endif
5479
5480#ifdef DEBUG
5481 pCache->TestIn.HCPhysCpuPage= 0;
5482 pCache->TestIn.HCPhysVmcs = 0;
5483 pCache->TestIn.pCache = 0;
5484 pCache->TestOut.HCPhysVmcs = 0;
5485 pCache->TestOut.pCache = 0;
5486 pCache->TestOut.pCtx = 0;
5487 pCache->TestOut.eflags = 0;
5488#endif
5489
5490 aParam[0] = (uint32_t)(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */
5491 aParam[1] = (uint32_t)(HCPhysCpuPage >> 32); /* Param 1: VMXON physical address - Hi. */
5492 aParam[2] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */
5493 aParam[3] = (uint32_t)(pVCpu->hm.s.vmx.HCPhysVmcs >> 32); /* Param 2: VMCS physical address - Hi. */
5494 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache);
5495 aParam[5] = 0;
5496
5497#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5498 pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8;
5499 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1;
5500#endif
5501 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
5502
5503#ifdef VBOX_WITH_CRASHDUMP_MAGIC
5504 Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5);
5505 Assert(pCtx->dr[4] == 10);
5506 *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff;
5507#endif
5508
5509#ifdef DEBUG
5510 AssertMsg(pCache->TestIn.HCPhysCpuPage== HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage));
5511 AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
5512 pVCpu->hm.s.vmx.HCPhysVmcs));
5513 AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs,
5514 pCache->TestOut.HCPhysVmcs));
5515 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache,
5516 pCache->TestOut.pCache));
5517 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache),
5518 ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache)));
5519 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx,
5520 pCache->TestOut.pCtx));
5521 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
5522#endif
5523 return rc;
5524}
5525
5526
5527# ifdef VBOX_STRICT
5528static bool hmR0VmxIsValidReadField(uint32_t idxField)
5529{
5530 switch (idxField)
5531 {
5532 case VMX_VMCS_GUEST_RIP:
5533 case VMX_VMCS_GUEST_RSP:
5534 case VMX_VMCS_GUEST_RFLAGS:
5535 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
5536 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
5537 case VMX_VMCS_GUEST_CR0:
5538 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
5539 case VMX_VMCS_GUEST_CR4:
5540 case VMX_VMCS_GUEST_DR7:
5541 case VMX_VMCS32_GUEST_SYSENTER_CS:
5542 case VMX_VMCS_GUEST_SYSENTER_EIP:
5543 case VMX_VMCS_GUEST_SYSENTER_ESP:
5544 case VMX_VMCS32_GUEST_GDTR_LIMIT:
5545 case VMX_VMCS_GUEST_GDTR_BASE:
5546 case VMX_VMCS32_GUEST_IDTR_LIMIT:
5547 case VMX_VMCS_GUEST_IDTR_BASE:
5548 case VMX_VMCS16_GUEST_FIELD_CS:
5549 case VMX_VMCS32_GUEST_CS_LIMIT:
5550 case VMX_VMCS_GUEST_CS_BASE:
5551 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
5552 case VMX_VMCS16_GUEST_FIELD_DS:
5553 case VMX_VMCS32_GUEST_DS_LIMIT:
5554 case VMX_VMCS_GUEST_DS_BASE:
5555 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
5556 case VMX_VMCS16_GUEST_FIELD_ES:
5557 case VMX_VMCS32_GUEST_ES_LIMIT:
5558 case VMX_VMCS_GUEST_ES_BASE:
5559 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
5560 case VMX_VMCS16_GUEST_FIELD_FS:
5561 case VMX_VMCS32_GUEST_FS_LIMIT:
5562 case VMX_VMCS_GUEST_FS_BASE:
5563 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
5564 case VMX_VMCS16_GUEST_FIELD_GS:
5565 case VMX_VMCS32_GUEST_GS_LIMIT:
5566 case VMX_VMCS_GUEST_GS_BASE:
5567 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
5568 case VMX_VMCS16_GUEST_FIELD_SS:
5569 case VMX_VMCS32_GUEST_SS_LIMIT:
5570 case VMX_VMCS_GUEST_SS_BASE:
5571 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
5572 case VMX_VMCS16_GUEST_FIELD_LDTR:
5573 case VMX_VMCS32_GUEST_LDTR_LIMIT:
5574 case VMX_VMCS_GUEST_LDTR_BASE:
5575 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
5576 case VMX_VMCS16_GUEST_FIELD_TR:
5577 case VMX_VMCS32_GUEST_TR_LIMIT:
5578 case VMX_VMCS_GUEST_TR_BASE:
5579 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
5580 case VMX_VMCS32_RO_EXIT_REASON:
5581 case VMX_VMCS32_RO_VM_INSTR_ERROR:
5582 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
5583 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE:
5584 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
5585 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
5586 case VMX_VMCS_RO_EXIT_QUALIFICATION:
5587 case VMX_VMCS32_RO_IDT_INFO:
5588 case VMX_VMCS32_RO_IDT_ERROR_CODE:
5589 case VMX_VMCS_GUEST_CR3:
5590 case VMX_VMCS64_EXIT_GUEST_PHYS_ADDR_FULL:
5591 return true;
5592 }
5593 return false;
5594}
5595
5596
5597static bool hmR0VmxIsValidWriteField(uint32_t idxField)
5598{
5599 switch (idxField)
5600 {
5601 case VMX_VMCS_GUEST_LDTR_BASE:
5602 case VMX_VMCS_GUEST_TR_BASE:
5603 case VMX_VMCS_GUEST_GDTR_BASE:
5604 case VMX_VMCS_GUEST_IDTR_BASE:
5605 case VMX_VMCS_GUEST_SYSENTER_EIP:
5606 case VMX_VMCS_GUEST_SYSENTER_ESP:
5607 case VMX_VMCS_GUEST_CR0:
5608 case VMX_VMCS_GUEST_CR4:
5609 case VMX_VMCS_GUEST_CR3:
5610 case VMX_VMCS_GUEST_DR7:
5611 case VMX_VMCS_GUEST_RIP:
5612 case VMX_VMCS_GUEST_RSP:
5613 case VMX_VMCS_GUEST_CS_BASE:
5614 case VMX_VMCS_GUEST_DS_BASE:
5615 case VMX_VMCS_GUEST_ES_BASE:
5616 case VMX_VMCS_GUEST_FS_BASE:
5617 case VMX_VMCS_GUEST_GS_BASE:
5618 case VMX_VMCS_GUEST_SS_BASE:
5619 return true;
5620 }
5621 return false;
5622}
5623# endif /* VBOX_STRICT */
5624
5625
5626/**
5627 * Executes the specified handler in 64-bit mode.
5628 *
5629 * @returns VBox status code.
5630 * @param pVM Pointer to the VM.
5631 * @param pVCpu Pointer to the VMCPU.
5632 * @param pCtx Pointer to the guest CPU context.
5633 * @param pfnHandler Pointer to the RC handler function.
5634 * @param cbParam Number of parameters.
5635 * @param paParam Array of 32-bit parameters.
5636 */
5637VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
5638 uint32_t *paParam)
5639{
5640 int rc, rc2;
5641 PHMGLOBLCPUINFO pCpu;
5642 RTHCPHYS HCPhysCpuPage;
5643 RTHCUINTREG uOldEFlags;
5644
5645 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
5646 Assert(pfnHandler);
5647 Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField));
5648 Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField));
5649
5650#ifdef VBOX_STRICT
5651 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries;i++)
5652 Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i]));
5653
5654 for (unsigned i=0;i<pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries;i++)
5655 Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i]));
5656#endif
5657
5658 /* Disable interrupts. */
5659 uOldEFlags = ASMIntDisableFlags();
5660
5661#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
5662 RTCPUID idHostCpu = RTMpCpuId();
5663 CPUMR0SetLApic(pVM, idHostCpu);
5664#endif
5665
5666 pCpu = HMR0GetCurrentCpu();
5667 HCPhysCpuPage = RTR0MemObjGetPagePhysAddr(pCpu->hMemObj, 0);
5668
5669 /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */
5670 VMXClearVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5671
5672 /* Leave VMX Root Mode. */
5673 VMXDisable();
5674
5675 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5676
5677 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
5678 CPUMSetHyperEIP(pVCpu, pfnHandler);
5679 for (int i=(int)cbParam-1;i>=0;i--)
5680 CPUMPushHyper(pVCpu, paParam[i]);
5681
5682 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
5683
5684 /* Call switcher. */
5685 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
5686 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
5687
5688 /* Make sure the VMX instructions don't cause #UD faults. */
5689 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
5690
5691 /* Enter VMX Root Mode */
5692 rc2 = VMXEnable(HCPhysCpuPage);
5693 if (RT_FAILURE(rc2))
5694 {
5695 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
5696 ASMSetFlags(uOldEFlags);
5697 return VERR_VMX_VMXON_FAILED;
5698 }
5699
5700 rc2 = VMXActivateVMCS(pVCpu->hm.s.vmx.HCPhysVmcs);
5701 AssertRC(rc2);
5702 Assert(!(ASMGetFlags() & X86_EFL_IF));
5703 ASMSetFlags(uOldEFlags);
5704 return rc;
5705}
5706#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
5707
5708
5709#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
5710/**
5711 * Executes VMWRITE.
5712 *
5713 * @returns VBox status code
5714 * @param pVCpu Pointer to the VMCPU.
5715 * @param idxField VMCS field index.
5716 * @param u64Val 16, 32 or 64 bits value.
5717 */
5718VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5719{
5720 int rc;
5721 switch (idxField)
5722 {
5723 case VMX_VMCS64_CTRL_TSC_OFFSET_FULL:
5724 case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL:
5725 case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL:
5726 case VMX_VMCS64_CTRL_MSR_BITMAP_FULL:
5727 case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL:
5728 case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL:
5729 case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL:
5730 case VMX_VMCS64_CTRL_VAPIC_PAGEADDR_FULL:
5731 case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL:
5732 case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL:
5733 case VMX_VMCS64_GUEST_PDPTE0_FULL:
5734 case VMX_VMCS64_GUEST_PDPTE1_FULL:
5735 case VMX_VMCS64_GUEST_PDPTE2_FULL:
5736 case VMX_VMCS64_GUEST_PDPTE3_FULL:
5737 case VMX_VMCS64_GUEST_DEBUGCTL_FULL:
5738 case VMX_VMCS64_GUEST_EFER_FULL:
5739 case VMX_VMCS64_CTRL_EPTP_FULL:
5740 /* These fields consist of two parts, which are both writable in 32 bits mode. */
5741 rc = VMXWriteVmcs32(idxField, u64Val);
5742 rc |= VMXWriteVmcs32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
5743 AssertRC(rc);
5744 return rc;
5745
5746 case VMX_VMCS_GUEST_LDTR_BASE:
5747 case VMX_VMCS_GUEST_TR_BASE:
5748 case VMX_VMCS_GUEST_GDTR_BASE:
5749 case VMX_VMCS_GUEST_IDTR_BASE:
5750 case VMX_VMCS_GUEST_SYSENTER_EIP:
5751 case VMX_VMCS_GUEST_SYSENTER_ESP:
5752 case VMX_VMCS_GUEST_CR0:
5753 case VMX_VMCS_GUEST_CR4:
5754 case VMX_VMCS_GUEST_CR3:
5755 case VMX_VMCS_GUEST_DR7:
5756 case VMX_VMCS_GUEST_RIP:
5757 case VMX_VMCS_GUEST_RSP:
5758 case VMX_VMCS_GUEST_CS_BASE:
5759 case VMX_VMCS_GUEST_DS_BASE:
5760 case VMX_VMCS_GUEST_ES_BASE:
5761 case VMX_VMCS_GUEST_FS_BASE:
5762 case VMX_VMCS_GUEST_GS_BASE:
5763 case VMX_VMCS_GUEST_SS_BASE:
5764 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
5765 if (u64Val >> 32ULL)
5766 rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val);
5767 else
5768 rc = VMXWriteVmcs32(idxField, (uint32_t)u64Val);
5769
5770 return rc;
5771
5772 default:
5773 AssertMsgFailed(("Unexpected field %x\n", idxField));
5774 return VERR_INVALID_PARAMETER;
5775 }
5776}
5777
5778
5779/**
5780 * Cache VMCS writes for running 64 bits guests on 32 bits hosts.
5781 *
5782 * @param pVCpu Pointer to the VMCPU.
5783 * @param idxField VMCS field index.
5784 * @param u64Val 16, 32 or 64 bits value.
5785 */
5786VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
5787{
5788 PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache;
5789
5790 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1,
5791 ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
5792
5793 /* Make sure there are no duplicates. */
5794 for (unsigned i = 0; i < pCache->Write.cValidEntries; i++)
5795 {
5796 if (pCache->Write.aField[i] == idxField)
5797 {
5798 pCache->Write.aFieldVal[i] = u64Val;
5799 return VINF_SUCCESS;
5800 }
5801 }
5802
5803 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
5804 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
5805 pCache->Write.cValidEntries++;
5806 return VINF_SUCCESS;
5807}
5808
5809#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_HYBRID_32BIT_KERNEL */
5810
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette