VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 15555

Last change on this file since 15555 was 15555, checked in by vboxsync, 16 years ago

Clear PAE bit when in 32 bits shadow mode

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 150.8 KB
Line 
1/* $Id: HWVMXR0.cpp 15555 2008-12-15 21:46:55Z vboxsync $ */
2/** @file
3 * HWACCM VMX - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_HWACCM
27#include <VBox/hwaccm.h>
28#include "HWACCMInternal.h"
29#include <VBox/vm.h>
30#include <VBox/x86.h>
31#include <VBox/pgm.h>
32#include <VBox/pdm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35#include <VBox/selm.h>
36#include <VBox/iom.h>
37#include <iprt/param.h>
38#include <iprt/assert.h>
39#include <iprt/asm.h>
40#include <iprt/string.h>
41#include "HWVMXR0.h"
42
43/*******************************************************************************
44* Defined Constants And Macros *
45*******************************************************************************/
46#if defined(RT_ARCH_AMD64)
47# define VMX_IS_64BIT_HOST_MODE() (true)
48#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
49# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
50#else
51# define VMX_IS_64BIT_HOST_MODE() (false)
52#endif
53
54/*******************************************************************************
55* Global Variables *
56*******************************************************************************/
57/* IO operation lookup arrays. */
58static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
59static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
60
61#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
62/** See HWACCMR0A.asm. */
63extern "C" uint32_t g_fVMXIs64bitHost;
64#endif
65
66/*******************************************************************************
67* Local Functions *
68*******************************************************************************/
69static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx);
70static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu);
71static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu);
72static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu);
73static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
74static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
75static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
76#ifdef VBOX_STRICT
77static bool vmxR0IsValidReadField(uint32_t idxField);
78static bool vmxR0IsValidWriteField(uint32_t idxField);
79#endif
80
81static void VMXR0CheckError(PVM pVM, PVMCPU pVCpu, int rc)
82{
83 if (rc == VERR_VMX_GENERIC)
84 {
85 RTCCUINTREG instrError;
86
87 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
88 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
89 }
90 pVM->hwaccm.s.lLastError = rc;
91}
92
93/**
94 * Sets up and activates VT-x on the current CPU
95 *
96 * @returns VBox status code.
97 * @param pCpu CPU info struct
98 * @param pVM The VM to operate on. (can be NULL after a resume!!)
99 * @param pvPageCpu Pointer to the global cpu page
100 * @param pPageCpuPhys Physical address of the global cpu page
101 */
102VMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
103{
104 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
105 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
106
107#ifdef LOG_ENABLED
108 SUPR0Printf("VMXR0EnableCpu cpu %d page (%x) %x\n", pCpu->idCpu, pvPageCpu, (uint32_t)pPageCpuPhys);
109#endif
110 if (pVM)
111 {
112 /* Set revision dword at the beginning of the VMXON structure. */
113 *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
114 }
115
116 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
117 * (which can have very bad consequences!!!)
118 */
119
120 /* Make sure the VMX instructions don't cause #UD faults. */
121 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
122
123 /* Enter VMX Root Mode */
124 int rc = VMXEnable(pPageCpuPhys);
125 if (RT_FAILURE(rc))
126 {
127 if (pVM)
128 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
129 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
130 return VERR_VMX_VMXON_FAILED;
131 }
132 return VINF_SUCCESS;
133}
134
135/**
136 * Deactivates VT-x on the current CPU
137 *
138 * @returns VBox status code.
139 * @param pCpu CPU info struct
140 * @param pvPageCpu Pointer to the global cpu page
141 * @param pPageCpuPhys Physical address of the global cpu page
142 */
143VMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
144{
145 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
146 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
147
148 /* Leave VMX Root Mode. */
149 VMXDisable();
150
151 /* And clear the X86_CR4_VMXE bit */
152 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
153
154#ifdef LOG_ENABLED
155 SUPR0Printf("VMXR0DisableCpu cpu %d\n", pCpu->idCpu);
156#endif
157 return VINF_SUCCESS;
158}
159
160/**
161 * Does Ring-0 per VM VT-x init.
162 *
163 * @returns VBox status code.
164 * @param pVM The VM to operate on.
165 */
166VMMR0DECL(int) VMXR0InitVM(PVM pVM)
167{
168 int rc;
169
170#ifdef LOG_ENABLED
171 SUPR0Printf("VMXR0InitVM %x\n", pVM);
172#endif
173
174 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
175
176 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
177 {
178 /* Allocate one page for the virtual APIC mmio cache. */
179 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
180 AssertRC(rc);
181 if (RT_FAILURE(rc))
182 return rc;
183
184 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
185 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
186 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
187 }
188 else
189 {
190 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
191 pVM->hwaccm.s.vmx.pAPIC = 0;
192 pVM->hwaccm.s.vmx.pAPICPhys = 0;
193 }
194
195 /* Allocate the MSR bitmap if this feature is supported. */
196 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
197 {
198 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
199 AssertRC(rc);
200 if (RT_FAILURE(rc))
201 return rc;
202
203 pVM->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjMSRBitmap);
204 pVM->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
205 memset(pVM->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
206 }
207
208 /* Allocate VMCBs for all guest CPUs. */
209 for (unsigned i=0;i<pVM->cCPUs;i++)
210 {
211 PVMCPU pVCpu = &pVM->aCpus[i];
212
213 pVCpu->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
214
215 /* Allocate one page for the VM control structure (VMCS). */
216 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
217 AssertRC(rc);
218 if (RT_FAILURE(rc))
219 return rc;
220
221 pVCpu->hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVMCS);
222 pVCpu->hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVMCS, 0);
223 ASMMemZero32(pVCpu->hwaccm.s.vmx.pVMCS, PAGE_SIZE);
224
225 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
226 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
227
228 /* Current guest paging mode. */
229 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
230
231#ifdef LOG_ENABLED
232 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.pVMCSPhys);
233#endif
234 }
235
236 return VINF_SUCCESS;
237}
238
239/**
240 * Does Ring-0 per VM VT-x termination.
241 *
242 * @returns VBox status code.
243 * @param pVM The VM to operate on.
244 */
245VMMR0DECL(int) VMXR0TermVM(PVM pVM)
246{
247 for (unsigned i=0;i<pVM->cCPUs;i++)
248 {
249 if (pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
250 {
251 RTR0MemObjFree(pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS, false);
252 pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
253 pVM->aCpus[i].hwaccm.s.vmx.pVMCS = 0;
254 pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys = 0;
255 }
256 }
257 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
258 {
259 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
260 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
261 pVM->hwaccm.s.vmx.pAPIC = 0;
262 pVM->hwaccm.s.vmx.pAPICPhys = 0;
263 }
264 if (pVM->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
265 {
266 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, false);
267 pVM->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
268 pVM->hwaccm.s.vmx.pMSRBitmap = 0;
269 pVM->hwaccm.s.vmx.pMSRBitmapPhys = 0;
270 }
271 return VINF_SUCCESS;
272}
273
274/**
275 * Sets up VT-x for the specified VM
276 *
277 * @returns VBox status code.
278 * @param pVM The VM to operate on.
279 */
280VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
281{
282 int rc = VINF_SUCCESS;
283 uint32_t val;
284
285 AssertReturn(pVM, VERR_INVALID_PARAMETER);
286
287 for (unsigned i=0;i<pVM->cCPUs;i++)
288 {
289 PVMCPU pVCpu = &pVM->aCpus[i];
290
291 Assert(pVCpu->hwaccm.s.vmx.pVMCS);
292
293 /* Set revision dword at the beginning of the VMCS structure. */
294 *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
295
296 /* Clear VM Control Structure. */
297 Log(("pVMCSPhys = %RHp\n", pVCpu->hwaccm.s.vmx.pVMCSPhys));
298 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
299 if (RT_FAILURE(rc))
300 goto vmx_end;
301
302 /* Activate the VM Control Structure. */
303 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
304 if (RT_FAILURE(rc))
305 goto vmx_end;
306
307 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
308 * Set required bits to one and zero according to the MSR capabilities.
309 */
310 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
311 /* External and non-maskable interrupts cause VM-exits. */
312 val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
313 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
314
315 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
316 AssertRC(rc);
317
318 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
319 * Set required bits to one and zero according to the MSR capabilities.
320 */
321 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
322 /* Program which event cause VM-exits and which features we want to use. */
323 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
324 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
325 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
326 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
327 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
328
329 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
330 if (!pVM->hwaccm.s.fNestedPaging)
331 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
332 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
333 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
334
335 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
336 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
337 {
338 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
339 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
340 Assert(pVM->hwaccm.s.vmx.pAPIC);
341 }
342 else
343 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
344 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
345
346#ifdef VBOX_WITH_VTX_MSR_BITMAPS
347 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
348 {
349 Assert(pVM->hwaccm.s.vmx.pMSRBitmapPhys);
350 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
351 }
352#endif
353
354 /* We will use the secondary control if it's present. */
355 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
356
357 /* Mask away the bits that the CPU doesn't support */
358 /** @todo make sure they don't conflict with the above requirements. */
359 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
360 pVCpu->hwaccm.s.vmx.proc_ctls = val;
361
362 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
363 AssertRC(rc);
364
365 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
366 {
367 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
368 * Set required bits to one and zero according to the MSR capabilities.
369 */
370 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
371 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
372
373#ifdef HWACCM_VTX_WITH_EPT
374 if (pVM->hwaccm.s.fNestedPaging)
375 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
376#endif /* HWACCM_VTX_WITH_EPT */
377#ifdef HWACCM_VTX_WITH_VPID
378 else
379 if (pVM->hwaccm.s.vmx.fVPID)
380 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
381#endif /* HWACCM_VTX_WITH_VPID */
382
383 /* Mask away the bits that the CPU doesn't support */
384 /** @todo make sure they don't conflict with the above requirements. */
385 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
386
387 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
388 AssertRC(rc);
389 }
390
391 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
392 * Set required bits to one and zero according to the MSR capabilities.
393 */
394 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
395 AssertRC(rc);
396
397 /* Forward all exception except #NM & #PF to the guest.
398 * We always need to check pagefaults since our shadow page table can be out of sync.
399 * And we always lazily sync the FPU & XMM state.
400 */
401
402 /** @todo Possible optimization:
403 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
404 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
405 * registers ourselves of course.
406 *
407 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
408 */
409
410 /* Don't filter page faults; all of them should cause a switch. */
411 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
412 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
413 AssertRC(rc);
414
415 /* Init TSC offset to zero. */
416 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
417 AssertRC(rc);
418
419 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
420 AssertRC(rc);
421
422 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
423 AssertRC(rc);
424
425 /* Set the MSR bitmap address. */
426 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
427 {
428 /* Optional */
429 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVM->hwaccm.s.vmx.pMSRBitmapPhys);
430 AssertRC(rc);
431 }
432
433 /* Clear MSR controls. */
434 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, 0);
435 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, 0);
436 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, 0);
437 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
438 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0);
439 AssertRC(rc);
440
441 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
442 {
443 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
444 /* Optional */
445 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
446 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
447 AssertRC(rc);
448 }
449
450 /* Set link pointer to -1. Not currently used. */
451 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
452 AssertRC(rc);
453
454 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
455 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
456 AssertRC(rc);
457
458 /* Configure the VMCS read cache. */
459 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
460
461 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
462 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
463 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
464 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
465 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
466 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
467 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
468 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
469 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
470 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
471 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
472 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
473 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
474 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
475 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
476 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
477
478 VMX_SETUP_SELREG(ES, pCache);
479 VMX_SETUP_SELREG(SS, pCache);
480 VMX_SETUP_SELREG(CS, pCache);
481 VMX_SETUP_SELREG(DS, pCache);
482 VMX_SETUP_SELREG(FS, pCache);
483 VMX_SETUP_SELREG(GS, pCache);
484 VMX_SETUP_SELREG(LDTR, pCache);
485 VMX_SETUP_SELREG(TR, pCache);
486
487 /* Status code VMCS reads. */
488 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
489 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
490 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
491 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
492 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
493 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
494 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
495 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
496 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
497
498 if (pVM->hwaccm.s.fNestedPaging)
499 {
500 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
501 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
502 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
503 }
504 else
505 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
506 } /* for each VMCPU */
507
508 /* Choose the right TLB setup function. */
509 if (pVM->hwaccm.s.fNestedPaging)
510 {
511 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBEPT;
512
513 /* Default values for flushing. */
514 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
515 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
516
517 /* If the capabilities specify we can do more, then make use of it. */
518 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
519 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
520 else
521 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
522 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
523
524 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
525 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
526 }
527#ifdef HWACCM_VTX_WITH_VPID
528 else
529 if (pVM->hwaccm.s.vmx.fVPID)
530 {
531 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBVPID;
532
533 /* Default values for flushing. */
534 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
535 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
536
537 /* If the capabilities specify we can do more, then make use of it. */
538 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
539 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
540 else
541 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
542 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
543
544 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
545 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
546 }
547#endif /* HWACCM_VTX_WITH_VPID */
548 else
549 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBDummy;
550
551vmx_end:
552 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
553 return rc;
554}
555
556
557/**
558 * Injects an event (trap or external interrupt)
559 *
560 * @returns VBox status code.
561 * @param pVM The VM to operate on.
562 * @param pVCpu The VMCPU to operate on.
563 * @param pCtx CPU Context
564 * @param intInfo VMX interrupt info
565 * @param cbInstr Opcode length of faulting instruction
566 * @param errCode Error code (optional)
567 */
568static int VMXR0InjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
569{
570 int rc;
571 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
572
573#ifdef VBOX_STRICT
574 if (iGate == 0xE)
575 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%08x intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
576 else
577 if (iGate < 0x20)
578 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
579 else
580 {
581 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
582 Assert(!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS));
583 Assert(pCtx->eflags.u32 & X86_EFL_IF);
584 }
585#endif
586
587#ifdef HWACCM_VMX_EMULATE_REALMODE
588 if (CPUMIsGuestInRealModeEx(pCtx))
589 {
590 RTGCPHYS GCPhysHandler;
591 uint16_t offset, ip;
592 RTSEL sel;
593
594 /* Injecting events doesn't work right with real mode emulation.
595 * (#GP if we try to inject external hardware interrupts)
596 * Inject the interrupt or trap directly instead.
597 */
598 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
599
600 /* Check if the interrupt handler is present. */
601 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
602 {
603 Log(("IDT cbIdt violation\n"));
604 if (iGate != X86_XCPT_DF)
605 {
606 RTGCUINTPTR intInfo;
607
608 intInfo = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
609 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
610 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
611 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
612
613 return VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0 /* no error code according to the Intel docs */);
614 }
615 Log(("Triple fault -> reset the VM!\n"));
616 return VINF_EM_RESET;
617 }
618 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
619 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
620 || iGate == 4)
621 {
622 ip = pCtx->ip + cbInstr;
623 }
624 else
625 ip = pCtx->ip;
626
627 /* Read the selector:offset pair of the interrupt handler. */
628 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
629 PGMPhysRead(pVM, GCPhysHandler, &offset, sizeof(offset));
630 PGMPhysRead(pVM, GCPhysHandler + 2, &sel, sizeof(sel));
631
632 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
633
634 /* Construct the stack frame. */
635 /** @todo should check stack limit. */
636 pCtx->sp -= 2;
637 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
638 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t));
639 pCtx->sp -= 2;
640 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
641 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t));
642 pCtx->sp -= 2;
643 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
644 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip));
645
646 /* Update the CPU state for executing the handler. */
647 pCtx->rip = offset;
648 pCtx->cs = sel;
649 pCtx->csHid.u64Base = sel << 4;
650 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
651
652 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
653 return VINF_SUCCESS;
654 }
655#endif /* HWACCM_VMX_EMULATE_REALMODE */
656
657 /* Set event injection state. */
658 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
659
660 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
661 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
662
663 AssertRC(rc);
664 return rc;
665}
666
667
668/**
669 * Checks for pending guest interrupts and injects them
670 *
671 * @returns VBox status code.
672 * @param pVM The VM to operate on.
673 * @param pVCpu The VMCPU to operate on.
674 * @param pCtx CPU Context
675 */
676static int VMXR0CheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
677{
678 int rc;
679
680 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
681 if (pVCpu->hwaccm.s.Event.fPending)
682 {
683 Log(("Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
684 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
685 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
686 AssertRC(rc);
687
688 pVCpu->hwaccm.s.Event.fPending = false;
689 return VINF_SUCCESS;
690 }
691
692 if (pVM->hwaccm.s.fInjectNMI)
693 {
694 RTGCUINTPTR intInfo;
695
696 intInfo = X86_XCPT_NMI;
697 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
698 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
699
700 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
701 AssertRC(rc);
702
703 pVM->hwaccm.s.fInjectNMI = false;
704 return VINF_SUCCESS;
705 }
706
707 /* When external interrupts are pending, we should exit the VM when IF is set. */
708 if ( !TRPMHasTrap(pVM)
709 && VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)))
710 {
711 if (!(pCtx->eflags.u32 & X86_EFL_IF))
712 {
713 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
714 {
715 LogFlow(("Enable irq window exit!\n"));
716 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
717 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
718 AssertRC(rc);
719 }
720 /* else nothing to do but wait */
721 }
722 else
723 if (!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
724 {
725 uint8_t u8Interrupt;
726
727 rc = PDMGetInterrupt(pVM, &u8Interrupt);
728 Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
729 if (RT_SUCCESS(rc))
730 {
731 rc = TRPMAssertTrap(pVM, u8Interrupt, TRPM_HARDWARE_INT);
732 AssertRC(rc);
733 }
734 else
735 {
736 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
737 Assert(!VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)));
738 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
739 /* Just continue */
740 }
741 }
742 else
743 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
744 }
745
746#ifdef VBOX_STRICT
747 if (TRPMHasTrap(pVM))
748 {
749 uint8_t u8Vector;
750 rc = TRPMQueryTrapAll(pVM, &u8Vector, 0, 0, 0);
751 AssertRC(rc);
752 }
753#endif
754
755 if ( pCtx->eflags.u32 & X86_EFL_IF
756 && (!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
757 && TRPMHasTrap(pVM)
758 )
759 {
760 uint8_t u8Vector;
761 int rc;
762 TRPMEVENT enmType;
763 RTGCUINTPTR intInfo;
764 RTGCUINT errCode;
765
766 /* If a new event is pending, then dispatch it now. */
767 rc = TRPMQueryTrapAll(pVM, &u8Vector, &enmType, &errCode, 0);
768 AssertRC(rc);
769 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
770 Assert(enmType != TRPM_SOFTWARE_INT);
771
772 /* Clear the pending trap. */
773 rc = TRPMResetTrap(pVM);
774 AssertRC(rc);
775
776 intInfo = u8Vector;
777 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
778
779 if (enmType == TRPM_TRAP)
780 {
781 switch (u8Vector) {
782 case 8:
783 case 10:
784 case 11:
785 case 12:
786 case 13:
787 case 14:
788 case 17:
789 /* Valid error codes. */
790 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
791 break;
792 default:
793 break;
794 }
795 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
796 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
797 else
798 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
799 }
800 else
801 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
802
803 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
804 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
805 AssertRC(rc);
806 } /* if (interrupts can be dispatched) */
807
808 return VINF_SUCCESS;
809}
810
811/**
812 * Save the host state
813 *
814 * @returns VBox status code.
815 * @param pVM The VM to operate on.
816 * @param pVCpu The VMCPU to operate on.
817 */
818VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
819{
820 int rc = VINF_SUCCESS;
821
822 /*
823 * Host CPU Context
824 */
825 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
826 {
827 RTIDTR idtr;
828 RTGDTR gdtr;
829 RTSEL SelTR;
830 PX86DESCHC pDesc;
831 uintptr_t trBase;
832 RTSEL cs;
833 RTSEL ss;
834 uint64_t cr3;
835
836 /* Control registers */
837 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
838#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
839 if (VMX_IS_64BIT_HOST_MODE())
840 {
841 cr3 = hwaccmR0Get64bitCR3();
842 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
843 }
844 else
845#endif
846 {
847 cr3 = ASMGetCR3();
848 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
849 }
850 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
851 AssertRC(rc);
852 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
853 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
854 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
855
856 /* Selector registers. */
857#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
858 if (VMX_IS_64BIT_HOST_MODE())
859 {
860 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
861 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
862 }
863 else
864 {
865 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
866 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
867 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
868 }
869#else
870 cs = ASMGetCS();
871 ss = ASMGetSS();
872#endif
873 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
874 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
875 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
876 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
877 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
878 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
879#if HC_ARCH_BITS == 32
880 if (!VMX_IS_64BIT_HOST_MODE())
881 {
882 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
883 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
884 }
885#endif
886 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
887 SelTR = ASMGetTR();
888 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
889 AssertRC(rc);
890 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
891 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
892 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
893 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
894 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
895 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
896 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
897
898 /* GDTR & IDTR */
899#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
900 if (VMX_IS_64BIT_HOST_MODE())
901 {
902 X86XDTR64 gdtr64, idtr64;
903 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
904 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
905 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
906 AssertRC(rc);
907 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
908 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
909 gdtr.cbGdt = gdtr64.cb;
910 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
911 }
912 else
913#endif
914 {
915 ASMGetGDTR(&gdtr);
916 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
917 ASMGetIDTR(&idtr);
918 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
919 AssertRC(rc);
920 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
921 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
922 }
923
924
925 /* Save the base address of the TR selector. */
926 if (SelTR > gdtr.cbGdt)
927 {
928 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
929 return VERR_VMX_INVALID_HOST_STATE;
930 }
931
932#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
933 if (VMX_IS_64BIT_HOST_MODE())
934 {
935 pDesc = &((PX86DESCHC)gdtr.pGdt)[SelTR >> X86_SEL_SHIFT_HC]; /// ????
936 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
937 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
938 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
939 AssertRC(rc);
940 }
941 else
942#endif
943 {
944 pDesc = &((PX86DESCHC)gdtr.pGdt)[SelTR >> X86_SEL_SHIFT_HC];
945#if HC_ARCH_BITS == 64
946 trBase = X86DESC64_BASE(*pDesc);
947#else
948 trBase = X86DESC_BASE(*pDesc);
949#endif
950 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
951 AssertRC(rc);
952 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
953 }
954
955 /* FS and GS base. */
956#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
957 if (VMX_IS_64BIT_HOST_MODE())
958 {
959 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
960 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
961 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
962 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
963 }
964#endif
965 AssertRC(rc);
966
967 /* Sysenter MSRs. */
968 /** @todo expensive!! */
969 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
970 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
971#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
972 if (VMX_IS_64BIT_HOST_MODE())
973 {
974 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
975 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
976 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
977 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
978 }
979 else
980 {
981 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
982 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
983 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
984 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
985 }
986#elif HC_ARCH_BITS == 32
987 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
988 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
989 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
990 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
991#else
992 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
993 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
994 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
995 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
996#endif
997 AssertRC(rc);
998
999#if 0 /* @todo deal with 32/64 */
1000 /* Restore the host EFER - on CPUs that support it. */
1001 if (pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1 & VMX_VMCS_CTRL_EXIT_CONTROLS_LOAD_HOST_EFER_MSR)
1002 {
1003 uint64_t msrEFER = ASMRdMsr(MSR_IA32_EFER);
1004 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FIELD_EFER_FULL, msrEFER);
1005 AssertRC(rc);
1006 }
1007#endif
1008 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1009 }
1010 return rc;
1011}
1012
1013/**
1014 * Prefetch the 4 PDPT pointers (PAE and nested paging only)
1015 *
1016 * @param pVM The VM to operate on.
1017 * @param pVCpu The VMCPU to operate on.
1018 * @param pCtx Guest context
1019 */
1020static void vmxR0PrefetchPAEPdptrs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1021{
1022 if (CPUMIsGuestInPAEModeEx(pCtx))
1023 {
1024 X86PDPE Pdpe;
1025
1026 for (unsigned i=0;i<4;i++)
1027 {
1028 Pdpe = PGMGstGetPaePDPtr(pVM, i);
1029 int rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL + i*2, Pdpe.u);
1030 AssertRC(rc);
1031 }
1032 }
1033}
1034
1035/**
1036 * Update the exception bitmap according to the current CPU state
1037 *
1038 * @param pVM The VM to operate on.
1039 * @param pVCpu The VMCPU to operate on.
1040 * @param pCtx Guest context
1041 */
1042static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1043{
1044 uint32_t u32TrapMask;
1045 Assert(pCtx);
1046
1047 u32TrapMask = HWACCM_VMX_TRAP_MASK;
1048#ifndef DEBUG
1049 if (pVM->hwaccm.s.fNestedPaging)
1050 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
1051#endif
1052
1053 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
1054 if ( CPUMIsGuestFPUStateActive(pVCpu) == true
1055 && !(pCtx->cr0 & X86_CR0_NE)
1056 && !pVCpu->hwaccm.s.fFPUOldStyleOverride)
1057 {
1058 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1059 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
1060 }
1061
1062#ifdef DEBUG
1063 /* Intercept X86_XCPT_DB if stepping is enabled */
1064 if (DBGFIsStepping(pVM))
1065 u32TrapMask |= RT_BIT(X86_XCPT_DB);
1066#endif
1067
1068#ifdef VBOX_STRICT
1069 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1070#endif
1071
1072# ifdef HWACCM_VMX_EMULATE_REALMODE
1073 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
1074 if (CPUMIsGuestInRealModeEx(pCtx))
1075 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
1076# endif /* HWACCM_VMX_EMULATE_REALMODE */
1077
1078 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1079 AssertRC(rc);
1080}
1081
1082/**
1083 * Loads the guest state
1084 *
1085 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1086 *
1087 * @returns VBox status code.
1088 * @param pVM The VM to operate on.
1089 * @param pVCpu The VMCPU to operate on.
1090 * @param pCtx Guest context
1091 */
1092VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1093{
1094 int rc = VINF_SUCCESS;
1095 RTGCUINTPTR val;
1096 X86EFLAGS eflags;
1097
1098 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1099 * Set required bits to one and zero according to the MSR capabilities.
1100 */
1101 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1102 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1103 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1104#if 0 /* @todo deal with 32/64 */
1105 /* Required for the EFER write below, not supported on all CPUs. */
1106 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_GUEST_EFER_MSR;
1107#endif
1108 /* 64 bits guest mode? */
1109 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1110 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1111 /* else Must be zero when AMD64 is not available. */
1112
1113 /* Mask away the bits that the CPU doesn't support */
1114 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1115 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1116 AssertRC(rc);
1117
1118 /* VMX_VMCS_CTRL_EXIT_CONTROLS
1119 * Set required bits to one and zero according to the MSR capabilities.
1120 */
1121 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1122
1123 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1124#if 0 /* @todo deal with 32/64 */
1125 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG | VMX_VMCS_CTRL_EXIT_CONTROLS_LOAD_HOST_EFER_MSR;
1126#else
1127 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1128#endif
1129
1130#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1131 if (VMX_IS_64BIT_HOST_MODE())
1132 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1133 /* else: Must be zero when AMD64 is not available. */
1134#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1135 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1136 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1137 else
1138 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1139#endif
1140 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1141 /* Don't acknowledge external interrupts on VM-exit. */
1142 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1143 AssertRC(rc);
1144
1145 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1146 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1147 {
1148#ifdef HWACCM_VMX_EMULATE_REALMODE
1149 PGMMODE enmGuestMode = PGMGetGuestMode(pVM);
1150 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1151 {
1152 /* Correct weird requirements for switching to protected mode. */
1153 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1154 && enmGuestMode >= PGMMODE_PROTECTED)
1155 {
1156 /* DPL of all hidden selector registers must match the current CPL (0). */
1157 pCtx->csHid.Attr.n.u2Dpl = 0;
1158 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1159
1160 pCtx->dsHid.Attr.n.u2Dpl = 0;
1161 pCtx->esHid.Attr.n.u2Dpl = 0;
1162 pCtx->fsHid.Attr.n.u2Dpl = 0;
1163 pCtx->gsHid.Attr.n.u2Dpl = 0;
1164 pCtx->ssHid.Attr.n.u2Dpl = 0;
1165 }
1166 else
1167 /* Switching from protected mode to real mode. */
1168 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode >= PGMMODE_PROTECTED
1169 && enmGuestMode == PGMMODE_REAL)
1170 {
1171 /* The limit must also be adjusted. */
1172 pCtx->csHid.u32Limit &= 0xffff;
1173 pCtx->dsHid.u32Limit &= 0xffff;
1174 pCtx->esHid.u32Limit &= 0xffff;
1175 pCtx->fsHid.u32Limit &= 0xffff;
1176 pCtx->gsHid.u32Limit &= 0xffff;
1177 pCtx->ssHid.u32Limit &= 0xffff;
1178
1179 Assert(pCtx->csHid.u64Base <= 0xfffff);
1180 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1181 Assert(pCtx->esHid.u64Base <= 0xfffff);
1182 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1183 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1184 }
1185 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1186 }
1187 else
1188 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1189 if ( CPUMIsGuestInRealModeEx(pCtx)
1190 && pCtx->csHid.u64Base == 0xffff0000)
1191 {
1192 pCtx->csHid.u64Base = 0xf0000;
1193 pCtx->cs = 0xf000;
1194 }
1195#endif /* HWACCM_VMX_EMULATE_REALMODE */
1196
1197 VMX_WRITE_SELREG(ES, es);
1198 AssertRC(rc);
1199
1200 VMX_WRITE_SELREG(CS, cs);
1201 AssertRC(rc);
1202
1203 VMX_WRITE_SELREG(SS, ss);
1204 AssertRC(rc);
1205
1206 VMX_WRITE_SELREG(DS, ds);
1207 AssertRC(rc);
1208
1209 /* The base values in the hidden fs & gs registers are not in sync with the msrs; they are cut to 32 bits. */
1210 VMX_WRITE_SELREG(FS, fs);
1211 AssertRC(rc);
1212
1213 VMX_WRITE_SELREG(GS, gs);
1214 AssertRC(rc);
1215 }
1216
1217 /* Guest CPU context: LDTR. */
1218 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1219 {
1220 if (pCtx->ldtr == 0)
1221 {
1222 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1223 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1224 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1225 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1226 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1227 }
1228 else
1229 {
1230 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1231 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1232 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1233 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1234 }
1235 AssertRC(rc);
1236 }
1237 /* Guest CPU context: TR. */
1238 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1239 {
1240#ifdef HWACCM_VMX_EMULATE_REALMODE
1241 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1242 if (CPUMIsGuestInRealModeEx(pCtx))
1243 {
1244 RTGCPHYS GCPhys;
1245
1246 /* We convert it here every time as pci regions could be reconfigured. */
1247 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1248 AssertRC(rc);
1249
1250 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1251 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1252 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1253
1254 X86DESCATTR attr;
1255
1256 attr.u = 0;
1257 attr.n.u1Present = 1;
1258 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1259 val = attr.u;
1260 }
1261 else
1262#endif /* HWACCM_VMX_EMULATE_REALMODE */
1263 {
1264 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1265 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1266 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1267
1268 val = pCtx->trHid.Attr.u;
1269
1270 /* The TSS selector must be busy. */
1271 if ((val & 0xF) == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
1272 val = (val & ~0xF) | X86_SEL_TYPE_SYS_286_TSS_BUSY;
1273 else
1274 /* Default even if no TR selector has been set (otherwise vmlaunch will fail!) */
1275 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1276
1277 }
1278 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1279 AssertRC(rc);
1280 }
1281 /* Guest CPU context: GDTR. */
1282 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1283 {
1284 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1285 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1286 AssertRC(rc);
1287 }
1288 /* Guest CPU context: IDTR. */
1289 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1290 {
1291 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1292 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1293 AssertRC(rc);
1294 }
1295
1296 /*
1297 * Sysenter MSRs (unconditional)
1298 */
1299 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1300 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1301 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1302 AssertRC(rc);
1303
1304 /* Control registers */
1305 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1306 {
1307 val = pCtx->cr0;
1308 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1309 Log2(("Guest CR0-shadow %08x\n", val));
1310 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1311 {
1312 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1313 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1314 }
1315 else
1316 {
1317 /** @todo check if we support the old style mess correctly. */
1318 if (!(val & X86_CR0_NE))
1319 Log(("Forcing X86_CR0_NE!!!\n"));
1320
1321 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1322 }
1323 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1324 val |= X86_CR0_PE | X86_CR0_PG;
1325 if (pVM->hwaccm.s.fNestedPaging)
1326 {
1327 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1328 {
1329 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1330 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1331 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1332 }
1333 else
1334 {
1335 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1336 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1337 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1338 }
1339 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1340 AssertRC(rc);
1341 }
1342 else
1343 {
1344 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1345 val |= X86_CR0_WP;
1346 }
1347
1348 /* Always enable caching. */
1349 val &= ~(X86_CR0_CD|X86_CR0_NW);
1350
1351 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1352 Log2(("Guest CR0 %08x\n", val));
1353 /* CR0 flags owned by the host; if the guests attempts to change them, then
1354 * the VM will exit.
1355 */
1356 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1357 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1358 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1359 | X86_CR0_TS
1360 | X86_CR0_ET /* Bit not restored during VM-exit! */
1361 | X86_CR0_CD /* Bit not restored during VM-exit! */
1362 | X86_CR0_NW /* Bit not restored during VM-exit! */
1363 | X86_CR0_NE
1364 | X86_CR0_MP;
1365 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1366
1367 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1368 Log2(("Guest CR0-mask %08x\n", val));
1369 AssertRC(rc);
1370 }
1371 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1372 {
1373 /* CR4 */
1374 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1375 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1376 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1377 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1378
1379 if (!pVM->hwaccm.s.fNestedPaging)
1380 {
1381 switch(pVCpu->hwaccm.s.enmShadowMode)
1382 {
1383 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1384 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1385 case PGMMODE_32_BIT: /* 32-bit paging. */
1386 val &= ~X86_CR4_PAE;
1387 break;
1388
1389 case PGMMODE_PAE: /* PAE paging. */
1390 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1391 /** @todo use normal 32 bits paging */
1392 val |= X86_CR4_PAE;
1393 break;
1394
1395 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1396 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1397#ifdef VBOX_ENABLE_64_BITS_GUESTS
1398 break;
1399#else
1400 AssertFailed();
1401 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1402#endif
1403 default: /* shut up gcc */
1404 AssertFailed();
1405 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1406 }
1407 }
1408 else
1409 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1410 {
1411 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1412 val |= X86_CR4_PSE;
1413 /* Our identity mapping is a 32 bits page directory. */
1414 val &= ~X86_CR4_PAE;
1415 }
1416
1417#ifdef HWACCM_VMX_EMULATE_REALMODE
1418 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1419 if (CPUMIsGuestInRealModeEx(pCtx))
1420 val |= X86_CR4_VME;
1421#endif /* HWACCM_VMX_EMULATE_REALMODE */
1422
1423 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1424 Log2(("Guest CR4 %08x\n", val));
1425 /* CR4 flags owned by the host; if the guests attempts to change them, then
1426 * the VM will exit.
1427 */
1428 val = 0
1429#ifdef HWACCM_VMX_EMULATE_REALMODE
1430 | X86_CR4_VME
1431#endif
1432 | X86_CR4_PAE
1433 | X86_CR4_PGE
1434 | X86_CR4_PSE
1435 | X86_CR4_VMXE;
1436 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1437
1438 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1439 Log2(("Guest CR4-mask %08x\n", val));
1440 AssertRC(rc);
1441 }
1442
1443 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1444 {
1445 if (pVM->hwaccm.s.fNestedPaging)
1446 {
1447 AssertMsg( PGMGetEPTCR3(pVM) == PGMGetHyperCR3(pVM)
1448 || VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL),
1449 ("%RHp vs %RHp\n", PGMGetEPTCR3(pVM), PGMGetHyperCR3(pVM)));
1450 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetEPTCR3(pVM);
1451
1452 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1453 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1454 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1455 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1456
1457 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1458 AssertRC(rc);
1459
1460 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1461 {
1462 RTGCPHYS GCPhys;
1463
1464 /* We convert it here every time as pci regions could be reconfigured. */
1465 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1466 AssertRC(rc);
1467
1468 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1469 * take care of the translation to host physical addresses.
1470 */
1471 val = GCPhys;
1472 }
1473 else
1474 {
1475 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1476 val = pCtx->cr3;
1477 /* Prefetch the four PDPT entries in PAE mode. */
1478 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1479 }
1480 }
1481 else
1482 {
1483 val = PGMGetHyperCR3(pVM);
1484 Assert(val || VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL));
1485 }
1486
1487 /* Save our shadow CR3 register. */
1488 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1489 AssertRC(rc);
1490 }
1491
1492 /* Debug registers. */
1493 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1494 {
1495 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1496 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1497
1498 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1499 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1500 pCtx->dr[7] |= 0x400; /* must be one */
1501
1502 /* Resync DR7 */
1503 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
1504 AssertRC(rc);
1505
1506 /* Sync the debug state now if any breakpoint is armed. */
1507 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1508 && !CPUMIsGuestDebugStateActive(pVM)
1509 && !DBGFIsStepping(pVM))
1510 {
1511 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1512
1513 /* Disable drx move intercepts. */
1514 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1515 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1516 AssertRC(rc);
1517
1518 /* Save the host and load the guest debug state. */
1519 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1520 AssertRC(rc);
1521 }
1522
1523 /* IA32_DEBUGCTL MSR. */
1524 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1525 AssertRC(rc);
1526
1527 /** @todo do we really ever need this? */
1528 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1529 AssertRC(rc);
1530 }
1531
1532 /* EIP, ESP and EFLAGS */
1533 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1534 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1535 AssertRC(rc);
1536
1537 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1538 eflags = pCtx->eflags;
1539 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1540 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1541
1542#ifdef HWACCM_VMX_EMULATE_REALMODE
1543 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1544 if (CPUMIsGuestInRealModeEx(pCtx))
1545 {
1546 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1547
1548 eflags.Bits.u1VM = 1;
1549 eflags.Bits.u2IOPL = 3;
1550 }
1551#endif /* HWACCM_VMX_EMULATE_REALMODE */
1552 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1553 AssertRC(rc);
1554
1555 /* TSC offset. */
1556 uint64_t u64TSCOffset;
1557
1558 if (TMCpuTickCanUseRealTSC(pVM, &u64TSCOffset))
1559 {
1560 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1561 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, u64TSCOffset);
1562 AssertRC(rc);
1563
1564 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1565 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1566 AssertRC(rc);
1567 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
1568 }
1569 else
1570 {
1571 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1572 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1573 AssertRC(rc);
1574 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
1575 }
1576
1577 /* 64 bits guest mode? */
1578 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1579 {
1580#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
1581 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1582#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1583 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
1584#else
1585# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1586 if (!pVM->hwaccm.s.fAllow64BitGuests)
1587 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1588# endif
1589 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1590#endif
1591 /* Unconditionally update these as wrmsr might have changed them. */
1592 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1593 AssertRC(rc);
1594 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1595 AssertRC(rc);
1596 }
1597 else
1598 {
1599 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1600 }
1601
1602#if 0 /* @todo deal with 32/64 */
1603 /* Unconditionally update the guest EFER - on CPUs that supports it. */
1604 if (pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1 & VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_GUEST_EFER_MSR)
1605 {
1606 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_EFER_FULL, pCtx->msrEFER);
1607 AssertRC(rc);
1608 }
1609#endif
1610
1611 vmxR0UpdateExceptionBitmap(pVM, pVCpu, pCtx);
1612
1613 /* Done. */
1614 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1615
1616 return rc;
1617}
1618
1619/**
1620 * Syncs back the guest state
1621 *
1622 * @returns VBox status code.
1623 * @param pVM The VM to operate on.
1624 * @param pVCpu The VMCPU to operate on.
1625 * @param pCtx Guest context
1626 */
1627DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1628{
1629 RTGCUINTREG val, valShadow;
1630 RTGCUINTPTR uInterruptState;
1631 int rc;
1632
1633 /* Let's first sync back eip, esp, and eflags. */
1634 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
1635 AssertRC(rc);
1636 pCtx->rip = val;
1637 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
1638 AssertRC(rc);
1639 pCtx->rsp = val;
1640 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
1641 AssertRC(rc);
1642 pCtx->eflags.u32 = val;
1643
1644 /* Take care of instruction fusing (sti, mov ss) */
1645 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
1646 uInterruptState = val;
1647 if (uInterruptState != 0)
1648 {
1649 Assert(uInterruptState <= 2); /* only sti & mov ss */
1650 Log(("uInterruptState %x eip=%RGv\n", uInterruptState, pCtx->rip));
1651 EMSetInhibitInterruptsPC(pVM, pCtx->rip);
1652 }
1653 else
1654 VM_FF_CLEAR(pVM, VM_FF_INHIBIT_INTERRUPTS);
1655
1656 /* Control registers. */
1657 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
1658 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
1659 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
1660 CPUMSetGuestCR0(pVM, val);
1661
1662 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
1663 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
1664 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
1665 CPUMSetGuestCR4(pVM, val);
1666
1667 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
1668 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1669 if ( pVM->hwaccm.s.fNestedPaging
1670 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
1671 {
1672 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
1673
1674 /* Can be updated behind our back in the nested paging case. */
1675 CPUMSetGuestCR2(pVM, pCache->cr2);
1676
1677 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
1678
1679 if (val != pCtx->cr3)
1680 {
1681 CPUMSetGuestCR3(pVM, val);
1682 PGMUpdateCR3(pVM, val);
1683 }
1684 /* Prefetch the four PDPT entries in PAE mode. */
1685 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1686 }
1687
1688 /* Sync back DR7 here. */
1689 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
1690 pCtx->dr[7] = val;
1691
1692 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1693 VMX_READ_SELREG(ES, es);
1694 VMX_READ_SELREG(SS, ss);
1695 VMX_READ_SELREG(CS, cs);
1696 VMX_READ_SELREG(DS, ds);
1697 VMX_READ_SELREG(FS, fs);
1698 VMX_READ_SELREG(GS, gs);
1699
1700 /*
1701 * System MSRs
1702 */
1703 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
1704 pCtx->SysEnter.cs = val;
1705 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
1706 pCtx->SysEnter.eip = val;
1707 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
1708 pCtx->SysEnter.esp = val;
1709
1710 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
1711 VMX_READ_SELREG(LDTR, ldtr);
1712
1713 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
1714 pCtx->gdtr.cbGdt = val;
1715 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
1716 pCtx->gdtr.pGdt = val;
1717
1718 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
1719 pCtx->idtr.cbIdt = val;
1720 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
1721 pCtx->idtr.pIdt = val;
1722
1723#ifdef HWACCM_VMX_EMULATE_REALMODE
1724 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1725 if (CPUMIsGuestInRealModeEx(pCtx))
1726 {
1727 /* Hide our emulation flags */
1728 pCtx->eflags.Bits.u1VM = 0;
1729 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
1730
1731 /* Force a TR resync every time in case we switch modes. */
1732 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
1733 }
1734 else
1735#endif /* HWACCM_VMX_EMULATE_REALMODE */
1736 {
1737 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
1738 VMX_READ_SELREG(TR, tr);
1739 }
1740 return VINF_SUCCESS;
1741}
1742
1743/**
1744 * Dummy placeholder
1745 *
1746 * @param pVM The VM to operate on.
1747 * @param pVCpu The VMCPU to operate on.
1748 */
1749static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu)
1750{
1751 NOREF(pVM);
1752 NOREF(pVCpu);
1753 return;
1754}
1755
1756/**
1757 * Setup the tagged TLB for EPT
1758 *
1759 * @returns VBox status code.
1760 * @param pVM The VM to operate on.
1761 * @param pVCpu The VMCPU to operate on.
1762 */
1763static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu)
1764{
1765 PHWACCM_CPUINFO pCpu;
1766
1767 Assert(pVM->hwaccm.s.fNestedPaging);
1768 Assert(!pVM->hwaccm.s.vmx.fVPID);
1769
1770 /* Deal with tagged TLBs if VPID or EPT is supported. */
1771 pCpu = HWACCMR0GetCurrentCpu();
1772 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1773 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1774 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1775 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1776 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1777 {
1778 /* Force a TLB flush on VM entry. */
1779 pVCpu->hwaccm.s.fForceTLBFlush = true;
1780 }
1781 else
1782 Assert(!pCpu->fFlushTLB);
1783
1784 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1785 pCpu->fFlushTLB = false;
1786
1787 if (pVCpu->hwaccm.s.fForceTLBFlush)
1788 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1789
1790#ifdef VBOX_WITH_STATISTICS
1791 if (pVCpu->hwaccm.s.fForceTLBFlush)
1792 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1793 else
1794 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1795#endif
1796}
1797
1798#ifdef HWACCM_VTX_WITH_VPID
1799/**
1800 * Setup the tagged TLB for VPID
1801 *
1802 * @returns VBox status code.
1803 * @param pVM The VM to operate on.
1804 * @param pVCpu The VMCPU to operate on.
1805 */
1806static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu)
1807{
1808 PHWACCM_CPUINFO pCpu;
1809
1810 Assert(pVM->hwaccm.s.vmx.fVPID);
1811 Assert(!pVM->hwaccm.s.fNestedPaging);
1812
1813 /* Deal with tagged TLBs if VPID or EPT is supported. */
1814 pCpu = HWACCMR0GetCurrentCpu();
1815 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1816 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1817 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1818 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1819 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1820 {
1821 /* Force a TLB flush on VM entry. */
1822 pVCpu->hwaccm.s.fForceTLBFlush = true;
1823 }
1824 else
1825 Assert(!pCpu->fFlushTLB);
1826
1827 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1828
1829 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
1830 if (pVCpu->hwaccm.s.fForceTLBFlush)
1831 {
1832 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
1833 || pCpu->fFlushTLB)
1834 {
1835 pCpu->fFlushTLB = false;
1836 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
1837 pCpu->cTLBFlushes++;
1838 }
1839 else
1840 {
1841 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
1842 pVCpu->hwaccm.s.fForceTLBFlush = false;
1843 }
1844
1845 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
1846 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
1847 }
1848 else
1849 {
1850 Assert(!pCpu->fFlushTLB);
1851
1852 if (!pCpu->uCurrentASID || !pVCpu->hwaccm.s.uCurrentASID)
1853 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID = 1;
1854 }
1855 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1856 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
1857 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
1858
1859 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
1860 AssertRC(rc);
1861
1862 if (pVCpu->hwaccm.s.fForceTLBFlush)
1863 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1864
1865#ifdef VBOX_WITH_STATISTICS
1866 if (pVCpu->hwaccm.s.fForceTLBFlush)
1867 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1868 else
1869 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1870#endif
1871}
1872#endif /* HWACCM_VTX_WITH_VPID */
1873
1874/**
1875 * Runs guest code in a VT-x VM.
1876 *
1877 * @returns VBox status code.
1878 * @param pVM The VM to operate on.
1879 * @param pVCpu The VMCPU to operate on.
1880 * @param pCtx Guest context
1881 */
1882VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1883{
1884 int rc = VINF_SUCCESS;
1885 RTGCUINTREG val;
1886 RTGCUINTREG exitReason, instrError, cbInstr;
1887 RTGCUINTPTR exitQualification;
1888 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
1889 RTGCUINTPTR errCode, instrInfo;
1890 bool fSyncTPR = false;
1891 PHWACCM_CPUINFO pCpu = 0;
1892 unsigned cResume = 0;
1893#ifdef VBOX_STRICT
1894 RTCPUID idCpuCheck;
1895#endif
1896#ifdef VBOX_WITH_STATISTICS
1897 bool fStatEntryStarted = true;
1898 bool fStatExit2Started = false;
1899#endif
1900
1901 Log2(("\nE"));
1902
1903 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
1904
1905#ifdef VBOX_STRICT
1906 {
1907 RTCCUINTREG val;
1908
1909 rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
1910 AssertRC(rc);
1911 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val));
1912
1913 /* allowed zero */
1914 if ((val & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
1915 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
1916
1917 /* allowed one */
1918 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
1919 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
1920
1921 rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
1922 AssertRC(rc);
1923 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val));
1924
1925 /* Must be set according to the MSR, but can be cleared in case of EPT. */
1926 if (pVM->hwaccm.s.fNestedPaging)
1927 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
1928 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1929 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1930
1931 /* allowed zero */
1932 if ((val & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
1933 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
1934
1935 /* allowed one */
1936 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
1937 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
1938
1939 rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
1940 AssertRC(rc);
1941 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val));
1942
1943 /* allowed zero */
1944 if ((val & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
1945 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
1946
1947 /* allowed one */
1948 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
1949 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
1950
1951 rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
1952 AssertRC(rc);
1953 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val));
1954
1955 /* allowed zero */
1956 if ((val & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
1957 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
1958
1959 /* allowed one */
1960 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
1961 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
1962 }
1963#endif
1964
1965 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
1966 */
1967ResumeExecution:
1968 STAM_STATS({
1969 if (fStatExit2Started) { STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = false; }
1970 if (!fStatEntryStarted) { STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = true; }
1971 });
1972 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
1973 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
1974 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
1975 Assert(!HWACCMR0SuspendPending());
1976
1977 /* Safety precaution; looping for too long here can have a very bad effect on the host */
1978 if (++cResume > HWACCM_MAX_RESUME_LOOPS)
1979 {
1980 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
1981 rc = VINF_EM_RAW_INTERRUPT;
1982 goto end;
1983 }
1984
1985 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
1986 if (VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
1987 {
1988 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVM)));
1989 if (pCtx->rip != EMGetInhibitInterruptsPC(pVM))
1990 {
1991 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
1992 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
1993 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
1994 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
1995 */
1996 VM_FF_CLEAR(pVM, VM_FF_INHIBIT_INTERRUPTS);
1997 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
1998 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
1999 AssertRC(rc);
2000 }
2001 }
2002 else
2003 {
2004 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2005 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2006 AssertRC(rc);
2007 }
2008
2009 /* Check for pending actions that force us to go back to ring 3. */
2010 if (VM_FF_ISPENDING(pVM, VM_FF_TO_R3 | VM_FF_TIMER))
2011 {
2012 VM_FF_CLEAR(pVM, VM_FF_TO_R3);
2013 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2014 rc = VINF_EM_RAW_TO_R3;
2015 goto end;
2016 }
2017 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2018 if (VM_FF_ISPENDING(pVM, VM_FF_REQUEST))
2019 {
2020 rc = VINF_EM_PENDING_REQUEST;
2021 goto end;
2022 }
2023
2024 /* When external interrupts are pending, we should exit the VM when IF is set. */
2025 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
2026 rc = VMXR0CheckPendingInterrupt(pVM, pVCpu, pCtx);
2027 if (RT_FAILURE(rc))
2028 goto end;
2029
2030 /** @todo check timers?? */
2031
2032 /* TPR caching using CR8 is only available in 64 bits mode */
2033 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
2034 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! */
2035 /**
2036 * @todo reduce overhead
2037 */
2038 if ( (pCtx->msrEFER & MSR_K6_EFER_LMA)
2039 && pVM->hwaccm.s.vmx.pAPIC)
2040 {
2041 /* TPR caching in CR8 */
2042 uint8_t u8TPR;
2043 bool fPending;
2044
2045 int rc = PDMApicGetTPR(pVM, &u8TPR, &fPending);
2046 AssertRC(rc);
2047 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2048 pVM->hwaccm.s.vmx.pAPIC[0x80] = u8TPR << 4; /* bits 7-4 contain the task priority */
2049
2050 /* Two options here:
2051 * - external interrupt pending, but masked by the TPR value.
2052 * -> a CR8 update that lower the current TPR value should cause an exit
2053 * - no pending interrupts
2054 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2055 */
2056 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? u8TPR : 0);
2057 AssertRC(rc);
2058
2059 /* Always sync back the TPR; we should optimize this though */ /** @todo optimize TPR sync. */
2060 fSyncTPR = true;
2061 }
2062
2063#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
2064 if ( pVM->hwaccm.s.fNestedPaging
2065# ifdef HWACCM_VTX_WITH_VPID
2066 || pVM->hwaccm.s.vmx.fVPID
2067# endif /* HWACCM_VTX_WITH_VPID */
2068 )
2069 {
2070 pCpu = HWACCMR0GetCurrentCpu();
2071 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2072 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2073 {
2074 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2075 Log(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2076 else
2077 Log(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2078 }
2079 if (pCpu->fFlushTLB)
2080 Log(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
2081 else
2082 if (pVCpu->hwaccm.s.fForceTLBFlush)
2083 LogFlow(("Manual TLB flush\n"));
2084 }
2085#endif
2086#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2087 PGMDynMapFlushAutoSet(pVCpu);
2088#endif
2089
2090 /*
2091 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
2092 * (until the actual world switch)
2093 */
2094#ifdef VBOX_STRICT
2095 idCpuCheck = RTMpCpuId();
2096#endif
2097#ifdef LOG_LOGGING
2098 VMMR0LogFlushDisable(pVCpu);
2099#endif
2100 /* Save the host state first. */
2101 rc = VMXR0SaveHostState(pVM, pVCpu);
2102 if (rc != VINF_SUCCESS)
2103 goto end;
2104 /* Load the guest state */
2105 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
2106 if (rc != VINF_SUCCESS)
2107 goto end;
2108
2109 /* Deal with tagged TLB setup and invalidation. */
2110 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
2111
2112 /* Non-register state Guest Context */
2113 /** @todo change me according to cpu state */
2114 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
2115 AssertRC(rc);
2116
2117 STAM_STATS({ STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = false; });
2118
2119 /* Manual save and restore:
2120 * - General purpose registers except RIP, RSP
2121 *
2122 * Trashed:
2123 * - CR2 (we don't care)
2124 * - LDTR (reset to 0)
2125 * - DRx (presumably not changed at all)
2126 * - DR7 (reset to 0x400)
2127 * - EFLAGS (reset to RT_BIT(1); not relevant)
2128 *
2129 */
2130
2131 /* All done! Let's start VM execution. */
2132 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, z);
2133#ifdef VBOX_STRICT
2134 Assert(idCpuCheck == RTMpCpuId());
2135#endif
2136 TMNotifyStartOfExecution(pVM);
2137 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
2138 TMNotifyEndOfExecution(pVM);
2139
2140 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
2141
2142 /* In case we execute a goto ResumeExecution later on. */
2143 pVCpu->hwaccm.s.fResumeVM = true;
2144 pVCpu->hwaccm.s.fForceTLBFlush = false;
2145
2146 /*
2147 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2148 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
2149 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2150 */
2151
2152 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, z);
2153 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit1, v);
2154
2155 if (rc != VINF_SUCCESS)
2156 {
2157 VMXR0ReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
2158 goto end;
2159 }
2160 /* Success. Query the guest state and figure out what has happened. */
2161
2162 /* Investigate why there was a VM-exit. */
2163 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
2164 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
2165
2166 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
2167 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
2168 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
2169 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
2170 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2171 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
2172 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
2173 rc |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
2174 AssertRC(rc);
2175
2176 /* Sync back the guest state */
2177 rc = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
2178 AssertRC(rc);
2179
2180 /* Note! NOW IT'S SAFE FOR LOGGING! */
2181#ifdef LOG_LOGGING
2182 VMMR0LogFlushEnable(pVCpu);
2183#endif
2184 Log2(("Raw exit reason %08x\n", exitReason));
2185
2186 /* Check if an injected event was interrupted prematurely. */
2187 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
2188 AssertRC(rc);
2189 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2190 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2191 /* Ignore 'int xx' as they'll be restarted anyway. */
2192 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
2193 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2194 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2195 {
2196 pVCpu->hwaccm.s.Event.fPending = true;
2197 /* Error code present? */
2198 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2199 {
2200 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
2201 AssertRC(rc);
2202 pVCpu->hwaccm.s.Event.errCode = val;
2203 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2204 }
2205 else
2206 {
2207 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2208 pVCpu->hwaccm.s.Event.errCode = 0;
2209 }
2210 }
2211#ifdef VBOX_STRICT
2212 else
2213 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2214 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2215 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2216 {
2217 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2218 }
2219
2220 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2221 HWACCMDumpRegs(pVM, pCtx);
2222#endif
2223
2224 Log2(("E%d", exitReason));
2225 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
2226 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
2227 Log2(("Interruption error code %d\n", (uint32_t)errCode));
2228 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
2229 Log2(("New EIP=%RGv\n", (RTGCPTR)pCtx->rip));
2230
2231 if (fSyncTPR)
2232 {
2233 rc = PDMApicSetTPR(pVM, pVM->hwaccm.s.vmx.pAPIC[0x80] >> 4);
2234 AssertRC(rc);
2235 }
2236
2237 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, v);
2238 STAM_STATS({ STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = true; });
2239
2240 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2241 switch (exitReason)
2242 {
2243 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2244 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2245 {
2246 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2247
2248 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2249 {
2250 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2251 /* External interrupt; leave to allow it to be dispatched again. */
2252 rc = VINF_EM_RAW_INTERRUPT;
2253 break;
2254 }
2255 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2256 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2257 {
2258 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2259 /* External interrupt; leave to allow it to be dispatched again. */
2260 rc = VINF_EM_RAW_INTERRUPT;
2261 break;
2262
2263 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2264 AssertFailed(); /* can't come here; fails the first check. */
2265 break;
2266
2267 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
2268 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2269 Assert(vector == 1 || vector == 3 || vector == 4);
2270 /* no break */
2271 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2272 Log2(("Hardware/software interrupt %d\n", vector));
2273 switch (vector)
2274 {
2275 case X86_XCPT_NM:
2276 {
2277 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
2278
2279 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2280 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2281 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
2282 if (rc == VINF_SUCCESS)
2283 {
2284 Assert(CPUMIsGuestFPUStateActive(pVCpu));
2285
2286 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
2287
2288 /* Continue execution. */
2289 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2290
2291 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2292 goto ResumeExecution;
2293 }
2294
2295 Log(("Forward #NM fault to the guest\n"));
2296 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
2297 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2298 AssertRC(rc);
2299 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2300 goto ResumeExecution;
2301 }
2302
2303 case X86_XCPT_PF: /* Page fault */
2304 {
2305#ifdef DEBUG
2306 if (pVM->hwaccm.s.fNestedPaging)
2307 { /* A genuine pagefault.
2308 * Forward the trap to the guest by injecting the exception and resuming execution.
2309 */
2310 Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2311
2312 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2313
2314 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2315
2316 /* Now we must update CR2. */
2317 pCtx->cr2 = exitQualification;
2318 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2319 AssertRC(rc);
2320
2321 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2322 goto ResumeExecution;
2323 }
2324#endif
2325 Assert(!pVM->hwaccm.s.fNestedPaging);
2326
2327 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
2328 /* Exit qualification contains the linear address of the page fault. */
2329 TRPMAssertTrap(pVM, X86_XCPT_PF, TRPM_TRAP);
2330 TRPMSetErrorCode(pVM, errCode);
2331 TRPMSetFaultAddress(pVM, exitQualification);
2332
2333 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2334 rc = PGMTrap0eHandler(pVM, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
2335 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2336 if (rc == VINF_SUCCESS)
2337 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2338 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
2339 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2340
2341 TRPMResetTrap(pVM);
2342
2343 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2344 goto ResumeExecution;
2345 }
2346 else
2347 if (rc == VINF_EM_RAW_GUEST_TRAP)
2348 { /* A genuine pagefault.
2349 * Forward the trap to the guest by injecting the exception and resuming execution.
2350 */
2351 Log2(("Forward page fault to the guest\n"));
2352
2353 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2354 /* The error code might have been changed. */
2355 errCode = TRPMGetErrorCode(pVM);
2356
2357 TRPMResetTrap(pVM);
2358
2359 /* Now we must update CR2. */
2360 pCtx->cr2 = exitQualification;
2361 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2362 AssertRC(rc);
2363
2364 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2365 goto ResumeExecution;
2366 }
2367#ifdef VBOX_STRICT
2368 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2369 Log2(("PGMTrap0eHandler failed with %d\n", rc));
2370#endif
2371 /* Need to go back to the recompiler to emulate the instruction. */
2372 TRPMResetTrap(pVM);
2373 break;
2374 }
2375
2376 case X86_XCPT_MF: /* Floating point exception. */
2377 {
2378 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
2379 if (!(pCtx->cr0 & X86_CR0_NE))
2380 {
2381 /* old style FPU error reporting needs some extra work. */
2382 /** @todo don't fall back to the recompiler, but do it manually. */
2383 rc = VINF_EM_RAW_EMULATE_INSTR;
2384 break;
2385 }
2386 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2387 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2388 AssertRC(rc);
2389
2390 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2391 goto ResumeExecution;
2392 }
2393
2394 case X86_XCPT_DB: /* Debug exception. */
2395 {
2396 uint64_t uDR6;
2397
2398 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
2399 *
2400 * Exit qualification bits:
2401 * 3:0 B0-B3 which breakpoint condition was met
2402 * 12:4 Reserved (0)
2403 * 13 BD - debug register access detected
2404 * 14 BS - single step execution or branch taken
2405 * 63:15 Reserved (0)
2406 */
2407 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
2408
2409 /* Note that we don't support guest and host-initiated debugging at the same time. */
2410 Assert(DBGFIsStepping(pVM) || CPUMIsGuestInRealModeEx(pCtx));
2411
2412 uDR6 = X86_DR6_INIT_VAL;
2413 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
2414 rc = DBGFR0Trap01Handler(pVM, CPUMCTX2CORE(pCtx), uDR6);
2415 if (rc == VINF_EM_RAW_GUEST_TRAP)
2416 {
2417 /** @todo this isn't working, but we'll never get here normally. */
2418
2419 /* Update DR6 here. */
2420 pCtx->dr[6] = uDR6;
2421
2422 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2423 pCtx->dr[7] &= ~X86_DR7_GD;
2424
2425 /* Paranoia. */
2426 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2427 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2428 pCtx->dr[7] |= 0x400; /* must be one */
2429
2430 /* Resync DR7 */
2431 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2432 AssertRC(rc);
2433
2434 Log(("Trap %x (debug) at %RGv exit qualification %RX64\n", vector, (RTGCPTR)pCtx->rip, exitQualification));
2435 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2436 AssertRC(rc);
2437
2438 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2439 goto ResumeExecution;
2440 }
2441 /* Return to ring 3 to deal with the debug exit code. */
2442 break;
2443 }
2444
2445 case X86_XCPT_GP: /* General protection failure exception.*/
2446 {
2447 uint32_t cbSize;
2448
2449 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
2450#ifdef VBOX_STRICT
2451 if (!CPUMIsGuestInRealModeEx(pCtx))
2452 {
2453 Log(("Trap %x at %04X:%RGv errorCode=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
2454 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2455 AssertRC(rc);
2456 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2457 goto ResumeExecution;
2458 }
2459#endif
2460 Assert(CPUMIsGuestInRealModeEx(pCtx));
2461
2462 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %RGv\n", (RTGCPTR)pCtx->rip));
2463 rc = EMInterpretInstruction(pVM, CPUMCTX2CORE(pCtx), 0, &cbSize);
2464 if (rc == VINF_SUCCESS)
2465 {
2466 /* EIP has been updated already. */
2467
2468 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
2469 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2470
2471 /* Only resume if successful. */
2472 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2473 goto ResumeExecution;
2474 }
2475 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc));
2476 break;
2477 }
2478
2479#ifdef VBOX_STRICT
2480 case X86_XCPT_DE: /* Divide error. */
2481 case X86_XCPT_UD: /* Unknown opcode exception. */
2482 case X86_XCPT_SS: /* Stack segment exception. */
2483 case X86_XCPT_NP: /* Segment not present exception. */
2484 {
2485 switch(vector)
2486 {
2487 case X86_XCPT_DE:
2488 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
2489 break;
2490 case X86_XCPT_UD:
2491 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
2492 break;
2493 case X86_XCPT_SS:
2494 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
2495 break;
2496 case X86_XCPT_NP:
2497 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
2498 break;
2499 }
2500
2501 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2502 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2503 AssertRC(rc);
2504
2505 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2506 goto ResumeExecution;
2507 }
2508#endif
2509 default:
2510#ifdef HWACCM_VMX_EMULATE_REALMODE
2511 if (CPUMIsGuestInRealModeEx(pCtx))
2512 {
2513 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
2514 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2515 AssertRC(rc);
2516
2517 /* Go back to ring 3 in case of a triple fault. */
2518 if ( vector == X86_XCPT_DF
2519 && rc == VINF_EM_RESET)
2520 break;
2521
2522 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2523 goto ResumeExecution;
2524 }
2525#endif
2526 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
2527 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
2528 break;
2529 } /* switch (vector) */
2530
2531 break;
2532
2533 default:
2534 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
2535 AssertMsgFailed(("Unexpected interuption code %x\n", intInfo));
2536 break;
2537 }
2538
2539 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2540 break;
2541 }
2542
2543 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
2544 {
2545 RTGCPHYS GCPhys;
2546
2547 Assert(pVM->hwaccm.s.fNestedPaging);
2548
2549 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
2550 AssertRC(rc);
2551 Assert(((exitQualification >> 7) & 3) != 2);
2552
2553 /* Determine the kind of violation. */
2554 errCode = 0;
2555 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
2556 errCode |= X86_TRAP_PF_ID;
2557
2558 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
2559 errCode |= X86_TRAP_PF_RW;
2560
2561 /* If the page is present, then it's a page level protection fault. */
2562 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
2563 errCode |= X86_TRAP_PF_P;
2564
2565 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
2566
2567 /* GCPhys contains the guest physical address of the page fault. */
2568 TRPMAssertTrap(pVM, X86_XCPT_PF, TRPM_TRAP);
2569 TRPMSetErrorCode(pVM, errCode);
2570 TRPMSetFaultAddress(pVM, GCPhys);
2571
2572 /* Handle the pagefault trap for the nested shadow table. */
2573 rc = PGMR0Trap0eHandlerNestedPaging(pVM, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
2574 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2575 if (rc == VINF_SUCCESS)
2576 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2577 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
2578 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
2579
2580 TRPMResetTrap(pVM);
2581
2582 goto ResumeExecution;
2583 }
2584
2585#ifdef VBOX_STRICT
2586 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2587 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc));
2588#endif
2589 /* Need to go back to the recompiler to emulate the instruction. */
2590 TRPMResetTrap(pVM);
2591 break;
2592 }
2593
2594 case VMX_EXIT_EPT_MISCONFIG:
2595 {
2596 RTGCPHYS GCPhys;
2597
2598 Assert(pVM->hwaccm.s.fNestedPaging);
2599
2600 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
2601 AssertRC(rc);
2602
2603 Log(("VMX_EXIT_EPT_MISCONFIG for %VGp\n", GCPhys));
2604 break;
2605 }
2606
2607 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
2608 /* Clear VM-exit on IF=1 change. */
2609 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
2610 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
2611 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2612 AssertRC(rc);
2613 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
2614 goto ResumeExecution; /* we check for pending guest interrupts there */
2615
2616 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
2617 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
2618 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
2619 /* Skip instruction and continue directly. */
2620 pCtx->rip += cbInstr;
2621 /* Continue execution.*/
2622 goto ResumeExecution;
2623
2624 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
2625 {
2626 Log2(("VMX: Cpuid %x\n", pCtx->eax));
2627 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
2628 rc = EMInterpretCpuId(pVM, CPUMCTX2CORE(pCtx));
2629 if (rc == VINF_SUCCESS)
2630 {
2631 /* Update EIP and continue execution. */
2632 Assert(cbInstr == 2);
2633 pCtx->rip += cbInstr;
2634 goto ResumeExecution;
2635 }
2636 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc));
2637 rc = VINF_EM_RAW_EMULATE_INSTR;
2638 break;
2639 }
2640
2641 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
2642 {
2643 Log2(("VMX: Rdtsc\n"));
2644 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
2645 rc = EMInterpretRdtsc(pVM, CPUMCTX2CORE(pCtx));
2646 if (rc == VINF_SUCCESS)
2647 {
2648 /* Update EIP and continue execution. */
2649 Assert(cbInstr == 2);
2650 pCtx->rip += cbInstr;
2651 goto ResumeExecution;
2652 }
2653 AssertMsgFailed(("EMU: rdtsc failed with %Rrc\n", rc));
2654 rc = VINF_EM_RAW_EMULATE_INSTR;
2655 break;
2656 }
2657
2658 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
2659 {
2660 Log2(("VMX: invlpg\n"));
2661 Assert(!pVM->hwaccm.s.fNestedPaging);
2662
2663 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
2664 rc = EMInterpretInvlpg(pVM, CPUMCTX2CORE(pCtx), exitQualification);
2665 if (rc == VINF_SUCCESS)
2666 {
2667 /* Update EIP and continue execution. */
2668 pCtx->rip += cbInstr;
2669 goto ResumeExecution;
2670 }
2671 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc));
2672 break;
2673 }
2674
2675 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
2676 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
2677 {
2678 uint32_t cbSize;
2679
2680 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
2681 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
2682 rc = EMInterpretInstruction(pVM, CPUMCTX2CORE(pCtx), 0, &cbSize);
2683 if (rc == VINF_SUCCESS)
2684 {
2685 /* EIP has been updated already. */
2686
2687 /* Only resume if successful. */
2688 goto ResumeExecution;
2689 }
2690 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
2691 break;
2692 }
2693
2694 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
2695 {
2696 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
2697
2698 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
2699 {
2700 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
2701 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
2702 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
2703 rc = EMInterpretCRxWrite(pVM, CPUMCTX2CORE(pCtx),
2704 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
2705 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
2706
2707 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
2708 {
2709 case 0:
2710 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
2711 break;
2712 case 2:
2713 break;
2714 case 3:
2715 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
2716 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
2717 break;
2718 case 4:
2719 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
2720 break;
2721 case 8:
2722 /* CR8 contains the APIC TPR */
2723 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
2724 break;
2725
2726 default:
2727 AssertFailed();
2728 break;
2729 }
2730 /* Check if a sync operation is pending. */
2731 if ( rc == VINF_SUCCESS /* don't bother if we are going to ring 3 anyway */
2732 && VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL))
2733 {
2734 rc = PGMSyncCR3(pVM, CPUMGetGuestCR0(pVM), CPUMGetGuestCR3(pVM), CPUMGetGuestCR4(pVM), VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
2735 AssertRC(rc);
2736 }
2737 break;
2738
2739 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
2740 Log2(("VMX: mov x, crx\n"));
2741 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
2742
2743 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
2744
2745 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
2746 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
2747
2748 rc = EMInterpretCRxRead(pVM, CPUMCTX2CORE(pCtx),
2749 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
2750 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
2751 break;
2752
2753 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
2754 Log2(("VMX: clts\n"));
2755 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
2756 rc = EMInterpretCLTS(pVM);
2757 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2758 break;
2759
2760 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
2761 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
2762 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
2763 rc = EMInterpretLMSW(pVM, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
2764 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2765 break;
2766 }
2767
2768 /* Update EIP if no error occurred. */
2769 if (RT_SUCCESS(rc))
2770 pCtx->rip += cbInstr;
2771
2772 if (rc == VINF_SUCCESS)
2773 {
2774 /* Only resume if successful. */
2775 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
2776 goto ResumeExecution;
2777 }
2778 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2779 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
2780 break;
2781 }
2782
2783 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
2784 {
2785 if (!DBGFIsStepping(pVM))
2786 {
2787 /* Disable drx move intercepts. */
2788 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2789 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2790 AssertRC(rc);
2791
2792 /* Save the host and load the guest debug state. */
2793 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
2794 AssertRC(rc);
2795
2796#ifdef VBOX_WITH_STATISTICS
2797 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
2798 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
2799 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
2800 else
2801 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
2802#endif
2803
2804 goto ResumeExecution;
2805 }
2806
2807 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
2808 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
2809 {
2810 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
2811 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
2812 rc = EMInterpretDRxWrite(pVM, CPUMCTX2CORE(pCtx),
2813 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
2814 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
2815 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
2816 Log2(("DR7=%08x\n", pCtx->dr[7]));
2817 }
2818 else
2819 {
2820 Log2(("VMX: mov x, drx\n"));
2821 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
2822 rc = EMInterpretDRxRead(pVM, CPUMCTX2CORE(pCtx),
2823 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
2824 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
2825 }
2826 /* Update EIP if no error occurred. */
2827 if (RT_SUCCESS(rc))
2828 pCtx->rip += cbInstr;
2829
2830 if (rc == VINF_SUCCESS)
2831 {
2832 /* Only resume if successful. */
2833 goto ResumeExecution;
2834 }
2835 Assert(rc == VERR_EM_INTERPRETER);
2836 break;
2837 }
2838
2839 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
2840 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
2841 {
2842 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
2843 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
2844 uint32_t uPort;
2845 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
2846
2847 /** @todo necessary to make the distinction? */
2848 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
2849 {
2850 uPort = pCtx->edx & 0xffff;
2851 }
2852 else
2853 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
2854
2855 /* paranoia */
2856 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
2857 {
2858 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
2859 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
2860 break;
2861 }
2862
2863 uint32_t cbSize = g_aIOSize[uIOWidth];
2864
2865 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
2866 {
2867 /* ins/outs */
2868 DISCPUSTATE Cpu;
2869
2870 /* Disassemble manually to deal with segment prefixes. */
2871 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
2872 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
2873 rc = EMInterpretDisasOne(pVM, CPUMCTX2CORE(pCtx), &Cpu, NULL);
2874 if (rc == VINF_SUCCESS)
2875 {
2876 if (fIOWrite)
2877 {
2878 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
2879 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
2880 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, Cpu.prefix, cbSize);
2881 }
2882 else
2883 {
2884 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
2885 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
2886 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, Cpu.prefix, cbSize);
2887 }
2888 }
2889 else
2890 rc = VINF_EM_RAW_EMULATE_INSTR;
2891 }
2892 else
2893 {
2894 /* normal in/out */
2895 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
2896
2897 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
2898
2899 if (fIOWrite)
2900 {
2901 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
2902 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
2903 }
2904 else
2905 {
2906 uint32_t u32Val = 0;
2907
2908 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
2909 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
2910 if (IOM_SUCCESS(rc))
2911 {
2912 /* Write back to the EAX register. */
2913 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
2914 }
2915 }
2916 }
2917 /*
2918 * Handled the I/O return codes.
2919 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
2920 */
2921 if (IOM_SUCCESS(rc))
2922 {
2923 /* Update EIP and continue execution. */
2924 pCtx->rip += cbInstr;
2925 if (RT_LIKELY(rc == VINF_SUCCESS))
2926 {
2927 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
2928 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
2929 {
2930 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
2931 for (unsigned i=0;i<4;i++)
2932 {
2933 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
2934
2935 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
2936 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
2937 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
2938 {
2939 uint64_t uDR6;
2940
2941 Assert(CPUMIsGuestDebugStateActive(pVM));
2942
2943 uDR6 = ASMGetDR6();
2944
2945 /* Clear all breakpoint status flags and set the one we just hit. */
2946 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
2947 uDR6 |= (uint64_t)RT_BIT(i);
2948
2949 /* Note: AMD64 Architecture Programmer's Manual 13.1:
2950 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
2951 * the contents have been read.
2952 */
2953 ASMSetDR6(uDR6);
2954
2955 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2956 pCtx->dr[7] &= ~X86_DR7_GD;
2957
2958 /* Paranoia. */
2959 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2960 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2961 pCtx->dr[7] |= 0x400; /* must be one */
2962
2963 /* Resync DR7 */
2964 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2965 AssertRC(rc);
2966
2967 /* Construct inject info. */
2968 intInfo = X86_XCPT_DB;
2969 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
2970 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
2971
2972 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
2973 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
2974 AssertRC(rc);
2975
2976 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
2977 goto ResumeExecution;
2978 }
2979 }
2980 }
2981
2982 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
2983 goto ResumeExecution;
2984 }
2985 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
2986 break;
2987 }
2988
2989#ifdef VBOX_STRICT
2990 if (rc == VINF_IOM_HC_IOPORT_READ)
2991 Assert(!fIOWrite);
2992 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
2993 Assert(fIOWrite);
2994 else
2995 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
2996#endif
2997 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
2998 break;
2999 }
3000
3001 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
3002 LogFlow(("VMX_EXIT_TPR\n"));
3003 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
3004 goto ResumeExecution;
3005
3006 default:
3007 /* The rest is handled after syncing the entire CPU state. */
3008 break;
3009 }
3010
3011 /* Note: the guest state isn't entirely synced back at this stage. */
3012
3013 /* Investigate why there was a VM-exit. (part 2) */
3014 switch (exitReason)
3015 {
3016 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3017 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3018 case VMX_EXIT_EPT_VIOLATION:
3019 /* Already handled above. */
3020 break;
3021
3022 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
3023 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
3024 break;
3025
3026 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
3027 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
3028 rc = VINF_EM_RAW_INTERRUPT;
3029 AssertFailed(); /* Can't happen. Yet. */
3030 break;
3031
3032 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
3033 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
3034 rc = VINF_EM_RAW_INTERRUPT;
3035 AssertFailed(); /* Can't happen afaik. */
3036 break;
3037
3038 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch. */
3039 rc = VERR_EM_INTERPRETER;
3040 break;
3041
3042 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
3043 /** Check if external interrupts are pending; if so, don't switch back. */
3044 pCtx->rip++; /* skip hlt */
3045 if ( pCtx->eflags.Bits.u1IF
3046 && VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)))
3047 goto ResumeExecution;
3048
3049 rc = VINF_EM_HALT;
3050 break;
3051
3052 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
3053 AssertFailed(); /* can't happen. */
3054 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
3055 break;
3056
3057 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
3058 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
3059 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
3060 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
3061 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
3062 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
3063 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
3064 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
3065 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
3066 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
3067 /** @todo inject #UD immediately */
3068 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
3069 break;
3070
3071 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3072 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
3073 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
3074 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
3075 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3076 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
3077 /* already handled above */
3078 AssertMsg( rc == VINF_PGM_CHANGE_MODE
3079 || rc == VINF_EM_RAW_INTERRUPT
3080 || rc == VERR_EM_INTERPRETER
3081 || rc == VINF_EM_RAW_EMULATE_INSTR
3082 || rc == VINF_PGM_SYNC_CR3
3083 || rc == VINF_IOM_HC_IOPORT_READ
3084 || rc == VINF_IOM_HC_IOPORT_WRITE
3085 || rc == VINF_EM_RAW_GUEST_TRAP
3086 || rc == VINF_TRPM_XCPT_DISPATCHED
3087 || rc == VINF_EM_RESCHEDULE_REM,
3088 ("rc = %d\n", rc));
3089 break;
3090
3091 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
3092 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
3093 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
3094 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
3095 rc = VERR_EM_INTERPRETER;
3096 break;
3097
3098 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
3099 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
3100 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
3101 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
3102 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
3103 break;
3104
3105 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3106 Assert(rc == VINF_EM_RAW_INTERRUPT);
3107 break;
3108
3109 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
3110 {
3111#ifdef VBOX_STRICT
3112 RTCCUINTREG val = 0;
3113
3114 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
3115
3116 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
3117 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
3118
3119 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val);
3120 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", val));
3121
3122 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val);
3123 Log(("VMX_VMCS_GUEST_CR3 %RGp\n", val));
3124
3125 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val);
3126 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", val));
3127
3128 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
3129 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
3130
3131 VMX_LOG_SELREG(CS, "CS");
3132 VMX_LOG_SELREG(DS, "DS");
3133 VMX_LOG_SELREG(ES, "ES");
3134 VMX_LOG_SELREG(FS, "FS");
3135 VMX_LOG_SELREG(GS, "GS");
3136 VMX_LOG_SELREG(SS, "SS");
3137 VMX_LOG_SELREG(TR, "TR");
3138 VMX_LOG_SELREG(LDTR, "LDTR");
3139
3140 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
3141 Log(("VMX_VMCS_GUEST_GDTR_BASE %RGv\n", val));
3142 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
3143 Log(("VMX_VMCS_GUEST_IDTR_BASE %RGv\n", val));
3144#endif /* VBOX_STRICT */
3145 rc = VERR_VMX_INVALID_GUEST_STATE;
3146 break;
3147 }
3148
3149 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
3150 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
3151 default:
3152 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
3153 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
3154 break;
3155
3156 }
3157end:
3158
3159 /* Signal changes for the recompiler. */
3160 CPUMSetChangedFlags(pVM, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
3161
3162 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
3163 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
3164 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3165 {
3166 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
3167 /* On the next entry we'll only sync the host context. */
3168 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
3169 }
3170 else
3171 {
3172 /* On the next entry we'll sync everything. */
3173 /** @todo we can do better than this */
3174 /* Not in the VINF_PGM_CHANGE_MODE though! */
3175 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3176 }
3177
3178 /* translate into a less severe return code */
3179 if (rc == VERR_EM_INTERPRETER)
3180 rc = VINF_EM_RAW_EMULATE_INSTR;
3181 else
3182 /* Try to extract more information about what might have gone wrong here. */
3183 if (rc == VERR_VMX_INVALID_VMCS_PTR)
3184 {
3185 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
3186 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS;
3187 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
3188 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
3189 }
3190
3191 STAM_STATS({
3192 if (fStatExit2Started) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y);
3193 else if (fStatEntryStarted) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
3194 });
3195 Log2(("X"));
3196 return rc;
3197}
3198
3199
3200/**
3201 * Enters the VT-x session
3202 *
3203 * @returns VBox status code.
3204 * @param pVM The VM to operate on.
3205 * @param pVCpu The VMCPU to operate on.
3206 * @param pCpu CPU info struct
3207 */
3208VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHWACCM_CPUINFO pCpu)
3209{
3210 Assert(pVM->hwaccm.s.vmx.fSupported);
3211
3212 unsigned cr4 = ASMGetCR4();
3213 if (!(cr4 & X86_CR4_VMXE))
3214 {
3215 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
3216 return VERR_VMX_X86_CR4_VMXE_CLEARED;
3217 }
3218
3219 /* Activate the VM Control Structure. */
3220 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3221 if (RT_FAILURE(rc))
3222 return rc;
3223
3224 pVCpu->hwaccm.s.fResumeVM = false;
3225 return VINF_SUCCESS;
3226}
3227
3228
3229/**
3230 * Leaves the VT-x session
3231 *
3232 * @returns VBox status code.
3233 * @param pVM The VM to operate on.
3234 * @param pVCpu The VMCPU to operate on.
3235 * @param pCtx CPU context
3236 */
3237VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
3238{
3239 Assert(pVM->hwaccm.s.vmx.fSupported);
3240
3241 /* Save the guest debug state if necessary. */
3242 if (CPUMIsGuestDebugStateActive(pVM))
3243 {
3244 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
3245
3246 /* Enable drx move intercepts again. */
3247 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3248 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3249 AssertRC(rc);
3250
3251 /* Resync the debug registers the next time. */
3252 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3253 }
3254 else
3255 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
3256
3257 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
3258 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3259 AssertRC(rc);
3260
3261 return VINF_SUCCESS;
3262}
3263
3264/**
3265 * Flush the TLB (EPT)
3266 *
3267 * @returns VBox status code.
3268 * @param pVM The VM to operate on.
3269 * @param pVCpu The VM CPU to operate on.
3270 * @param enmFlush Type of flush
3271 * @param GCPhys Physical address of the page to flush
3272 */
3273static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
3274{
3275 uint64_t descriptor[2];
3276
3277 LogFlow(("vmxR0FlushEPT %d %RGv\n", enmFlush, GCPhys));
3278 Assert(pVM->hwaccm.s.fNestedPaging);
3279 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
3280 descriptor[1] = GCPhys;
3281 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
3282 AssertRC(rc);
3283}
3284
3285#ifdef HWACCM_VTX_WITH_VPID
3286/**
3287 * Flush the TLB (EPT)
3288 *
3289 * @returns VBox status code.
3290 * @param pVM The VM to operate on.
3291 * @param pVCpu The VM CPU to operate on.
3292 * @param enmFlush Type of flush
3293 * @param GCPtr Virtual address of the page to flush
3294 */
3295static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
3296{
3297#if HC_ARCH_BITS == 32
3298 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
3299 if ( CPUMIsGuestInLongMode(pVM)
3300 && !VMX_IS_64BIT_HOST_MODE())
3301 {
3302 pVCpu->hwaccm.s.fForceTLBFlush = true;
3303 }
3304 else
3305#endif
3306 {
3307 uint64_t descriptor[2];
3308
3309 Assert(pVM->hwaccm.s.vmx.fVPID);
3310 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
3311 descriptor[1] = GCPtr;
3312 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
3313 AssertRC(rc);
3314 }
3315}
3316#endif /* HWACCM_VTX_WITH_VPID */
3317
3318/**
3319 * Invalidates a guest page
3320 *
3321 * @returns VBox status code.
3322 * @param pVM The VM to operate on.
3323 * @param pVCpu The VM CPU to operate on.
3324 * @param GCVirt Page to invalidate
3325 */
3326VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
3327{
3328 bool fFlushPending = pVCpu->hwaccm.s.fForceTLBFlush;
3329
3330 LogFlow(("VMXR0InvalidatePage %RGv\n", GCVirt));
3331
3332 /* Only relevant if we want to use VPID.
3333 * In the nested paging case we still see such calls, but
3334 * can safely ignore them. (e.g. after cr3 updates)
3335 */
3336#ifdef HWACCM_VTX_WITH_VPID
3337 /* Skip it if a TLB flush is already pending. */
3338 if ( !fFlushPending
3339 && pVM->hwaccm.s.vmx.fVPID)
3340 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
3341#endif /* HWACCM_VTX_WITH_VPID */
3342
3343 return VINF_SUCCESS;
3344}
3345
3346/**
3347 * Invalidates a guest page by physical address
3348 *
3349 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
3350 *
3351 * @returns VBox status code.
3352 * @param pVM The VM to operate on.
3353 * @param pVCpu The VM CPU to operate on.
3354 * @param GCPhys Page to invalidate
3355 */
3356VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
3357{
3358 bool fFlushPending = pVCpu->hwaccm.s.fForceTLBFlush;
3359
3360 Assert(pVM->hwaccm.s.fNestedPaging);
3361
3362 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
3363
3364 /* Skip it if a TLB flush is already pending. */
3365 if (!fFlushPending)
3366 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
3367
3368 return VINF_SUCCESS;
3369}
3370
3371/**
3372 * Report world switch error and dump some useful debug info
3373 *
3374 * @param pVM The VM to operate on.
3375 * @param pVCpu The VMCPU to operate on.
3376 * @param rc Return code
3377 * @param pCtx Current CPU context (not updated)
3378 */
3379static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx)
3380{
3381 switch (rc)
3382 {
3383 case VERR_VMX_INVALID_VMXON_PTR:
3384 AssertFailed();
3385 break;
3386
3387 case VERR_VMX_UNABLE_TO_START_VM:
3388 case VERR_VMX_UNABLE_TO_RESUME_VM:
3389 {
3390 int rc;
3391 RTCCUINTREG exitReason, instrError, val;
3392
3393 rc = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3394 rc |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3395 AssertRC(rc);
3396 if (rc == VINF_SUCCESS)
3397 {
3398 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
3399 Log(("Current stack %08x\n", &rc));
3400
3401 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
3402 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
3403
3404#ifdef VBOX_STRICT
3405 RTGDTR gdtr;
3406 PX86DESCHC pDesc;
3407
3408 ASMGetGDTR(&gdtr);
3409
3410 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
3411 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
3412 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
3413 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
3414 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
3415 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
3416 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
3417 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
3418 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
3419 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
3420
3421 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
3422 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
3423
3424 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
3425 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
3426
3427 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
3428 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
3429
3430 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
3431 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
3432
3433 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
3434 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
3435
3436 if (val < gdtr.cbGdt)
3437 {
3438 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3439 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
3440 }
3441
3442 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
3443 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
3444 if (val < gdtr.cbGdt)
3445 {
3446 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3447 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
3448 }
3449
3450 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
3451 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
3452 if (val < gdtr.cbGdt)
3453 {
3454 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3455 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
3456 }
3457
3458 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
3459 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
3460 if (val < gdtr.cbGdt)
3461 {
3462 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3463 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
3464 }
3465
3466 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
3467 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
3468 if (val < gdtr.cbGdt)
3469 {
3470 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3471 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
3472 }
3473
3474 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
3475 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
3476 if (val < gdtr.cbGdt)
3477 {
3478 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3479 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
3480 }
3481
3482 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
3483 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
3484 if (val < gdtr.cbGdt)
3485 {
3486 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3487 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
3488 }
3489
3490 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
3491 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
3492
3493 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
3494 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
3495 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
3496 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
3497
3498 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
3499 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
3500
3501 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
3502 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
3503
3504 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
3505 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
3506
3507 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
3508 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
3509 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
3510 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
3511
3512# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3513 if (VMX_IS_64BIT_HOST_MODE())
3514 {
3515 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
3516 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
3517 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
3518 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
3519 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
3520 }
3521# endif
3522#endif /* VBOX_STRICT */
3523 }
3524 break;
3525 }
3526
3527 default:
3528 /* impossible */
3529 AssertMsgFailed(("%Rrc (%#x)\n", rc, rc));
3530 break;
3531 }
3532}
3533
3534#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3535/**
3536 * Prepares for and executes VMLAUNCH (64 bits guest mode)
3537 *
3538 * @returns VBox status code
3539 * @param fResume vmlauch/vmresume
3540 * @param pCtx Guest context
3541 * @param pCache VMCS cache
3542 * @param pVM The VM to operate on.
3543 * @param pVCpu The VMCPU to operate on.
3544 */
3545DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
3546{
3547 uint32_t aParam[6];
3548 PHWACCM_CPUINFO pCpu;
3549 RTHCPHYS pPageCpuPhys;
3550 int rc;
3551
3552 pCpu = HWACCMR0GetCurrentCpu();
3553 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
3554
3555#ifdef DEBUG
3556 pCache->TestIn.pPageCpuPhys = 0;
3557 pCache->TestIn.pVMCSPhys = 0;
3558 pCache->TestIn.pCache = 0;
3559 pCache->TestOut.pVMCSPhys = 0;
3560 pCache->TestOut.pCache = 0;
3561 pCache->TestOut.pCtx = 0;
3562#endif
3563
3564 aParam[0] = (uint32_t)(pPageCpuPhys); /* Param 1: VMXON physical address - Lo. */
3565 aParam[1] = (uint32_t)(pPageCpuPhys >> 32); /* Param 1: VMXON physical address - Hi. */
3566 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys); /* Param 2: VMCS physical address - Lo. */
3567 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys >> 32); /* Param 2: VMCS physical address - Hi. */
3568 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
3569 aParam[5] = 0;
3570
3571 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
3572
3573#ifdef DEBUG
3574 AssertMsg(pCache->TestIn.pPageCpuPhys == pPageCpuPhys, ("%RHp vs %RHp\n", pCache->TestIn.pPageCpuPhys, pPageCpuPhys));
3575 AssertMsg(pCache->TestIn.pVMCSPhys == pVCpu->hwaccm.s.vmx.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pVCpu->hwaccm.s.vmx.pVMCSPhys));
3576 AssertMsg(pCache->TestIn.pVMCSPhys == pCache->TestOut.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pCache->TestOut.pVMCSPhys));
3577 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache));
3578 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
3579 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx));
3580#endif
3581
3582 return rc;
3583}
3584
3585/**
3586 * Executes the specified handler in 64 mode
3587 *
3588 * @returns VBox status code.
3589 * @param pVM The VM to operate on.
3590 * @param pVCpu The VMCPU to operate on.
3591 * @param pCtx Guest context
3592 * @param pfnHandler RC handler
3593 * @param cbParam Number of parameters
3594 * @param paParam Array of 32 bits parameters
3595 */
3596VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
3597{
3598 int rc, rc2;
3599 RTHCUINTREG uFlags;
3600 PHWACCM_CPUINFO pCpu;
3601 RTHCPHYS pPageCpuPhys;
3602
3603 /* @todo This code is not guest SMP safe (hyper context) */
3604 AssertReturn(pVM->cCPUs == 1, VERR_ACCESS_DENIED);
3605 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_INTERNAL_ERROR);
3606 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
3607 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
3608
3609#ifdef VBOX_STRICT
3610 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
3611 Assert(vmxR0IsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
3612
3613 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
3614 Assert(vmxR0IsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
3615#endif
3616
3617 pCpu = HWACCMR0GetCurrentCpu();
3618 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
3619
3620 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
3621 VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3622
3623 /* Leave VMX Root Mode. */
3624 VMXDisable();
3625
3626 uFlags = ASMIntDisableFlags();
3627
3628 CPUMSetHyperESP(pVM, VMMGetStackRC(pVM));
3629 CPUMSetHyperEIP(pVM, pfnHandler);
3630 for (int i=(int)cbParam-1;i>=0;i--)
3631 CPUMPushHyper(pVM, paParam[i]);
3632
3633 /* Call switcher. */
3634 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM);
3635
3636#ifdef VBOX_STRICT
3637 RTHCUINTREG uFlagsTest = ASMGetFlags();
3638#endif
3639
3640 ASMSetFlags(uFlags);
3641
3642 /* Make sure the VMX instructions don't cause #UD faults. */
3643 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
3644
3645 /* Enter VMX Root Mode */
3646 rc2 = VMXEnable(pPageCpuPhys);
3647 if (RT_FAILURE(rc2))
3648 {
3649 if (pVM)
3650 VMXR0CheckError(pVM, pVCpu, rc2);
3651 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
3652 return VERR_VMX_VMXON_FAILED;
3653 }
3654
3655 VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3656 Assert(!(uFlagsTest & X86_EFL_IF));
3657
3658 return rc;
3659}
3660
3661#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
3662
3663
3664#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
3665/**
3666 * Executes VMWRITE
3667 *
3668 * @returns VBox status code
3669 * @param pVCpu The VMCPU to operate on.
3670 * @param idxField VMCS index
3671 * @param u64Val 16, 32 or 64 bits value
3672 */
3673VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
3674{
3675 int rc;
3676
3677 switch (idxField)
3678 {
3679 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
3680 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
3681 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
3682 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
3683 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
3684 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
3685 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
3686 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
3687 case VMX_VMCS_GUEST_LINK_PTR_FULL:
3688 case VMX_VMCS_GUEST_PDPTR0_FULL:
3689 case VMX_VMCS_GUEST_PDPTR1_FULL:
3690 case VMX_VMCS_GUEST_PDPTR2_FULL:
3691 case VMX_VMCS_GUEST_PDPTR3_FULL:
3692 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
3693 case VMX_VMCS_GUEST_EFER_FULL:
3694 case VMX_VMCS_CTRL_EPTP_FULL:
3695 /* These fields consist of two parts, which are both writable in 32 bits mode. */
3696 rc = VMXWriteVMCS32(idxField, u64Val);
3697 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
3698 AssertRC(rc);
3699 return rc;
3700
3701 case VMX_VMCS64_GUEST_LDTR_BASE:
3702 case VMX_VMCS64_GUEST_TR_BASE:
3703 case VMX_VMCS64_GUEST_GDTR_BASE:
3704 case VMX_VMCS64_GUEST_IDTR_BASE:
3705 case VMX_VMCS64_GUEST_SYSENTER_EIP:
3706 case VMX_VMCS64_GUEST_SYSENTER_ESP:
3707 case VMX_VMCS64_GUEST_CR0:
3708 case VMX_VMCS64_GUEST_CR4:
3709 case VMX_VMCS64_GUEST_CR3:
3710 case VMX_VMCS64_GUEST_DR7:
3711 case VMX_VMCS64_GUEST_RIP:
3712 case VMX_VMCS64_GUEST_RSP:
3713 case VMX_VMCS64_GUEST_CS_BASE:
3714 case VMX_VMCS64_GUEST_DS_BASE:
3715 case VMX_VMCS64_GUEST_ES_BASE:
3716 case VMX_VMCS64_GUEST_FS_BASE:
3717 case VMX_VMCS64_GUEST_GS_BASE:
3718 case VMX_VMCS64_GUEST_SS_BASE:
3719 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
3720 if (u64Val >> 32ULL)
3721 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
3722 else
3723 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
3724
3725 return rc;
3726
3727 default:
3728 AssertMsgFailed(("Unexpected field %x\n", idxField));
3729 return VERR_INVALID_PARAMETER;
3730 }
3731}
3732
3733/**
3734 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
3735 *
3736 * @param pVCpu The VMCPU to operate on.
3737 * @param idxField VMCS field
3738 * @param u64Val Value
3739 */
3740VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
3741{
3742 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
3743
3744 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
3745
3746 /* Make sure there are no duplicates. */
3747 for (unsigned i=0;i<pCache->Write.cValidEntries;i++)
3748 {
3749 if (pCache->Write.aField[i] == idxField)
3750 {
3751 pCache->Write.aFieldVal[i] = u64Val;
3752 return VINF_SUCCESS;
3753 }
3754 }
3755
3756 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
3757 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
3758 pCache->Write.cValidEntries++;
3759 return VINF_SUCCESS;
3760}
3761
3762#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
3763
3764#ifdef VBOX_STRICT
3765static bool vmxR0IsValidReadField(uint32_t idxField)
3766{
3767 switch(idxField)
3768 {
3769 case VMX_VMCS64_GUEST_RIP:
3770 case VMX_VMCS64_GUEST_RSP:
3771 case VMX_VMCS_GUEST_RFLAGS:
3772 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
3773 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
3774 case VMX_VMCS64_GUEST_CR0:
3775 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
3776 case VMX_VMCS64_GUEST_CR4:
3777 case VMX_VMCS64_GUEST_DR7:
3778 case VMX_VMCS32_GUEST_SYSENTER_CS:
3779 case VMX_VMCS64_GUEST_SYSENTER_EIP:
3780 case VMX_VMCS64_GUEST_SYSENTER_ESP:
3781 case VMX_VMCS32_GUEST_GDTR_LIMIT:
3782 case VMX_VMCS64_GUEST_GDTR_BASE:
3783 case VMX_VMCS32_GUEST_IDTR_LIMIT:
3784 case VMX_VMCS64_GUEST_IDTR_BASE:
3785 case VMX_VMCS16_GUEST_FIELD_CS:
3786 case VMX_VMCS32_GUEST_CS_LIMIT:
3787 case VMX_VMCS64_GUEST_CS_BASE:
3788 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
3789 case VMX_VMCS16_GUEST_FIELD_DS:
3790 case VMX_VMCS32_GUEST_DS_LIMIT:
3791 case VMX_VMCS64_GUEST_DS_BASE:
3792 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
3793 case VMX_VMCS16_GUEST_FIELD_ES:
3794 case VMX_VMCS32_GUEST_ES_LIMIT:
3795 case VMX_VMCS64_GUEST_ES_BASE:
3796 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
3797 case VMX_VMCS16_GUEST_FIELD_FS:
3798 case VMX_VMCS32_GUEST_FS_LIMIT:
3799 case VMX_VMCS64_GUEST_FS_BASE:
3800 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
3801 case VMX_VMCS16_GUEST_FIELD_GS:
3802 case VMX_VMCS32_GUEST_GS_LIMIT:
3803 case VMX_VMCS64_GUEST_GS_BASE:
3804 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
3805 case VMX_VMCS16_GUEST_FIELD_SS:
3806 case VMX_VMCS32_GUEST_SS_LIMIT:
3807 case VMX_VMCS64_GUEST_SS_BASE:
3808 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
3809 case VMX_VMCS16_GUEST_FIELD_LDTR:
3810 case VMX_VMCS32_GUEST_LDTR_LIMIT:
3811 case VMX_VMCS64_GUEST_LDTR_BASE:
3812 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
3813 case VMX_VMCS16_GUEST_FIELD_TR:
3814 case VMX_VMCS32_GUEST_TR_LIMIT:
3815 case VMX_VMCS64_GUEST_TR_BASE:
3816 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
3817 case VMX_VMCS32_RO_EXIT_REASON:
3818 case VMX_VMCS32_RO_VM_INSTR_ERROR:
3819 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
3820 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
3821 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
3822 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
3823 case VMX_VMCS_RO_EXIT_QUALIFICATION:
3824 case VMX_VMCS32_RO_IDT_INFO:
3825 case VMX_VMCS32_RO_IDT_ERRCODE:
3826 case VMX_VMCS64_GUEST_CR3:
3827 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
3828 return true;
3829 }
3830 return false;
3831}
3832
3833static bool vmxR0IsValidWriteField(uint32_t idxField)
3834{
3835 switch(idxField)
3836 {
3837 case VMX_VMCS64_GUEST_LDTR_BASE:
3838 case VMX_VMCS64_GUEST_TR_BASE:
3839 case VMX_VMCS64_GUEST_GDTR_BASE:
3840 case VMX_VMCS64_GUEST_IDTR_BASE:
3841 case VMX_VMCS64_GUEST_SYSENTER_EIP:
3842 case VMX_VMCS64_GUEST_SYSENTER_ESP:
3843 case VMX_VMCS64_GUEST_CR0:
3844 case VMX_VMCS64_GUEST_CR4:
3845 case VMX_VMCS64_GUEST_CR3:
3846 case VMX_VMCS64_GUEST_DR7:
3847 case VMX_VMCS64_GUEST_RIP:
3848 case VMX_VMCS64_GUEST_RSP:
3849 case VMX_VMCS64_GUEST_CS_BASE:
3850 case VMX_VMCS64_GUEST_DS_BASE:
3851 case VMX_VMCS64_GUEST_ES_BASE:
3852 case VMX_VMCS64_GUEST_FS_BASE:
3853 case VMX_VMCS64_GUEST_GS_BASE:
3854 case VMX_VMCS64_GUEST_SS_BASE:
3855 return true;
3856 }
3857 return false;
3858}
3859
3860#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette