VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 13898

Last change on this file since 13898 was 13898, checked in by vboxsync, 16 years ago

Moved more data to VMCPU.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 128.3 KB
Line 
1/* $Id: HWVMXR0.cpp 13898 2008-11-06 09:44:29Z vboxsync $ */
2/** @file
3 * HWACCM VMX - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_HWACCM
27#include <VBox/hwaccm.h>
28#include "HWACCMInternal.h"
29#include <VBox/vm.h>
30#include <VBox/x86.h>
31#include <VBox/pgm.h>
32#include <VBox/pdm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35#include <VBox/selm.h>
36#include <VBox/iom.h>
37#include <iprt/param.h>
38#include <iprt/assert.h>
39#include <iprt/asm.h>
40#include <iprt/string.h>
41#include "HWVMXR0.h"
42
43/*******************************************************************************
44* Global Variables *
45*******************************************************************************/
46/* IO operation lookup arrays. */
47static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
48static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
49
50/*******************************************************************************
51* Local Functions *
52*******************************************************************************/
53static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx);
54static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu);
55static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu);
56static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu);
57static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
58static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
59static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
60
61
62static void VMXR0CheckError(PVM pVM, int rc)
63{
64 if (rc == VERR_VMX_GENERIC)
65 {
66 RTCCUINTREG instrError;
67
68 VMXReadVMCS(VMX_VMCS_RO_VM_INSTR_ERROR, &instrError);
69 pVM->hwaccm.s.vmx.ulLastInstrError = instrError;
70 }
71 pVM->hwaccm.s.lLastError = rc;
72}
73
74/**
75 * Sets up and activates VT-x on the current CPU
76 *
77 * @returns VBox status code.
78 * @param pCpu CPU info struct
79 * @param pVM The VM to operate on. (can be NULL after a resume!!)
80 * @param pvPageCpu Pointer to the global cpu page
81 * @param pPageCpuPhys Physical address of the global cpu page
82 */
83VMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
84{
85 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
86 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
87
88#ifdef LOG_ENABLED
89 SUPR0Printf("VMXR0EnableCpu cpu %d page (%x) %x\n", pCpu->idCpu, pvPageCpu, (uint32_t)pPageCpuPhys);
90#endif
91 if (pVM)
92 {
93 /* Set revision dword at the beginning of the VMXON structure. */
94 *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
95 }
96
97 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
98 * (which can have very bad consequences!!!)
99 */
100
101 /* Make sure the VMX instructions don't cause #UD faults. */
102 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
103
104 /* Enter VMX Root Mode */
105 int rc = VMXEnable(pPageCpuPhys);
106 if (RT_FAILURE(rc))
107 {
108 if (pVM)
109 VMXR0CheckError(pVM, rc);
110 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
111 return VERR_VMX_VMXON_FAILED;
112 }
113 return VINF_SUCCESS;
114}
115
116/**
117 * Deactivates VT-x on the current CPU
118 *
119 * @returns VBox status code.
120 * @param pCpu CPU info struct
121 * @param pvPageCpu Pointer to the global cpu page
122 * @param pPageCpuPhys Physical address of the global cpu page
123 */
124VMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
125{
126 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
127 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
128
129 /* Leave VMX Root Mode. */
130 VMXDisable();
131
132 /* And clear the X86_CR4_VMXE bit */
133 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
134
135#ifdef LOG_ENABLED
136 SUPR0Printf("VMXR0DisableCpu cpu %d\n", pCpu->idCpu);
137#endif
138 return VINF_SUCCESS;
139}
140
141/**
142 * Does Ring-0 per VM VT-x init.
143 *
144 * @returns VBox status code.
145 * @param pVM The VM to operate on.
146 */
147VMMR0DECL(int) VMXR0InitVM(PVM pVM)
148{
149 int rc;
150
151#ifdef LOG_ENABLED
152 SUPR0Printf("VMXR0InitVM %x\n", pVM);
153#endif
154
155 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
156
157 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
158 {
159 /* Allocate one page for the virtual APIC mmio cache. */
160 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
161 AssertRC(rc);
162 if (RT_FAILURE(rc))
163 return rc;
164
165 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
166 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
167 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
168 }
169 else
170 {
171 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
172 pVM->hwaccm.s.vmx.pAPIC = 0;
173 pVM->hwaccm.s.vmx.pAPICPhys = 0;
174 }
175
176 /* Allocate the MSR bitmap if this feature is supported. */
177 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
178 {
179 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
180 AssertRC(rc);
181 if (RT_FAILURE(rc))
182 return rc;
183
184 pVM->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjMSRBitmap);
185 pVM->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
186 memset(pVM->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
187 }
188
189 /* Allocate VMCBs for all guest CPUs. */
190 for (unsigned i=0;i<pVM->cCPUs;i++)
191 {
192 pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
193
194 /* Allocate one page for the VM control structure (VMCS). */
195 rc = RTR0MemObjAllocCont(&pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
196 AssertRC(rc);
197 if (RT_FAILURE(rc))
198 return rc;
199
200 pVM->aCpus[i].hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS);
201 pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS, 0);
202 ASMMemZero32(pVM->aCpus[i].hwaccm.s.vmx.pVMCS, PAGE_SIZE);
203
204 pVM->aCpus[i].hwaccm.s.vmx.cr0_mask = 0;
205 pVM->aCpus[i].hwaccm.s.vmx.cr4_mask = 0;
206
207#ifdef LOG_ENABLED
208 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVM->aCpus[i].hwaccm.s.vmx.pVMCS, (uint32_t)pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys);
209#endif
210 }
211
212 /* Current guest paging mode. */
213 pVM->hwaccm.s.vmx.enmCurrGuestMode = PGMMODE_REAL;
214
215 return VINF_SUCCESS;
216}
217
218/**
219 * Does Ring-0 per VM VT-x termination.
220 *
221 * @returns VBox status code.
222 * @param pVM The VM to operate on.
223 */
224VMMR0DECL(int) VMXR0TermVM(PVM pVM)
225{
226 for (unsigned i=0;i<pVM->cCPUs;i++)
227 {
228 if (pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
229 {
230 RTR0MemObjFree(pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS, false);
231 pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
232 pVM->aCpus[i].hwaccm.s.vmx.pVMCS = 0;
233 pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys = 0;
234 }
235 }
236 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
237 {
238 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
239 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
240 pVM->hwaccm.s.vmx.pAPIC = 0;
241 pVM->hwaccm.s.vmx.pAPICPhys = 0;
242 }
243 if (pVM->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
244 {
245 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, false);
246 pVM->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
247 pVM->hwaccm.s.vmx.pMSRBitmap = 0;
248 pVM->hwaccm.s.vmx.pMSRBitmapPhys = 0;
249 }
250 return VINF_SUCCESS;
251}
252
253/**
254 * Sets up VT-x for the specified VM
255 *
256 * @returns VBox status code.
257 * @param pVM The VM to operate on.
258 */
259VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
260{
261 int rc = VINF_SUCCESS;
262 uint32_t val;
263
264 AssertReturn(pVM, VERR_INVALID_PARAMETER);
265
266 for (unsigned i=0;i<pVM->cCPUs;i++)
267 {
268 Assert(pVM->aCpus[i].hwaccm.s.vmx.pVMCS);
269
270 /* Set revision dword at the beginning of the VMCS structure. */
271 *(uint32_t *)pVM->aCpus[i].hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
272
273 /* Clear VM Control Structure. */
274 Log(("pVMCSPhys = %RHp\n", pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys));
275 rc = VMXClearVMCS(pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys);
276 if (RT_FAILURE(rc))
277 goto vmx_end;
278
279 /* Activate the VM Control Structure. */
280 rc = VMXActivateVMCS(pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys);
281 if (RT_FAILURE(rc))
282 goto vmx_end;
283
284 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
285 * Set required bits to one and zero according to the MSR capabilities.
286 */
287 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
288 /* External and non-maskable interrupts cause VM-exits. */
289 val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
290 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
291
292 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
293 AssertRC(rc);
294
295 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
296 * Set required bits to one and zero according to the MSR capabilities.
297 */
298 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
299 /* Program which event cause VM-exits and which features we want to use. */
300 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
301 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
302 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
303 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
304 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
305
306 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
307 if (!pVM->hwaccm.s.fNestedPaging)
308 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
309 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
310 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
311
312 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
313
314#if HC_ARCH_BITS == 64
315 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
316 {
317 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
318 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
319 Assert(pVM->hwaccm.s.vmx.pAPIC);
320 }
321 else
322 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
323 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
324#endif
325
326#ifdef VBOX_WITH_VTX_MSR_BITMAPS
327 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
328 {
329 Assert(pVM->hwaccm.s.vmx.pMSRBitmapPhys);
330 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
331 }
332#endif
333
334 /* We will use the secondary control if it's present. */
335 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
336
337 /* Mask away the bits that the CPU doesn't support */
338 /** @todo make sure they don't conflict with the above requirements. */
339 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
340 pVM->aCpus[i].hwaccm.s.vmx.proc_ctls = val;
341
342 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
343 AssertRC(rc);
344
345 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
346 {
347 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
348 * Set required bits to one and zero according to the MSR capabilities.
349 */
350 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
351 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
352
353#ifdef HWACCM_VTX_WITH_EPT
354 if (pVM->hwaccm.s.fNestedPaging)
355 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
356#endif /* HWACCM_VTX_WITH_EPT */
357#ifdef HWACCM_VTX_WITH_VPID
358 else
359 if (pVM->hwaccm.s.vmx.fVPID)
360 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
361#endif /* HWACCM_VTX_WITH_VPID */
362
363 /* Mask away the bits that the CPU doesn't support */
364 /** @todo make sure they don't conflict with the above requirements. */
365 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
366
367 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
368 AssertRC(rc);
369 }
370
371 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
372 * Set required bits to one and zero according to the MSR capabilities.
373 */
374 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
375 AssertRC(rc);
376
377 /* VMX_VMCS_CTRL_EXIT_CONTROLS
378 * Set required bits to one and zero according to the MSR capabilities.
379 */
380 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
381
382 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
383 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
384#if HC_ARCH_BITS == 64
385 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
386#else
387 /* else Must be zero when AMD64 is not available. */
388#endif
389 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
390 /* Don't acknowledge external interrupts on VM-exit. */
391 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
392 AssertRC(rc);
393
394 /* Forward all exception except #NM & #PF to the guest.
395 * We always need to check pagefaults since our shadow page table can be out of sync.
396 * And we always lazily sync the FPU & XMM state.
397 */
398
399 /** @todo Possible optimization:
400 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
401 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
402 * registers ourselves of course.
403 *
404 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
405 */
406
407 /* Don't filter page faults; all of them should cause a switch. */
408 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
409 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
410 AssertRC(rc);
411
412 /* Init TSC offset to zero. */
413 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
414#if HC_ARCH_BITS == 32
415 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_HIGH, 0);
416#endif
417 AssertRC(rc);
418
419 rc = VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
420#if HC_ARCH_BITS == 32
421 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_A_HIGH, 0);
422#endif
423 AssertRC(rc);
424
425 rc = VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
426#if HC_ARCH_BITS == 32
427 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_B_HIGH, 0);
428#endif
429 AssertRC(rc);
430
431 /* Set the MSR bitmap address. */
432 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
433 {
434 /* Optional */
435 rc = VMXWriteVMCS(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVM->hwaccm.s.vmx.pMSRBitmapPhys);
436#if HC_ARCH_BITS == 32
437 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_MSR_BITMAP_HIGH, pVM->hwaccm.s.vmx.pMSRBitmapPhys >> 32ULL);
438#endif
439 AssertRC(rc);
440 }
441
442 /* Clear MSR controls. */
443 rc = VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, 0);
444 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, 0);
445 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, 0);
446#if HC_ARCH_BITS == 32
447 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_HIGH, 0);
448 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_HIGH, 0);
449 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_HIGH, 0);
450#endif
451 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
452 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0);
453 AssertRC(rc);
454
455 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
456 {
457 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
458 /* Optional */
459 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
460 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
461#if HC_ARCH_BITS == 32
462 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VAPIC_PAGEADDR_HIGH, pVM->hwaccm.s.vmx.pAPICPhys >> 32ULL);
463#endif
464 AssertRC(rc);
465 }
466
467 /* Set link pointer to -1. Not currently used. */
468#if HC_ARCH_BITS == 32
469 rc = VMXWriteVMCS(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFF);
470 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LINK_PTR_HIGH, 0xFFFFFFFF);
471#else
472 rc = VMXWriteVMCS(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFF);
473#endif
474 AssertRC(rc);
475
476 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
477 rc = VMXClearVMCS(pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys);
478 AssertRC(rc);
479 } /* for each VMCPU */
480
481 /* Choose the right TLB setup function. */
482 if (pVM->hwaccm.s.fNestedPaging)
483 {
484 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBEPT;
485
486 /* Default values for flushing. */
487 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
488 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
489
490 /* If the capabilities specify we can do more, then make use of it. */
491 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
492 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
493 else
494 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
495 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
496
497 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
498 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
499 }
500#ifdef HWACCM_VTX_WITH_VPID
501 else
502 if (pVM->hwaccm.s.vmx.fVPID)
503 {
504 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBVPID;
505
506 /* Default values for flushing. */
507 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
508 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
509
510 /* If the capabilities specify we can do more, then make use of it. */
511 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
512 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
513 else
514 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
515 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
516
517 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
518 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
519 }
520#endif /* HWACCM_VTX_WITH_VPID */
521 else
522 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBDummy;
523
524vmx_end:
525 VMXR0CheckError(pVM, rc);
526 return rc;
527}
528
529
530/**
531 * Injects an event (trap or external interrupt)
532 *
533 * @returns VBox status code.
534 * @param pVM The VM to operate on.
535 * @param pVCpu The VMCPU to operate on.
536 * @param pCtx CPU Context
537 * @param intInfo VMX interrupt info
538 * @param cbInstr Opcode length of faulting instruction
539 * @param errCode Error code (optional)
540 */
541static int VMXR0InjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
542{
543 int rc;
544 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
545
546#ifdef VBOX_STRICT
547 if (iGate == 0xE)
548 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%08x intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
549 else
550 if (iGate < 0x20)
551 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
552 else
553 {
554 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
555 Assert(!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS));
556 Assert(pCtx->eflags.u32 & X86_EFL_IF);
557 }
558#endif
559
560#ifdef HWACCM_VMX_EMULATE_REALMODE
561 if (CPUMIsGuestInRealModeEx(pCtx))
562 {
563 RTGCPHYS GCPhysHandler;
564 uint16_t offset, ip;
565 RTSEL sel;
566
567 /* Injecting events doesn't work right with real mode emulation.
568 * (#GP if we try to inject external hardware interrupts)
569 * Inject the interrupt or trap directly instead.
570 */
571 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
572
573 /* Check if the interrupt handler is present. */
574 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
575 {
576 Log(("IDT cbIdt violation\n"));
577 if (iGate != X86_XCPT_DF)
578 {
579 RTGCUINTPTR intInfo;
580
581 intInfo = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
582 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
583 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
584 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
585
586 return VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0 /* no error code according to the Intel docs */);
587 }
588 Log(("Triple fault -> reset the VM!\n"));
589 return VINF_EM_RESET;
590 }
591 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
592 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
593 || iGate == 4)
594 {
595 ip = pCtx->ip + cbInstr;
596 }
597 else
598 ip = pCtx->ip;
599
600 /* Read the selector:offset pair of the interrupt handler. */
601 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
602 PGMPhysRead(pVM, GCPhysHandler, &offset, sizeof(offset));
603 PGMPhysRead(pVM, GCPhysHandler + 2, &sel, sizeof(sel));
604
605 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
606
607 /* Construct the stack frame. */
608 /** @todo should check stack limit. */
609 pCtx->sp -= 2;
610 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
611 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t));
612 pCtx->sp -= 2;
613 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
614 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t));
615 pCtx->sp -= 2;
616 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
617 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip));
618
619 /* Update the CPU state for executing the handler. */
620 pCtx->rip = offset;
621 pCtx->cs = sel;
622 pCtx->csHid.u64Base = sel << 4;
623 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
624
625 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
626 return VINF_SUCCESS;
627 }
628#endif /* HWACCM_VMX_EMULATE_REALMODE */
629
630 /* Set event injection state. */
631 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
632
633 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
634 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
635
636 AssertRC(rc);
637 return rc;
638}
639
640
641/**
642 * Checks for pending guest interrupts and injects them
643 *
644 * @returns VBox status code.
645 * @param pVM The VM to operate on.
646 * @param pVCpu The VMCPU to operate on.
647 * @param pCtx CPU Context
648 */
649static int VMXR0CheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
650{
651 int rc;
652
653 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
654 if (pVCpu->hwaccm.s.Event.fPending)
655 {
656 Log(("Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
657 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
658 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
659 AssertRC(rc);
660
661 pVCpu->hwaccm.s.Event.fPending = false;
662 return VINF_SUCCESS;
663 }
664
665 /* When external interrupts are pending, we should exit the VM when IF is set. */
666 if ( !TRPMHasTrap(pVM)
667 && VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)))
668 {
669 if (!(pCtx->eflags.u32 & X86_EFL_IF))
670 {
671 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
672 {
673 LogFlow(("Enable irq window exit!\n"));
674 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
675 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
676 AssertRC(rc);
677 }
678 /* else nothing to do but wait */
679 }
680 else
681 if (!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
682 {
683 uint8_t u8Interrupt;
684
685 rc = PDMGetInterrupt(pVM, &u8Interrupt);
686 Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
687 if (RT_SUCCESS(rc))
688 {
689 rc = TRPMAssertTrap(pVM, u8Interrupt, TRPM_HARDWARE_INT);
690 AssertRC(rc);
691 }
692 else
693 {
694 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
695 Assert(!VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)));
696 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
697 /* Just continue */
698 }
699 }
700 else
701 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
702 }
703
704#ifdef VBOX_STRICT
705 if (TRPMHasTrap(pVM))
706 {
707 uint8_t u8Vector;
708 rc = TRPMQueryTrapAll(pVM, &u8Vector, 0, 0, 0);
709 AssertRC(rc);
710 }
711#endif
712
713 if ( pCtx->eflags.u32 & X86_EFL_IF
714 && (!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
715 && TRPMHasTrap(pVM)
716 )
717 {
718 uint8_t u8Vector;
719 int rc;
720 TRPMEVENT enmType;
721 RTGCUINTPTR intInfo;
722 RTGCUINT errCode;
723
724 /* If a new event is pending, then dispatch it now. */
725 rc = TRPMQueryTrapAll(pVM, &u8Vector, &enmType, &errCode, 0);
726 AssertRC(rc);
727 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
728 Assert(enmType != TRPM_SOFTWARE_INT);
729
730 /* Clear the pending trap. */
731 rc = TRPMResetTrap(pVM);
732 AssertRC(rc);
733
734 intInfo = u8Vector;
735 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
736
737 if (enmType == TRPM_TRAP)
738 {
739 switch (u8Vector) {
740 case 8:
741 case 10:
742 case 11:
743 case 12:
744 case 13:
745 case 14:
746 case 17:
747 /* Valid error codes. */
748 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
749 break;
750 default:
751 break;
752 }
753 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
754 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
755 else
756 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
757 }
758 else
759 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
760
761 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
762 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
763 AssertRC(rc);
764 } /* if (interrupts can be dispatched) */
765
766 return VINF_SUCCESS;
767}
768
769/**
770 * Save the host state
771 *
772 * @returns VBox status code.
773 * @param pVM The VM to operate on.
774 * @param pVCpu The VMCPU to operate on.
775 */
776VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
777{
778 int rc = VINF_SUCCESS;
779
780 /*
781 * Host CPU Context
782 */
783 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
784 {
785 RTIDTR idtr;
786 RTGDTR gdtr;
787 RTSEL SelTR;
788 PX86DESCHC pDesc;
789 uintptr_t trBase;
790
791 /* Control registers */
792 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
793 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, ASMGetCR3());
794 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
795 AssertRC(rc);
796 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
797 Log2(("VMX_VMCS_HOST_CR3 %RHp\n", ASMGetCR3()));
798 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
799
800 /* Selector registers. */
801 rc = VMXWriteVMCS(VMX_VMCS_HOST_FIELD_CS, ASMGetCS());
802 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
803 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_DS, 0);
804 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_ES, 0);
805#if HC_ARCH_BITS == 32
806 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_FS, 0);
807 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_GS, 0);
808#endif
809 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_SS, ASMGetSS());
810 SelTR = ASMGetTR();
811 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_TR, SelTR);
812 AssertRC(rc);
813 Log2(("VMX_VMCS_HOST_FIELD_CS %08x\n", ASMGetCS()));
814 Log2(("VMX_VMCS_HOST_FIELD_DS %08x\n", ASMGetDS()));
815 Log2(("VMX_VMCS_HOST_FIELD_ES %08x\n", ASMGetES()));
816 Log2(("VMX_VMCS_HOST_FIELD_FS %08x\n", ASMGetFS()));
817 Log2(("VMX_VMCS_HOST_FIELD_GS %08x\n", ASMGetGS()));
818 Log2(("VMX_VMCS_HOST_FIELD_SS %08x\n", ASMGetSS()));
819 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
820
821 /* GDTR & IDTR */
822 ASMGetGDTR(&gdtr);
823 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
824 ASMGetIDTR(&idtr);
825 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
826 AssertRC(rc);
827 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
828 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
829
830 /* Save the base address of the TR selector. */
831 if (SelTR > gdtr.cbGdt)
832 {
833 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
834 return VERR_VMX_INVALID_HOST_STATE;
835 }
836
837 pDesc = &((PX86DESCHC)gdtr.pGdt)[SelTR >> X86_SEL_SHIFT_HC];
838#if HC_ARCH_BITS == 64
839 trBase = X86DESC64_BASE(*pDesc);
840#else
841 trBase = X86DESC_BASE(*pDesc);
842#endif
843 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
844 AssertRC(rc);
845 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
846
847 /* FS and GS base. */
848#if HC_ARCH_BITS == 64
849 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
850 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
851 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
852 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
853#endif
854 AssertRC(rc);
855
856 /* Sysenter MSRs. */
857 /** @todo expensive!! */
858 rc = VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
859 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
860#if HC_ARCH_BITS == 32
861 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
862 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
863 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
864 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
865#else
866 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
867 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
868 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
869 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
870#endif
871 AssertRC(rc);
872
873 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
874 }
875 return rc;
876}
877
878/**
879 * Prefetch the 4 PDPT pointers (PAE and nested paging only)
880 *
881 * @param pVM The VM to operate on.
882 * @param pCtx Guest context
883 */
884static void vmxR0PrefetchPAEPdptrs(PVM pVM, PCPUMCTX pCtx)
885{
886 if (CPUMIsGuestInPAEModeEx(pCtx))
887 {
888 X86PDPE Pdpe;
889
890 for (unsigned i=0;i<4;i++)
891 {
892 Pdpe = PGMGstGetPaePDPtr(pVM, i);
893 int rc = VMXWriteVMCS(VMX_VMCS_GUEST_PDPTR0_FULL + i*2, Pdpe.u);
894#if HC_ARCH_BITS == 32
895 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_PDPTR0_FULL + i*2 + 1, Pdpe.u >> 32ULL);
896#endif
897 AssertRC(rc);
898 }
899 }
900}
901
902/**
903 * Update the exception bitmap according to the current CPU state
904 *
905 * @param pVM The VM to operate on.
906 * @param pVCpu The VMCPU to operate on.
907 * @param pCtx Guest context
908 */
909static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
910{
911 uint32_t u32TrapMask;
912 Assert(pCtx);
913
914 u32TrapMask = HWACCM_VMX_TRAP_MASK;
915#ifndef DEBUG
916 if (pVM->hwaccm.s.fNestedPaging)
917 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
918#endif
919
920 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
921 if ( CPUMIsGuestFPUStateActive(pVM) == true
922 && !(pCtx->cr0 & X86_CR0_NE)
923 && !pVCpu->hwaccm.s.fFPUOldStyleOverride)
924 {
925 u32TrapMask |= RT_BIT(X86_XCPT_MF);
926 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
927 }
928
929#ifdef DEBUG
930 /* Intercept X86_XCPT_DB if stepping is enabled */
931 if (DBGFIsStepping(pVM))
932 u32TrapMask |= RT_BIT(X86_XCPT_DB);
933#endif
934
935#ifdef VBOX_STRICT
936 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
937#endif
938
939# ifdef HWACCM_VMX_EMULATE_REALMODE
940 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
941 if (CPUMIsGuestInRealModeEx(pCtx))
942 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
943# endif /* HWACCM_VMX_EMULATE_REALMODE */
944
945 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
946 AssertRC(rc);
947}
948
949/**
950 * Loads the guest state
951 *
952 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
953 *
954 * @returns VBox status code.
955 * @param pVM The VM to operate on.
956 * @param pVCpu The VMCPU to operate on.
957 * @param pCtx Guest context
958 */
959VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
960{
961 int rc = VINF_SUCCESS;
962 RTGCUINTPTR val;
963 X86EFLAGS eflags;
964
965 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
966 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
967 {
968#ifdef HWACCM_VMX_EMULATE_REALMODE
969 PGMMODE enmGuestMode = PGMGetGuestMode(pVM);
970 if (pVM->hwaccm.s.vmx.enmCurrGuestMode != enmGuestMode)
971 {
972 /* Correct weird requirements for switching to protected mode. */
973 if ( pVM->hwaccm.s.vmx.enmCurrGuestMode == PGMMODE_REAL
974 && enmGuestMode >= PGMMODE_PROTECTED)
975 {
976 /* DPL of all hidden selector registers must match the current CPL (0). */
977 pCtx->csHid.Attr.n.u2Dpl = 0;
978 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
979
980 pCtx->dsHid.Attr.n.u2Dpl = 0;
981 pCtx->esHid.Attr.n.u2Dpl = 0;
982 pCtx->fsHid.Attr.n.u2Dpl = 0;
983 pCtx->gsHid.Attr.n.u2Dpl = 0;
984 pCtx->ssHid.Attr.n.u2Dpl = 0;
985 }
986 else
987 /* Switching from protected mode to real mode. */
988 if ( pVM->hwaccm.s.vmx.enmCurrGuestMode >= PGMMODE_PROTECTED
989 && enmGuestMode == PGMMODE_REAL)
990 {
991 /* The limit must also be adjusted. */
992 pCtx->csHid.u32Limit &= 0xffff;
993 pCtx->dsHid.u32Limit &= 0xffff;
994 pCtx->esHid.u32Limit &= 0xffff;
995 pCtx->fsHid.u32Limit &= 0xffff;
996 pCtx->gsHid.u32Limit &= 0xffff;
997 pCtx->ssHid.u32Limit &= 0xffff;
998
999 Assert(pCtx->csHid.u64Base <= 0xfffff);
1000 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1001 Assert(pCtx->esHid.u64Base <= 0xfffff);
1002 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1003 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1004 }
1005 pVM->hwaccm.s.vmx.enmCurrGuestMode = enmGuestMode;
1006 }
1007 else
1008 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1009 if ( CPUMIsGuestInRealModeEx(pCtx)
1010 && pCtx->csHid.u64Base == 0xffff0000)
1011 {
1012 pCtx->csHid.u64Base = 0xf0000;
1013 pCtx->cs = 0xf000;
1014 }
1015#endif /* HWACCM_VMX_EMULATE_REALMODE */
1016
1017 VMX_WRITE_SELREG(ES, es);
1018 AssertRC(rc);
1019
1020 VMX_WRITE_SELREG(CS, cs);
1021 AssertRC(rc);
1022
1023 VMX_WRITE_SELREG(SS, ss);
1024 AssertRC(rc);
1025
1026 VMX_WRITE_SELREG(DS, ds);
1027 AssertRC(rc);
1028
1029 /* The base values in the hidden fs & gs registers are not in sync with the msrs; they are cut to 32 bits. */
1030 VMX_WRITE_SELREG(FS, fs);
1031 AssertRC(rc);
1032
1033 VMX_WRITE_SELREG(GS, gs);
1034 AssertRC(rc);
1035 }
1036
1037 /* Guest CPU context: LDTR. */
1038 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1039 {
1040 if (pCtx->ldtr == 0)
1041 {
1042 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_LDTR, 0);
1043 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_LIMIT, 0);
1044 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_BASE, 0);
1045 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1046 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1047 }
1048 else
1049 {
1050 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_LDTR, pCtx->ldtr);
1051 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1052 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1053 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1054 }
1055 AssertRC(rc);
1056 }
1057 /* Guest CPU context: TR. */
1058 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1059 {
1060#ifdef HWACCM_VMX_EMULATE_REALMODE
1061 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1062 if (CPUMIsGuestInRealModeEx(pCtx))
1063 {
1064 RTGCPHYS GCPhys;
1065
1066 /* We convert it here every time as pci regions could be reconfigured. */
1067 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1068 AssertRC(rc);
1069
1070 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_TR, 0);
1071 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1072 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1073
1074 X86DESCATTR attr;
1075
1076 attr.u = 0;
1077 attr.n.u1Present = 1;
1078 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1079 val = attr.u;
1080 }
1081 else
1082#endif /* HWACCM_VMX_EMULATE_REALMODE */
1083 {
1084 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_TR, pCtx->tr);
1085 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1086 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_BASE, pCtx->trHid.u64Base);
1087
1088 val = pCtx->trHid.Attr.u;
1089
1090 /* The TSS selector must be busy. */
1091 if ((val & 0xF) == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
1092 val = (val & ~0xF) | X86_SEL_TYPE_SYS_286_TSS_BUSY;
1093 else
1094 /* Default even if no TR selector has been set (otherwise vmlaunch will fail!) */
1095 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1096
1097 }
1098 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_ACCESS_RIGHTS, val);
1099 AssertRC(rc);
1100 }
1101 /* Guest CPU context: GDTR. */
1102 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1103 {
1104 rc = VMXWriteVMCS(VMX_VMCS_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1105 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1106 AssertRC(rc);
1107 }
1108 /* Guest CPU context: IDTR. */
1109 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1110 {
1111 rc = VMXWriteVMCS(VMX_VMCS_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1112 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1113 AssertRC(rc);
1114 }
1115
1116 /*
1117 * Sysenter MSRs (unconditional)
1118 */
1119 rc = VMXWriteVMCS(VMX_VMCS_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1120 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1121 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1122 AssertRC(rc);
1123
1124 /* Control registers */
1125 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1126 {
1127 val = pCtx->cr0;
1128 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1129 Log2(("Guest CR0-shadow %08x\n", val));
1130 if (CPUMIsGuestFPUStateActive(pVM) == false)
1131 {
1132 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1133 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1134 }
1135 else
1136 {
1137 /** @todo check if we support the old style mess correctly. */
1138 if (!(val & X86_CR0_NE))
1139 Log(("Forcing X86_CR0_NE!!!\n"));
1140
1141 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1142 }
1143 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1144 val |= X86_CR0_PE | X86_CR0_PG;
1145 if (pVM->hwaccm.s.fNestedPaging)
1146 {
1147 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1148 {
1149 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1150 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1151 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1152 }
1153 else
1154 {
1155 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1156 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1157 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1158 }
1159 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1160 AssertRC(rc);
1161 }
1162 else
1163 {
1164 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1165 val |= X86_CR0_WP;
1166 }
1167
1168 /* Always enable caching. */
1169 val &= ~(X86_CR0_CD|X86_CR0_NW);
1170
1171 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_CR0, val);
1172 Log2(("Guest CR0 %08x\n", val));
1173 /* CR0 flags owned by the host; if the guests attempts to change them, then
1174 * the VM will exit.
1175 */
1176 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1177 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1178 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1179 | X86_CR0_TS
1180 | X86_CR0_ET /* Bit not restored during VM-exit! */
1181 | X86_CR0_CD /* Bit not restored during VM-exit! */
1182 | X86_CR0_NW /* Bit not restored during VM-exit! */
1183 | X86_CR0_NE
1184 | X86_CR0_MP;
1185 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1186
1187 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1188 Log2(("Guest CR0-mask %08x\n", val));
1189 AssertRC(rc);
1190 }
1191 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1192 {
1193 /* CR4 */
1194 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1195 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1196 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1197 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1198
1199 if (!pVM->hwaccm.s.fNestedPaging)
1200 {
1201 switch(pVM->hwaccm.s.enmShadowMode)
1202 {
1203 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1204 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1205 case PGMMODE_32_BIT: /* 32-bit paging. */
1206 break;
1207
1208 case PGMMODE_PAE: /* PAE paging. */
1209 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1210 /** @todo use normal 32 bits paging */
1211 val |= X86_CR4_PAE;
1212 break;
1213
1214 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1215 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1216#ifdef VBOX_ENABLE_64_BITS_GUESTS
1217 break;
1218#else
1219 AssertFailed();
1220 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1221#endif
1222 default: /* shut up gcc */
1223 AssertFailed();
1224 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1225 }
1226 }
1227 else
1228 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1229 {
1230 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1231 val |= X86_CR4_PSE;
1232 /* Our identity mapping is a 32 bits page directory. */
1233 val &= ~X86_CR4_PAE;
1234 }
1235
1236#ifdef HWACCM_VMX_EMULATE_REALMODE
1237 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1238 if (CPUMIsGuestInRealModeEx(pCtx))
1239 val |= X86_CR4_VME;
1240#endif /* HWACCM_VMX_EMULATE_REALMODE */
1241
1242 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_CR4, val);
1243 Log2(("Guest CR4 %08x\n", val));
1244 /* CR4 flags owned by the host; if the guests attempts to change them, then
1245 * the VM will exit.
1246 */
1247 val = 0
1248#ifdef HWACCM_VMX_EMULATE_REALMODE
1249 | X86_CR4_VME
1250#endif
1251 | X86_CR4_PAE
1252 | X86_CR4_PGE
1253 | X86_CR4_PSE
1254 | X86_CR4_VMXE;
1255 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1256
1257 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1258 Log2(("Guest CR4-mask %08x\n", val));
1259 AssertRC(rc);
1260 }
1261
1262 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1263 {
1264 if (pVM->hwaccm.s.fNestedPaging)
1265 {
1266 AssertMsg(PGMGetEPTCR3(pVM) == PGMGetHyperCR3(pVM), ("%RHp vs %RHp\n", PGMGetEPTCR3(pVM), PGMGetHyperCR3(pVM)));
1267 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetEPTCR3(pVM);
1268
1269 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1270 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1271 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1272 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1273
1274 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1275#if HC_ARCH_BITS == 32
1276 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EPTP_HIGH, (uint32_t)(pVCpu->hwaccm.s.vmx.GCPhysEPTP >> 32ULL));
1277#endif
1278 AssertRC(rc);
1279
1280 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1281 {
1282 RTGCPHYS GCPhys;
1283
1284 /* We convert it here every time as pci regions could be reconfigured. */
1285 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1286 AssertRC(rc);
1287
1288 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1289 * take care of the translation to host physical addresses.
1290 */
1291 val = GCPhys;
1292 }
1293 else
1294 {
1295 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1296 val = pCtx->cr3;
1297 /* Prefetch the four PDPT entries in PAE mode. */
1298 vmxR0PrefetchPAEPdptrs(pVM, pCtx);
1299 }
1300 }
1301 else
1302 {
1303 val = PGMGetHyperCR3(pVM);
1304 Assert(val);
1305 }
1306
1307 /* Save our shadow CR3 register. */
1308 rc = VMXWriteVMCS(VMX_VMCS_GUEST_CR3, val);
1309 AssertRC(rc);
1310 }
1311
1312 /* Debug registers. */
1313 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1314 {
1315 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1316 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1317
1318 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1319 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1320 pCtx->dr[7] |= 0x400; /* must be one */
1321
1322 /* Resync DR7 */
1323 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
1324 AssertRC(rc);
1325
1326 /* Sync the debug state now if any breakpoint is armed. */
1327 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1328 && !CPUMIsGuestDebugStateActive(pVM)
1329 && !DBGFIsStepping(pVM))
1330 {
1331 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1332
1333 /* Disable drx move intercepts. */
1334 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1335 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1336 AssertRC(rc);
1337
1338 /* Save the host and load the guest debug state. */
1339 rc = CPUMR0LoadGuestDebugState(pVM, pCtx, true /* include DR6 */);
1340 AssertRC(rc);
1341 }
1342
1343 /* IA32_DEBUGCTL MSR. */
1344 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1345 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUGCTL_HIGH, 0);
1346 AssertRC(rc);
1347
1348 /** @todo do we really ever need this? */
1349 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1350 AssertRC(rc);
1351 }
1352
1353 /* EIP, ESP and EFLAGS */
1354 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RIP, pCtx->rip);
1355 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_RSP, pCtx->rsp);
1356 AssertRC(rc);
1357
1358 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1359 eflags = pCtx->eflags;
1360 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1361 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1362
1363#ifdef HWACCM_VMX_EMULATE_REALMODE
1364 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1365 if (CPUMIsGuestInRealModeEx(pCtx))
1366 {
1367 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1368
1369 eflags.Bits.u1VM = 1;
1370 eflags.Bits.u2IOPL = 3;
1371 }
1372#endif /* HWACCM_VMX_EMULATE_REALMODE */
1373 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1374 AssertRC(rc);
1375
1376 /* TSC offset. */
1377 uint64_t u64TSCOffset;
1378
1379 if (TMCpuTickCanUseRealTSC(pVM, &u64TSCOffset))
1380 {
1381 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1382 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_FULL, u64TSCOffset);
1383#if HC_ARCH_BITS == 32
1384 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_HIGH, (uint32_t)(u64TSCOffset >> 32ULL));
1385#endif
1386 AssertRC(rc);
1387
1388 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1389 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1390 AssertRC(rc);
1391 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
1392 }
1393 else
1394 {
1395 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1396 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1397 AssertRC(rc);
1398 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
1399 }
1400
1401 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1402 * Set required bits to one and zero according to the MSR capabilities.
1403 */
1404 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1405 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1406 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1407
1408 /* 64 bits guest mode? */
1409 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1410 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1411 /* else Must be zero when AMD64 is not available. */
1412
1413 /* Mask away the bits that the CPU doesn't support */
1414 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1415 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1416 AssertRC(rc);
1417
1418 /* 64 bits guest mode? */
1419 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1420 {
1421#if !defined(VBOX_WITH_64_BITS_GUESTS) || HC_ARCH_BITS != 64
1422 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1423#else
1424 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1425#endif
1426 /* Unconditionally update these as wrmsr might have changed them. */
1427 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1428 AssertRC(rc);
1429 rc = VMXWriteVMCS(VMX_VMCS_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1430 AssertRC(rc);
1431 }
1432 else
1433 {
1434 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1435 }
1436
1437 vmxR0UpdateExceptionBitmap(pVM, pVCpu, pCtx);
1438
1439 /* Done. */
1440 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1441
1442 return rc;
1443}
1444
1445/**
1446 * Syncs back the guest state
1447 *
1448 * @returns VBox status code.
1449 * @param pVM The VM to operate on.
1450 * @param pVCpu The VMCPU to operate on.
1451 * @param pCtx Guest context
1452 */
1453DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1454{
1455 RTCCUINTREG val, valShadow;
1456 RTGCUINTPTR uInterruptState;
1457 int rc;
1458
1459 /* Let's first sync back eip, esp, and eflags. */
1460 rc = VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
1461 AssertRC(rc);
1462 pCtx->rip = val;
1463 rc = VMXReadVMCS(VMX_VMCS_GUEST_RSP, &val);
1464 AssertRC(rc);
1465 pCtx->rsp = val;
1466 rc = VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
1467 AssertRC(rc);
1468 pCtx->eflags.u32 = val;
1469
1470 /* Take care of instruction fusing (sti, mov ss) */
1471 rc |= VMXReadVMCS(VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE, &val);
1472 uInterruptState = val;
1473 if (uInterruptState != 0)
1474 {
1475 Assert(uInterruptState <= 2); /* only sti & mov ss */
1476 Log(("uInterruptState %x eip=%RGv\n", uInterruptState, pCtx->rip));
1477 EMSetInhibitInterruptsPC(pVM, pCtx->rip);
1478 }
1479 else
1480 VM_FF_CLEAR(pVM, VM_FF_INHIBIT_INTERRUPTS);
1481
1482 /* Control registers. */
1483 VMXReadVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
1484 VMXReadVMCS(VMX_VMCS_GUEST_CR0, &val);
1485 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
1486 CPUMSetGuestCR0(pVM, val);
1487
1488 VMXReadVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
1489 VMXReadVMCS(VMX_VMCS_GUEST_CR4, &val);
1490 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
1491 CPUMSetGuestCR4(pVM, val);
1492
1493 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
1494 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1495 if ( pVM->hwaccm.s.fNestedPaging
1496 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
1497 {
1498 /* Can be updated behind our back in the nested paging case. */
1499 CPUMSetGuestCR2(pVM, ASMGetCR2());
1500
1501 VMXReadVMCS(VMX_VMCS_GUEST_CR3, &val);
1502
1503 if (val != pCtx->cr3)
1504 {
1505 CPUMSetGuestCR3(pVM, val);
1506 PGMUpdateCR3(pVM, val);
1507 }
1508 /* Prefetch the four PDPT entries in PAE mode. */
1509 vmxR0PrefetchPAEPdptrs(pVM, pCtx);
1510 }
1511
1512 /* Sync back DR7 here. */
1513 VMXReadVMCS(VMX_VMCS_GUEST_DR7, &val);
1514 pCtx->dr[7] = val;
1515
1516 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1517 VMX_READ_SELREG(ES, es);
1518 VMX_READ_SELREG(SS, ss);
1519 VMX_READ_SELREG(CS, cs);
1520 VMX_READ_SELREG(DS, ds);
1521 VMX_READ_SELREG(FS, fs);
1522 VMX_READ_SELREG(GS, gs);
1523
1524 /*
1525 * System MSRs
1526 */
1527 VMXReadVMCS(VMX_VMCS_GUEST_SYSENTER_CS, &val);
1528 pCtx->SysEnter.cs = val;
1529 VMXReadVMCS(VMX_VMCS_GUEST_SYSENTER_EIP, &val);
1530 pCtx->SysEnter.eip = val;
1531 VMXReadVMCS(VMX_VMCS_GUEST_SYSENTER_ESP, &val);
1532 pCtx->SysEnter.esp = val;
1533
1534 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
1535 VMX_READ_SELREG(LDTR, ldtr);
1536
1537 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_LIMIT, &val);
1538 pCtx->gdtr.cbGdt = val;
1539 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val);
1540 pCtx->gdtr.pGdt = val;
1541
1542 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_LIMIT, &val);
1543 pCtx->idtr.cbIdt = val;
1544 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val);
1545 pCtx->idtr.pIdt = val;
1546
1547#ifdef HWACCM_VMX_EMULATE_REALMODE
1548 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1549 if (CPUMIsGuestInRealModeEx(pCtx))
1550 {
1551 /* Hide our emulation flags */
1552 pCtx->eflags.Bits.u1VM = 0;
1553 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
1554
1555 /* Force a TR resync every time in case we switch modes. */
1556 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
1557 }
1558 else
1559#endif /* HWACCM_VMX_EMULATE_REALMODE */
1560 {
1561 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
1562 VMX_READ_SELREG(TR, tr);
1563 }
1564 return VINF_SUCCESS;
1565}
1566
1567/**
1568 * Dummy placeholder
1569 *
1570 * @param pVM The VM to operate on.
1571 * @param pVCpu The VMCPU to operate on.
1572 */
1573static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu)
1574{
1575 NOREF(pVM);
1576 NOREF(pVCpu);
1577 return;
1578}
1579
1580/**
1581 * Setup the tagged TLB for EPT
1582 *
1583 * @returns VBox status code.
1584 * @param pVM The VM to operate on.
1585 * @param pVCpu The VMCPU to operate on.
1586 */
1587static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu)
1588{
1589 PHWACCM_CPUINFO pCpu;
1590
1591 Assert(pVM->hwaccm.s.fNestedPaging);
1592 Assert(!pVM->hwaccm.s.vmx.fVPID);
1593
1594 /* Deal with tagged TLBs if VPID or EPT is supported. */
1595 pCpu = HWACCMR0GetCurrentCpu();
1596 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1597 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1598 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1599 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1600 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1601 {
1602 /* Force a TLB flush on VM entry. */
1603 pVCpu->hwaccm.s.fForceTLBFlush = true;
1604 }
1605 else
1606 Assert(!pCpu->fFlushTLB);
1607
1608 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1609 pCpu->fFlushTLB = false;
1610
1611 if (pVCpu->hwaccm.s.fForceTLBFlush)
1612 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1613
1614#ifdef VBOX_WITH_STATISTICS
1615 if (pVCpu->hwaccm.s.fForceTLBFlush)
1616 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1617 else
1618 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1619#endif
1620}
1621
1622#ifdef HWACCM_VTX_WITH_VPID
1623/**
1624 * Setup the tagged TLB for VPID
1625 *
1626 * @returns VBox status code.
1627 * @param pVM The VM to operate on.
1628 * @param pVCpu The VMCPU to operate on.
1629 */
1630static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu)
1631{
1632 PHWACCM_CPUINFO pCpu;
1633
1634 Assert(pVM->hwaccm.s.vmx.fVPID);
1635 Assert(!pVM->hwaccm.s.fNestedPaging);
1636
1637 /* Deal with tagged TLBs if VPID or EPT is supported. */
1638 pCpu = HWACCMR0GetCurrentCpu();
1639 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1640 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1641 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1642 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1643 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1644 {
1645 /* Force a TLB flush on VM entry. */
1646 pVCpu->hwaccm.s.fForceTLBFlush = true;
1647 }
1648 else
1649 Assert(!pCpu->fFlushTLB);
1650
1651 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1652
1653 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
1654 if (pVCpu->hwaccm.s.fForceTLBFlush)
1655 {
1656 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
1657 || pCpu->fFlushTLB)
1658 {
1659 pCpu->fFlushTLB = false;
1660 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
1661 pCpu->cTLBFlushes++;
1662 }
1663 else
1664 {
1665 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
1666 pVCpu->hwaccm.s.fForceTLBFlush = false;
1667 }
1668
1669 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
1670 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
1671 }
1672 else
1673 {
1674 Assert(!pCpu->fFlushTLB);
1675
1676 if (!pCpu->uCurrentASID || !pVCpu->hwaccm.s.uCurrentASID)
1677 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID = 1;
1678 }
1679 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1680 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
1681 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
1682
1683 int rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
1684 AssertRC(rc);
1685
1686 if (pVCpu->hwaccm.s.fForceTLBFlush)
1687 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1688
1689#ifdef VBOX_WITH_STATISTICS
1690 if (pVCpu->hwaccm.s.fForceTLBFlush)
1691 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1692 else
1693 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1694#endif
1695}
1696#endif /* HWACCM_VTX_WITH_VPID */
1697
1698/**
1699 * Runs guest code in a VT-x VM.
1700 *
1701 * @returns VBox status code.
1702 * @param pVM The VM to operate on.
1703 * @param pVCpu The VMCPU to operate on.
1704 * @param pCtx Guest context
1705 */
1706VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1707{
1708 int rc = VINF_SUCCESS;
1709 RTCCUINTREG val;
1710 RTCCUINTREG exitReason, instrError, cbInstr;
1711 RTGCUINTPTR exitQualification;
1712 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
1713 RTGCUINTPTR errCode, instrInfo;
1714 bool fSyncTPR = false;
1715 PHWACCM_CPUINFO pCpu = 0;
1716 unsigned cResume = 0;
1717#ifdef VBOX_STRICT
1718 RTCPUID idCpuCheck;
1719#endif
1720
1721 Log2(("\nE"));
1722
1723 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
1724
1725#ifdef VBOX_STRICT
1726 rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
1727 AssertRC(rc);
1728 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val));
1729
1730 /* allowed zero */
1731 if ((val & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
1732 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
1733
1734 /* allowed one */
1735 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
1736 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
1737
1738 rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
1739 AssertRC(rc);
1740 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val));
1741
1742 /* Must be set according to the MSR, but can be cleared in case of EPT. */
1743 if (pVM->hwaccm.s.fNestedPaging)
1744 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
1745 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1746 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1747
1748 /* allowed zero */
1749 if ((val & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
1750 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
1751
1752 /* allowed one */
1753 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
1754 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
1755
1756 rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
1757 AssertRC(rc);
1758 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val));
1759
1760 /* allowed zero */
1761 if ((val & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
1762 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
1763
1764 /* allowed one */
1765 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
1766 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
1767
1768 rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
1769 AssertRC(rc);
1770 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val));
1771
1772 /* allowed zero */
1773 if ((val & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
1774 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
1775
1776 /* allowed one */
1777 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
1778 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
1779#endif
1780
1781 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
1782 */
1783ResumeExecution:
1784 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
1785 ("Expected %d, I'm %d; cResume=%d exitReason=%RTreg exitQualification=%RTreg\n",
1786 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
1787 Assert(!HWACCMR0SuspendPending());
1788
1789 /* Safety precaution; looping for too long here can have a very bad effect on the host */
1790 if (++cResume > HWACCM_MAX_RESUME_LOOPS)
1791 {
1792 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
1793 rc = VINF_EM_RAW_INTERRUPT;
1794 goto end;
1795 }
1796
1797 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
1798 if (VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
1799 {
1800 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVM)));
1801 if (pCtx->rip != EMGetInhibitInterruptsPC(pVM))
1802 {
1803 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
1804 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
1805 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
1806 * break the guest. Sounds very unlikely, but such timing sensitive problem are not as rare as you might think.
1807 */
1808 VM_FF_CLEAR(pVM, VM_FF_INHIBIT_INTERRUPTS);
1809 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
1810 rc = VMXWriteVMCS(VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE, 0);
1811 AssertRC(rc);
1812 }
1813 }
1814 else
1815 {
1816 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
1817 rc = VMXWriteVMCS(VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE, 0);
1818 AssertRC(rc);
1819 }
1820
1821 /* Check for pending actions that force us to go back to ring 3. */
1822 if (VM_FF_ISPENDING(pVM, VM_FF_TO_R3 | VM_FF_TIMER))
1823 {
1824 VM_FF_CLEAR(pVM, VM_FF_TO_R3);
1825 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
1826 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
1827 rc = VINF_EM_RAW_TO_R3;
1828 goto end;
1829 }
1830 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
1831 if (VM_FF_ISPENDING(pVM, VM_FF_REQUEST))
1832 {
1833 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
1834 rc = VINF_EM_PENDING_REQUEST;
1835 goto end;
1836 }
1837
1838 /* When external interrupts are pending, we should exit the VM when IF is set. */
1839 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
1840 rc = VMXR0CheckPendingInterrupt(pVM, pVCpu, pCtx);
1841 if (RT_FAILURE(rc))
1842 {
1843 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
1844 goto end;
1845 }
1846
1847 /** @todo check timers?? */
1848
1849 /* TPR caching using CR8 is only available in 64 bits mode */
1850 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
1851 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! */
1852 /**
1853 * @todo reduce overhead
1854 */
1855 if ( (pCtx->msrEFER & MSR_K6_EFER_LMA)
1856 && pVM->hwaccm.s.vmx.pAPIC)
1857 {
1858 /* TPR caching in CR8 */
1859 uint8_t u8TPR;
1860 bool fPending;
1861
1862 int rc = PDMApicGetTPR(pVM, &u8TPR, &fPending);
1863 AssertRC(rc);
1864 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
1865 pVM->hwaccm.s.vmx.pAPIC[0x80] = u8TPR << 4; /* bits 7-4 contain the task priority */
1866
1867 /* Two options here:
1868 * - external interrupt pending, but masked by the TPR value.
1869 * -> a CR8 update that lower the current TPR value should cause an exit
1870 * - no pending interrupts
1871 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
1872 */
1873 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? u8TPR : 0);
1874 AssertRC(rc);
1875
1876 /* Always sync back the TPR; we should optimize this though */ /** @todo optimize TPR sync. */
1877 fSyncTPR = true;
1878 }
1879
1880#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
1881 if ( pVM->hwaccm.s.fNestedPaging
1882# ifdef HWACCM_VTX_WITH_VPID
1883 || pVM->hwaccm.s.vmx.fVPID
1884# endif /* HWACCM_VTX_WITH_VPID */
1885 )
1886 {
1887 pCpu = HWACCMR0GetCurrentCpu();
1888 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1889 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1890 {
1891 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
1892 Log(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
1893 else
1894 Log(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1895 }
1896 if (pCpu->fFlushTLB)
1897 Log(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
1898 else
1899 if (pVCpu->hwaccm.s.fForceTLBFlush)
1900 LogFlow(("Manual TLB flush\n"));
1901 }
1902#endif
1903
1904 /*
1905 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
1906 * (until the actual world switch)
1907 */
1908#ifdef VBOX_STRICT
1909 idCpuCheck = RTMpCpuId();
1910#endif
1911 /* Save the host state first. */
1912 rc = VMXR0SaveHostState(pVM, pVCpu);
1913 if (rc != VINF_SUCCESS)
1914 {
1915 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
1916 goto end;
1917 }
1918 /* Load the guest state */
1919 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
1920 if (rc != VINF_SUCCESS)
1921 {
1922 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
1923 goto end;
1924 }
1925
1926 /* Deal with tagged TLB setup and invalidation. */
1927 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
1928
1929 /* Non-register state Guest Context */
1930 /** @todo change me according to cpu state */
1931 rc = VMXWriteVMCS(VMX_VMCS_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
1932 AssertRC(rc);
1933
1934 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
1935
1936 /* Manual save and restore:
1937 * - General purpose registers except RIP, RSP
1938 *
1939 * Trashed:
1940 * - CR2 (we don't care)
1941 * - LDTR (reset to 0)
1942 * - DRx (presumably not changed at all)
1943 * - DR7 (reset to 0x400)
1944 * - EFLAGS (reset to RT_BIT(1); not relevant)
1945 *
1946 */
1947
1948 /* All done! Let's start VM execution. */
1949 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, x);
1950#ifdef VBOX_STRICT
1951 Assert(idCpuCheck == RTMpCpuId());
1952#endif
1953 TMNotifyStartOfExecution(pVM);
1954 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx);
1955 TMNotifyEndOfExecution(pVM);
1956
1957 /* In case we execute a goto ResumeExecution later on. */
1958 pVCpu->hwaccm.s.fResumeVM = true;
1959 pVCpu->hwaccm.s.fForceTLBFlush = false;
1960
1961 /*
1962 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1963 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
1964 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1965 */
1966
1967 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, x);
1968 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit, x);
1969
1970 if (rc != VINF_SUCCESS)
1971 {
1972 VMXR0ReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
1973 goto end;
1974 }
1975 /* Success. Query the guest state and figure out what has happened. */
1976
1977 /* Investigate why there was a VM-exit. */
1978 rc = VMXReadVMCS(VMX_VMCS_RO_EXIT_REASON, &exitReason);
1979 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
1980
1981 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
1982 rc |= VMXReadVMCS(VMX_VMCS_RO_VM_INSTR_ERROR, &instrError);
1983 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INSTR_LENGTH, &cbInstr);
1984 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INTERRUPTION_INFO, &val);
1985 intInfo = val;
1986 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INTERRUPTION_ERRCODE, &val);
1987 errCode = val; /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
1988 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INSTR_INFO, &val);
1989 instrInfo = val;
1990 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &val);
1991 exitQualification = val;
1992 AssertRC(rc);
1993
1994 /* Sync back the guest state */
1995 rc = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
1996 AssertRC(rc);
1997
1998 /* Note! NOW IT'S SAFE FOR LOGGING! */
1999 Log2(("Raw exit reason %08x\n", exitReason));
2000
2001 /* Check if an injected event was interrupted prematurely. */
2002 rc = VMXReadVMCS(VMX_VMCS_RO_IDT_INFO, &val);
2003 AssertRC(rc);
2004 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2005 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2006 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW)
2007 {
2008 pVCpu->hwaccm.s.Event.fPending = true;
2009 /* Error code present? */
2010 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2011 {
2012 rc = VMXReadVMCS(VMX_VMCS_RO_IDT_ERRCODE, &val);
2013 AssertRC(rc);
2014 pVCpu->hwaccm.s.Event.errCode = val;
2015 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%08x pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2016 }
2017 else
2018 {
2019 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%08x\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2020 pVCpu->hwaccm.s.Event.errCode = 0;
2021 }
2022 }
2023
2024#ifdef VBOX_STRICT
2025 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2026 HWACCMDumpRegs(pVM, pCtx);
2027#endif
2028
2029 Log2(("E%d", exitReason));
2030 Log2(("Exit reason %d, exitQualification %08x\n", exitReason, exitQualification));
2031 Log2(("instrInfo=%d instrError=%d instr length=%d\n", instrInfo, instrError, cbInstr));
2032 Log2(("Interruption error code %d\n", errCode));
2033 Log2(("IntInfo = %08x\n", intInfo));
2034 Log2(("New EIP=%RGv\n", (RTGCPTR)pCtx->rip));
2035
2036 if (fSyncTPR)
2037 {
2038 rc = PDMApicSetTPR(pVM, pVM->hwaccm.s.vmx.pAPIC[0x80] >> 4);
2039 AssertRC(rc);
2040 }
2041
2042 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2043 switch (exitReason)
2044 {
2045 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2046 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2047 {
2048 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2049
2050 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2051 {
2052 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2053 /* External interrupt; leave to allow it to be dispatched again. */
2054 rc = VINF_EM_RAW_INTERRUPT;
2055 break;
2056 }
2057 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2058 {
2059 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2060 /* External interrupt; leave to allow it to be dispatched again. */
2061 rc = VINF_EM_RAW_INTERRUPT;
2062 break;
2063
2064 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2065 AssertFailed(); /* can't come here; fails the first check. */
2066 break;
2067
2068 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2069 Assert(vector == 3 || vector == 4);
2070 /* no break */
2071 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2072 Log2(("Hardware/software interrupt %d\n", vector));
2073 switch (vector)
2074 {
2075 case X86_XCPT_NM:
2076 {
2077 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
2078
2079 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2080 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2081 rc = CPUMR0LoadGuestFPU(pVM, pCtx);
2082 if (rc == VINF_SUCCESS)
2083 {
2084 Assert(CPUMIsGuestFPUStateActive(pVM));
2085
2086 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
2087
2088 /* Continue execution. */
2089 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2090 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2091
2092 goto ResumeExecution;
2093 }
2094
2095 Log(("Forward #NM fault to the guest\n"));
2096 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
2097 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2098 AssertRC(rc);
2099 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2100 goto ResumeExecution;
2101 }
2102
2103 case X86_XCPT_PF: /* Page fault */
2104 {
2105#ifdef DEBUG
2106 if (pVM->hwaccm.s.fNestedPaging)
2107 { /* A genuine pagefault.
2108 * Forward the trap to the guest by injecting the exception and resuming execution.
2109 */
2110 Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2111
2112 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2113
2114 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2115
2116 /* Now we must update CR2. */
2117 pCtx->cr2 = exitQualification;
2118 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2119 AssertRC(rc);
2120
2121 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2122 goto ResumeExecution;
2123 }
2124#endif
2125 Assert(!pVM->hwaccm.s.fNestedPaging);
2126
2127 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
2128 /* Exit qualification contains the linear address of the page fault. */
2129 TRPMAssertTrap(pVM, X86_XCPT_PF, TRPM_TRAP);
2130 TRPMSetErrorCode(pVM, errCode);
2131 TRPMSetFaultAddress(pVM, exitQualification);
2132
2133 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2134 rc = PGMTrap0eHandler(pVM, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
2135 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2136 if (rc == VINF_SUCCESS)
2137 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2138 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
2139 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2140
2141 TRPMResetTrap(pVM);
2142
2143 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2144 goto ResumeExecution;
2145 }
2146 else
2147 if (rc == VINF_EM_RAW_GUEST_TRAP)
2148 { /* A genuine pagefault.
2149 * Forward the trap to the guest by injecting the exception and resuming execution.
2150 */
2151 Log2(("Forward page fault to the guest\n"));
2152
2153 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2154 /* The error code might have been changed. */
2155 errCode = TRPMGetErrorCode(pVM);
2156
2157 TRPMResetTrap(pVM);
2158
2159 /* Now we must update CR2. */
2160 pCtx->cr2 = exitQualification;
2161 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2162 AssertRC(rc);
2163
2164 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2165 goto ResumeExecution;
2166 }
2167#ifdef VBOX_STRICT
2168 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2169 Log2(("PGMTrap0eHandler failed with %d\n", rc));
2170#endif
2171 /* Need to go back to the recompiler to emulate the instruction. */
2172 TRPMResetTrap(pVM);
2173 break;
2174 }
2175
2176 case X86_XCPT_MF: /* Floating point exception. */
2177 {
2178 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
2179 if (!(pCtx->cr0 & X86_CR0_NE))
2180 {
2181 /* old style FPU error reporting needs some extra work. */
2182 /** @todo don't fall back to the recompiler, but do it manually. */
2183 rc = VINF_EM_RAW_EMULATE_INSTR;
2184 break;
2185 }
2186 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2187 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2188 AssertRC(rc);
2189
2190 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2191 goto ResumeExecution;
2192 }
2193
2194 case X86_XCPT_DB: /* Debug exception. */
2195 {
2196 uint64_t uDR6;
2197
2198 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
2199 *
2200 * Exit qualification bits:
2201 * 3:0 B0-B3 which breakpoint condition was met
2202 * 12:4 Reserved (0)
2203 * 13 BD - debug register access detected
2204 * 14 BS - single step execution or branch taken
2205 * 63:15 Reserved (0)
2206 */
2207 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
2208
2209 /* Note that we don't support guest and host-initiated debugging at the same time. */
2210 Assert(DBGFIsStepping(pVM) || CPUMIsGuestInRealModeEx(pCtx));
2211
2212 uDR6 = X86_DR6_INIT_VAL;
2213 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
2214 rc = DBGFR0Trap01Handler(pVM, CPUMCTX2CORE(pCtx), uDR6);
2215 if (rc == VINF_EM_RAW_GUEST_TRAP)
2216 {
2217 /** @todo this isn't working, but we'll never get here normally. */
2218
2219 /* Update DR6 here. */
2220 pCtx->dr[6] = uDR6;
2221
2222 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2223 pCtx->dr[7] &= ~X86_DR7_GD;
2224
2225 /* Paranoia. */
2226 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2227 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2228 pCtx->dr[7] |= 0x400; /* must be one */
2229
2230 /* Resync DR7 */
2231 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2232 AssertRC(rc);
2233
2234 Log(("Trap %x (debug) at %RGv exit qualification %RX64\n", vector, (RTGCPTR)pCtx->rip, exitQualification));
2235 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2236 AssertRC(rc);
2237
2238 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2239 goto ResumeExecution;
2240 }
2241 /* Return to ring 3 to deal with the debug exit code. */
2242 break;
2243 }
2244
2245 case X86_XCPT_GP: /* General protection failure exception.*/
2246 {
2247 uint32_t cbSize;
2248
2249 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
2250#ifdef VBOX_STRICT
2251 if (!CPUMIsGuestInRealModeEx(pCtx))
2252 {
2253 Log(("Trap %x at %04X:%RGv errorCode=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
2254 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2255 AssertRC(rc);
2256 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2257 goto ResumeExecution;
2258 }
2259#endif
2260 Assert(CPUMIsGuestInRealModeEx(pCtx));
2261
2262 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %RGv\n", (RTGCPTR)pCtx->rip));
2263 rc = EMInterpretInstruction(pVM, CPUMCTX2CORE(pCtx), 0, &cbSize);
2264 if (rc == VINF_SUCCESS)
2265 {
2266 /* EIP has been updated already. */
2267
2268 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
2269 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2270
2271 /* Only resume if successful. */
2272 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2273 goto ResumeExecution;
2274 }
2275 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc));
2276 break;
2277 }
2278
2279#ifdef VBOX_STRICT
2280 case X86_XCPT_DE: /* Divide error. */
2281 case X86_XCPT_UD: /* Unknown opcode exception. */
2282 case X86_XCPT_SS: /* Stack segment exception. */
2283 case X86_XCPT_NP: /* Segment not present exception. */
2284 {
2285 switch(vector)
2286 {
2287 case X86_XCPT_DE:
2288 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
2289 break;
2290 case X86_XCPT_UD:
2291 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
2292 break;
2293 case X86_XCPT_SS:
2294 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
2295 break;
2296 case X86_XCPT_NP:
2297 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
2298 break;
2299 }
2300
2301 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2302 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2303 AssertRC(rc);
2304
2305 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2306 goto ResumeExecution;
2307 }
2308#endif
2309 default:
2310#ifdef HWACCM_VMX_EMULATE_REALMODE
2311 if (CPUMIsGuestInRealModeEx(pCtx))
2312 {
2313 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
2314 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2315 AssertRC(rc);
2316
2317 /* Go back to ring 3 in case of a triple fault. */
2318 if ( vector == X86_XCPT_DF
2319 && rc == VINF_EM_RESET)
2320 break;
2321
2322 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2323 goto ResumeExecution;
2324 }
2325#endif
2326 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
2327 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
2328 break;
2329 } /* switch (vector) */
2330
2331 break;
2332
2333 default:
2334 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
2335 AssertFailed();
2336 break;
2337 }
2338
2339 break;
2340 }
2341
2342 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
2343 {
2344 RTGCPHYS GCPhys;
2345
2346 Assert(pVM->hwaccm.s.fNestedPaging);
2347
2348#if HC_ARCH_BITS == 64
2349 rc = VMXReadVMCS(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
2350 AssertRC(rc);
2351#else
2352 uint32_t val_hi;
2353 rc = VMXReadVMCS(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &val);
2354 AssertRC(rc);
2355 rc = VMXReadVMCS(VMX_VMCS_EXIT_PHYS_ADDR_HIGH, &val_hi);
2356 AssertRC(rc);
2357 GCPhys = RT_MAKE_U64(val, val_hi);
2358#endif
2359
2360 Assert(((exitQualification >> 7) & 3) != 2);
2361
2362 /* Determine the kind of violation. */
2363 errCode = 0;
2364 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
2365 errCode |= X86_TRAP_PF_ID;
2366
2367 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
2368 errCode |= X86_TRAP_PF_RW;
2369
2370 /* If the page is present, then it's a page level protection fault. */
2371 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
2372 errCode |= X86_TRAP_PF_P;
2373
2374 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
2375
2376 /* GCPhys contains the guest physical address of the page fault. */
2377 TRPMAssertTrap(pVM, X86_XCPT_PF, TRPM_TRAP);
2378 TRPMSetErrorCode(pVM, errCode);
2379 TRPMSetFaultAddress(pVM, GCPhys);
2380
2381 /* Handle the pagefault trap for the nested shadow table. */
2382 rc = PGMR0Trap0eHandlerNestedPaging(pVM, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
2383 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2384 if (rc == VINF_SUCCESS)
2385 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2386 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
2387 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2388
2389 TRPMResetTrap(pVM);
2390
2391 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2392 goto ResumeExecution;
2393 }
2394
2395#ifdef VBOX_STRICT
2396 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2397 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc));
2398#endif
2399 /* Need to go back to the recompiler to emulate the instruction. */
2400 TRPMResetTrap(pVM);
2401 break;
2402 }
2403
2404 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
2405 /* Clear VM-exit on IF=1 change. */
2406 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
2407 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
2408 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2409 AssertRC(rc);
2410 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
2411 goto ResumeExecution; /* we check for pending guest interrupts there */
2412
2413 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
2414 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
2415 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
2416 /* Skip instruction and continue directly. */
2417 pCtx->rip += cbInstr;
2418 /* Continue execution.*/
2419 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2420 goto ResumeExecution;
2421
2422 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
2423 {
2424 Log2(("VMX: Cpuid %x\n", pCtx->eax));
2425 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
2426 rc = EMInterpretCpuId(pVM, CPUMCTX2CORE(pCtx));
2427 if (rc == VINF_SUCCESS)
2428 {
2429 /* Update EIP and continue execution. */
2430 Assert(cbInstr == 2);
2431 pCtx->rip += cbInstr;
2432 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2433 goto ResumeExecution;
2434 }
2435 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc));
2436 rc = VINF_EM_RAW_EMULATE_INSTR;
2437 break;
2438 }
2439
2440 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
2441 {
2442 Log2(("VMX: Rdtsc\n"));
2443 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
2444 rc = EMInterpretRdtsc(pVM, CPUMCTX2CORE(pCtx));
2445 if (rc == VINF_SUCCESS)
2446 {
2447 /* Update EIP and continue execution. */
2448 Assert(cbInstr == 2);
2449 pCtx->rip += cbInstr;
2450 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2451 goto ResumeExecution;
2452 }
2453 AssertMsgFailed(("EMU: rdtsc failed with %Rrc\n", rc));
2454 rc = VINF_EM_RAW_EMULATE_INSTR;
2455 break;
2456 }
2457
2458 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
2459 {
2460 Log2(("VMX: invlpg\n"));
2461 Assert(!pVM->hwaccm.s.fNestedPaging);
2462
2463 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
2464 rc = EMInterpretInvlpg(pVM, CPUMCTX2CORE(pCtx), exitQualification);
2465 if (rc == VINF_SUCCESS)
2466 {
2467 /* Update EIP and continue execution. */
2468 pCtx->rip += cbInstr;
2469 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2470 goto ResumeExecution;
2471 }
2472 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc));
2473 break;
2474 }
2475
2476 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
2477 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
2478 {
2479 uint32_t cbSize;
2480
2481 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
2482 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
2483 rc = EMInterpretInstruction(pVM, CPUMCTX2CORE(pCtx), 0, &cbSize);
2484 if (rc == VINF_SUCCESS)
2485 {
2486 /* EIP has been updated already. */
2487
2488 /* Only resume if successful. */
2489 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2490 goto ResumeExecution;
2491 }
2492 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
2493 break;
2494 }
2495
2496 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
2497 {
2498 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
2499 {
2500 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
2501 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
2502 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite);
2503 rc = EMInterpretCRxWrite(pVM, CPUMCTX2CORE(pCtx),
2504 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
2505 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
2506
2507 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
2508 {
2509 case 0:
2510 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
2511 break;
2512 case 2:
2513 break;
2514 case 3:
2515 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
2516 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
2517 break;
2518 case 4:
2519 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
2520 break;
2521 case 8:
2522 /* CR8 contains the APIC TPR */
2523 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
2524 break;
2525
2526 default:
2527 AssertFailed();
2528 break;
2529 }
2530 /* Check if a sync operation is pending. */
2531 if ( rc == VINF_SUCCESS /* don't bother if we are going to ring 3 anyway */
2532 && VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL))
2533 {
2534 rc = PGMSyncCR3(pVM, CPUMGetGuestCR0(pVM), CPUMGetGuestCR3(pVM), CPUMGetGuestCR4(pVM), VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
2535 AssertRC(rc);
2536 }
2537 break;
2538
2539 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
2540 Log2(("VMX: mov x, crx\n"));
2541 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead);
2542
2543 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
2544
2545 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
2546 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
2547
2548 rc = EMInterpretCRxRead(pVM, CPUMCTX2CORE(pCtx),
2549 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
2550 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
2551 break;
2552
2553 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
2554 Log2(("VMX: clts\n"));
2555 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
2556 rc = EMInterpretCLTS(pVM);
2557 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2558 break;
2559
2560 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
2561 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
2562 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
2563 rc = EMInterpretLMSW(pVM, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
2564 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2565 break;
2566 }
2567
2568 /* Update EIP if no error occurred. */
2569 if (RT_SUCCESS(rc))
2570 pCtx->rip += cbInstr;
2571
2572 if (rc == VINF_SUCCESS)
2573 {
2574 /* Only resume if successful. */
2575 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2576 goto ResumeExecution;
2577 }
2578 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2579 break;
2580 }
2581
2582 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
2583 {
2584 if (!DBGFIsStepping(pVM))
2585 {
2586 /* Disable drx move intercepts. */
2587 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2588 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2589 AssertRC(rc);
2590
2591 /* Save the host and load the guest debug state. */
2592 rc = CPUMR0LoadGuestDebugState(pVM, pCtx, true /* include DR6 */);
2593 AssertRC(rc);
2594
2595#ifdef VBOX_WITH_STATISTICS
2596 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
2597 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
2598 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
2599 else
2600 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
2601#endif
2602
2603 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2604 goto ResumeExecution;
2605 }
2606
2607 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
2608 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
2609 {
2610 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
2611 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
2612 rc = EMInterpretDRxWrite(pVM, CPUMCTX2CORE(pCtx),
2613 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
2614 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
2615 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
2616 Log2(("DR7=%08x\n", pCtx->dr[7]));
2617 }
2618 else
2619 {
2620 Log2(("VMX: mov x, drx\n"));
2621 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
2622 rc = EMInterpretDRxRead(pVM, CPUMCTX2CORE(pCtx),
2623 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
2624 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
2625 }
2626 /* Update EIP if no error occurred. */
2627 if (RT_SUCCESS(rc))
2628 pCtx->rip += cbInstr;
2629
2630 if (rc == VINF_SUCCESS)
2631 {
2632 /* Only resume if successful. */
2633 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2634 goto ResumeExecution;
2635 }
2636 Assert(rc == VERR_EM_INTERPRETER);
2637 break;
2638 }
2639
2640 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
2641 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
2642 {
2643 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
2644 uint32_t uPort;
2645 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
2646
2647 /** @todo necessary to make the distinction? */
2648 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
2649 {
2650 uPort = pCtx->edx & 0xffff;
2651 }
2652 else
2653 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
2654
2655 /* paranoia */
2656 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
2657 {
2658 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
2659 break;
2660 }
2661
2662 uint32_t cbSize = g_aIOSize[uIOWidth];
2663
2664 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
2665 {
2666 /* ins/outs */
2667 uint32_t prefix = 0;
2668 if (VMX_EXIT_QUALIFICATION_IO_REP(exitQualification))
2669 prefix |= PREFIX_REP;
2670
2671 if (fIOWrite)
2672 {
2673 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
2674 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
2675 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, prefix, cbSize);
2676 }
2677 else
2678 {
2679 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
2680 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
2681 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, prefix, cbSize);
2682 }
2683 }
2684 else
2685 {
2686 /* normal in/out */
2687 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
2688
2689 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
2690
2691 if (fIOWrite)
2692 {
2693 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
2694 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
2695 }
2696 else
2697 {
2698 uint32_t u32Val = 0;
2699
2700 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
2701 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
2702 if (IOM_SUCCESS(rc))
2703 {
2704 /* Write back to the EAX register. */
2705 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
2706 }
2707 }
2708 }
2709 /*
2710 * Handled the I/O return codes.
2711 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
2712 */
2713 if (IOM_SUCCESS(rc))
2714 {
2715 /* Update EIP and continue execution. */
2716 pCtx->rip += cbInstr;
2717 if (RT_LIKELY(rc == VINF_SUCCESS))
2718 {
2719 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
2720 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
2721 {
2722 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
2723 for (unsigned i=0;i<4;i++)
2724 {
2725 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
2726
2727 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
2728 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
2729 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
2730 {
2731 uint64_t uDR6;
2732
2733 Assert(CPUMIsGuestDebugStateActive(pVM));
2734
2735 uDR6 = ASMGetDR6();
2736
2737 /* Clear all breakpoint status flags and set the one we just hit. */
2738 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
2739 uDR6 |= (uint64_t)RT_BIT(i);
2740
2741 /* Note: AMD64 Architecture Programmer's Manual 13.1:
2742 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
2743 * the contents have been read.
2744 */
2745 ASMSetDR6(uDR6);
2746
2747 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2748 pCtx->dr[7] &= ~X86_DR7_GD;
2749
2750 /* Paranoia. */
2751 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2752 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2753 pCtx->dr[7] |= 0x400; /* must be one */
2754
2755 /* Resync DR7 */
2756 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2757 AssertRC(rc);
2758
2759 /* Construct inject info. */
2760 intInfo = X86_XCPT_DB;
2761 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
2762 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
2763
2764 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
2765 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
2766 AssertRC(rc);
2767
2768 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2769 goto ResumeExecution;
2770 }
2771 }
2772 }
2773
2774 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2775 goto ResumeExecution;
2776 }
2777 break;
2778 }
2779
2780#ifdef VBOX_STRICT
2781 if (rc == VINF_IOM_HC_IOPORT_READ)
2782 Assert(!fIOWrite);
2783 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
2784 Assert(fIOWrite);
2785 else
2786 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
2787#endif
2788 break;
2789 }
2790
2791 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
2792 LogFlow(("VMX_EXIT_TPR\n"));
2793 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
2794 goto ResumeExecution;
2795
2796 default:
2797 /* The rest is handled after syncing the entire CPU state. */
2798 break;
2799 }
2800
2801 /* Note: the guest state isn't entirely synced back at this stage. */
2802
2803 /* Investigate why there was a VM-exit. (part 2) */
2804 switch (exitReason)
2805 {
2806 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2807 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2808 case VMX_EXIT_EPT_VIOLATION:
2809 /* Already handled above. */
2810 break;
2811
2812 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
2813 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
2814 break;
2815
2816 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
2817 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
2818 rc = VINF_EM_RAW_INTERRUPT;
2819 AssertFailed(); /* Can't happen. Yet. */
2820 break;
2821
2822 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
2823 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
2824 rc = VINF_EM_RAW_INTERRUPT;
2825 AssertFailed(); /* Can't happen afaik. */
2826 break;
2827
2828 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch. */
2829 rc = VERR_EM_INTERPRETER;
2830 break;
2831
2832 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
2833 /** Check if external interrupts are pending; if so, don't switch back. */
2834 pCtx->rip++; /* skip hlt */
2835 if ( pCtx->eflags.Bits.u1IF
2836 && VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)))
2837 goto ResumeExecution;
2838
2839 rc = VINF_EM_HALT;
2840 break;
2841
2842 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
2843 AssertFailed(); /* can't happen. */
2844 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
2845 break;
2846
2847 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
2848 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
2849 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
2850 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
2851 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
2852 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
2853 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
2854 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
2855 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
2856 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
2857 /** @todo inject #UD immediately */
2858 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
2859 break;
2860
2861 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
2862 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
2863 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
2864 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
2865 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
2866 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
2867 /* already handled above */
2868 AssertMsg( rc == VINF_PGM_CHANGE_MODE
2869 || rc == VINF_EM_RAW_INTERRUPT
2870 || rc == VERR_EM_INTERPRETER
2871 || rc == VINF_EM_RAW_EMULATE_INSTR
2872 || rc == VINF_PGM_SYNC_CR3
2873 || rc == VINF_IOM_HC_IOPORT_READ
2874 || rc == VINF_IOM_HC_IOPORT_WRITE
2875 || rc == VINF_EM_RAW_GUEST_TRAP
2876 || rc == VINF_TRPM_XCPT_DISPATCHED
2877 || rc == VINF_EM_RESCHEDULE_REM,
2878 ("rc = %d\n", rc));
2879 break;
2880
2881 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
2882 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
2883 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
2884 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
2885 rc = VERR_EM_INTERPRETER;
2886 break;
2887
2888 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
2889 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
2890 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
2891 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
2892 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
2893 break;
2894
2895 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
2896 Assert(rc == VINF_EM_RAW_INTERRUPT);
2897 break;
2898
2899 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
2900 {
2901#ifdef VBOX_STRICT
2902 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
2903
2904 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
2905 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
2906
2907 VMXReadVMCS(VMX_VMCS_GUEST_CR0, &val);
2908 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", val));
2909
2910 VMXReadVMCS(VMX_VMCS_GUEST_CR3, &val);
2911 Log(("VMX_VMCS_GUEST_CR3 %RGp\n", val));
2912
2913 VMXReadVMCS(VMX_VMCS_GUEST_CR4, &val);
2914 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", val));
2915
2916 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2917 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
2918
2919 VMX_LOG_SELREG(CS, "CS");
2920 VMX_LOG_SELREG(DS, "DS");
2921 VMX_LOG_SELREG(ES, "ES");
2922 VMX_LOG_SELREG(FS, "FS");
2923 VMX_LOG_SELREG(GS, "GS");
2924 VMX_LOG_SELREG(SS, "SS");
2925 VMX_LOG_SELREG(TR, "TR");
2926 VMX_LOG_SELREG(LDTR, "LDTR");
2927
2928 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val);
2929 Log(("VMX_VMCS_GUEST_GDTR_BASE %RGv\n", val));
2930 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val);
2931 Log(("VMX_VMCS_GUEST_IDTR_BASE %RGv\n", val));
2932#endif /* VBOX_STRICT */
2933 rc = VERR_VMX_INVALID_GUEST_STATE;
2934 break;
2935 }
2936
2937 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
2938 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
2939 default:
2940 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
2941 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
2942 break;
2943
2944 }
2945end:
2946
2947 /* Signal changes for the recompiler. */
2948 CPUMSetChangedFlags(pVM, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
2949
2950 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
2951 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
2952 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2953 {
2954 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
2955 /* On the next entry we'll only sync the host context. */
2956 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
2957 }
2958 else
2959 {
2960 /* On the next entry we'll sync everything. */
2961 /** @todo we can do better than this */
2962 /* Not in the VINF_PGM_CHANGE_MODE though! */
2963 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2964 }
2965
2966 /* translate into a less severe return code */
2967 if (rc == VERR_EM_INTERPRETER)
2968 rc = VINF_EM_RAW_EMULATE_INSTR;
2969 else
2970 /* Try to extract more information about what might have gone wrong here. */
2971 if (rc == VERR_VMX_INVALID_VMCS_PTR)
2972 {
2973 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
2974 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS;
2975 }
2976
2977 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit, x);
2978
2979 Log2(("X"));
2980 return rc;
2981}
2982
2983
2984/**
2985 * Enters the VT-x session
2986 *
2987 * @returns VBox status code.
2988 * @param pVM The VM to operate on.
2989 * @param pVCpu The VMCPU to operate on.
2990 * @param pCpu CPU info struct
2991 */
2992VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHWACCM_CPUINFO pCpu)
2993{
2994 Assert(pVM->hwaccm.s.vmx.fSupported);
2995
2996 unsigned cr4 = ASMGetCR4();
2997 if (!(cr4 & X86_CR4_VMXE))
2998 {
2999 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
3000 return VERR_VMX_X86_CR4_VMXE_CLEARED;
3001 }
3002
3003 /* Activate the VM Control Structure. */
3004 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3005 if (RT_FAILURE(rc))
3006 return rc;
3007
3008 pVCpu->hwaccm.s.fResumeVM = false;
3009 return VINF_SUCCESS;
3010}
3011
3012
3013/**
3014 * Leaves the VT-x session
3015 *
3016 * @returns VBox status code.
3017 * @param pVM The VM to operate on.
3018 * @param pVCpu The VMCPU to operate on.
3019 * @param pCtx CPU context
3020 */
3021VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
3022{
3023 Assert(pVM->hwaccm.s.vmx.fSupported);
3024
3025 /* Save the guest debug state if necessary. */
3026 if (CPUMIsGuestDebugStateActive(pVM))
3027 {
3028 CPUMR0SaveGuestDebugState(pVM, pCtx, true /* save DR6 */);
3029
3030 /* Enable drx move intercepts again. */
3031 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3032 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3033 AssertRC(rc);
3034
3035 /* Resync the debug registers the next time. */
3036 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3037 }
3038 else
3039 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
3040
3041 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
3042 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3043 AssertRC(rc);
3044
3045 return VINF_SUCCESS;
3046}
3047
3048/**
3049 * Flush the TLB (EPT)
3050 *
3051 * @returns VBox status code.
3052 * @param pVM The VM to operate on.
3053 * @param pVCpu The VM CPU to operate on.
3054 * @param enmFlush Type of flush
3055 * @param GCPhys Physical address of the page to flush
3056 */
3057static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
3058{
3059 uint64_t descriptor[2];
3060
3061 LogFlow(("vmxR0FlushEPT %d %RGv\n", enmFlush, GCPhys));
3062 Assert(pVM->hwaccm.s.fNestedPaging);
3063 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
3064 descriptor[1] = GCPhys;
3065 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
3066 AssertRC(rc);
3067}
3068
3069#ifdef HWACCM_VTX_WITH_VPID
3070/**
3071 * Flush the TLB (EPT)
3072 *
3073 * @returns VBox status code.
3074 * @param pVM The VM to operate on.
3075 * @param pVCpu The VM CPU to operate on.
3076 * @param enmFlush Type of flush
3077 * @param GCPtr Virtual address of the page to flush
3078 */
3079static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
3080{
3081 uint64_t descriptor[2];
3082
3083 Assert(pVM->hwaccm.s.vmx.fVPID);
3084 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
3085 descriptor[1] = GCPtr;
3086 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
3087 AssertRC(rc);
3088}
3089#endif /* HWACCM_VTX_WITH_VPID */
3090
3091/**
3092 * Invalidates a guest page
3093 *
3094 * @returns VBox status code.
3095 * @param pVM The VM to operate on.
3096 * @param pVCpu The VM CPU to operate on.
3097 * @param GCVirt Page to invalidate
3098 */
3099VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
3100{
3101 bool fFlushPending = pVCpu->hwaccm.s.fForceTLBFlush;
3102
3103 LogFlow(("VMXR0InvalidatePage %RGv\n", GCVirt));
3104
3105 /* Only relevant if we want to use VPID.
3106 * In the nested paging case we still see such calls, but
3107 * can safely ignore them. (e.g. after cr3 updates)
3108 */
3109#ifdef HWACCM_VTX_WITH_VPID
3110 /* Skip it if a TLB flush is already pending. */
3111 if ( !fFlushPending
3112 && pVM->hwaccm.s.vmx.fVPID)
3113 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
3114#endif /* HWACCM_VTX_WITH_VPID */
3115
3116 return VINF_SUCCESS;
3117}
3118
3119/**
3120 * Invalidates a guest page by physical address
3121 *
3122 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
3123 *
3124 * @returns VBox status code.
3125 * @param pVM The VM to operate on.
3126 * @param pVCpu The VM CPU to operate on.
3127 * @param GCPhys Page to invalidate
3128 */
3129VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
3130{
3131 bool fFlushPending = pVCpu->hwaccm.s.fForceTLBFlush;
3132
3133 Assert(pVM->hwaccm.s.fNestedPaging);
3134
3135 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
3136
3137 /* Skip it if a TLB flush is already pending. */
3138 if (!fFlushPending)
3139 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
3140
3141 return VINF_SUCCESS;
3142}
3143
3144/**
3145 * Report world switch error and dump some useful debug info
3146 *
3147 * @param pVM The VM to operate on.
3148 * @param pVCpu The VMCPU to operate on.
3149 * @param rc Return code
3150 * @param pCtx Current CPU context (not updated)
3151 */
3152static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx)
3153{
3154 switch (rc)
3155 {
3156 case VERR_VMX_INVALID_VMXON_PTR:
3157 AssertFailed();
3158 break;
3159
3160 case VERR_VMX_UNABLE_TO_START_VM:
3161 case VERR_VMX_UNABLE_TO_RESUME_VM:
3162 {
3163 int rc;
3164 RTCCUINTREG exitReason, instrError, val;
3165
3166 rc = VMXReadVMCS(VMX_VMCS_RO_EXIT_REASON, &exitReason);
3167 rc |= VMXReadVMCS(VMX_VMCS_RO_VM_INSTR_ERROR, &instrError);
3168 AssertRC(rc);
3169 if (rc == VINF_SUCCESS)
3170 {
3171 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
3172 Log(("Current stack %08x\n", &rc));
3173
3174 pVCpu->hwaccm.s.vmx.lasterror.ulLastInstrError = instrError;
3175 pVCpu->hwaccm.s.vmx.lasterror.ulLastExitReason = exitReason;
3176
3177#ifdef VBOX_STRICT
3178 RTGDTR gdtr;
3179 PX86DESCHC pDesc;
3180
3181 ASMGetGDTR(&gdtr);
3182
3183 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
3184 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
3185 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
3186 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
3187 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
3188 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
3189 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
3190 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
3191 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
3192 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
3193
3194 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
3195 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
3196
3197 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
3198 Log(("VMX_VMCS_HOST_CR3 %RHp\n", val));
3199
3200 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
3201 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
3202
3203 VMXReadVMCS(VMX_VMCS_HOST_FIELD_CS, &val);
3204 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
3205
3206 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
3207 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
3208
3209 if (val < gdtr.cbGdt)
3210 {
3211 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3212 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
3213 }
3214
3215 VMXReadVMCS(VMX_VMCS_HOST_FIELD_DS, &val);
3216 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
3217 if (val < gdtr.cbGdt)
3218 {
3219 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3220 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
3221 }
3222
3223 VMXReadVMCS(VMX_VMCS_HOST_FIELD_ES, &val);
3224 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
3225 if (val < gdtr.cbGdt)
3226 {
3227 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3228 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
3229 }
3230
3231 VMXReadVMCS(VMX_VMCS_HOST_FIELD_FS, &val);
3232 Log(("VMX_VMCS_HOST_FIELD_FS %08x\n", val));
3233 if (val < gdtr.cbGdt)
3234 {
3235 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3236 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
3237 }
3238
3239 VMXReadVMCS(VMX_VMCS_HOST_FIELD_GS, &val);
3240 Log(("VMX_VMCS_HOST_FIELD_GS %08x\n", val));
3241 if (val < gdtr.cbGdt)
3242 {
3243 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3244 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
3245 }
3246
3247 VMXReadVMCS(VMX_VMCS_HOST_FIELD_SS, &val);
3248 Log(("VMX_VMCS_HOST_FIELD_SS %08x\n", val));
3249 if (val < gdtr.cbGdt)
3250 {
3251 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3252 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
3253 }
3254
3255 VMXReadVMCS(VMX_VMCS_HOST_FIELD_TR, &val);
3256 Log(("VMX_VMCS_HOST_FIELD_TR %08x\n", val));
3257 if (val < gdtr.cbGdt)
3258 {
3259 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3260 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
3261 }
3262
3263 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
3264 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
3265
3266 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
3267 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
3268 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
3269 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
3270
3271 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_CS, &val);
3272 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
3273
3274 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
3275 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
3276
3277 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
3278 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
3279
3280 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
3281 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
3282 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
3283 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
3284
3285# if HC_ARCH_BITS == 64
3286 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
3287 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
3288 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
3289 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
3290 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
3291# endif
3292#endif /* VBOX_STRICT */
3293 }
3294 break;
3295 }
3296
3297 default:
3298 /* impossible */
3299 AssertFailed();
3300 break;
3301 }
3302}
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette