VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 13281

Last change on this file since 13281 was 13281, checked in by vboxsync, 16 years ago

Fixed IP in the stack frame for #BP & #OF traps.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 127.1 KB
Line 
1/* $Id: HWVMXR0.cpp 13281 2008-10-15 12:27:38Z vboxsync $ */
2/** @file
3 * HWACCM VMX - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_HWACCM
27#include <VBox/hwaccm.h>
28#include "HWACCMInternal.h"
29#include <VBox/vm.h>
30#include <VBox/x86.h>
31#include <VBox/pgm.h>
32#include <VBox/pdm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35#include <VBox/selm.h>
36#include <VBox/iom.h>
37#include <iprt/param.h>
38#include <iprt/assert.h>
39#include <iprt/asm.h>
40#include <iprt/string.h>
41#include "HWVMXR0.h"
42
43/*******************************************************************************
44* Global Variables *
45*******************************************************************************/
46/* IO operation lookup arrays. */
47static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
48static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
49
50/*******************************************************************************
51* Local Functions *
52*******************************************************************************/
53#ifdef VBOX_STRICT
54static void VMXR0ReportWorldSwitchError(PVM pVM, int rc, PCPUMCTX pCtx);
55#else
56#define VMXR0ReportWorldSwitchError(a, b, c) do { } while (0);
57#endif /* VBOX_STRICT */
58static void vmxR0SetupTLBEPT(PVM pVM);
59static void vmxR0SetupTLBVPID(PVM pVM);
60static void vmxR0SetupTLBDummy(PVM pVM);
61static void vmxR0FlushEPT(PVM pVM, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
62static void vmxR0FlushVPID(PVM pVM, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
63static void vmxR0UpdateExceptionBitmap(PVM pVM, PCPUMCTX pCtx);
64
65
66static void VMXR0CheckError(PVM pVM, int rc)
67{
68 if (rc == VERR_VMX_GENERIC)
69 {
70 RTCCUINTREG instrError;
71
72 VMXReadVMCS(VMX_VMCS_RO_VM_INSTR_ERROR, &instrError);
73 pVM->hwaccm.s.vmx.ulLastInstrError = instrError;
74 }
75 pVM->hwaccm.s.lLastError = rc;
76}
77
78/**
79 * Sets up and activates VT-x on the current CPU
80 *
81 * @returns VBox status code.
82 * @param pCpu CPU info struct
83 * @param pVM The VM to operate on.
84 * @param pvPageCpu Pointer to the global cpu page
85 * @param pPageCpuPhys Physical address of the global cpu page
86 */
87VMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
88{
89 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
90 AssertReturn(pVM, VERR_INVALID_PARAMETER);
91 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
92
93 /* Setup Intel VMX. */
94 Assert(pVM->hwaccm.s.vmx.fSupported);
95
96#ifdef LOG_ENABLED
97 SUPR0Printf("VMXR0EnableCpu cpu %d page (%x) %x\n", pCpu->idCpu, pvPageCpu, (uint32_t)pPageCpuPhys);
98#endif
99 /* Set revision dword at the beginning of the VMXON structure. */
100 *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
101
102 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
103 * (which can have very bad consequences!!!)
104 */
105
106 /* Make sure the VMX instructions don't cause #UD faults. */
107 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
108
109 /* Enter VMX Root Mode */
110 int rc = VMXEnable(pPageCpuPhys);
111 if (VBOX_FAILURE(rc))
112 {
113 VMXR0CheckError(pVM, rc);
114 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
115 return VERR_VMX_VMXON_FAILED;
116 }
117 return VINF_SUCCESS;
118}
119
120/**
121 * Deactivates VT-x on the current CPU
122 *
123 * @returns VBox status code.
124 * @param pCpu CPU info struct
125 * @param pvPageCpu Pointer to the global cpu page
126 * @param pPageCpuPhys Physical address of the global cpu page
127 */
128VMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
129{
130 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
131 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
132
133 /* Leave VMX Root Mode. */
134 VMXDisable();
135
136 /* And clear the X86_CR4_VMXE bit */
137 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
138
139#ifdef LOG_ENABLED
140 SUPR0Printf("VMXR0DisableCpu cpu %d\n", pCpu->idCpu);
141#endif
142 return VINF_SUCCESS;
143}
144
145/**
146 * Does Ring-0 per VM VT-x init.
147 *
148 * @returns VBox status code.
149 * @param pVM The VM to operate on.
150 */
151VMMR0DECL(int) VMXR0InitVM(PVM pVM)
152{
153 int rc;
154
155#ifdef LOG_ENABLED
156 SUPR0Printf("VMXR0InitVM %x\n", pVM);
157#endif
158 pVM->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
159 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
160
161
162 /* Allocate one page for the VM control structure (VMCS). */
163 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
164 AssertRC(rc);
165 if (RT_FAILURE(rc))
166 return rc;
167
168 pVM->hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjVMCS);
169 pVM->hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjVMCS, 0);
170 ASMMemZero32(pVM->hwaccm.s.vmx.pVMCS, PAGE_SIZE);
171
172 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
173 {
174 /* Allocate one page for the virtual APIC mmio cache. */
175 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
176 AssertRC(rc);
177 if (RT_FAILURE(rc))
178 return rc;
179
180 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
181 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
182 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
183 }
184 else
185 {
186 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
187 pVM->hwaccm.s.vmx.pAPIC = 0;
188 pVM->hwaccm.s.vmx.pAPICPhys = 0;
189 }
190
191 /* Allocate the MSR bitmap if this feature is supported. */
192 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
193 {
194 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
195 AssertRC(rc);
196 if (RT_FAILURE(rc))
197 return rc;
198
199 pVM->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjMSRBitmap);
200 pVM->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
201 memset(pVM->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
202 }
203
204 /* Current guest paging mode. */
205 pVM->hwaccm.s.vmx.enmCurrGuestMode = PGMMODE_REAL;
206
207#ifdef LOG_ENABLED
208 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVM->hwaccm.s.vmx.pVMCS, (uint32_t)pVM->hwaccm.s.vmx.pVMCSPhys);
209#endif
210 return VINF_SUCCESS;
211}
212
213/**
214 * Does Ring-0 per VM VT-x termination.
215 *
216 * @returns VBox status code.
217 * @param pVM The VM to operate on.
218 */
219VMMR0DECL(int) VMXR0TermVM(PVM pVM)
220{
221 if (pVM->hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
222 {
223 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjVMCS, false);
224 pVM->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
225 pVM->hwaccm.s.vmx.pVMCS = 0;
226 pVM->hwaccm.s.vmx.pVMCSPhys = 0;
227 }
228 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
229 {
230 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
231 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
232 pVM->hwaccm.s.vmx.pAPIC = 0;
233 pVM->hwaccm.s.vmx.pAPICPhys = 0;
234 }
235 if (pVM->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
236 {
237 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, false);
238 pVM->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
239 pVM->hwaccm.s.vmx.pMSRBitmap = 0;
240 pVM->hwaccm.s.vmx.pMSRBitmapPhys = 0;
241 }
242 return VINF_SUCCESS;
243}
244
245/**
246 * Sets up VT-x for the specified VM
247 *
248 * @returns VBox status code.
249 * @param pVM The VM to operate on.
250 */
251VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
252{
253 int rc = VINF_SUCCESS;
254 uint32_t val;
255
256 AssertReturn(pVM, VERR_INVALID_PARAMETER);
257 Assert(pVM->hwaccm.s.vmx.pVMCS);
258
259 /* Set revision dword at the beginning of the VMCS structure. */
260 *(uint32_t *)pVM->hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
261
262 /* Clear VM Control Structure. */
263 Log(("pVMCSPhys = %VHp\n", pVM->hwaccm.s.vmx.pVMCSPhys));
264 rc = VMXClearVMCS(pVM->hwaccm.s.vmx.pVMCSPhys);
265 if (VBOX_FAILURE(rc))
266 goto vmx_end;
267
268 /* Activate the VM Control Structure. */
269 rc = VMXActivateVMCS(pVM->hwaccm.s.vmx.pVMCSPhys);
270 if (VBOX_FAILURE(rc))
271 goto vmx_end;
272
273 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
274 * Set required bits to one and zero according to the MSR capabilities.
275 */
276 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
277 /* External and non-maskable interrupts cause VM-exits. */
278 val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
279 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
280
281 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
282 AssertRC(rc);
283
284 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
285 * Set required bits to one and zero according to the MSR capabilities.
286 */
287 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
288 /* Program which event cause VM-exits and which features we want to use. */
289 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
290 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
291 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
292 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
293 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
294
295 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
296 if (!pVM->hwaccm.s.fNestedPaging)
297 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
298 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
299 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
300
301 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
302
303#if HC_ARCH_BITS == 64
304 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
305 {
306 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
307 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
308 Assert(pVM->hwaccm.s.vmx.pAPIC);
309 }
310 else
311 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
312 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
313#endif
314
315#ifdef VBOX_WITH_VTX_MSR_BITMAPS
316 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
317 {
318 Assert(pVM->hwaccm.s.vmx.pMSRBitmapPhys);
319 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
320 }
321#endif
322
323 /* We will use the secondary control if it's present. */
324 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
325
326 /* Mask away the bits that the CPU doesn't support */
327 /** @todo make sure they don't conflict with the above requirements. */
328 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
329 pVM->hwaccm.s.vmx.proc_ctls = val;
330
331 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
332 AssertRC(rc);
333
334 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
335 {
336 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
337 * Set required bits to one and zero according to the MSR capabilities.
338 */
339 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
340 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
341
342#ifdef HWACCM_VTX_WITH_EPT
343 if (pVM->hwaccm.s.fNestedPaging)
344 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
345#endif /* HWACCM_VTX_WITH_EPT */
346#ifdef HWACCM_VTX_WITH_VPID
347 else
348 if (pVM->hwaccm.s.vmx.fVPID)
349 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
350#endif /* HWACCM_VTX_WITH_VPID */
351
352 /* Mask away the bits that the CPU doesn't support */
353 /** @todo make sure they don't conflict with the above requirements. */
354 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
355
356 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
357 AssertRC(rc);
358 }
359
360 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
361 * Set required bits to one and zero according to the MSR capabilities.
362 */
363 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
364 AssertRC(rc);
365
366 /* VMX_VMCS_CTRL_EXIT_CONTROLS
367 * Set required bits to one and zero according to the MSR capabilities.
368 */
369 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
370
371 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
372 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
373#if HC_ARCH_BITS == 64
374 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
375#else
376 /* else Must be zero when AMD64 is not available. */
377#endif
378 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
379 /* Don't acknowledge external interrupts on VM-exit. */
380 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
381 AssertRC(rc);
382
383 /* Forward all exception except #NM & #PF to the guest.
384 * We always need to check pagefaults since our shadow page table can be out of sync.
385 * And we always lazily sync the FPU & XMM state.
386 */
387
388 /** @todo Possible optimization:
389 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
390 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
391 * registers ourselves of course.
392 *
393 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
394 */
395
396 /* Don't filter page faults; all of them should cause a switch. */
397 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
398 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
399 AssertRC(rc);
400
401 /* Init TSC offset to zero. */
402 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
403#if HC_ARCH_BITS == 32
404 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_HIGH, 0);
405#endif
406 AssertRC(rc);
407
408 rc = VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
409#if HC_ARCH_BITS == 32
410 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_A_HIGH, 0);
411#endif
412 AssertRC(rc);
413
414 rc = VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
415#if HC_ARCH_BITS == 32
416 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_IO_BITMAP_B_HIGH, 0);
417#endif
418 AssertRC(rc);
419
420 /* Set the MSR bitmap address. */
421 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
422 {
423 /* Optional */
424 rc = VMXWriteVMCS(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVM->hwaccm.s.vmx.pMSRBitmapPhys);
425#if HC_ARCH_BITS == 32
426 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_MSR_BITMAP_HIGH, pVM->hwaccm.s.vmx.pMSRBitmapPhys >> 32ULL);
427#endif
428 AssertRC(rc);
429 }
430
431 /* Clear MSR controls. */
432 rc = VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, 0);
433 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, 0);
434 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, 0);
435#if HC_ARCH_BITS == 32
436 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_HIGH, 0);
437 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_HIGH, 0);
438 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_HIGH, 0);
439#endif
440 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
441 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0);
442 AssertRC(rc);
443
444 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
445 {
446 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
447 /* Optional */
448 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
449 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
450#if HC_ARCH_BITS == 32
451 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_VAPIC_PAGEADDR_HIGH, pVM->hwaccm.s.vmx.pAPICPhys >> 32ULL);
452#endif
453 AssertRC(rc);
454 }
455
456 /* Set link pointer to -1. Not currently used. */
457#if HC_ARCH_BITS == 32
458 rc = VMXWriteVMCS(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFF);
459 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LINK_PTR_HIGH, 0xFFFFFFFF);
460#else
461 rc = VMXWriteVMCS(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFF);
462#endif
463 AssertRC(rc);
464
465 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
466 rc = VMXClearVMCS(pVM->hwaccm.s.vmx.pVMCSPhys);
467 AssertRC(rc);
468
469 /* Choose the right TLB setup function. */
470 if (pVM->hwaccm.s.fNestedPaging)
471 {
472 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBEPT;
473
474 /* Default values for flushing. */
475 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
476 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
477
478 /* If the capabilities specify we can do more, then make use of it. */
479 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
480 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
481 else
482 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
483 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
484
485 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
486 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
487 }
488#ifdef HWACCM_VTX_WITH_VPID
489 else
490 if (pVM->hwaccm.s.vmx.fVPID)
491 {
492 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBVPID;
493
494 /* Default values for flushing. */
495 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
496 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
497
498 /* If the capabilities specify we can do more, then make use of it. */
499 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
500 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
501 else
502 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
503 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
504
505 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
506 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
507 }
508#endif /* HWACCM_VTX_WITH_VPID */
509 else
510 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBDummy;
511
512
513vmx_end:
514 VMXR0CheckError(pVM, rc);
515 return rc;
516}
517
518
519/**
520 * Injects an event (trap or external interrupt)
521 *
522 * @returns VBox status code.
523 * @param pVM The VM to operate on.
524 * @param pCtx CPU Context
525 * @param intInfo VMX interrupt info
526 * @param cbInstr Opcode length of faulting instruction
527 * @param errCode Error code (optional)
528 */
529static int VMXR0InjectEvent(PVM pVM, CPUMCTX *pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
530{
531 int rc;
532 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
533
534#ifdef VBOX_STRICT
535 if (iGate == 0xE)
536 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %VGv error code=%08x CR2=%08x intInfo=%08x\n", iGate, pCtx->rip, errCode, pCtx->cr2, intInfo));
537 else
538 if (iGate < 0x20)
539 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %VGv error code=%08x\n", iGate, pCtx->rip, errCode));
540 else
541 {
542 LogFlow(("INJ-EI: %x at %VGv\n", iGate, pCtx->rip));
543 Assert(!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS));
544 Assert(pCtx->eflags.u32 & X86_EFL_IF);
545 }
546#endif
547
548#ifdef HWACCM_VMX_EMULATE_REALMODE
549 if (CPUMIsGuestInRealModeEx(pCtx))
550 {
551 RTGCPHYS GCPhysHandler;
552 uint16_t offset, ip;
553 RTSEL sel;
554
555 /* Injecting events doesn't work right with real mode emulation.
556 * (#GP if we try to inject external hardware interrupts)
557 * Inject the interrupt or trap directly instead.
558 */
559 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
560
561 /* Check if the interrupt handler is present. */
562 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
563 {
564 Log(("IDT cbIdt violation\n"));
565 if (iGate != X86_XCPT_DF)
566 {
567 RTGCUINTPTR intInfo;
568
569 intInfo = (iGate == X86_XCPT_GP) ? X86_XCPT_DF : iGate;
570 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
571 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
572 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
573
574 return VMXR0InjectEvent(pVM, pCtx, intInfo, 0, 0 /* no error code according to the Intel docs */);
575 }
576 Log(("Triple fault -> reset the VM!\n"));
577 return VINF_EM_RESET;
578 }
579 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
580 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
581 || iGate == 4)
582 {
583 ip = pCtx->ip + cbInstr;
584 }
585 else
586 ip = pCtx->ip;
587
588 /* Read the selector:offset pair of the interrupt handler. */
589 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
590 PGMPhysRead(pVM, GCPhysHandler, &offset, sizeof(offset));
591 PGMPhysRead(pVM, GCPhysHandler + 2, &sel, sizeof(sel));
592
593 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
594
595 /* Construct the stack frame. */
596 /** @todo should check stack limit. */
597 pCtx->sp -= 2;
598 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
599 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t));
600 pCtx->sp -= 2;
601 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
602 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t));
603 pCtx->sp -= 2;
604 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
605 PGMPhysWrite(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip));
606
607 /* Update the CPU state for executing the handler. */
608 pCtx->rip = offset;
609 pCtx->cs = sel;
610 pCtx->csHid.u64Base = sel << 4;
611 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
612
613 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
614 return VINF_SUCCESS;
615 }
616#endif /* HWACCM_VMX_EMULATE_REALMODE */
617
618 /* Set event injection state. */
619 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
620
621 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
622 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
623
624 AssertRC(rc);
625 return rc;
626}
627
628
629/**
630 * Checks for pending guest interrupts and injects them
631 *
632 * @returns VBox status code.
633 * @param pVM The VM to operate on.
634 * @param pCtx CPU Context
635 */
636static int VMXR0CheckPendingInterrupt(PVM pVM, CPUMCTX *pCtx)
637{
638 int rc;
639
640 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
641 if (pVM->hwaccm.s.Event.fPending)
642 {
643 Log(("Reinjecting event %VX64 %08x at %VGv cr2=%RX64\n", pVM->hwaccm.s.Event.intInfo, pVM->hwaccm.s.Event.errCode, pCtx->rip, pCtx->cr2));
644 STAM_COUNTER_INC(&pVM->hwaccm.s.StatIntReinject);
645 rc = VMXR0InjectEvent(pVM, pCtx, pVM->hwaccm.s.Event.intInfo, 0, pVM->hwaccm.s.Event.errCode);
646 AssertRC(rc);
647
648 pVM->hwaccm.s.Event.fPending = false;
649 return VINF_SUCCESS;
650 }
651
652 /* When external interrupts are pending, we should exit the VM when IF is set. */
653 if ( !TRPMHasTrap(pVM)
654 && VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)))
655 {
656 if (!(pCtx->eflags.u32 & X86_EFL_IF))
657 {
658 if (!(pVM->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
659 {
660 LogFlow(("Enable irq window exit!\n"));
661 pVM->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
662 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
663 AssertRC(rc);
664 }
665 /* else nothing to do but wait */
666 }
667 else
668 if (!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
669 {
670 uint8_t u8Interrupt;
671
672 rc = PDMGetInterrupt(pVM, &u8Interrupt);
673 Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Vrc cs:eip=%04X:%VGv\n", u8Interrupt, u8Interrupt, rc, pCtx->cs, pCtx->rip));
674 if (VBOX_SUCCESS(rc))
675 {
676 rc = TRPMAssertTrap(pVM, u8Interrupt, TRPM_HARDWARE_INT);
677 AssertRC(rc);
678 }
679 else
680 {
681 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
682 Assert(!VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)));
683 STAM_COUNTER_INC(&pVM->hwaccm.s.StatSwitchGuestIrq);
684 /* Just continue */
685 }
686 }
687 else
688 Log(("Pending interrupt blocked at %VGv by VM_FF_INHIBIT_INTERRUPTS!!\n", pCtx->rip));
689 }
690
691#ifdef VBOX_STRICT
692 if (TRPMHasTrap(pVM))
693 {
694 uint8_t u8Vector;
695 rc = TRPMQueryTrapAll(pVM, &u8Vector, 0, 0, 0);
696 AssertRC(rc);
697 }
698#endif
699
700 if ( pCtx->eflags.u32 & X86_EFL_IF
701 && (!VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
702 && TRPMHasTrap(pVM)
703 )
704 {
705 uint8_t u8Vector;
706 int rc;
707 TRPMEVENT enmType;
708 RTGCUINTPTR intInfo;
709 RTGCUINT errCode;
710
711 /* If a new event is pending, then dispatch it now. */
712 rc = TRPMQueryTrapAll(pVM, &u8Vector, &enmType, &errCode, 0);
713 AssertRC(rc);
714 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
715 Assert(enmType != TRPM_SOFTWARE_INT);
716
717 /* Clear the pending trap. */
718 rc = TRPMResetTrap(pVM);
719 AssertRC(rc);
720
721 intInfo = u8Vector;
722 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
723
724 if (enmType == TRPM_TRAP)
725 {
726 switch (u8Vector) {
727 case 8:
728 case 10:
729 case 11:
730 case 12:
731 case 13:
732 case 14:
733 case 17:
734 /* Valid error codes. */
735 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
736 break;
737 default:
738 break;
739 }
740 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
741 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
742 else
743 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
744 }
745 else
746 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
747
748 STAM_COUNTER_INC(&pVM->hwaccm.s.StatIntInject);
749 rc = VMXR0InjectEvent(pVM, pCtx, intInfo, 0, errCode);
750 AssertRC(rc);
751 } /* if (interrupts can be dispatched) */
752
753 return VINF_SUCCESS;
754}
755
756/**
757 * Save the host state
758 *
759 * @returns VBox status code.
760 * @param pVM The VM to operate on.
761 */
762VMMR0DECL(int) VMXR0SaveHostState(PVM pVM)
763{
764 int rc = VINF_SUCCESS;
765
766 /*
767 * Host CPU Context
768 */
769 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
770 {
771 RTIDTR idtr;
772 RTGDTR gdtr;
773 RTSEL SelTR;
774 PX86DESCHC pDesc;
775 uintptr_t trBase;
776
777 /* Control registers */
778 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
779 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, ASMGetCR3());
780 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
781 AssertRC(rc);
782 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
783 Log2(("VMX_VMCS_HOST_CR3 %VHp\n", ASMGetCR3()));
784 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
785
786 /* Selector registers. */
787 rc = VMXWriteVMCS(VMX_VMCS_HOST_FIELD_CS, ASMGetCS());
788 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
789 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_DS, 0);
790 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_ES, 0);
791#if HC_ARCH_BITS == 32
792 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_FS, 0);
793 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_GS, 0);
794#endif
795 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_SS, ASMGetSS());
796 SelTR = ASMGetTR();
797 rc |= VMXWriteVMCS(VMX_VMCS_HOST_FIELD_TR, SelTR);
798 AssertRC(rc);
799 Log2(("VMX_VMCS_HOST_FIELD_CS %08x\n", ASMGetCS()));
800 Log2(("VMX_VMCS_HOST_FIELD_DS %08x\n", ASMGetDS()));
801 Log2(("VMX_VMCS_HOST_FIELD_ES %08x\n", ASMGetES()));
802 Log2(("VMX_VMCS_HOST_FIELD_FS %08x\n", ASMGetFS()));
803 Log2(("VMX_VMCS_HOST_FIELD_GS %08x\n", ASMGetGS()));
804 Log2(("VMX_VMCS_HOST_FIELD_SS %08x\n", ASMGetSS()));
805 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
806
807 /* GDTR & IDTR */
808 ASMGetGDTR(&gdtr);
809 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
810 ASMGetIDTR(&idtr);
811 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
812 AssertRC(rc);
813 Log2(("VMX_VMCS_HOST_GDTR_BASE %VHv\n", gdtr.pGdt));
814 Log2(("VMX_VMCS_HOST_IDTR_BASE %VHv\n", idtr.pIdt));
815
816 /* Save the base address of the TR selector. */
817 if (SelTR > gdtr.cbGdt)
818 {
819 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
820 return VERR_VMX_INVALID_HOST_STATE;
821 }
822
823 pDesc = &((PX86DESCHC)gdtr.pGdt)[SelTR >> X86_SEL_SHIFT_HC];
824#if HC_ARCH_BITS == 64
825 trBase = X86DESC64_BASE(*pDesc);
826#else
827 trBase = X86DESC_BASE(*pDesc);
828#endif
829 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
830 AssertRC(rc);
831 Log2(("VMX_VMCS_HOST_TR_BASE %VHv\n", trBase));
832
833 /* FS and GS base. */
834#if HC_ARCH_BITS == 64
835 Log2(("MSR_K8_FS_BASE = %VHv\n", ASMRdMsr(MSR_K8_FS_BASE)));
836 Log2(("MSR_K8_GS_BASE = %VHv\n", ASMRdMsr(MSR_K8_GS_BASE)));
837 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
838 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
839#endif
840 AssertRC(rc);
841
842 /* Sysenter MSRs. */
843 /** @todo expensive!! */
844 rc = VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
845 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
846#if HC_ARCH_BITS == 32
847 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
848 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
849 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %VHv\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
850 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %VHv\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
851#else
852 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %VHv\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
853 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %VHv\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
854 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
855 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
856#endif
857 AssertRC(rc);
858
859 pVM->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
860 }
861 return rc;
862}
863
864/**
865 * Prefetch the 4 PDPT pointers (PAE and nested paging only)
866 *
867 * @param pVM The VM to operate on.
868 * @param pCtx Guest context
869 */
870static void vmxR0PrefetchPAEPdptrs(PVM pVM, PCPUMCTX pCtx)
871{
872 if (CPUMIsGuestInPAEModeEx(pCtx))
873 {
874 X86PDPE Pdpe;
875
876 for (unsigned i=0;i<4;i++)
877 {
878 Pdpe = PGMGstGetPaePDPtr(pVM, i);
879 int rc = VMXWriteVMCS(VMX_VMCS_GUEST_PDPTR0_FULL + i*2, Pdpe.u);
880#if HC_ARCH_BITS == 32
881 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_PDPTR0_FULL + i*2 + 1, Pdpe.u >> 32ULL);
882#endif
883 AssertRC(rc);
884 }
885 }
886}
887
888/**
889 * Update the exception bitmap according to the current CPU state
890 *
891 * @param pVM The VM to operate on.
892 * @param pCtx Guest context
893 */
894static void vmxR0UpdateExceptionBitmap(PVM pVM, PCPUMCTX pCtx)
895{
896 uint32_t u32TrapMask;
897 Assert(pCtx);
898
899 u32TrapMask = HWACCM_VMX_TRAP_MASK;
900#ifndef DEBUG
901 if (pVM->hwaccm.s.fNestedPaging)
902 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
903#endif
904
905 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
906 if (!pVM->hwaccm.s.fFPUOldStyleOverride)
907 {
908 u32TrapMask |= RT_BIT(X86_XCPT_MF);
909 pVM->hwaccm.s.fFPUOldStyleOverride = true;
910 }
911
912#ifdef DEBUG
913 /* Intercept X86_XCPT_DB if stepping is enabled */
914 if (DBGFIsStepping(pVM))
915 u32TrapMask |= RT_BIT(X86_XCPT_DB);
916#endif
917
918#ifdef VBOX_STRICT
919 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
920#endif
921
922# ifdef HWACCM_VMX_EMULATE_REALMODE
923 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
924 if (CPUMIsGuestInRealModeEx(pCtx))
925 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
926# endif /* HWACCM_VMX_EMULATE_REALMODE */
927
928 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
929 AssertRC(rc);
930}
931
932/**
933 * Loads the guest state
934 *
935 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
936 *
937 * @returns VBox status code.
938 * @param pVM The VM to operate on.
939 * @param pCtx Guest context
940 */
941VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, CPUMCTX *pCtx)
942{
943 int rc = VINF_SUCCESS;
944 RTGCUINTPTR val;
945 X86EFLAGS eflags;
946
947 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
948 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
949 {
950#ifdef HWACCM_VMX_EMULATE_REALMODE
951 PGMMODE enmGuestMode = PGMGetGuestMode(pVM);
952 if (pVM->hwaccm.s.vmx.enmCurrGuestMode != enmGuestMode)
953 {
954# define VTX_CORRECT_PROT_SEL(reg) \
955 { \
956 if ( pCtx->reg##Hid.u64Base == (pVM->hwaccm.s.vmx.RealMode.reg##Hid.u64Base & 0xfffff) \
957 && pCtx->reg == ((pVM->hwaccm.s.vmx.RealMode.reg##Hid.u64Base >> 4) & ~X86_SEL_RPL)) \
958 { \
959 pCtx->reg##Hid = pVM->hwaccm.s.vmx.RealMode.reg##Hid; \
960 pCtx->reg = pVM->hwaccm.s.vmx.RealMode.reg; \
961 } \
962 }
963
964 /* Correct weird requirements for switching to protected mode. */
965 if ( pVM->hwaccm.s.vmx.enmCurrGuestMode == PGMMODE_REAL
966 && enmGuestMode >= PGMMODE_PROTECTED)
967 {
968 /* DPL of all hidden selector registers must match the current CPL (0). */
969 pCtx->csHid.Attr.n.u2Dpl = 0;
970 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
971
972 pCtx->dsHid.Attr.n.u2Dpl = 0;
973 pCtx->esHid.Attr.n.u2Dpl = 0;
974 pCtx->fsHid.Attr.n.u2Dpl = 0;
975 pCtx->gsHid.Attr.n.u2Dpl = 0;
976 pCtx->ssHid.Attr.n.u2Dpl = 0;
977
978 /* RPL of all selectors must match the current CPL (0). */
979 pCtx->cs &= ~X86_SEL_RPL;
980 pCtx->ds &= ~X86_SEL_RPL;
981 pCtx->es &= ~X86_SEL_RPL;
982 pCtx->fs &= ~X86_SEL_RPL;
983 pCtx->gs &= ~X86_SEL_RPL;
984 pCtx->ss &= ~X86_SEL_RPL;
985
986 if (pVM->hwaccm.s.vmx.RealMode.fValid)
987 {
988 VTX_CORRECT_PROT_SEL(ds);
989 VTX_CORRECT_PROT_SEL(es);
990 VTX_CORRECT_PROT_SEL(fs);
991 VTX_CORRECT_PROT_SEL(gs);
992 pVM->hwaccm.s.vmx.RealMode.fValid = false;
993 }
994 }
995 else
996 /* Switching from protected mode to real mode. */
997 if ( pVM->hwaccm.s.vmx.enmCurrGuestMode >= PGMMODE_PROTECTED
998 && enmGuestMode == PGMMODE_REAL)
999 {
1000 /* Save the original hidden selectors in case we need to restore them later on. */
1001 pVM->hwaccm.s.vmx.RealMode.ds = pCtx->ds;
1002 pVM->hwaccm.s.vmx.RealMode.dsHid = pCtx->dsHid;
1003 pVM->hwaccm.s.vmx.RealMode.es = pCtx->es;
1004 pVM->hwaccm.s.vmx.RealMode.esHid = pCtx->esHid;
1005 pVM->hwaccm.s.vmx.RealMode.fs = pCtx->fs;
1006 pVM->hwaccm.s.vmx.RealMode.fsHid = pCtx->fsHid;
1007 pVM->hwaccm.s.vmx.RealMode.gs = pCtx->gs;
1008 pVM->hwaccm.s.vmx.RealMode.gsHid = pCtx->gsHid;
1009 pVM->hwaccm.s.vmx.RealMode.ss = pCtx->ss;
1010 pVM->hwaccm.s.vmx.RealMode.ssHid = pCtx->ssHid;
1011 pVM->hwaccm.s.vmx.RealMode.fValid = true;
1012
1013 /* The selector value & base must be adjusted or else... */
1014 pCtx->cs = pCtx->csHid.u64Base >> 4;
1015 pCtx->ds = pCtx->dsHid.u64Base >> 4;
1016 pCtx->es = pCtx->esHid.u64Base >> 4;
1017 pCtx->fs = pCtx->fsHid.u64Base >> 4;
1018 pCtx->gs = pCtx->gsHid.u64Base >> 4;
1019 pCtx->ss = pCtx->ssHid.u64Base >> 4;
1020
1021 /* The limit must also be adjusted. */
1022 pCtx->csHid.u32Limit &= 0xffff;
1023 pCtx->dsHid.u32Limit &= 0xffff;
1024 pCtx->esHid.u32Limit &= 0xffff;
1025 pCtx->fsHid.u32Limit &= 0xffff;
1026 pCtx->gsHid.u32Limit &= 0xffff;
1027 pCtx->ssHid.u32Limit &= 0xffff;
1028
1029 Assert(pCtx->csHid.u64Base <= 0xfffff);
1030 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1031 Assert(pCtx->esHid.u64Base <= 0xfffff);
1032 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1033 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1034 }
1035 pVM->hwaccm.s.vmx.enmCurrGuestMode = enmGuestMode;
1036 }
1037 else
1038 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1039 if ( CPUMIsGuestInRealModeEx(pCtx)
1040 && pCtx->csHid.u64Base == 0xffff0000)
1041 {
1042 pCtx->csHid.u64Base = 0xf0000;
1043 pCtx->cs = 0xf000;
1044 }
1045#endif /* HWACCM_VMX_EMULATE_REALMODE */
1046
1047 VMX_WRITE_SELREG(ES, es);
1048 AssertRC(rc);
1049
1050 VMX_WRITE_SELREG(CS, cs);
1051 AssertRC(rc);
1052
1053 VMX_WRITE_SELREG(SS, ss);
1054 AssertRC(rc);
1055
1056 VMX_WRITE_SELREG(DS, ds);
1057 AssertRC(rc);
1058
1059 /* The base values in the hidden fs & gs registers are not in sync with the msrs; they are cut to 32 bits. */
1060 VMX_WRITE_SELREG(FS, fs);
1061 AssertRC(rc);
1062
1063 VMX_WRITE_SELREG(GS, gs);
1064 AssertRC(rc);
1065 }
1066
1067 /* Guest CPU context: LDTR. */
1068 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1069 {
1070 if (pCtx->ldtr == 0)
1071 {
1072 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_LDTR, 0);
1073 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_LIMIT, 0);
1074 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_BASE, 0);
1075 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1076 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1077 }
1078 else
1079 {
1080 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_LDTR, pCtx->ldtr);
1081 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1082 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1083 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1084 }
1085 AssertRC(rc);
1086 }
1087 /* Guest CPU context: TR. */
1088 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1089 {
1090#ifdef HWACCM_VMX_EMULATE_REALMODE
1091 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1092 if (CPUMIsGuestInRealModeEx(pCtx))
1093 {
1094 RTGCPHYS GCPhys;
1095
1096 /* We convert it here every time as pci regions could be reconfigured. */
1097 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1098 AssertRC(rc);
1099
1100 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_TR, 0);
1101 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1102 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1103
1104 X86DESCATTR attr;
1105
1106 attr.u = 0;
1107 attr.n.u1Present = 1;
1108 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1109 val = attr.u;
1110 }
1111 else
1112#endif /* HWACCM_VMX_EMULATE_REALMODE */
1113 {
1114 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_TR, pCtx->tr);
1115 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1116 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_BASE, pCtx->trHid.u64Base);
1117
1118 val = pCtx->trHid.Attr.u;
1119
1120 /* The TSS selector must be busy. */
1121 if ((val & 0xF) == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
1122 val = (val & ~0xF) | X86_SEL_TYPE_SYS_286_TSS_BUSY;
1123 else
1124 /* Default even if no TR selector has been set (otherwise vmlaunch will fail!) */
1125 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1126
1127 }
1128 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_TR_ACCESS_RIGHTS, val);
1129 AssertRC(rc);
1130 }
1131 /* Guest CPU context: GDTR. */
1132 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1133 {
1134 rc = VMXWriteVMCS(VMX_VMCS_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1135 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1136 AssertRC(rc);
1137 }
1138 /* Guest CPU context: IDTR. */
1139 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1140 {
1141 rc = VMXWriteVMCS(VMX_VMCS_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1142 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1143 AssertRC(rc);
1144 }
1145
1146 /*
1147 * Sysenter MSRs (unconditional)
1148 */
1149 rc = VMXWriteVMCS(VMX_VMCS_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1150 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1151 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1152 AssertRC(rc);
1153
1154 /* Control registers */
1155 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1156 {
1157 val = pCtx->cr0;
1158 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1159 Log2(("Guest CR0-shadow %08x\n", val));
1160 if (CPUMIsGuestFPUStateActive(pVM) == false)
1161 {
1162 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1163 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1164 }
1165 else
1166 {
1167 /** @todo check if we support the old style mess correctly. */
1168 if (!(val & X86_CR0_NE))
1169 Log(("Forcing X86_CR0_NE!!!\n"));
1170
1171 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1172 }
1173 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1174 val |= X86_CR0_PE | X86_CR0_PG;
1175 if (pVM->hwaccm.s.fNestedPaging)
1176 {
1177 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1178 {
1179 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1180 pVM->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1181 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1182 }
1183 else
1184 {
1185 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1186 pVM->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1187 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1188 }
1189 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
1190 AssertRC(rc);
1191 }
1192 else
1193 {
1194 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1195 val |= X86_CR0_WP;
1196 }
1197
1198 /* Always enable caching. */
1199 val &= ~(X86_CR0_CD|X86_CR0_NW);
1200
1201 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_CR0, val);
1202 Log2(("Guest CR0 %08x\n", val));
1203 /* CR0 flags owned by the host; if the guests attempts to change them, then
1204 * the VM will exit.
1205 */
1206 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1207 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1208 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1209 | X86_CR0_TS
1210 | X86_CR0_ET /* Bit not restored during VM-exit! */
1211 | X86_CR0_CD /* Bit not restored during VM-exit! */
1212 | X86_CR0_NW /* Bit not restored during VM-exit! */
1213 | X86_CR0_NE
1214 | X86_CR0_MP;
1215 pVM->hwaccm.s.vmx.cr0_mask = val;
1216
1217 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1218 Log2(("Guest CR0-mask %08x\n", val));
1219 AssertRC(rc);
1220 }
1221 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1222 {
1223 /* CR4 */
1224 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1225 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1226 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1227 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1228
1229 if (!pVM->hwaccm.s.fNestedPaging)
1230 {
1231 switch(pVM->hwaccm.s.enmShadowMode)
1232 {
1233 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1234 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1235 case PGMMODE_32_BIT: /* 32-bit paging. */
1236 break;
1237
1238 case PGMMODE_PAE: /* PAE paging. */
1239 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1240 /** @todo use normal 32 bits paging */
1241 val |= X86_CR4_PAE;
1242 break;
1243
1244 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1245 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1246#ifdef VBOX_ENABLE_64_BITS_GUESTS
1247 break;
1248#else
1249 AssertFailed();
1250 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1251#endif
1252 default: /* shut up gcc */
1253 AssertFailed();
1254 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1255 }
1256 }
1257 else
1258 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1259 {
1260 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1261 val |= X86_CR4_PSE;
1262 /* Our identity mapping is a 32 bits page directory. */
1263 val &= ~X86_CR4_PAE;
1264 }
1265
1266#ifdef HWACCM_VMX_EMULATE_REALMODE
1267 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1268 if (CPUMIsGuestInRealModeEx(pCtx))
1269 val |= X86_CR4_VME;
1270#endif /* HWACCM_VMX_EMULATE_REALMODE */
1271
1272 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_CR4, val);
1273 Log2(("Guest CR4 %08x\n", val));
1274 /* CR4 flags owned by the host; if the guests attempts to change them, then
1275 * the VM will exit.
1276 */
1277 val = 0
1278#ifdef HWACCM_VMX_EMULATE_REALMODE
1279 | X86_CR4_VME
1280#endif
1281 | X86_CR4_PAE
1282 | X86_CR4_PGE
1283 | X86_CR4_PSE
1284 | X86_CR4_VMXE;
1285 pVM->hwaccm.s.vmx.cr4_mask = val;
1286
1287 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1288 Log2(("Guest CR4-mask %08x\n", val));
1289 AssertRC(rc);
1290 }
1291
1292 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1293 {
1294 if (pVM->hwaccm.s.fNestedPaging)
1295 {
1296 AssertMsg(PGMGetEPTCR3(pVM) == PGMGetHyperCR3(pVM), ("%VHp vs %VHp\n", PGMGetEPTCR3(pVM), PGMGetHyperCR3(pVM)));
1297 pVM->hwaccm.s.vmx.GCPhysEPTP = PGMGetEPTCR3(pVM);
1298
1299 Assert(!(pVM->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1300 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1301 pVM->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1302 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1303
1304 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EPTP_FULL, pVM->hwaccm.s.vmx.GCPhysEPTP);
1305#if HC_ARCH_BITS == 32
1306 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EPTP_HIGH, (uint32_t)(pVM->hwaccm.s.vmx.GCPhysEPTP >> 32ULL));
1307#endif
1308 AssertRC(rc);
1309
1310 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1311 {
1312 RTGCPHYS GCPhys;
1313
1314 /* We convert it here every time as pci regions could be reconfigured. */
1315 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1316 AssertRC(rc);
1317
1318 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1319 * take care of the translation to host physical addresses.
1320 */
1321 val = GCPhys;
1322 }
1323 else
1324 {
1325 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1326 val = pCtx->cr3;
1327 /* Prefetch the four PDPT entries in PAE mode. */
1328 vmxR0PrefetchPAEPdptrs(pVM, pCtx);
1329 }
1330 }
1331 else
1332 {
1333 val = PGMGetHyperCR3(pVM);
1334 Assert(val);
1335 }
1336
1337 /* Save our shadow CR3 register. */
1338 rc = VMXWriteVMCS(VMX_VMCS_GUEST_CR3, val);
1339 AssertRC(rc);
1340 }
1341
1342 /* Debug registers. */
1343 if (pVM->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1344 {
1345 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1346 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1347
1348 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1349 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1350 pCtx->dr[7] |= 0x400; /* must be one */
1351
1352 /* Resync DR7 */
1353 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
1354 AssertRC(rc);
1355
1356 /* Sync the debug state now if any breakpoint is armed. */
1357 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1358 && !CPUMIsGuestDebugStateActive(pVM)
1359 && !DBGFIsStepping(pVM))
1360 {
1361 STAM_COUNTER_INC(&pVM->hwaccm.s.StatDRxArmed);
1362
1363 /* Disable drx move intercepts. */
1364 pVM->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1365 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
1366 AssertRC(rc);
1367
1368 /* Save the host and load the guest debug state. */
1369 rc = CPUMR0LoadGuestDebugState(pVM, pCtx, true /* include DR6 */);
1370 AssertRC(rc);
1371 }
1372
1373 /* IA32_DEBUGCTL MSR. */
1374 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1375 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUGCTL_HIGH, 0);
1376 AssertRC(rc);
1377
1378 /** @todo do we really ever need this? */
1379 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1380 AssertRC(rc);
1381 }
1382
1383 /* EIP, ESP and EFLAGS */
1384 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RIP, pCtx->rip);
1385 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_RSP, pCtx->rsp);
1386 AssertRC(rc);
1387
1388 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1389 eflags = pCtx->eflags;
1390 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1391 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1392
1393#ifdef HWACCM_VMX_EMULATE_REALMODE
1394 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1395 if (CPUMIsGuestInRealModeEx(pCtx))
1396 {
1397 pVM->hwaccm.s.vmx.RealMode.eflags = eflags;
1398
1399 eflags.Bits.u1VM = 1;
1400 eflags.Bits.u2IOPL = 3;
1401 }
1402#endif /* HWACCM_VMX_EMULATE_REALMODE */
1403 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1404 AssertRC(rc);
1405
1406 /* TSC offset. */
1407 uint64_t u64TSCOffset;
1408
1409 if (TMCpuTickCanUseRealTSC(pVM, &u64TSCOffset))
1410 {
1411 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1412 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_FULL, u64TSCOffset);
1413#if HC_ARCH_BITS == 32
1414 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_TSC_OFFSET_HIGH, (uint32_t)(u64TSCOffset >> 32ULL));
1415#endif
1416 AssertRC(rc);
1417
1418 pVM->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1419 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
1420 AssertRC(rc);
1421 STAM_COUNTER_INC(&pVM->hwaccm.s.StatTSCOffset);
1422 }
1423 else
1424 {
1425 pVM->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1426 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
1427 AssertRC(rc);
1428 STAM_COUNTER_INC(&pVM->hwaccm.s.StatTSCIntercept);
1429 }
1430
1431 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1432 * Set required bits to one and zero according to the MSR capabilities.
1433 */
1434 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1435 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1436 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1437
1438 /* 64 bits guest mode? */
1439 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1440 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1441 /* else Must be zero when AMD64 is not available. */
1442
1443 /* Mask away the bits that the CPU doesn't support */
1444 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1445 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1446 AssertRC(rc);
1447
1448 /* 64 bits guest mode? */
1449 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1450 {
1451#if !defined(VBOX_WITH_64_BITS_GUESTS) || HC_ARCH_BITS != 64
1452 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1453#else
1454 pVM->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1455#endif
1456 /* Unconditionally update these as wrmsr might have changed them. */
1457 rc = VMXWriteVMCS(VMX_VMCS_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1458 AssertRC(rc);
1459 rc = VMXWriteVMCS(VMX_VMCS_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1460 AssertRC(rc);
1461 }
1462 else
1463 {
1464 pVM->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1465 }
1466
1467 vmxR0UpdateExceptionBitmap(pVM, pCtx);
1468
1469 /* Done. */
1470 pVM->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1471
1472 return rc;
1473}
1474
1475/**
1476 * Syncs back the guest state
1477 *
1478 * @returns VBox status code.
1479 * @param pVM The VM to operate on.
1480 * @param pCtx Guest context
1481 */
1482DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, CPUMCTX *pCtx)
1483{
1484 RTCCUINTREG val, valShadow;
1485 RTGCUINTPTR uInterruptState;
1486 int rc;
1487
1488 /* Let's first sync back eip, esp, and eflags. */
1489 rc = VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
1490 AssertRC(rc);
1491 pCtx->rip = val;
1492 rc = VMXReadVMCS(VMX_VMCS_GUEST_RSP, &val);
1493 AssertRC(rc);
1494 pCtx->rsp = val;
1495 rc = VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
1496 AssertRC(rc);
1497 pCtx->eflags.u32 = val;
1498
1499 /* Take care of instruction fusing (sti, mov ss) */
1500 rc |= VMXReadVMCS(VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE, &val);
1501 uInterruptState = val;
1502 if (uInterruptState != 0)
1503 {
1504 Assert(uInterruptState <= 2); /* only sti & mov ss */
1505 Log(("uInterruptState %x eip=%VGv\n", uInterruptState, pCtx->rip));
1506 EMSetInhibitInterruptsPC(pVM, pCtx->rip);
1507 }
1508 else
1509 VM_FF_CLEAR(pVM, VM_FF_INHIBIT_INTERRUPTS);
1510
1511 /* Control registers. */
1512 VMXReadVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
1513 VMXReadVMCS(VMX_VMCS_GUEST_CR0, &val);
1514 val = (valShadow & pVM->hwaccm.s.vmx.cr0_mask) | (val & ~pVM->hwaccm.s.vmx.cr0_mask);
1515 CPUMSetGuestCR0(pVM, val);
1516
1517 VMXReadVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
1518 VMXReadVMCS(VMX_VMCS_GUEST_CR4, &val);
1519 val = (valShadow & pVM->hwaccm.s.vmx.cr4_mask) | (val & ~pVM->hwaccm.s.vmx.cr4_mask);
1520 CPUMSetGuestCR4(pVM, val);
1521
1522 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
1523 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1524 if ( pVM->hwaccm.s.fNestedPaging
1525 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
1526 {
1527 /* Can be updated behind our back in the nested paging case. */
1528 CPUMSetGuestCR2(pVM, ASMGetCR2());
1529
1530 VMXReadVMCS(VMX_VMCS_GUEST_CR3, &val);
1531
1532 if (val != pCtx->cr3)
1533 {
1534 CPUMSetGuestCR3(pVM, val);
1535 PGMUpdateCR3(pVM, val);
1536 }
1537 /* Prefetch the four PDPT entries in PAE mode. */
1538 vmxR0PrefetchPAEPdptrs(pVM, pCtx);
1539 }
1540
1541 /* Sync back DR7 here. */
1542 VMXReadVMCS(VMX_VMCS_GUEST_DR7, &val);
1543 pCtx->dr[7] = val;
1544
1545 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1546 VMX_READ_SELREG(ES, es);
1547 VMX_READ_SELREG(SS, ss);
1548 VMX_READ_SELREG(CS, cs);
1549 VMX_READ_SELREG(DS, ds);
1550 VMX_READ_SELREG(FS, fs);
1551 VMX_READ_SELREG(GS, gs);
1552
1553 /*
1554 * System MSRs
1555 */
1556 VMXReadVMCS(VMX_VMCS_GUEST_SYSENTER_CS, &val);
1557 pCtx->SysEnter.cs = val;
1558 VMXReadVMCS(VMX_VMCS_GUEST_SYSENTER_EIP, &val);
1559 pCtx->SysEnter.eip = val;
1560 VMXReadVMCS(VMX_VMCS_GUEST_SYSENTER_ESP, &val);
1561 pCtx->SysEnter.esp = val;
1562
1563 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
1564 VMX_READ_SELREG(LDTR, ldtr);
1565
1566 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_LIMIT, &val);
1567 pCtx->gdtr.cbGdt = val;
1568 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val);
1569 pCtx->gdtr.pGdt = val;
1570
1571 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_LIMIT, &val);
1572 pCtx->idtr.cbIdt = val;
1573 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val);
1574 pCtx->idtr.pIdt = val;
1575
1576#ifdef HWACCM_VMX_EMULATE_REALMODE
1577 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1578 if (CPUMIsGuestInRealModeEx(pCtx))
1579 {
1580 /* Hide our emulation flags */
1581 pCtx->eflags.Bits.u1VM = 0;
1582 pCtx->eflags.Bits.u2IOPL = pVM->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
1583
1584 /* Force a TR resync every time in case we switch modes. */
1585 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
1586 }
1587 else
1588#endif /* HWACCM_VMX_EMULATE_REALMODE */
1589 {
1590 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
1591 VMX_READ_SELREG(TR, tr);
1592 }
1593 return VINF_SUCCESS;
1594}
1595
1596/**
1597 * Dummy placeholder
1598 *
1599 * @param pVM The VM to operate on.
1600 */
1601static void vmxR0SetupTLBDummy(PVM pVM)
1602{
1603 return;
1604}
1605
1606/**
1607 * Setup the tagged TLB for EPT
1608 *
1609 * @returns VBox status code.
1610 * @param pVM The VM to operate on.
1611 */
1612static void vmxR0SetupTLBEPT(PVM pVM)
1613{
1614 PHWACCM_CPUINFO pCpu;
1615
1616 Assert(pVM->hwaccm.s.fNestedPaging);
1617 Assert(!pVM->hwaccm.s.vmx.fVPID);
1618
1619 /* Deal with tagged TLBs if VPID or EPT is supported. */
1620 pCpu = HWACCMR0GetCurrentCpu();
1621 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1622 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1623 if ( pVM->hwaccm.s.idLastCpu != pCpu->idCpu
1624 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1625 || pVM->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1626 {
1627 /* Force a TLB flush on VM entry. */
1628 pVM->hwaccm.s.fForceTLBFlush = true;
1629 }
1630 else
1631 Assert(!pCpu->fFlushTLB);
1632
1633 pVM->hwaccm.s.idLastCpu = pCpu->idCpu;
1634 pCpu->fFlushTLB = false;
1635
1636 if (pVM->hwaccm.s.fForceTLBFlush)
1637 vmxR0FlushEPT(pVM, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1638
1639#ifdef VBOX_WITH_STATISTICS
1640 if (pVM->hwaccm.s.fForceTLBFlush)
1641 STAM_COUNTER_INC(&pVM->hwaccm.s.StatFlushTLBWorldSwitch);
1642 else
1643 STAM_COUNTER_INC(&pVM->hwaccm.s.StatNoFlushTLBWorldSwitch);
1644#endif
1645}
1646
1647#ifdef HWACCM_VTX_WITH_VPID
1648/**
1649 * Setup the tagged TLB for VPID
1650 *
1651 * @returns VBox status code.
1652 * @param pVM The VM to operate on.
1653 */
1654static void vmxR0SetupTLBVPID(PVM pVM)
1655{
1656 PHWACCM_CPUINFO pCpu;
1657
1658 Assert(pVM->hwaccm.s.vmx.fVPID);
1659 Assert(!pVM->hwaccm.s.fNestedPaging);
1660
1661 /* Deal with tagged TLBs if VPID or EPT is supported. */
1662 pCpu = HWACCMR0GetCurrentCpu();
1663 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1664 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1665 if ( pVM->hwaccm.s.idLastCpu != pCpu->idCpu
1666 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1667 || pVM->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1668 {
1669 /* Force a TLB flush on VM entry. */
1670 pVM->hwaccm.s.fForceTLBFlush = true;
1671 }
1672 else
1673 Assert(!pCpu->fFlushTLB);
1674
1675 pVM->hwaccm.s.idLastCpu = pCpu->idCpu;
1676
1677 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
1678 if (pVM->hwaccm.s.fForceTLBFlush)
1679 {
1680 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
1681 || pCpu->fFlushTLB)
1682 {
1683 pCpu->fFlushTLB = false;
1684 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
1685 pCpu->cTLBFlushes++;
1686 }
1687 else
1688 {
1689 STAM_COUNTER_INC(&pVM->hwaccm.s.StatFlushASID);
1690 pVM->hwaccm.s.fForceTLBFlush = false;
1691 }
1692
1693 pVM->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
1694 pVM->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
1695 }
1696 else
1697 {
1698 Assert(!pCpu->fFlushTLB);
1699
1700 if (!pCpu->uCurrentASID || !pVM->hwaccm.s.uCurrentASID)
1701 pVM->hwaccm.s.uCurrentASID = pCpu->uCurrentASID = 1;
1702 }
1703 AssertMsg(pVM->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVM->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1704 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
1705 AssertMsg(pVM->hwaccm.s.uCurrentASID >= 1 && pVM->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVM->hwaccm.s.uCurrentASID));
1706
1707 int rc = VMXWriteVMCS(VMX_VMCS_GUEST_FIELD_VPID, pVM->hwaccm.s.uCurrentASID);
1708 AssertRC(rc);
1709
1710 if (pVM->hwaccm.s.fForceTLBFlush)
1711 vmxR0FlushVPID(pVM, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1712
1713#ifdef VBOX_WITH_STATISTICS
1714 if (pVM->hwaccm.s.fForceTLBFlush)
1715 STAM_COUNTER_INC(&pVM->hwaccm.s.StatFlushTLBWorldSwitch);
1716 else
1717 STAM_COUNTER_INC(&pVM->hwaccm.s.StatNoFlushTLBWorldSwitch);
1718#endif
1719}
1720#endif /* HWACCM_VTX_WITH_VPID */
1721
1722/**
1723 * Runs guest code in a VT-x VM.
1724 *
1725 * @returns VBox status code.
1726 * @param pVM The VM to operate on.
1727 * @param pCtx Guest context
1728 */
1729VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, CPUMCTX *pCtx)
1730{
1731 int rc = VINF_SUCCESS;
1732 RTCCUINTREG val;
1733 RTCCUINTREG exitReason, instrError, cbInstr;
1734 RTGCUINTPTR exitQualification;
1735 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
1736 RTGCUINTPTR errCode, instrInfo;
1737 bool fSyncTPR = false;
1738 PHWACCM_CPUINFO pCpu = 0;
1739 unsigned cResume = 0;
1740#ifdef VBOX_STRICT
1741 RTCPUID idCpuCheck;
1742#endif
1743
1744 Log2(("\nE"));
1745
1746 STAM_PROFILE_ADV_START(&pVM->hwaccm.s.StatEntry, x);
1747
1748#ifdef VBOX_STRICT
1749 rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
1750 AssertRC(rc);
1751 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val));
1752
1753 /* allowed zero */
1754 if ((val & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
1755 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
1756
1757 /* allowed one */
1758 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
1759 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
1760
1761 rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
1762 AssertRC(rc);
1763 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val));
1764
1765 /* allowed zero */
1766 if ((val & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
1767 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
1768
1769 /* allowed one */
1770 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
1771 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
1772
1773 rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
1774 AssertRC(rc);
1775 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val));
1776
1777 /* allowed zero */
1778 if ((val & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
1779 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
1780
1781 /* allowed one */
1782 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
1783 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
1784
1785 rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
1786 AssertRC(rc);
1787 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val));
1788
1789 /* allowed zero */
1790 if ((val & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
1791 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
1792
1793 /* allowed one */
1794 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
1795 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
1796#endif
1797
1798 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
1799 */
1800ResumeExecution:
1801 AssertMsg(pVM->hwaccm.s.idEnteredCpu == RTMpCpuId(),
1802 ("Expected %d, I'm %d; cResume=%d exitReason=%RTreg exitQualification=%RTreg\n",
1803 (int)pVM->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
1804
1805 /* Safety precaution; looping for too long here can have a very bad effect on the host */
1806 if (++cResume > HWACCM_MAX_RESUME_LOOPS)
1807 {
1808 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitMaxResume);
1809 rc = VINF_EM_RAW_INTERRUPT;
1810 goto end;
1811 }
1812
1813 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
1814 if (VM_FF_ISSET(pVM, VM_FF_INHIBIT_INTERRUPTS))
1815 {
1816 Log(("VM_FF_INHIBIT_INTERRUPTS at %VGv successor %VGv\n", pCtx->rip, EMGetInhibitInterruptsPC(pVM)));
1817 if (pCtx->rip != EMGetInhibitInterruptsPC(pVM))
1818 {
1819 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
1820 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
1821 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
1822 * break the guest. Sounds very unlikely, but such timing sensitive problem are not as rare as you might think.
1823 */
1824 VM_FF_CLEAR(pVM, VM_FF_INHIBIT_INTERRUPTS);
1825 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
1826 rc = VMXWriteVMCS(VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE, 0);
1827 AssertRC(rc);
1828 }
1829 }
1830 else
1831 {
1832 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
1833 rc = VMXWriteVMCS(VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE, 0);
1834 AssertRC(rc);
1835 }
1836
1837 /* Check for pending actions that force us to go back to ring 3. */
1838 if (VM_FF_ISPENDING(pVM, VM_FF_TO_R3 | VM_FF_TIMER))
1839 {
1840 VM_FF_CLEAR(pVM, VM_FF_TO_R3);
1841 STAM_COUNTER_INC(&pVM->hwaccm.s.StatSwitchToR3);
1842 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatEntry, x);
1843 rc = VINF_EM_RAW_TO_R3;
1844 goto end;
1845 }
1846 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
1847 if (VM_FF_ISPENDING(pVM, VM_FF_REQUEST))
1848 {
1849 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatEntry, x);
1850 rc = VINF_EM_PENDING_REQUEST;
1851 goto end;
1852 }
1853
1854 /* When external interrupts are pending, we should exit the VM when IF is set. */
1855 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
1856 rc = VMXR0CheckPendingInterrupt(pVM, pCtx);
1857 if (VBOX_FAILURE(rc))
1858 {
1859 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatEntry, x);
1860 goto end;
1861 }
1862
1863 /** @todo check timers?? */
1864
1865 /* TPR caching using CR8 is only available in 64 bits mode */
1866 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
1867 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! */
1868 /**
1869 * @todo reduce overhead
1870 */
1871 if ( (pCtx->msrEFER & MSR_K6_EFER_LMA)
1872 && pVM->hwaccm.s.vmx.pAPIC)
1873 {
1874 /* TPR caching in CR8 */
1875 uint8_t u8TPR;
1876 bool fPending;
1877
1878 int rc = PDMApicGetTPR(pVM, &u8TPR, &fPending);
1879 AssertRC(rc);
1880 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
1881 pVM->hwaccm.s.vmx.pAPIC[0x80] = u8TPR << 4; /* bits 7-4 contain the task priority */
1882
1883 /* Two options here:
1884 * - external interrupt pending, but masked by the TPR value.
1885 * -> a CR8 update that lower the current TPR value should cause an exit
1886 * - no pending interrupts
1887 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
1888 */
1889 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? u8TPR : 0);
1890 AssertRC(rc);
1891
1892 /* Always sync back the TPR; we should optimize this though */ /** @todo optimize TPR sync. */
1893 fSyncTPR = true;
1894 }
1895
1896#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
1897 if ( pVM->hwaccm.s.fNestedPaging
1898# ifdef HWACCM_VTX_WITH_VPID
1899 || pVM->hwaccm.s.vmx.fVPID
1900# endif /* HWACCM_VTX_WITH_VPID */
1901 )
1902 {
1903 pCpu = HWACCMR0GetCurrentCpu();
1904 if ( pVM->hwaccm.s.idLastCpu != pCpu->idCpu
1905 || pVM->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1906 {
1907 if (pVM->hwaccm.s.idLastCpu != pCpu->idCpu)
1908 Log(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVM->hwaccm.s.idLastCpu, pCpu->idCpu));
1909 else
1910 Log(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVM->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1911 }
1912 if (pCpu->fFlushTLB)
1913 Log(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
1914 }
1915#endif
1916
1917 /*
1918 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
1919 * (until the actual world switch)
1920 */
1921#ifdef VBOX_STRICT
1922 idCpuCheck = RTMpCpuId();
1923#endif
1924 /* Save the host state first. */
1925 rc = VMXR0SaveHostState(pVM);
1926 if (rc != VINF_SUCCESS)
1927 {
1928 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatEntry, x);
1929 goto end;
1930 }
1931 /* Load the guest state */
1932 rc = VMXR0LoadGuestState(pVM, pCtx);
1933 if (rc != VINF_SUCCESS)
1934 {
1935 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatEntry, x);
1936 goto end;
1937 }
1938
1939 /* Deal with tagged TLB setup and invalidation. */
1940 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM);
1941
1942 /* Non-register state Guest Context */
1943 /** @todo change me according to cpu state */
1944 rc = VMXWriteVMCS(VMX_VMCS_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
1945 AssertRC(rc);
1946
1947 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatEntry, x);
1948
1949 /* Manual save and restore:
1950 * - General purpose registers except RIP, RSP
1951 *
1952 * Trashed:
1953 * - CR2 (we don't care)
1954 * - LDTR (reset to 0)
1955 * - DRx (presumably not changed at all)
1956 * - DR7 (reset to 0x400)
1957 * - EFLAGS (reset to RT_BIT(1); not relevant)
1958 *
1959 */
1960
1961 /* All done! Let's start VM execution. */
1962 STAM_PROFILE_ADV_START(&pVM->hwaccm.s.StatInGC, x);
1963#ifdef VBOX_STRICT
1964 Assert(idCpuCheck == RTMpCpuId());
1965#endif
1966 TMNotifyStartOfExecution(pVM);
1967 rc = pVM->hwaccm.s.vmx.pfnStartVM(pVM->hwaccm.s.vmx.fResumeVM, pCtx);
1968 TMNotifyEndOfExecution(pVM);
1969
1970 /* In case we execute a goto ResumeExecution later on. */
1971 pVM->hwaccm.s.vmx.fResumeVM = true;
1972 pVM->hwaccm.s.fForceTLBFlush = false;
1973
1974 /*
1975 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1976 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
1977 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1978 */
1979
1980 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatInGC, x);
1981 STAM_PROFILE_ADV_START(&pVM->hwaccm.s.StatExit, x);
1982
1983 if (rc != VINF_SUCCESS)
1984 {
1985 VMXR0ReportWorldSwitchError(pVM, rc, pCtx);
1986 goto end;
1987 }
1988 /* Success. Query the guest state and figure out what has happened. */
1989
1990 /* Investigate why there was a VM-exit. */
1991 rc = VMXReadVMCS(VMX_VMCS_RO_EXIT_REASON, &exitReason);
1992 STAM_COUNTER_INC(&pVM->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
1993
1994 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
1995 rc |= VMXReadVMCS(VMX_VMCS_RO_VM_INSTR_ERROR, &instrError);
1996 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INSTR_LENGTH, &cbInstr);
1997 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INTERRUPTION_INFO, &val);
1998 intInfo = val;
1999 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INTERRUPTION_ERRCODE, &val);
2000 errCode = val; /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2001 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_INSTR_INFO, &val);
2002 instrInfo = val;
2003 rc |= VMXReadVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &val);
2004 exitQualification = val;
2005 AssertRC(rc);
2006
2007 /* Sync back the guest state */
2008 rc = VMXR0SaveGuestState(pVM, pCtx);
2009 AssertRC(rc);
2010
2011 /* Note! NOW IT'S SAFE FOR LOGGING! */
2012 Log2(("Raw exit reason %08x\n", exitReason));
2013
2014 /* Check if an injected event was interrupted prematurely. */
2015 rc = VMXReadVMCS(VMX_VMCS_RO_IDT_INFO, &val);
2016 AssertRC(rc);
2017 pVM->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2018 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVM->hwaccm.s.Event.intInfo)
2019 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVM->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW)
2020 {
2021 pVM->hwaccm.s.Event.fPending = true;
2022 /* Error code present? */
2023 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVM->hwaccm.s.Event.intInfo))
2024 {
2025 rc = VMXReadVMCS(VMX_VMCS_RO_IDT_ERRCODE, &val);
2026 AssertRC(rc);
2027 pVM->hwaccm.s.Event.errCode = val;
2028 Log(("Pending inject %VX64 at %VGv exit=%08x intInfo=%08x exitQualification=%08x pending error=%RX64\n", pVM->hwaccm.s.Event.intInfo, pCtx->rip, exitReason, intInfo, exitQualification, val));
2029 }
2030 else
2031 {
2032 Log(("Pending inject %VX64 at %VGv exit=%08x intInfo=%08x exitQualification=%08x\n", pVM->hwaccm.s.Event.intInfo, pCtx->rip, exitReason, intInfo, exitQualification));
2033 pVM->hwaccm.s.Event.errCode = 0;
2034 }
2035 }
2036
2037#ifdef VBOX_STRICT
2038 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2039 HWACCMDumpRegs(pVM, pCtx);
2040#endif
2041
2042 Log2(("E%d", exitReason));
2043 Log2(("Exit reason %d, exitQualification %08x\n", exitReason, exitQualification));
2044 Log2(("instrInfo=%d instrError=%d instr length=%d\n", instrInfo, instrError, cbInstr));
2045 Log2(("Interruption error code %d\n", errCode));
2046 Log2(("IntInfo = %08x\n", intInfo));
2047 Log2(("New EIP=%VGv\n", pCtx->rip));
2048
2049 if (fSyncTPR)
2050 {
2051 rc = PDMApicSetTPR(pVM, pVM->hwaccm.s.vmx.pAPIC[0x80] >> 4);
2052 AssertRC(rc);
2053 }
2054
2055 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2056 switch (exitReason)
2057 {
2058 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2059 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2060 {
2061 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2062
2063 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2064 {
2065 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2066 /* External interrupt; leave to allow it to be dispatched again. */
2067 rc = VINF_EM_RAW_INTERRUPT;
2068 break;
2069 }
2070 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2071 {
2072 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2073 /* External interrupt; leave to allow it to be dispatched again. */
2074 rc = VINF_EM_RAW_INTERRUPT;
2075 break;
2076
2077 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2078 AssertFailed(); /* can't come here; fails the first check. */
2079 break;
2080
2081 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2082 Assert(vector == 3 || vector == 4);
2083 /* no break */
2084 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2085 Log2(("Hardware/software interrupt %d\n", vector));
2086 switch (vector)
2087 {
2088 case X86_XCPT_NM:
2089 {
2090 Log(("#NM fault at %VGv error code %x\n", pCtx->rip, errCode));
2091
2092 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2093 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2094 rc = CPUMR0LoadGuestFPU(pVM, pCtx);
2095 if (rc == VINF_SUCCESS)
2096 {
2097 Assert(CPUMIsGuestFPUStateActive(pVM));
2098
2099 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitShadowNM);
2100
2101 /* Continue execution. */
2102 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2103 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2104
2105 goto ResumeExecution;
2106 }
2107
2108 Log(("Forward #NM fault to the guest\n"));
2109 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestNM);
2110 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2111 AssertRC(rc);
2112 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2113 goto ResumeExecution;
2114 }
2115
2116 case X86_XCPT_PF: /* Page fault */
2117 {
2118#ifdef DEBUG
2119 if (pVM->hwaccm.s.fNestedPaging)
2120 { /* A genuine pagefault.
2121 * Forward the trap to the guest by injecting the exception and resuming execution.
2122 */
2123 Log(("Guest page fault at %VGv cr2=%VGv error code %x rsp=%VGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2124
2125 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2126
2127 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestPF);
2128
2129 /* Now we must update CR2. */
2130 pCtx->cr2 = exitQualification;
2131 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2132 AssertRC(rc);
2133
2134 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2135 goto ResumeExecution;
2136 }
2137#endif
2138 Assert(!pVM->hwaccm.s.fNestedPaging);
2139
2140 Log2(("Page fault at %VGv error code %x\n", exitQualification, errCode));
2141 /* Exit qualification contains the linear address of the page fault. */
2142 TRPMAssertTrap(pVM, X86_XCPT_PF, TRPM_TRAP);
2143 TRPMSetErrorCode(pVM, errCode);
2144 TRPMSetFaultAddress(pVM, exitQualification);
2145
2146 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2147 rc = PGMTrap0eHandler(pVM, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
2148 Log2(("PGMTrap0eHandler %VGv returned %Vrc\n", pCtx->rip, rc));
2149 if (rc == VINF_SUCCESS)
2150 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2151 Log2(("Shadow page fault at %VGv cr2=%VGv error code %x\n", pCtx->rip, exitQualification ,errCode));
2152 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitShadowPF);
2153
2154 TRPMResetTrap(pVM);
2155
2156 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2157 goto ResumeExecution;
2158 }
2159 else
2160 if (rc == VINF_EM_RAW_GUEST_TRAP)
2161 { /* A genuine pagefault.
2162 * Forward the trap to the guest by injecting the exception and resuming execution.
2163 */
2164 Log2(("Forward page fault to the guest\n"));
2165
2166 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestPF);
2167 /* The error code might have been changed. */
2168 errCode = TRPMGetErrorCode(pVM);
2169
2170 TRPMResetTrap(pVM);
2171
2172 /* Now we must update CR2. */
2173 pCtx->cr2 = exitQualification;
2174 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2175 AssertRC(rc);
2176
2177 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2178 goto ResumeExecution;
2179 }
2180#ifdef VBOX_STRICT
2181 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2182 Log2(("PGMTrap0eHandler failed with %d\n", rc));
2183#endif
2184 /* Need to go back to the recompiler to emulate the instruction. */
2185 TRPMResetTrap(pVM);
2186 break;
2187 }
2188
2189 case X86_XCPT_MF: /* Floating point exception. */
2190 {
2191 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestMF);
2192 if (!(pCtx->cr0 & X86_CR0_NE))
2193 {
2194 /* old style FPU error reporting needs some extra work. */
2195 /** @todo don't fall back to the recompiler, but do it manually. */
2196 rc = VINF_EM_RAW_EMULATE_INSTR;
2197 break;
2198 }
2199 Log(("Trap %x at %VGv\n", vector, pCtx->rip));
2200 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2201 AssertRC(rc);
2202
2203 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2204 goto ResumeExecution;
2205 }
2206
2207 case X86_XCPT_DB: /* Debug exception. */
2208 {
2209 uint64_t uDR6;
2210
2211 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
2212 *
2213 * Exit qualification bits:
2214 * 3:0 B0-B3 which breakpoint condition was met
2215 * 12:4 Reserved (0)
2216 * 13 BD - debug register access detected
2217 * 14 BS - single step execution or branch taken
2218 * 63:15 Reserved (0)
2219 */
2220 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestDB);
2221
2222 /* Note that we don't support guest and host-initiated debugging at the same time. */
2223 Assert(DBGFIsStepping(pVM) || CPUMIsGuestInRealModeEx(pCtx));
2224
2225 uDR6 = X86_DR6_INIT_VAL;
2226 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
2227 rc = DBGFR0Trap01Handler(pVM, CPUMCTX2CORE(pCtx), uDR6);
2228 if (rc == VINF_EM_RAW_GUEST_TRAP)
2229 {
2230 /** @todo this isn't working, but we'll never get here normally. */
2231
2232 /* Update DR6 here. */
2233 pCtx->dr[6] = uDR6;
2234
2235 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2236 pCtx->dr[7] &= ~X86_DR7_GD;
2237
2238 /* Paranoia. */
2239 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2240 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2241 pCtx->dr[7] |= 0x400; /* must be one */
2242
2243 /* Resync DR7 */
2244 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2245 AssertRC(rc);
2246
2247 Log(("Trap %x (debug) at %VGv exit qualification %VX64\n", vector, pCtx->rip, exitQualification));
2248 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2249 AssertRC(rc);
2250
2251 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2252 goto ResumeExecution;
2253 }
2254 /* Return to ring 3 to deal with the debug exit code. */
2255 break;
2256 }
2257
2258 case X86_XCPT_GP: /* General protection failure exception.*/
2259 {
2260 uint32_t cbSize;
2261
2262 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestGP);
2263#ifdef VBOX_STRICT
2264 if (!CPUMIsGuestInRealModeEx(pCtx))
2265 {
2266 Log(("Trap %x at %VGv error code %x\n", vector, pCtx->rip, errCode));
2267 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2268 AssertRC(rc);
2269 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2270 goto ResumeExecution;
2271 }
2272#endif
2273 Assert(CPUMIsGuestInRealModeEx(pCtx));
2274
2275 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %VGv\n", pCtx->rip));
2276 rc = EMInterpretInstruction(pVM, CPUMCTX2CORE(pCtx), 0, &cbSize);
2277 if (rc == VINF_SUCCESS)
2278 {
2279 /* EIP has been updated already. */
2280
2281 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
2282 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2283
2284 /* Only resume if successful. */
2285 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2286 goto ResumeExecution;
2287 }
2288 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Vrc\n", rc));
2289 break;
2290 }
2291
2292#ifdef VBOX_STRICT
2293 case X86_XCPT_DE: /* Divide error. */
2294 case X86_XCPT_UD: /* Unknown opcode exception. */
2295 case X86_XCPT_SS: /* Stack segment exception. */
2296 case X86_XCPT_NP: /* Segment not present exception. */
2297 {
2298 switch(vector)
2299 {
2300 case X86_XCPT_DE:
2301 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestDE);
2302 break;
2303 case X86_XCPT_UD:
2304 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestUD);
2305 break;
2306 case X86_XCPT_SS:
2307 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestSS);
2308 break;
2309 case X86_XCPT_NP:
2310 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitGuestNP);
2311 break;
2312 }
2313
2314 Log(("Trap %x at %VGv error code %x\n", vector, pCtx->rip, errCode));
2315 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2316 AssertRC(rc);
2317
2318 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2319 goto ResumeExecution;
2320 }
2321#endif
2322 default:
2323#ifdef HWACCM_VMX_EMULATE_REALMODE
2324 if (CPUMIsGuestInRealModeEx(pCtx))
2325 {
2326 Log(("Real Mode Trap %x at %VGv error code %x\n", vector, pCtx->rip, errCode));
2327 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2328 AssertRC(rc);
2329
2330 /* Go back to ring 3 in case of a triple fault. */
2331 if ( vector == X86_XCPT_DF
2332 && rc == VINF_EM_RESET)
2333 break;
2334
2335 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2336 goto ResumeExecution;
2337 }
2338#endif
2339 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
2340 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
2341 break;
2342 } /* switch (vector) */
2343
2344 break;
2345
2346 default:
2347 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
2348 AssertFailed();
2349 break;
2350 }
2351
2352 break;
2353 }
2354
2355 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
2356 {
2357 RTGCPHYS GCPhys;
2358
2359 Assert(pVM->hwaccm.s.fNestedPaging);
2360
2361#if HC_ARCH_BITS == 64
2362 rc = VMXReadVMCS(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
2363 AssertRC(rc);
2364#else
2365 uint32_t val_hi;
2366 rc = VMXReadVMCS(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &val);
2367 AssertRC(rc);
2368 rc = VMXReadVMCS(VMX_VMCS_EXIT_PHYS_ADDR_HIGH, &val_hi);
2369 AssertRC(rc);
2370 GCPhys = RT_MAKE_U64(val, val_hi);
2371#endif
2372
2373 Assert(((exitQualification >> 7) & 3) != 2);
2374
2375 /* Determine the kind of violation. */
2376 errCode = 0;
2377 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
2378 errCode |= X86_TRAP_PF_ID;
2379
2380 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
2381 errCode |= X86_TRAP_PF_RW;
2382
2383 /* If the page is present, then it's a page level protection fault. */
2384 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
2385 errCode |= X86_TRAP_PF_P;
2386
2387 Log(("EPT Page fault %x at %VGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
2388
2389 /* GCPhys contains the guest physical address of the page fault. */
2390 TRPMAssertTrap(pVM, X86_XCPT_PF, TRPM_TRAP);
2391 TRPMSetErrorCode(pVM, errCode);
2392 TRPMSetFaultAddress(pVM, GCPhys);
2393
2394 /* Handle the pagefault trap for the nested shadow table. */
2395 rc = PGMR0Trap0eHandlerNestedPaging(pVM, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
2396 Log2(("PGMR0Trap0eHandlerNestedPaging %VGv returned %Vrc\n", pCtx->rip, rc));
2397 if (rc == VINF_SUCCESS)
2398 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2399 Log2(("Shadow page fault at %VGv cr2=%VGp error code %x\n", pCtx->rip, exitQualification , errCode));
2400 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitShadowPF);
2401
2402 TRPMResetTrap(pVM);
2403
2404 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2405 goto ResumeExecution;
2406 }
2407
2408#ifdef VBOX_STRICT
2409 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2410 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc));
2411#endif
2412 /* Need to go back to the recompiler to emulate the instruction. */
2413 TRPMResetTrap(pVM);
2414 break;
2415 }
2416
2417 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
2418 /* Clear VM-exit on IF=1 change. */
2419 LogFlow(("VMX_EXIT_IRQ_WINDOW %VGv pending=%d IF=%d\n", pCtx->rip, VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
2420 pVM->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
2421 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
2422 AssertRC(rc);
2423 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitIrqWindow);
2424 goto ResumeExecution; /* we check for pending guest interrupts there */
2425
2426 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
2427 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
2428 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitInvd);
2429 /* Skip instruction and continue directly. */
2430 pCtx->rip += cbInstr;
2431 /* Continue execution.*/
2432 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2433 goto ResumeExecution;
2434
2435 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
2436 {
2437 Log2(("VMX: Cpuid %x\n", pCtx->eax));
2438 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitCpuid);
2439 rc = EMInterpretCpuId(pVM, CPUMCTX2CORE(pCtx));
2440 if (rc == VINF_SUCCESS)
2441 {
2442 /* Update EIP and continue execution. */
2443 Assert(cbInstr == 2);
2444 pCtx->rip += cbInstr;
2445 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2446 goto ResumeExecution;
2447 }
2448 AssertMsgFailed(("EMU: cpuid failed with %Vrc\n", rc));
2449 rc = VINF_EM_RAW_EMULATE_INSTR;
2450 break;
2451 }
2452
2453 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
2454 {
2455 Log2(("VMX: Rdtsc\n"));
2456 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitRdtsc);
2457 rc = EMInterpretRdtsc(pVM, CPUMCTX2CORE(pCtx));
2458 if (rc == VINF_SUCCESS)
2459 {
2460 /* Update EIP and continue execution. */
2461 Assert(cbInstr == 2);
2462 pCtx->rip += cbInstr;
2463 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2464 goto ResumeExecution;
2465 }
2466 AssertMsgFailed(("EMU: rdtsc failed with %Vrc\n", rc));
2467 rc = VINF_EM_RAW_EMULATE_INSTR;
2468 break;
2469 }
2470
2471 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
2472 {
2473 Log2(("VMX: invlpg\n"));
2474 Assert(!pVM->hwaccm.s.fNestedPaging);
2475
2476 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitInvpg);
2477 rc = EMInterpretInvlpg(pVM, CPUMCTX2CORE(pCtx), exitQualification);
2478 if (rc == VINF_SUCCESS)
2479 {
2480 /* Update EIP and continue execution. */
2481 pCtx->rip += cbInstr;
2482 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2483 goto ResumeExecution;
2484 }
2485 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %VGv failed with %Vrc\n", exitQualification, rc));
2486 break;
2487 }
2488
2489 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
2490 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
2491 {
2492 uint32_t cbSize;
2493
2494 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
2495 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
2496 rc = EMInterpretInstruction(pVM, CPUMCTX2CORE(pCtx), 0, &cbSize);
2497 if (rc == VINF_SUCCESS)
2498 {
2499 /* EIP has been updated already. */
2500
2501 /* Only resume if successful. */
2502 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2503 goto ResumeExecution;
2504 }
2505 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Vrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
2506 break;
2507 }
2508
2509 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
2510 {
2511 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
2512 {
2513 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
2514 Log2(("VMX: %VGv mov cr%d, x\n", pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
2515 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitCRxWrite);
2516 rc = EMInterpretCRxWrite(pVM, CPUMCTX2CORE(pCtx),
2517 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
2518 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
2519
2520 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
2521 {
2522 case 0:
2523 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
2524 break;
2525 case 2:
2526 break;
2527 case 3:
2528 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
2529 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
2530 break;
2531 case 4:
2532 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
2533 break;
2534 case 8:
2535 /* CR8 contains the APIC TPR */
2536 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
2537 break;
2538
2539 default:
2540 AssertFailed();
2541 break;
2542 }
2543 /* Check if a sync operation is pending. */
2544 if ( rc == VINF_SUCCESS /* don't bother if we are going to ring 3 anyway */
2545 && VM_FF_ISPENDING(pVM, VM_FF_PGM_SYNC_CR3 | VM_FF_PGM_SYNC_CR3_NON_GLOBAL))
2546 {
2547 rc = PGMSyncCR3(pVM, CPUMGetGuestCR0(pVM), CPUMGetGuestCR3(pVM), CPUMGetGuestCR4(pVM), VM_FF_ISSET(pVM, VM_FF_PGM_SYNC_CR3));
2548 AssertRC(rc);
2549 }
2550 break;
2551
2552 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
2553 Log2(("VMX: mov x, crx\n"));
2554 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitCRxRead);
2555
2556 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
2557
2558 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
2559 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
2560
2561 rc = EMInterpretCRxRead(pVM, CPUMCTX2CORE(pCtx),
2562 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
2563 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
2564 break;
2565
2566 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
2567 Log2(("VMX: clts\n"));
2568 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitCLTS);
2569 rc = EMInterpretCLTS(pVM);
2570 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2571 break;
2572
2573 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
2574 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
2575 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitLMSW);
2576 rc = EMInterpretLMSW(pVM, VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
2577 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2578 break;
2579 }
2580
2581 /* Update EIP if no error occurred. */
2582 if (VBOX_SUCCESS(rc))
2583 pCtx->rip += cbInstr;
2584
2585 if (rc == VINF_SUCCESS)
2586 {
2587 /* Only resume if successful. */
2588 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2589 goto ResumeExecution;
2590 }
2591 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2592 break;
2593 }
2594
2595 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
2596 {
2597 if (!DBGFIsStepping(pVM))
2598 {
2599 /* Disable drx move intercepts. */
2600 pVM->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
2601 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
2602 AssertRC(rc);
2603
2604 /* Save the host and load the guest debug state. */
2605 rc = CPUMR0LoadGuestDebugState(pVM, pCtx, true /* include DR6 */);
2606 AssertRC(rc);
2607
2608#ifdef VBOX_WITH_STATISTICS
2609 STAM_COUNTER_INC(&pVM->hwaccm.s.StatDRxContextSwitch);
2610 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
2611 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitDRxWrite);
2612 else
2613 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitDRxRead);
2614#endif
2615
2616 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2617 goto ResumeExecution;
2618 }
2619
2620 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
2621 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
2622 {
2623 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
2624 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitDRxWrite);
2625 rc = EMInterpretDRxWrite(pVM, CPUMCTX2CORE(pCtx),
2626 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
2627 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
2628 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
2629 Log2(("DR7=%08x\n", pCtx->dr[7]));
2630 }
2631 else
2632 {
2633 Log2(("VMX: mov x, drx\n"));
2634 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitDRxRead);
2635 rc = EMInterpretDRxRead(pVM, CPUMCTX2CORE(pCtx),
2636 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
2637 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
2638 }
2639 /* Update EIP if no error occurred. */
2640 if (VBOX_SUCCESS(rc))
2641 pCtx->rip += cbInstr;
2642
2643 if (rc == VINF_SUCCESS)
2644 {
2645 /* Only resume if successful. */
2646 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2647 goto ResumeExecution;
2648 }
2649 Assert(rc == VERR_EM_INTERPRETER);
2650 break;
2651 }
2652
2653 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
2654 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
2655 {
2656 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
2657 uint32_t uPort;
2658 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
2659
2660 /** @todo necessary to make the distinction? */
2661 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
2662 {
2663 uPort = pCtx->edx & 0xffff;
2664 }
2665 else
2666 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
2667
2668 /* paranoia */
2669 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
2670 {
2671 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
2672 break;
2673 }
2674
2675 uint32_t cbSize = g_aIOSize[uIOWidth];
2676
2677 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
2678 {
2679 /* ins/outs */
2680 uint32_t prefix = 0;
2681 if (VMX_EXIT_QUALIFICATION_IO_REP(exitQualification))
2682 prefix |= PREFIX_REP;
2683
2684 if (fIOWrite)
2685 {
2686 Log2(("IOMInterpretOUTSEx %VGv %x size=%d\n", pCtx->rip, uPort, cbSize));
2687 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitIOStringWrite);
2688 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, prefix, cbSize);
2689 }
2690 else
2691 {
2692 Log2(("IOMInterpretINSEx %VGv %x size=%d\n", pCtx->rip, uPort, cbSize));
2693 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitIOStringRead);
2694 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, prefix, cbSize);
2695 }
2696 }
2697 else
2698 {
2699 /* normal in/out */
2700 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
2701
2702 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
2703
2704 if (fIOWrite)
2705 {
2706 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitIOWrite);
2707 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
2708 }
2709 else
2710 {
2711 uint32_t u32Val = 0;
2712
2713 STAM_COUNTER_INC(&pVM->hwaccm.s.StatExitIORead);
2714 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
2715 if (IOM_SUCCESS(rc))
2716 {
2717 /* Write back to the EAX register. */
2718 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
2719 }
2720 }
2721 }
2722 /*
2723 * Handled the I/O return codes.
2724 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
2725 */
2726 if (IOM_SUCCESS(rc))
2727 {
2728 /* Update EIP and continue execution. */
2729 pCtx->rip += cbInstr;
2730 if (RT_LIKELY(rc == VINF_SUCCESS))
2731 {
2732 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
2733 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
2734 {
2735 STAM_COUNTER_INC(&pVM->hwaccm.s.StatDRxIOCheck);
2736 for (unsigned i=0;i<4;i++)
2737 {
2738 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
2739
2740 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
2741 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
2742 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
2743 {
2744 uint64_t uDR6;
2745
2746 Assert(CPUMIsGuestDebugStateActive(pVM));
2747
2748 uDR6 = ASMGetDR6();
2749
2750 /* Clear all breakpoint status flags and set the one we just hit. */
2751 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
2752 uDR6 |= (uint64_t)RT_BIT(i);
2753
2754 /* Note: AMD64 Architecture Programmer's Manual 13.1:
2755 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
2756 * the contents have been read.
2757 */
2758 ASMSetDR6(uDR6);
2759
2760 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2761 pCtx->dr[7] &= ~X86_DR7_GD;
2762
2763 /* Paranoia. */
2764 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2765 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2766 pCtx->dr[7] |= 0x400; /* must be one */
2767
2768 /* Resync DR7 */
2769 rc = VMXWriteVMCS(VMX_VMCS_GUEST_DR7, pCtx->dr[7]);
2770 AssertRC(rc);
2771
2772 /* Construct inject info. */
2773 intInfo = X86_XCPT_DB;
2774 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
2775 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
2776
2777 Log(("Inject IO debug trap at %VGv\n", pCtx->rip));
2778 rc = VMXR0InjectEvent(pVM, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
2779 AssertRC(rc);
2780
2781 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2782 goto ResumeExecution;
2783 }
2784 }
2785 }
2786
2787 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2788 goto ResumeExecution;
2789 }
2790 break;
2791 }
2792
2793#ifdef VBOX_STRICT
2794 if (rc == VINF_IOM_HC_IOPORT_READ)
2795 Assert(!fIOWrite);
2796 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
2797 Assert(fIOWrite);
2798 else
2799 AssertMsg(VBOX_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Vrc\n", rc));
2800#endif
2801 break;
2802 }
2803
2804 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
2805 LogFlow(("VMX_EXIT_TPR\n"));
2806 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
2807 goto ResumeExecution;
2808
2809 default:
2810 /* The rest is handled after syncing the entire CPU state. */
2811 break;
2812 }
2813
2814 /* Note: the guest state isn't entirely synced back at this stage. */
2815
2816 /* Investigate why there was a VM-exit. (part 2) */
2817 switch (exitReason)
2818 {
2819 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2820 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2821 case VMX_EXIT_EPT_VIOLATION:
2822 /* Already handled above. */
2823 break;
2824
2825 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
2826 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
2827 break;
2828
2829 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
2830 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
2831 rc = VINF_EM_RAW_INTERRUPT;
2832 AssertFailed(); /* Can't happen. Yet. */
2833 break;
2834
2835 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
2836 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
2837 rc = VINF_EM_RAW_INTERRUPT;
2838 AssertFailed(); /* Can't happen afaik. */
2839 break;
2840
2841 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch. */
2842 rc = VERR_EM_INTERPRETER;
2843 break;
2844
2845 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
2846 /** Check if external interrupts are pending; if so, don't switch back. */
2847 pCtx->rip++; /* skip hlt */
2848 if ( pCtx->eflags.Bits.u1IF
2849 && VM_FF_ISPENDING(pVM, (VM_FF_INTERRUPT_APIC|VM_FF_INTERRUPT_PIC)))
2850 goto ResumeExecution;
2851
2852 rc = VINF_EM_HALT;
2853 break;
2854
2855 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
2856 AssertFailed(); /* can't happen. */
2857 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
2858 break;
2859
2860 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
2861 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
2862 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
2863 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
2864 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
2865 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
2866 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
2867 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
2868 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
2869 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
2870 /** @todo inject #UD immediately */
2871 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
2872 break;
2873
2874 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
2875 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
2876 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
2877 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
2878 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
2879 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
2880 /* already handled above */
2881 AssertMsg( rc == VINF_PGM_CHANGE_MODE
2882 || rc == VINF_EM_RAW_INTERRUPT
2883 || rc == VERR_EM_INTERPRETER
2884 || rc == VINF_EM_RAW_EMULATE_INSTR
2885 || rc == VINF_PGM_SYNC_CR3
2886 || rc == VINF_IOM_HC_IOPORT_READ
2887 || rc == VINF_IOM_HC_IOPORT_WRITE
2888 || rc == VINF_EM_RAW_GUEST_TRAP
2889 || rc == VINF_TRPM_XCPT_DISPATCHED
2890 || rc == VINF_EM_RESCHEDULE_REM,
2891 ("rc = %d\n", rc));
2892 break;
2893
2894 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
2895 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
2896 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
2897 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
2898 rc = VERR_EM_INTERPRETER;
2899 break;
2900
2901 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
2902 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
2903 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
2904 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
2905 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
2906 break;
2907
2908 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
2909 Assert(rc == VINF_EM_RAW_INTERRUPT);
2910 break;
2911
2912 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
2913 {
2914#ifdef VBOX_STRICT
2915 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
2916
2917 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
2918 Log(("Old eip %VGv new %VGv\n", pCtx->rip, (RTGCPTR)val));
2919
2920 VMXReadVMCS(VMX_VMCS_GUEST_CR0, &val);
2921 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", val));
2922
2923 VMXReadVMCS(VMX_VMCS_GUEST_CR3, &val);
2924 Log(("VMX_VMCS_GUEST_CR3 %VGp\n", val));
2925
2926 VMXReadVMCS(VMX_VMCS_GUEST_CR4, &val);
2927 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", val));
2928
2929 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
2930 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
2931
2932 VMX_LOG_SELREG(CS, "CS");
2933 VMX_LOG_SELREG(DS, "DS");
2934 VMX_LOG_SELREG(ES, "ES");
2935 VMX_LOG_SELREG(FS, "FS");
2936 VMX_LOG_SELREG(GS, "GS");
2937 VMX_LOG_SELREG(SS, "SS");
2938 VMX_LOG_SELREG(TR, "TR");
2939 VMX_LOG_SELREG(LDTR, "LDTR");
2940
2941 VMXReadVMCS(VMX_VMCS_GUEST_GDTR_BASE, &val);
2942 Log(("VMX_VMCS_GUEST_GDTR_BASE %VGv\n", val));
2943 VMXReadVMCS(VMX_VMCS_GUEST_IDTR_BASE, &val);
2944 Log(("VMX_VMCS_GUEST_IDTR_BASE %VGv\n", val));
2945#endif /* VBOX_STRICT */
2946 rc = VERR_VMX_INVALID_GUEST_STATE;
2947 break;
2948 }
2949
2950 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
2951 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
2952 default:
2953 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
2954 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
2955 break;
2956
2957 }
2958end:
2959
2960 /* Signal changes for the recompiler. */
2961 CPUMSetChangedFlags(pVM, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
2962
2963 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
2964 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
2965 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2966 {
2967 STAM_COUNTER_INC(&pVM->hwaccm.s.StatPendingHostIrq);
2968 /* On the next entry we'll only sync the host context. */
2969 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
2970 }
2971 else
2972 {
2973 /* On the next entry we'll sync everything. */
2974 /** @todo we can do better than this */
2975 /* Not in the VINF_PGM_CHANGE_MODE though! */
2976 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2977 }
2978
2979 /* translate into a less severe return code */
2980 if (rc == VERR_EM_INTERPRETER)
2981 rc = VINF_EM_RAW_EMULATE_INSTR;
2982 else
2983 /* Try to extract more information about what might have gone wrong here. */
2984 if (rc == VERR_VMX_INVALID_VMCS_PTR)
2985 {
2986 VMXGetActivateVMCS(&pVM->hwaccm.s.vmx.lasterror.u64VMCSPhys);
2987 pVM->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVM->hwaccm.s.vmx.pVMCS;
2988 }
2989
2990 STAM_PROFILE_ADV_STOP(&pVM->hwaccm.s.StatExit, x);
2991
2992 Log2(("X"));
2993 return rc;
2994}
2995
2996
2997/**
2998 * Enters the VT-x session
2999 *
3000 * @returns VBox status code.
3001 * @param pVM The VM to operate on.
3002 * @param pCpu CPU info struct
3003 */
3004VMMR0DECL(int) VMXR0Enter(PVM pVM, PHWACCM_CPUINFO pCpu)
3005{
3006 Assert(pVM->hwaccm.s.vmx.fSupported);
3007
3008 unsigned cr4 = ASMGetCR4();
3009 if (!(cr4 & X86_CR4_VMXE))
3010 {
3011 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
3012 return VERR_VMX_X86_CR4_VMXE_CLEARED;
3013 }
3014
3015 /* Activate the VM Control Structure. */
3016 int rc = VMXActivateVMCS(pVM->hwaccm.s.vmx.pVMCSPhys);
3017 if (VBOX_FAILURE(rc))
3018 return rc;
3019
3020 pVM->hwaccm.s.vmx.fResumeVM = false;
3021 return VINF_SUCCESS;
3022}
3023
3024
3025/**
3026 * Leaves the VT-x session
3027 *
3028 * @returns VBox status code.
3029 * @param pVM The VM to operate on.
3030 * @param pCtx CPU context
3031 */
3032VMMR0DECL(int) VMXR0Leave(PVM pVM, PCPUMCTX pCtx)
3033{
3034 Assert(pVM->hwaccm.s.vmx.fSupported);
3035
3036 /* Save the guest debug state if necessary. */
3037 if (CPUMIsGuestDebugStateActive(pVM))
3038 {
3039 CPUMR0SaveGuestDebugState(pVM, pCtx, true /* save DR6 */);
3040
3041 /* Enable drx move intercepts again. */
3042 pVM->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3043 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVM->hwaccm.s.vmx.proc_ctls);
3044 AssertRC(rc);
3045
3046 /* Resync the debug registers the next time. */
3047 pVM->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3048 }
3049 else
3050 Assert(pVM->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
3051
3052 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
3053 int rc = VMXClearVMCS(pVM->hwaccm.s.vmx.pVMCSPhys);
3054 AssertRC(rc);
3055
3056 return VINF_SUCCESS;
3057}
3058
3059/**
3060 * Flush the TLB (EPT)
3061 *
3062 * @returns VBox status code.
3063 * @param pVM The VM to operate on.
3064 * @param enmFlush Type of flush
3065 * @param GCPhys Physical address of the page to flush
3066 */
3067static void vmxR0FlushEPT(PVM pVM, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
3068{
3069 uint64_t descriptor[2];
3070
3071 LogFlow(("vmxR0FlushEPT %d %VGv\n", enmFlush, GCPhys));
3072 Assert(pVM->hwaccm.s.fNestedPaging);
3073 descriptor[0] = pVM->hwaccm.s.vmx.GCPhysEPTP;
3074 descriptor[1] = GCPhys;
3075 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
3076 AssertRC(rc);
3077}
3078
3079#ifdef HWACCM_VTX_WITH_VPID
3080/**
3081 * Flush the TLB (EPT)
3082 *
3083 * @returns VBox status code.
3084 * @param pVM The VM to operate on.
3085 * @param enmFlush Type of flush
3086 * @param GCPtr Virtual address of the page to flush
3087 */
3088static void vmxR0FlushVPID(PVM pVM, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
3089{
3090 uint64_t descriptor[2];
3091
3092 Assert(pVM->hwaccm.s.vmx.fVPID);
3093 descriptor[0] = pVM->hwaccm.s.uCurrentASID;
3094 descriptor[1] = GCPtr;
3095 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
3096 AssertRC(rc);
3097}
3098#endif /* HWACCM_VTX_WITH_VPID */
3099
3100/**
3101 * Invalidates a guest page
3102 *
3103 * @returns VBox status code.
3104 * @param pVM The VM to operate on.
3105 * @param GCVirt Page to invalidate
3106 */
3107VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, RTGCPTR GCVirt)
3108{
3109 bool fFlushPending = pVM->hwaccm.s.fForceTLBFlush;
3110
3111 /* Only relevant if we want to use VPID.
3112 * In the nested paging case we still see such calls, but
3113 * can safely ignore them. (e.g. after cr3 updates)
3114 */
3115#ifdef HWACCM_VTX_WITH_VPID
3116 /* Skip it if a TLB flush is already pending. */
3117 if ( !fFlushPending
3118 && pVM->hwaccm.s.vmx.fVPID)
3119 vmxR0FlushVPID(pVM, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
3120#endif /* HWACCM_VTX_WITH_VPID */
3121
3122 return VINF_SUCCESS;
3123}
3124
3125/**
3126 * Invalidates a guest page by physical address
3127 *
3128 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
3129 *
3130 * @returns VBox status code.
3131 * @param pVM The VM to operate on.
3132 * @param GCPhys Page to invalidate
3133 */
3134VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, RTGCPHYS GCPhys)
3135{
3136 bool fFlushPending = pVM->hwaccm.s.fForceTLBFlush;
3137
3138 Assert(pVM->hwaccm.s.fNestedPaging);
3139
3140 /* Skip it if a TLB flush is already pending. */
3141 if (!fFlushPending)
3142 vmxR0FlushEPT(pVM, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
3143
3144 return VINF_SUCCESS;
3145}
3146
3147#ifdef VBOX_STRICT
3148/**
3149 * Report world switch error and dump some useful debug info
3150 *
3151 * @param pVM The VM to operate on.
3152 * @param rc Return code
3153 * @param pCtx Current CPU context (not updated)
3154 */
3155static void VMXR0ReportWorldSwitchError(PVM pVM, int rc, PCPUMCTX pCtx)
3156{
3157 switch (rc)
3158 {
3159 case VERR_VMX_INVALID_VMXON_PTR:
3160 AssertFailed();
3161 break;
3162
3163 case VERR_VMX_UNABLE_TO_START_VM:
3164 case VERR_VMX_UNABLE_TO_RESUME_VM:
3165 {
3166 int rc;
3167 RTCCUINTREG exitReason, instrError, val;
3168
3169 rc = VMXReadVMCS(VMX_VMCS_RO_EXIT_REASON, &exitReason);
3170 rc |= VMXReadVMCS(VMX_VMCS_RO_VM_INSTR_ERROR, &instrError);
3171 AssertRC(rc);
3172 if (rc == VINF_SUCCESS)
3173 {
3174 RTGDTR gdtr;
3175 PX86DESCHC pDesc;
3176
3177 ASMGetGDTR(&gdtr);
3178
3179 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
3180 Log(("Current stack %08x\n", &rc));
3181
3182
3183 VMXReadVMCS(VMX_VMCS_GUEST_RIP, &val);
3184 Log(("Old eip %VGv new %VGv\n", pCtx->rip, (RTGCPTR)val));
3185 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
3186 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
3187 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
3188 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
3189 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
3190 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
3191 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
3192 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
3193
3194 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
3195 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
3196
3197 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
3198 Log(("VMX_VMCS_HOST_CR3 %VHp\n", val));
3199
3200 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
3201 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
3202
3203 VMXReadVMCS(VMX_VMCS_HOST_FIELD_CS, &val);
3204 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
3205
3206 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
3207 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
3208
3209 if (val < gdtr.cbGdt)
3210 {
3211 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3212 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
3213 }
3214
3215 VMXReadVMCS(VMX_VMCS_HOST_FIELD_DS, &val);
3216 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
3217 if (val < gdtr.cbGdt)
3218 {
3219 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3220 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
3221 }
3222
3223 VMXReadVMCS(VMX_VMCS_HOST_FIELD_ES, &val);
3224 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
3225 if (val < gdtr.cbGdt)
3226 {
3227 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3228 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
3229 }
3230
3231 VMXReadVMCS(VMX_VMCS_HOST_FIELD_FS, &val);
3232 Log(("VMX_VMCS_HOST_FIELD_FS %08x\n", val));
3233 if (val < gdtr.cbGdt)
3234 {
3235 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3236 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
3237 }
3238
3239 VMXReadVMCS(VMX_VMCS_HOST_FIELD_GS, &val);
3240 Log(("VMX_VMCS_HOST_FIELD_GS %08x\n", val));
3241 if (val < gdtr.cbGdt)
3242 {
3243 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3244 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
3245 }
3246
3247 VMXReadVMCS(VMX_VMCS_HOST_FIELD_SS, &val);
3248 Log(("VMX_VMCS_HOST_FIELD_SS %08x\n", val));
3249 if (val < gdtr.cbGdt)
3250 {
3251 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3252 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
3253 }
3254
3255 VMXReadVMCS(VMX_VMCS_HOST_FIELD_TR, &val);
3256 Log(("VMX_VMCS_HOST_FIELD_TR %08x\n", val));
3257 if (val < gdtr.cbGdt)
3258 {
3259 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3260 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
3261 }
3262
3263 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
3264 Log(("VMX_VMCS_HOST_TR_BASE %VHv\n", val));
3265
3266 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
3267 Log(("VMX_VMCS_HOST_GDTR_BASE %VHv\n", val));
3268 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
3269 Log(("VMX_VMCS_HOST_IDTR_BASE %VHv\n", val));
3270
3271 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_CS, &val);
3272 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
3273
3274 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
3275 Log(("VMX_VMCS_HOST_SYSENTER_EIP %VHv\n", val));
3276
3277 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
3278 Log(("VMX_VMCS_HOST_SYSENTER_ESP %VHv\n", val));
3279
3280 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
3281 Log(("VMX_VMCS_HOST_RSP %VHv\n", val));
3282 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
3283 Log(("VMX_VMCS_HOST_RIP %VHv\n", val));
3284
3285#if HC_ARCH_BITS == 64
3286 Log(("MSR_K6_EFER = %VX64\n", ASMRdMsr(MSR_K6_EFER)));
3287 Log(("MSR_K6_STAR = %VX64\n", ASMRdMsr(MSR_K6_STAR)));
3288 Log(("MSR_K8_LSTAR = %VX64\n", ASMRdMsr(MSR_K8_LSTAR)));
3289 Log(("MSR_K8_CSTAR = %VX64\n", ASMRdMsr(MSR_K8_CSTAR)));
3290 Log(("MSR_K8_SF_MASK = %VX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
3291#endif
3292 }
3293 break;
3294 }
3295
3296 default:
3297 /* impossible */
3298 AssertFailed();
3299 break;
3300 }
3301}
3302#endif /* VBOX_STRICT */
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette