VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 29941

Last change on this file since 29941 was 29737, checked in by vboxsync, 15 years ago

Fixed dropping back to the recompiler too often when running unrestricted guest code that switches mode very often.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 194.1 KB
Line 
1/* $Id: HWVMXR0.cpp 29737 2010-05-21 14:07:52Z vboxsync $ */
2/** @file
3 * HWACCM VMX - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*******************************************************************************
20* Header Files *
21*******************************************************************************/
22#define LOG_GROUP LOG_GROUP_HWACCM
23#include <VBox/hwaccm.h>
24#include <VBox/pgm.h>
25#include <VBox/dbgf.h>
26#include <VBox/selm.h>
27#include <VBox/iom.h>
28#include <VBox/rem.h>
29#include <VBox/tm.h>
30#include "HWACCMInternal.h"
31#include <VBox/vm.h>
32#include <VBox/x86.h>
33#include <VBox/pdmapi.h>
34#include <VBox/err.h>
35#include <VBox/log.h>
36#include <iprt/asm-amd64-x86.h>
37#include <iprt/assert.h>
38#include <iprt/param.h>
39#include <iprt/string.h>
40#include <iprt/time.h>
41#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
42# include <iprt/thread.h>
43#endif
44#include "HWVMXR0.h"
45
46/*******************************************************************************
47* Defined Constants And Macros *
48*******************************************************************************/
49#if defined(RT_ARCH_AMD64)
50# define VMX_IS_64BIT_HOST_MODE() (true)
51#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
52# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
53#else
54# define VMX_IS_64BIT_HOST_MODE() (false)
55#endif
56
57/*******************************************************************************
58* Global Variables *
59*******************************************************************************/
60/* IO operation lookup arrays. */
61static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
62static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
63
64#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
65/** See HWACCMR0A.asm. */
66extern "C" uint32_t g_fVMXIs64bitHost;
67#endif
68
69/*******************************************************************************
70* Local Functions *
71*******************************************************************************/
72static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx);
73static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu);
74static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu);
75static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu);
76static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
77static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
78static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
79#ifdef VBOX_STRICT
80static bool vmxR0IsValidReadField(uint32_t idxField);
81static bool vmxR0IsValidWriteField(uint32_t idxField);
82#endif
83static void vmxR0SetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
84
85static void VMXR0CheckError(PVM pVM, PVMCPU pVCpu, int rc)
86{
87 if (rc == VERR_VMX_GENERIC)
88 {
89 RTCCUINTREG instrError;
90
91 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
92 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
93 }
94 pVM->hwaccm.s.lLastError = rc;
95}
96
97/**
98 * Sets up and activates VT-x on the current CPU
99 *
100 * @returns VBox status code.
101 * @param pCpu CPU info struct
102 * @param pVM The VM to operate on. (can be NULL after a resume!!)
103 * @param pvPageCpu Pointer to the global cpu page
104 * @param pPageCpuPhys Physical address of the global cpu page
105 */
106VMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
107{
108 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
109 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
110
111 if (pVM)
112 {
113 /* Set revision dword at the beginning of the VMXON structure. */
114 *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
115 }
116
117 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
118 * (which can have very bad consequences!!!)
119 */
120
121 if (ASMGetCR4() & X86_CR4_VMXE)
122 return VERR_VMX_IN_VMX_ROOT_MODE;
123
124 /* Make sure the VMX instructions don't cause #UD faults. */
125 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
126
127 /* Enter VMX Root Mode */
128 int rc = VMXEnable(pPageCpuPhys);
129 if (RT_FAILURE(rc))
130 {
131 if (pVM)
132 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
133 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
134 return VERR_VMX_VMXON_FAILED;
135 }
136 return VINF_SUCCESS;
137}
138
139/**
140 * Deactivates VT-x on the current CPU
141 *
142 * @returns VBox status code.
143 * @param pCpu CPU info struct
144 * @param pvPageCpu Pointer to the global cpu page
145 * @param pPageCpuPhys Physical address of the global cpu page
146 */
147VMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
148{
149 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
150 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
151
152 /* Leave VMX Root Mode. */
153 VMXDisable();
154
155 /* And clear the X86_CR4_VMXE bit */
156 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
157 return VINF_SUCCESS;
158}
159
160/**
161 * Does Ring-0 per VM VT-x init.
162 *
163 * @returns VBox status code.
164 * @param pVM The VM to operate on.
165 */
166VMMR0DECL(int) VMXR0InitVM(PVM pVM)
167{
168 int rc;
169
170#ifdef LOG_ENABLED
171 SUPR0Printf("VMXR0InitVM %x\n", pVM);
172#endif
173
174 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
175
176 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
177 {
178 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
179 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
180 AssertRC(rc);
181 if (RT_FAILURE(rc))
182 return rc;
183
184 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
185 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
186 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
187 }
188 else
189 {
190 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
191 pVM->hwaccm.s.vmx.pAPIC = 0;
192 pVM->hwaccm.s.vmx.pAPICPhys = 0;
193 }
194
195#ifdef VBOX_WITH_CRASHDUMP_MAGIC
196 {
197 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
198 AssertRC(rc);
199 if (RT_FAILURE(rc))
200 return rc;
201
202 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
203 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
204
205 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
206 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
207 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
208 }
209#endif
210
211 /* Allocate VMCBs for all guest CPUs. */
212 for (VMCPUID i = 0; i < pVM->cCpus; i++)
213 {
214 PVMCPU pVCpu = &pVM->aCpus[i];
215
216 pVCpu->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
217
218 /* Allocate one page for the VM control structure (VMCS). */
219 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
220 AssertRC(rc);
221 if (RT_FAILURE(rc))
222 return rc;
223
224 pVCpu->hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVMCS);
225 pVCpu->hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVMCS, 0);
226 ASMMemZero32(pVCpu->hwaccm.s.vmx.pVMCS, PAGE_SIZE);
227
228 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
229 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
230
231 /* Allocate one page for the virtual APIC page for TPR caching. */
232 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
233 AssertRC(rc);
234 if (RT_FAILURE(rc))
235 return rc;
236
237 pVCpu->hwaccm.s.vmx.pVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVAPIC);
238 pVCpu->hwaccm.s.vmx.pVAPICPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVAPIC, 0);
239 ASMMemZero32(pVCpu->hwaccm.s.vmx.pVAPIC, PAGE_SIZE);
240
241 /* Allocate the MSR bitmap if this feature is supported. */
242 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
243 {
244 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
245 AssertRC(rc);
246 if (RT_FAILURE(rc))
247 return rc;
248
249 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
250 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
251 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
252 }
253
254#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
255 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
256 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
257 AssertRC(rc);
258 if (RT_FAILURE(rc))
259 return rc;
260
261 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
262 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
263 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
264
265 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
266 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
267 AssertRC(rc);
268 if (RT_FAILURE(rc))
269 return rc;
270
271 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
272 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
273 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
274#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
275
276 /* Current guest paging mode. */
277 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
278
279#ifdef LOG_ENABLED
280 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.pVMCSPhys);
281#endif
282 }
283
284 return VINF_SUCCESS;
285}
286
287/**
288 * Does Ring-0 per VM VT-x termination.
289 *
290 * @returns VBox status code.
291 * @param pVM The VM to operate on.
292 */
293VMMR0DECL(int) VMXR0TermVM(PVM pVM)
294{
295 for (VMCPUID i = 0; i < pVM->cCpus; i++)
296 {
297 PVMCPU pVCpu = &pVM->aCpus[i];
298
299 if (pVCpu->hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
300 {
301 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjVMCS, false);
302 pVCpu->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
303 pVCpu->hwaccm.s.vmx.pVMCS = 0;
304 pVCpu->hwaccm.s.vmx.pVMCSPhys = 0;
305 }
306 if (pVCpu->hwaccm.s.vmx.pMemObjVAPIC != NIL_RTR0MEMOBJ)
307 {
308 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjVAPIC, false);
309 pVCpu->hwaccm.s.vmx.pMemObjVAPIC = NIL_RTR0MEMOBJ;
310 pVCpu->hwaccm.s.vmx.pVAPIC = 0;
311 pVCpu->hwaccm.s.vmx.pVAPICPhys = 0;
312 }
313 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
314 {
315 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
316 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
317 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
318 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
319 }
320#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
321 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
322 {
323 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
324 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
325 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
326 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
327 }
328 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
329 {
330 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
331 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
332 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
333 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
334 }
335#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
336 }
337 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
338 {
339 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
340 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
341 pVM->hwaccm.s.vmx.pAPIC = 0;
342 pVM->hwaccm.s.vmx.pAPICPhys = 0;
343 }
344#ifdef VBOX_WITH_CRASHDUMP_MAGIC
345 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
346 {
347 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
348 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
349 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
350 pVM->hwaccm.s.vmx.pScratch = 0;
351 pVM->hwaccm.s.vmx.pScratchPhys = 0;
352 }
353#endif
354 return VINF_SUCCESS;
355}
356
357/**
358 * Sets up VT-x for the specified VM
359 *
360 * @returns VBox status code.
361 * @param pVM The VM to operate on.
362 */
363VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
364{
365 int rc = VINF_SUCCESS;
366 uint32_t val;
367
368 AssertReturn(pVM, VERR_INVALID_PARAMETER);
369
370 for (VMCPUID i = 0; i < pVM->cCpus; i++)
371 {
372 PVMCPU pVCpu = &pVM->aCpus[i];
373
374 Assert(pVCpu->hwaccm.s.vmx.pVMCS);
375
376 /* Set revision dword at the beginning of the VMCS structure. */
377 *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
378
379 /* Clear VM Control Structure. */
380 Log(("pVMCSPhys = %RHp\n", pVCpu->hwaccm.s.vmx.pVMCSPhys));
381 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
382 if (RT_FAILURE(rc))
383 goto vmx_end;
384
385 /* Activate the VM Control Structure. */
386 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
387 if (RT_FAILURE(rc))
388 goto vmx_end;
389
390 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
391 * Set required bits to one and zero according to the MSR capabilities.
392 */
393 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
394 /* External and non-maskable interrupts cause VM-exits. */
395 val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
396 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
397
398 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
399 AssertRC(rc);
400
401 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
402 * Set required bits to one and zero according to the MSR capabilities.
403 */
404 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
405 /* Program which event cause VM-exits and which features we want to use. */
406 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
407 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
408 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
409 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
410 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
411 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
412 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
413
414 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
415 if (!pVM->hwaccm.s.fNestedPaging)
416 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
417 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
418 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
419
420 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
421 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
422 {
423 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
424 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
425 Assert(pVM->hwaccm.s.vmx.pAPIC);
426 }
427 else
428 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
429 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
430
431 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
432 {
433 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
434 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
435 }
436
437 /* We will use the secondary control if it's present. */
438 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
439
440 /* Mask away the bits that the CPU doesn't support */
441 /** @todo make sure they don't conflict with the above requirements. */
442 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
443 pVCpu->hwaccm.s.vmx.proc_ctls = val;
444
445 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
446 AssertRC(rc);
447
448 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
449 {
450 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
451 * Set required bits to one and zero according to the MSR capabilities.
452 */
453 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
454 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
455
456#ifdef HWACCM_VTX_WITH_EPT
457 if (pVM->hwaccm.s.fNestedPaging)
458 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
459#endif /* HWACCM_VTX_WITH_EPT */
460#ifdef HWACCM_VTX_WITH_VPID
461 else
462 if (pVM->hwaccm.s.vmx.fVPID)
463 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
464#endif /* HWACCM_VTX_WITH_VPID */
465
466 if (pVM->hwaccm.s.fHasIoApic)
467 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
468
469 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
470 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
471
472 /* Mask away the bits that the CPU doesn't support */
473 /** @todo make sure they don't conflict with the above requirements. */
474 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
475 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
476 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
477 AssertRC(rc);
478 }
479
480 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
481 * Set required bits to one and zero according to the MSR capabilities.
482 */
483 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
484 AssertRC(rc);
485
486 /* Forward all exception except #NM & #PF to the guest.
487 * We always need to check pagefaults since our shadow page table can be out of sync.
488 * And we always lazily sync the FPU & XMM state.
489 */
490
491 /** @todo Possible optimization:
492 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
493 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
494 * registers ourselves of course.
495 *
496 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
497 */
498
499 /* Don't filter page faults; all of them should cause a switch. */
500 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
501 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
502 AssertRC(rc);
503
504 /* Init TSC offset to zero. */
505 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
506 AssertRC(rc);
507
508 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
509 AssertRC(rc);
510
511 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
512 AssertRC(rc);
513
514 /* Set the MSR bitmap address. */
515 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
516 {
517 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
518
519 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
520 AssertRC(rc);
521
522 /* Allow the guest to directly modify these MSRs; they are restored and saved automatically. */
523 vmxR0SetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
524 vmxR0SetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
525 vmxR0SetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
526 vmxR0SetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
527 vmxR0SetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
528 vmxR0SetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
529 vmxR0SetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
530 vmxR0SetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
531 vmxR0SetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
532 }
533
534#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
535 /* Set the guest & host MSR load/store physical addresses. */
536 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
537 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
538 AssertRC(rc);
539 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
540 AssertRC(rc);
541
542 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
543 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
544 AssertRC(rc);
545#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
546
547 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
548 AssertRC(rc);
549
550 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
551 AssertRC(rc);
552
553 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
554 {
555 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
556 /* Optional */
557 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
558 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.pVAPICPhys);
559
560 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
561 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
562
563 AssertRC(rc);
564 }
565
566 /* Set link pointer to -1. Not currently used. */
567 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
568 AssertRC(rc);
569
570 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
571 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
572 AssertRC(rc);
573
574 /* Configure the VMCS read cache. */
575 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
576
577 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
578 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
579 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
580 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
581 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
582 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
583 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
584 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
585 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
586 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
587 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
588 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
589 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
590 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
591 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
592 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
593
594 VMX_SETUP_SELREG(ES, pCache);
595 VMX_SETUP_SELREG(SS, pCache);
596 VMX_SETUP_SELREG(CS, pCache);
597 VMX_SETUP_SELREG(DS, pCache);
598 VMX_SETUP_SELREG(FS, pCache);
599 VMX_SETUP_SELREG(GS, pCache);
600 VMX_SETUP_SELREG(LDTR, pCache);
601 VMX_SETUP_SELREG(TR, pCache);
602
603 /* Status code VMCS reads. */
604 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
605 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
606 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
607 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
608 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
609 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
610 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
611 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
612 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
613
614 if (pVM->hwaccm.s.fNestedPaging)
615 {
616 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
617 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
618 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
619 }
620 else
621 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
622 } /* for each VMCPU */
623
624 /* Choose the right TLB setup function. */
625 if (pVM->hwaccm.s.fNestedPaging)
626 {
627 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBEPT;
628
629 /* Default values for flushing. */
630 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
631 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
632
633 /* If the capabilities specify we can do more, then make use of it. */
634 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
635 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
636 else
637 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
638 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
639
640 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
641 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
642 }
643#ifdef HWACCM_VTX_WITH_VPID
644 else
645 if (pVM->hwaccm.s.vmx.fVPID)
646 {
647 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBVPID;
648
649 /* Default values for flushing. */
650 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
651 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
652
653 /* If the capabilities specify we can do more, then make use of it. */
654 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
655 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
656 else
657 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
658 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
659
660 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
661 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
662 }
663#endif /* HWACCM_VTX_WITH_VPID */
664 else
665 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBDummy;
666
667vmx_end:
668 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
669 return rc;
670}
671
672/**
673 * Sets the permission bits for the specified MSR
674 *
675 * @param pVCpu The VMCPU to operate on.
676 * @param ulMSR MSR value
677 * @param fRead Reading allowed/disallowed
678 * @param fWrite Writing allowed/disallowed
679 */
680static void vmxR0SetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
681{
682 unsigned ulBit;
683 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
684
685 /* Layout:
686 * 0x000 - 0x3ff - Low MSR read bits
687 * 0x400 - 0x7ff - High MSR read bits
688 * 0x800 - 0xbff - Low MSR write bits
689 * 0xc00 - 0xfff - High MSR write bits
690 */
691 if (ulMSR <= 0x00001FFF)
692 {
693 /* Pentium-compatible MSRs */
694 ulBit = ulMSR;
695 }
696 else
697 if ( ulMSR >= 0xC0000000
698 && ulMSR <= 0xC0001FFF)
699 {
700 /* AMD Sixth Generation x86 Processor MSRs */
701 ulBit = (ulMSR - 0xC0000000);
702 pMSRBitmap += 0x400;
703 }
704 else
705 {
706 AssertFailed();
707 return;
708 }
709
710 Assert(ulBit <= 0x1fff);
711 if (fRead)
712 ASMBitClear(pMSRBitmap, ulBit);
713 else
714 ASMBitSet(pMSRBitmap, ulBit);
715
716 if (fWrite)
717 ASMBitClear(pMSRBitmap + 0x800, ulBit);
718 else
719 ASMBitSet(pMSRBitmap + 0x800, ulBit);
720}
721
722
723/**
724 * Injects an event (trap or external interrupt)
725 *
726 * @returns VBox status code.
727 * @param pVM The VM to operate on.
728 * @param pVCpu The VMCPU to operate on.
729 * @param pCtx CPU Context
730 * @param intInfo VMX interrupt info
731 * @param cbInstr Opcode length of faulting instruction
732 * @param errCode Error code (optional)
733 */
734static int VMXR0InjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
735{
736 int rc;
737 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
738
739#ifdef VBOX_WITH_STATISTICS
740 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
741#endif
742
743#ifdef VBOX_STRICT
744 if (iGate == 0xE)
745 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
746 else
747 if (iGate < 0x20)
748 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
749 else
750 {
751 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
752 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
753 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || pCtx->eflags.u32 & X86_EFL_IF);
754 }
755#endif
756
757 if ( CPUMIsGuestInRealModeEx(pCtx)
758 && pVM->hwaccm.s.vmx.pRealModeTSS)
759 {
760 RTGCPHYS GCPhysHandler;
761 uint16_t offset, ip;
762 RTSEL sel;
763
764 /* Injecting events doesn't work right with real mode emulation.
765 * (#GP if we try to inject external hardware interrupts)
766 * Inject the interrupt or trap directly instead.
767 *
768 * ASSUMES no access handlers for the bits we read or write below (should be safe).
769 */
770 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
771
772 /* Check if the interrupt handler is present. */
773 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
774 {
775 Log(("IDT cbIdt violation\n"));
776 if (iGate != X86_XCPT_DF)
777 {
778 uint32_t intInfo2;
779
780 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
781 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
782 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
783 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
784
785 return VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
786 }
787 Log(("Triple fault -> reset the VM!\n"));
788 return VINF_EM_RESET;
789 }
790 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
791 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
792 || iGate == 4)
793 {
794 ip = pCtx->ip + cbInstr;
795 }
796 else
797 ip = pCtx->ip;
798
799 /* Read the selector:offset pair of the interrupt handler. */
800 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
801 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
802 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
803
804 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
805
806 /* Construct the stack frame. */
807 /** @todo should check stack limit. */
808 pCtx->sp -= 2;
809 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
810 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
811 pCtx->sp -= 2;
812 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
813 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
814 pCtx->sp -= 2;
815 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
816 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
817
818 /* Update the CPU state for executing the handler. */
819 pCtx->rip = offset;
820 pCtx->cs = sel;
821 pCtx->csHid.u64Base = sel << 4;
822 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
823
824 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
825 return VINF_SUCCESS;
826 }
827
828 /* Set event injection state. */
829 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
830
831 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
832 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
833
834 AssertRC(rc);
835 return rc;
836}
837
838
839/**
840 * Checks for pending guest interrupts and injects them
841 *
842 * @returns VBox status code.
843 * @param pVM The VM to operate on.
844 * @param pVCpu The VMCPU to operate on.
845 * @param pCtx CPU Context
846 */
847static int VMXR0CheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
848{
849 int rc;
850
851 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
852 if (pVCpu->hwaccm.s.Event.fPending)
853 {
854 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
855 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
856 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
857 AssertRC(rc);
858
859 pVCpu->hwaccm.s.Event.fPending = false;
860 return VINF_SUCCESS;
861 }
862
863 /* If an active trap is already pending, then we must forward it first! */
864 if (!TRPMHasTrap(pVCpu))
865 {
866 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
867 {
868 RTGCUINTPTR intInfo;
869
870 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
871
872 intInfo = X86_XCPT_NMI;
873 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
874 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
875
876 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
877 AssertRC(rc);
878
879 return VINF_SUCCESS;
880 }
881
882 /* @todo SMI interrupts. */
883
884 /* When external interrupts are pending, we should exit the VM when IF is set. */
885 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
886 {
887 if (!(pCtx->eflags.u32 & X86_EFL_IF))
888 {
889 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
890 {
891 LogFlow(("Enable irq window exit!\n"));
892 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
893 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
894 AssertRC(rc);
895 }
896 /* else nothing to do but wait */
897 }
898 else
899 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
900 {
901 uint8_t u8Interrupt;
902
903 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
904 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
905 if (RT_SUCCESS(rc))
906 {
907 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
908 AssertRC(rc);
909 }
910 else
911 {
912 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
913 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
914 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
915 /* Just continue */
916 }
917 }
918 else
919 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
920 }
921 }
922
923#ifdef VBOX_STRICT
924 if (TRPMHasTrap(pVCpu))
925 {
926 uint8_t u8Vector;
927 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
928 AssertRC(rc);
929 }
930#endif
931
932 if ( (pCtx->eflags.u32 & X86_EFL_IF)
933 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
934 && TRPMHasTrap(pVCpu)
935 )
936 {
937 uint8_t u8Vector;
938 TRPMEVENT enmType;
939 RTGCUINTPTR intInfo;
940 RTGCUINT errCode;
941
942 /* If a new event is pending, then dispatch it now. */
943 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
944 AssertRC(rc);
945 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
946 Assert(enmType != TRPM_SOFTWARE_INT);
947
948 /* Clear the pending trap. */
949 rc = TRPMResetTrap(pVCpu);
950 AssertRC(rc);
951
952 intInfo = u8Vector;
953 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
954
955 if (enmType == TRPM_TRAP)
956 {
957 switch (u8Vector) {
958 case 8:
959 case 10:
960 case 11:
961 case 12:
962 case 13:
963 case 14:
964 case 17:
965 /* Valid error codes. */
966 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
967 break;
968 default:
969 break;
970 }
971 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
972 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
973 else
974 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
975 }
976 else
977 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
978
979 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
980 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
981 AssertRC(rc);
982 } /* if (interrupts can be dispatched) */
983
984 return VINF_SUCCESS;
985}
986
987/**
988 * Save the host state
989 *
990 * @returns VBox status code.
991 * @param pVM The VM to operate on.
992 * @param pVCpu The VMCPU to operate on.
993 */
994VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
995{
996 int rc = VINF_SUCCESS;
997
998 /*
999 * Host CPU Context
1000 */
1001 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1002 {
1003 RTIDTR idtr;
1004 RTGDTR gdtr;
1005 RTSEL SelTR;
1006 PCX86DESCHC pDesc;
1007 uintptr_t trBase;
1008 RTSEL cs;
1009 RTSEL ss;
1010 uint64_t cr3;
1011
1012 /* Control registers */
1013 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1014#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1015 if (VMX_IS_64BIT_HOST_MODE())
1016 {
1017 cr3 = hwaccmR0Get64bitCR3();
1018 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1019 }
1020 else
1021#endif
1022 {
1023 cr3 = ASMGetCR3();
1024 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1025 }
1026 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1027 AssertRC(rc);
1028 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1029 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1030 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1031
1032 /* Selector registers. */
1033#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1034 if (VMX_IS_64BIT_HOST_MODE())
1035 {
1036 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1037 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1038 }
1039 else
1040 {
1041 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1042 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1043 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1044 }
1045#else
1046 cs = ASMGetCS();
1047 ss = ASMGetSS();
1048#endif
1049 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1050 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1051 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1052 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1053 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1054 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1055#if HC_ARCH_BITS == 32
1056 if (!VMX_IS_64BIT_HOST_MODE())
1057 {
1058 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1059 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1060 }
1061#endif
1062 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1063 SelTR = ASMGetTR();
1064 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1065 AssertRC(rc);
1066 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1067 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1068 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1069 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1070 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1071 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1072 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1073
1074 /* GDTR & IDTR */
1075#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1076 if (VMX_IS_64BIT_HOST_MODE())
1077 {
1078 X86XDTR64 gdtr64, idtr64;
1079 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1080 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1081 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1082 AssertRC(rc);
1083 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1084 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1085 gdtr.cbGdt = gdtr64.cb;
1086 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1087 }
1088 else
1089#endif
1090 {
1091 ASMGetGDTR(&gdtr);
1092 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1093 ASMGetIDTR(&idtr);
1094 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1095 AssertRC(rc);
1096 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1097 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1098 }
1099
1100 /* Save the base address of the TR selector. */
1101 if (SelTR > gdtr.cbGdt)
1102 {
1103 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1104 return VERR_VMX_INVALID_HOST_STATE;
1105 }
1106
1107 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1108#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1109 if (VMX_IS_64BIT_HOST_MODE())
1110 {
1111 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
1112 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1113 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1114 AssertRC(rc);
1115 }
1116 else
1117#endif
1118 {
1119#if HC_ARCH_BITS == 64
1120 trBase = X86DESC64_BASE(*pDesc);
1121#else
1122 trBase = X86DESC_BASE(*pDesc);
1123#endif
1124 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1125 AssertRC(rc);
1126 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1127 }
1128
1129 /* FS and GS base. */
1130#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1131 if (VMX_IS_64BIT_HOST_MODE())
1132 {
1133 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1134 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1135 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1136 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1137 }
1138#endif
1139 AssertRC(rc);
1140
1141 /* Sysenter MSRs. */
1142 /** @todo expensive!! */
1143 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1144 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1145#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1146 if (VMX_IS_64BIT_HOST_MODE())
1147 {
1148 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1149 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1150 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1151 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1152 }
1153 else
1154 {
1155 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1156 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1157 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1158 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1159 }
1160#elif HC_ARCH_BITS == 32
1161 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1162 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1163 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1164 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1165#else
1166 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1167 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1168 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1169 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1170#endif
1171 AssertRC(rc);
1172
1173#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1174 /* Store all host MSRs in the VM-Exit load area, so they will be reloaded after the world switch back to the host. */
1175 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1176 unsigned idxMsr = 0;
1177
1178 /* EFER MSR present? */
1179 if (ASMCpuId_EDX(0x80000001) & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1180 {
1181 if (ASMCpuId_EDX(0x80000001) & X86_CPUID_AMD_FEATURE_EDX_SEP)
1182 {
1183 pMsr->u32IndexMSR = MSR_K6_STAR;
1184 pMsr->u32Reserved = 0;
1185 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1186 pMsr++; idxMsr++;
1187 }
1188
1189 pMsr->u32IndexMSR = MSR_K6_EFER;
1190 pMsr->u32Reserved = 0;
1191# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1192 if (CPUMIsGuestInLongMode(pVCpu))
1193 {
1194 /* Must match the efer value in our 64 bits switcher. */
1195 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1196 }
1197 else
1198# endif
1199 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1200 pMsr++; idxMsr++;
1201 }
1202
1203# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1204 if (VMX_IS_64BIT_HOST_MODE())
1205 {
1206 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1207 pMsr->u32Reserved = 0;
1208 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1209 pMsr++; idxMsr++;
1210 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1211 pMsr->u32Reserved = 0;
1212 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1213 pMsr++; idxMsr++;
1214 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1215 pMsr->u32Reserved = 0;
1216 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1217 pMsr++; idxMsr++;
1218 }
1219# endif
1220 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1221 AssertRC(rc);
1222#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1223
1224 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1225 }
1226 return rc;
1227}
1228
1229/**
1230 * Prefetch the 4 PDPT pointers (PAE and nested paging only)
1231 *
1232 * @param pVM The VM to operate on.
1233 * @param pVCpu The VMCPU to operate on.
1234 * @param pCtx Guest context
1235 */
1236static void vmxR0PrefetchPAEPdptrs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1237{
1238 if (CPUMIsGuestInPAEModeEx(pCtx))
1239 {
1240 X86PDPE Pdpe;
1241
1242 for (unsigned i=0;i<4;i++)
1243 {
1244 Pdpe = PGMGstGetPaePDPtr(pVCpu, i);
1245 int rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL + i*2, Pdpe.u);
1246 AssertRC(rc);
1247 }
1248 }
1249}
1250
1251/**
1252 * Update the exception bitmap according to the current CPU state
1253 *
1254 * @param pVM The VM to operate on.
1255 * @param pVCpu The VMCPU to operate on.
1256 * @param pCtx Guest context
1257 */
1258static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1259{
1260 uint32_t u32TrapMask;
1261 Assert(pCtx);
1262
1263 u32TrapMask = HWACCM_VMX_TRAP_MASK;
1264#ifndef DEBUG
1265 if (pVM->hwaccm.s.fNestedPaging)
1266 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
1267#endif
1268
1269 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
1270 if ( CPUMIsGuestFPUStateActive(pVCpu) == true
1271 && !(pCtx->cr0 & X86_CR0_NE)
1272 && !pVCpu->hwaccm.s.fFPUOldStyleOverride)
1273 {
1274 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1275 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
1276 }
1277
1278#ifdef VBOX_STRICT
1279 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1280#endif
1281
1282 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
1283 if ( CPUMIsGuestInRealModeEx(pCtx)
1284 && pVM->hwaccm.s.vmx.pRealModeTSS)
1285 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
1286
1287 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1288 AssertRC(rc);
1289}
1290
1291/**
1292 * Loads the guest state
1293 *
1294 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1295 *
1296 * @returns VBox status code.
1297 * @param pVM The VM to operate on.
1298 * @param pVCpu The VMCPU to operate on.
1299 * @param pCtx Guest context
1300 */
1301VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1302{
1303 int rc = VINF_SUCCESS;
1304 RTGCUINTPTR val;
1305 X86EFLAGS eflags;
1306
1307 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1308 * Set required bits to one and zero according to the MSR capabilities.
1309 */
1310 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1311 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1312 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1313 /* 64 bits guest mode? */
1314 if (CPUMIsGuestInLongModeEx(pCtx))
1315 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1316 /* else Must be zero when AMD64 is not available. */
1317
1318 /* Mask away the bits that the CPU doesn't support */
1319 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1320 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1321 AssertRC(rc);
1322
1323 /* VMX_VMCS_CTRL_EXIT_CONTROLS
1324 * Set required bits to one and zero according to the MSR capabilities.
1325 */
1326 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1327
1328 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1329 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1330
1331#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1332 if (VMX_IS_64BIT_HOST_MODE())
1333 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1334 /* else: Must be zero when AMD64 is not available. */
1335#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1336 if (CPUMIsGuestInLongModeEx(pCtx))
1337 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1338 else
1339 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1340#endif
1341 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1342 /* Don't acknowledge external interrupts on VM-exit. */
1343 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1344 AssertRC(rc);
1345
1346 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1347 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1348 {
1349 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1350 {
1351 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1352 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1353 {
1354 /* Correct weird requirements for switching to protected mode. */
1355 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1356 && enmGuestMode >= PGMMODE_PROTECTED)
1357 {
1358 /* Flush the recompiler code cache as it's not unlikely
1359 * the guest will rewrite code it will later execute in real
1360 * mode (OpenBSD 4.0 is one such example)
1361 */
1362 REMFlushTBs(pVM);
1363
1364 /* DPL of all hidden selector registers must match the current CPL (0). */
1365 pCtx->csHid.Attr.n.u2Dpl = 0;
1366 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1367
1368 pCtx->dsHid.Attr.n.u2Dpl = 0;
1369 pCtx->esHid.Attr.n.u2Dpl = 0;
1370 pCtx->fsHid.Attr.n.u2Dpl = 0;
1371 pCtx->gsHid.Attr.n.u2Dpl = 0;
1372 pCtx->ssHid.Attr.n.u2Dpl = 0;
1373
1374 /* The limit must correspond to the 32 bits setting. */
1375 if (!pCtx->csHid.Attr.n.u1DefBig)
1376 pCtx->csHid.u32Limit &= 0xffff;
1377 if (!pCtx->dsHid.Attr.n.u1DefBig)
1378 pCtx->dsHid.u32Limit &= 0xffff;
1379 if (!pCtx->esHid.Attr.n.u1DefBig)
1380 pCtx->esHid.u32Limit &= 0xffff;
1381 if (!pCtx->fsHid.Attr.n.u1DefBig)
1382 pCtx->fsHid.u32Limit &= 0xffff;
1383 if (!pCtx->gsHid.Attr.n.u1DefBig)
1384 pCtx->gsHid.u32Limit &= 0xffff;
1385 if (!pCtx->ssHid.Attr.n.u1DefBig)
1386 pCtx->ssHid.u32Limit &= 0xffff;
1387 }
1388 else
1389 /* Switching from protected mode to real mode. */
1390 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode >= PGMMODE_PROTECTED
1391 && enmGuestMode == PGMMODE_REAL)
1392 {
1393 /* The limit must also be set to 0xffff. */
1394 pCtx->csHid.u32Limit = 0xffff;
1395 pCtx->dsHid.u32Limit = 0xffff;
1396 pCtx->esHid.u32Limit = 0xffff;
1397 pCtx->fsHid.u32Limit = 0xffff;
1398 pCtx->gsHid.u32Limit = 0xffff;
1399 pCtx->ssHid.u32Limit = 0xffff;
1400
1401 Assert(pCtx->csHid.u64Base <= 0xfffff);
1402 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1403 Assert(pCtx->esHid.u64Base <= 0xfffff);
1404 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1405 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1406 }
1407 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1408 }
1409 else
1410 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1411 if ( CPUMIsGuestInRealModeEx(pCtx)
1412 && pCtx->csHid.u64Base == 0xffff0000)
1413 {
1414 pCtx->csHid.u64Base = 0xf0000;
1415 pCtx->cs = 0xf000;
1416 }
1417 }
1418
1419 VMX_WRITE_SELREG(ES, es);
1420 AssertRC(rc);
1421
1422 VMX_WRITE_SELREG(CS, cs);
1423 AssertRC(rc);
1424
1425 VMX_WRITE_SELREG(SS, ss);
1426 AssertRC(rc);
1427
1428 VMX_WRITE_SELREG(DS, ds);
1429 AssertRC(rc);
1430
1431 VMX_WRITE_SELREG(FS, fs);
1432 AssertRC(rc);
1433
1434 VMX_WRITE_SELREG(GS, gs);
1435 AssertRC(rc);
1436 }
1437
1438 /* Guest CPU context: LDTR. */
1439 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1440 {
1441 if (pCtx->ldtr == 0)
1442 {
1443 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1444 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1445 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1446 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1447 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1448 }
1449 else
1450 {
1451 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1452 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1453 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1454 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1455 }
1456 AssertRC(rc);
1457 }
1458 /* Guest CPU context: TR. */
1459 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1460 {
1461 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1462 if ( CPUMIsGuestInRealModeEx(pCtx)
1463 && pVM->hwaccm.s.vmx.pRealModeTSS)
1464 {
1465 RTGCPHYS GCPhys;
1466
1467 /* We convert it here every time as pci regions could be reconfigured. */
1468 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1469 AssertRC(rc);
1470
1471 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1472 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1473 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1474
1475 X86DESCATTR attr;
1476
1477 attr.u = 0;
1478 attr.n.u1Present = 1;
1479 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1480 val = attr.u;
1481 }
1482 else
1483 {
1484 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1485 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1486 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1487
1488 val = pCtx->trHid.Attr.u;
1489
1490 /* The TSS selector must be busy. */
1491 if ((val & 0xF) == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
1492 val = (val & ~0xF) | X86_SEL_TYPE_SYS_286_TSS_BUSY;
1493 else
1494 /* Default even if no TR selector has been set (otherwise vmlaunch will fail!) */
1495 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1496
1497 }
1498 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1499 AssertRC(rc);
1500 }
1501 /* Guest CPU context: GDTR. */
1502 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1503 {
1504 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1505 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1506 AssertRC(rc);
1507 }
1508 /* Guest CPU context: IDTR. */
1509 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1510 {
1511 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1512 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1513 AssertRC(rc);
1514 }
1515
1516 /*
1517 * Sysenter MSRs (unconditional)
1518 */
1519 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1520 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1521 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1522 AssertRC(rc);
1523
1524 /* Control registers */
1525 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1526 {
1527 val = pCtx->cr0;
1528 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1529 Log2(("Guest CR0-shadow %08x\n", val));
1530 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1531 {
1532 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1533 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1534 }
1535 else
1536 {
1537 /** @todo check if we support the old style mess correctly. */
1538 if (!(val & X86_CR0_NE))
1539 Log(("Forcing X86_CR0_NE!!!\n"));
1540
1541 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1542 }
1543 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1544 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1545 val |= X86_CR0_PE | X86_CR0_PG;
1546
1547 if (pVM->hwaccm.s.fNestedPaging)
1548 {
1549 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1550 {
1551 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1552 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1553 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1554 }
1555 else
1556 {
1557 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1558 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1559 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1560 }
1561 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1562 AssertRC(rc);
1563 }
1564 else
1565 {
1566 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1567 val |= X86_CR0_WP;
1568 }
1569
1570 /* Always enable caching. */
1571 val &= ~(X86_CR0_CD|X86_CR0_NW);
1572
1573 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1574 Log2(("Guest CR0 %08x\n", val));
1575 /* CR0 flags owned by the host; if the guests attempts to change them, then
1576 * the VM will exit.
1577 */
1578 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1579 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1580 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1581 | X86_CR0_TS
1582 | X86_CR0_ET /* Bit not restored during VM-exit! */
1583 | X86_CR0_CD /* Bit not restored during VM-exit! */
1584 | X86_CR0_NW /* Bit not restored during VM-exit! */
1585 | X86_CR0_NE
1586 | X86_CR0_MP;
1587 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1588
1589 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1590 Log2(("Guest CR0-mask %08x\n", val));
1591 AssertRC(rc);
1592 }
1593 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1594 {
1595 /* CR4 */
1596 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1597 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1598 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1599 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1600
1601 if (!pVM->hwaccm.s.fNestedPaging)
1602 {
1603 switch(pVCpu->hwaccm.s.enmShadowMode)
1604 {
1605 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1606 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1607 case PGMMODE_32_BIT: /* 32-bit paging. */
1608 val &= ~X86_CR4_PAE;
1609 break;
1610
1611 case PGMMODE_PAE: /* PAE paging. */
1612 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1613 /** @todo use normal 32 bits paging */
1614 val |= X86_CR4_PAE;
1615 break;
1616
1617 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1618 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1619#ifdef VBOX_ENABLE_64_BITS_GUESTS
1620 break;
1621#else
1622 AssertFailed();
1623 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1624#endif
1625 default: /* shut up gcc */
1626 AssertFailed();
1627 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1628 }
1629 }
1630 else
1631 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1632 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1633 {
1634 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1635 val |= X86_CR4_PSE;
1636 /* Our identity mapping is a 32 bits page directory. */
1637 val &= ~X86_CR4_PAE;
1638 }
1639
1640 /* Turn off VME if we're in emulated real mode. */
1641 if ( CPUMIsGuestInRealModeEx(pCtx)
1642 && pVM->hwaccm.s.vmx.pRealModeTSS)
1643 val &= ~X86_CR4_VME;
1644
1645 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1646 Log2(("Guest CR4 %08x\n", val));
1647 /* CR4 flags owned by the host; if the guests attempts to change them, then
1648 * the VM will exit.
1649 */
1650 val = 0
1651 | X86_CR4_VME
1652 | X86_CR4_PAE
1653 | X86_CR4_PGE
1654 | X86_CR4_PSE
1655 | X86_CR4_VMXE;
1656 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1657
1658 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1659 Log2(("Guest CR4-mask %08x\n", val));
1660 AssertRC(rc);
1661 }
1662
1663 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1664 {
1665 if (pVM->hwaccm.s.fNestedPaging)
1666 {
1667 Assert(PGMGetHyperCR3(pVCpu));
1668 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1669
1670 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1671 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1672 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1673 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1674
1675 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1676 AssertRC(rc);
1677
1678 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1679 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1680 {
1681 RTGCPHYS GCPhys;
1682
1683 /* We convert it here every time as pci regions could be reconfigured. */
1684 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1685 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
1686
1687 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1688 * take care of the translation to host physical addresses.
1689 */
1690 val = GCPhys;
1691 }
1692 else
1693 {
1694 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1695 val = pCtx->cr3;
1696 /* Prefetch the four PDPT entries in PAE mode. */
1697 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1698 }
1699 }
1700 else
1701 {
1702 val = PGMGetHyperCR3(pVCpu);
1703 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1704 }
1705
1706 /* Save our shadow CR3 register. */
1707 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1708 AssertRC(rc);
1709 }
1710
1711 /* Debug registers. */
1712 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1713 {
1714 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1715 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1716
1717 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1718 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1719 pCtx->dr[7] |= 0x400; /* must be one */
1720
1721 /* Resync DR7 */
1722 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
1723 AssertRC(rc);
1724
1725#ifdef DEBUG
1726 /* Sync the hypervisor debug state now if any breakpoint is armed. */
1727 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
1728 && !CPUMIsHyperDebugStateActive(pVCpu)
1729 && !DBGFIsStepping(pVCpu))
1730 {
1731 /* Save the host and load the hypervisor debug state. */
1732 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1733 AssertRC(rc);
1734
1735 /* DRx intercepts remain enabled. */
1736
1737 /* Override dr7 with the hypervisor value. */
1738 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
1739 AssertRC(rc);
1740 }
1741 else
1742#endif
1743 /* Sync the debug state now if any breakpoint is armed. */
1744 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1745 && !CPUMIsGuestDebugStateActive(pVCpu)
1746 && !DBGFIsStepping(pVCpu))
1747 {
1748 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1749
1750 /* Disable drx move intercepts. */
1751 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1752 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1753 AssertRC(rc);
1754
1755 /* Save the host and load the guest debug state. */
1756 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1757 AssertRC(rc);
1758 }
1759
1760 /* IA32_DEBUGCTL MSR. */
1761 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1762 AssertRC(rc);
1763
1764 /** @todo do we really ever need this? */
1765 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1766 AssertRC(rc);
1767 }
1768
1769 /* EIP, ESP and EFLAGS */
1770 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1771 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1772 AssertRC(rc);
1773
1774 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1775 eflags = pCtx->eflags;
1776 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1777 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1778
1779 /* Real mode emulation using v86 mode. */
1780 if ( CPUMIsGuestInRealModeEx(pCtx)
1781 && pVM->hwaccm.s.vmx.pRealModeTSS)
1782 {
1783 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1784
1785 eflags.Bits.u1VM = 1;
1786 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1787 }
1788 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1789 AssertRC(rc);
1790
1791 if (TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset))
1792 {
1793 uint64_t u64CurTSC = ASMReadTSC();
1794 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
1795 {
1796 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1797 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
1798 AssertRC(rc);
1799
1800 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1801 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1802 AssertRC(rc);
1803 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
1804 }
1805 else
1806 {
1807 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
1808 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGet(pVCpu)));
1809 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1810 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1811 AssertRC(rc);
1812 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
1813 }
1814 }
1815 else
1816 {
1817 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1818 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1819 AssertRC(rc);
1820 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
1821 }
1822
1823 /* 64 bits guest mode? */
1824 if (CPUMIsGuestInLongModeEx(pCtx))
1825 {
1826#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
1827 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1828#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1829 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
1830#else
1831# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1832 if (!pVM->hwaccm.s.fAllow64BitGuests)
1833 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1834# endif
1835 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1836#endif
1837 /* Unconditionally update these as wrmsr might have changed them. */
1838 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1839 AssertRC(rc);
1840 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1841 AssertRC(rc);
1842 }
1843 else
1844 {
1845 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1846 }
1847
1848 vmxR0UpdateExceptionBitmap(pVM, pVCpu, pCtx);
1849
1850#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1851 /* Store all guest MSRs in the VM-Entry load area, so they will be loaded during the world switch. */
1852 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
1853 unsigned idxMsr = 0;
1854
1855 uint32_t ulEdx;
1856 uint32_t ulTemp;
1857 CPUMGetGuestCpuId(pVCpu, 0x80000001, &ulTemp, &ulTemp, &ulTemp, &ulEdx);
1858 /* EFER MSR present? */
1859 if (ulEdx & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1860 {
1861 pMsr->u32IndexMSR = MSR_K6_EFER;
1862 pMsr->u32Reserved = 0;
1863 pMsr->u64Value = pCtx->msrEFER;
1864 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
1865 if (!CPUMIsGuestInLongModeEx(pCtx))
1866 pMsr->u64Value &= ~(MSR_K6_EFER_LMA|MSR_K6_EFER_LME);
1867 pMsr++; idxMsr++;
1868
1869 if (ulEdx & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE)
1870 {
1871 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1872 pMsr->u32Reserved = 0;
1873 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
1874 pMsr++; idxMsr++;
1875 pMsr->u32IndexMSR = MSR_K6_STAR;
1876 pMsr->u32Reserved = 0;
1877 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
1878 pMsr++; idxMsr++;
1879 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1880 pMsr->u32Reserved = 0;
1881 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
1882 pMsr++; idxMsr++;
1883 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1884 pMsr->u32Reserved = 0;
1885 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
1886 pMsr++; idxMsr++;
1887 }
1888 }
1889 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
1890
1891 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
1892 AssertRC(rc);
1893
1894 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
1895 AssertRC(rc);
1896#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1897
1898 /* Done. */
1899 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1900
1901 return rc;
1902}
1903
1904/**
1905 * Syncs back the guest state
1906 *
1907 * @returns VBox status code.
1908 * @param pVM The VM to operate on.
1909 * @param pVCpu The VMCPU to operate on.
1910 * @param pCtx Guest context
1911 */
1912DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1913{
1914 RTGCUINTREG val, valShadow;
1915 RTGCUINTPTR uInterruptState;
1916 int rc;
1917
1918 /* Let's first sync back eip, esp, and eflags. */
1919 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
1920 AssertRC(rc);
1921 pCtx->rip = val;
1922 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
1923 AssertRC(rc);
1924 pCtx->rsp = val;
1925 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
1926 AssertRC(rc);
1927 pCtx->eflags.u32 = val;
1928
1929 /* Take care of instruction fusing (sti, mov ss) */
1930 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
1931 uInterruptState = val;
1932 if (uInterruptState != 0)
1933 {
1934 Assert(uInterruptState <= 2); /* only sti & mov ss */
1935 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
1936 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
1937 }
1938 else
1939 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1940
1941 /* Control registers. */
1942 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
1943 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
1944 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
1945 CPUMSetGuestCR0(pVCpu, val);
1946
1947 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
1948 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
1949 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
1950 CPUMSetGuestCR4(pVCpu, val);
1951
1952 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
1953 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1954 if ( pVM->hwaccm.s.fNestedPaging
1955 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
1956 {
1957 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
1958
1959 /* Can be updated behind our back in the nested paging case. */
1960 CPUMSetGuestCR2(pVCpu, pCache->cr2);
1961
1962 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
1963
1964 if (val != pCtx->cr3)
1965 {
1966 CPUMSetGuestCR3(pVCpu, val);
1967 PGMUpdateCR3(pVCpu, val);
1968 }
1969 /* Prefetch the four PDPT entries in PAE mode. */
1970 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1971 }
1972
1973 /* Sync back DR7 here. */
1974 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
1975 pCtx->dr[7] = val;
1976
1977 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1978 VMX_READ_SELREG(ES, es);
1979 VMX_READ_SELREG(SS, ss);
1980 VMX_READ_SELREG(CS, cs);
1981 VMX_READ_SELREG(DS, ds);
1982 VMX_READ_SELREG(FS, fs);
1983 VMX_READ_SELREG(GS, gs);
1984
1985 /*
1986 * System MSRs
1987 */
1988 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
1989 pCtx->SysEnter.cs = val;
1990 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
1991 pCtx->SysEnter.eip = val;
1992 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
1993 pCtx->SysEnter.esp = val;
1994
1995 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
1996 VMX_READ_SELREG(LDTR, ldtr);
1997
1998 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
1999 pCtx->gdtr.cbGdt = val;
2000 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2001 pCtx->gdtr.pGdt = val;
2002
2003 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2004 pCtx->idtr.cbIdt = val;
2005 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2006 pCtx->idtr.pIdt = val;
2007
2008 /* Real mode emulation using v86 mode. */
2009 if ( CPUMIsGuestInRealModeEx(pCtx)
2010 && pVM->hwaccm.s.vmx.pRealModeTSS)
2011 {
2012 /* Hide our emulation flags */
2013 pCtx->eflags.Bits.u1VM = 0;
2014
2015 /* Restore original IOPL setting as we always use 0. */
2016 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2017
2018 /* Force a TR resync every time in case we switch modes. */
2019 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2020 }
2021 else
2022 {
2023 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2024 VMX_READ_SELREG(TR, tr);
2025 }
2026
2027#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2028 /* Save the possibly changed MSRs that we automatically restore and save during a world switch. */
2029 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2030 {
2031 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2032 pMsr += i;
2033
2034 switch (pMsr->u32IndexMSR)
2035 {
2036 case MSR_K8_LSTAR:
2037 pCtx->msrLSTAR = pMsr->u64Value;
2038 break;
2039 case MSR_K6_STAR:
2040 pCtx->msrSTAR = pMsr->u64Value;
2041 break;
2042 case MSR_K8_SF_MASK:
2043 pCtx->msrSFMASK = pMsr->u64Value;
2044 break;
2045 case MSR_K8_KERNEL_GS_BASE:
2046 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2047 break;
2048 case MSR_K6_EFER:
2049 /* EFER can't be changed without causing a VM-exit. */
2050// Assert(pCtx->msrEFER == pMsr->u64Value);
2051 break;
2052 default:
2053 AssertFailed();
2054 return VERR_INTERNAL_ERROR;
2055 }
2056 }
2057#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2058 return VINF_SUCCESS;
2059}
2060
2061/**
2062 * Dummy placeholder
2063 *
2064 * @param pVM The VM to operate on.
2065 * @param pVCpu The VMCPU to operate on.
2066 */
2067static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2068{
2069 NOREF(pVM);
2070 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2071 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2072 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2073 return;
2074}
2075
2076/**
2077 * Setup the tagged TLB for EPT
2078 *
2079 * @returns VBox status code.
2080 * @param pVM The VM to operate on.
2081 * @param pVCpu The VMCPU to operate on.
2082 */
2083static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2084{
2085 PHWACCM_CPUINFO pCpu;
2086
2087 Assert(pVM->hwaccm.s.fNestedPaging);
2088 Assert(!pVM->hwaccm.s.vmx.fVPID);
2089
2090 /* Deal with tagged TLBs if VPID or EPT is supported. */
2091 pCpu = HWACCMR0GetCurrentCpu();
2092 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2093 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2094 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2095 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2096 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2097 {
2098 /* Force a TLB flush on VM entry. */
2099 pVCpu->hwaccm.s.fForceTLBFlush = true;
2100 }
2101 else
2102 Assert(!pCpu->fFlushTLB);
2103
2104 /* Check for tlb shootdown flushes. */
2105 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2106 pVCpu->hwaccm.s.fForceTLBFlush = true;
2107
2108 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2109 pCpu->fFlushTLB = false;
2110
2111 if (pVCpu->hwaccm.s.fForceTLBFlush)
2112 {
2113 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
2114 }
2115 else
2116 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2117 {
2118 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
2119 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2120
2121 for (unsigned i=0;i<pVCpu->hwaccm.s.TlbShootdown.cPages;i++)
2122 {
2123 /* aTlbShootdownPages contains physical addresses in this case. */
2124 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2125 }
2126 }
2127 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2128 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2129
2130#ifdef VBOX_WITH_STATISTICS
2131 if (pVCpu->hwaccm.s.fForceTLBFlush)
2132 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2133 else
2134 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2135#endif
2136}
2137
2138#ifdef HWACCM_VTX_WITH_VPID
2139/**
2140 * Setup the tagged TLB for VPID
2141 *
2142 * @returns VBox status code.
2143 * @param pVM The VM to operate on.
2144 * @param pVCpu The VMCPU to operate on.
2145 */
2146static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2147{
2148 PHWACCM_CPUINFO pCpu;
2149
2150 Assert(pVM->hwaccm.s.vmx.fVPID);
2151 Assert(!pVM->hwaccm.s.fNestedPaging);
2152
2153 /* Deal with tagged TLBs if VPID or EPT is supported. */
2154 pCpu = HWACCMR0GetCurrentCpu();
2155 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2156 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2157 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2158 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2159 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2160 {
2161 /* Force a TLB flush on VM entry. */
2162 pVCpu->hwaccm.s.fForceTLBFlush = true;
2163 }
2164 else
2165 Assert(!pCpu->fFlushTLB);
2166
2167 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2168
2169 /* Check for tlb shootdown flushes. */
2170 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2171 pVCpu->hwaccm.s.fForceTLBFlush = true;
2172
2173 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
2174 if (pVCpu->hwaccm.s.fForceTLBFlush)
2175 {
2176 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
2177 || pCpu->fFlushTLB)
2178 {
2179 pCpu->fFlushTLB = false;
2180 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2181 pCpu->cTLBFlushes++;
2182 vmxR0FlushVPID(pVM, pVCpu, VMX_FLUSH_ALL_CONTEXTS, 0);
2183 }
2184 else
2185 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2186
2187 pVCpu->hwaccm.s.fForceTLBFlush = false;
2188 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2189 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2190 }
2191 else
2192 {
2193 Assert(!pCpu->fFlushTLB);
2194 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2195
2196 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2197 {
2198 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
2199 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2200 for (unsigned i=0;i<pVCpu->hwaccm.s.TlbShootdown.cPages;i++)
2201 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2202 }
2203 }
2204 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2205 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2206
2207 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2208 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2209 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2210
2211 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2212 AssertRC(rc);
2213
2214 if (pVCpu->hwaccm.s.fForceTLBFlush)
2215 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
2216
2217#ifdef VBOX_WITH_STATISTICS
2218 if (pVCpu->hwaccm.s.fForceTLBFlush)
2219 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2220 else
2221 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2222#endif
2223}
2224#endif /* HWACCM_VTX_WITH_VPID */
2225
2226/**
2227 * Runs guest code in a VT-x VM.
2228 *
2229 * @returns VBox status code.
2230 * @param pVM The VM to operate on.
2231 * @param pVCpu The VMCPU to operate on.
2232 * @param pCtx Guest context
2233 */
2234VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2235{
2236 int rc = VINF_SUCCESS;
2237 RTGCUINTREG val;
2238 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2239 RTGCUINTREG instrError, cbInstr;
2240 RTGCUINTPTR exitQualification = 0;
2241 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2242 RTGCUINTPTR errCode, instrInfo;
2243 bool fSetupTPRCaching = false;
2244 uint64_t u64OldLSTAR = 0;
2245 uint8_t u8LastTPR = 0;
2246 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2247 unsigned cResume = 0;
2248#ifdef VBOX_STRICT
2249 RTCPUID idCpuCheck;
2250 bool fWasInLongMode = false;
2251#endif
2252#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2253 uint64_t u64LastTime = RTTimeMilliTS();
2254#endif
2255#ifdef VBOX_WITH_STATISTICS
2256 bool fStatEntryStarted = true;
2257 bool fStatExit2Started = false;
2258#endif
2259
2260 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2261
2262 /* Check if we need to use TPR shadowing. */
2263 if ( CPUMIsGuestInLongModeEx(pCtx)
2264 || ( ((pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || pVM->hwaccm.s.fTRPPatchingAllowed)
2265 && pVM->hwaccm.s.fHasIoApic)
2266 )
2267 {
2268 fSetupTPRCaching = true;
2269 }
2270
2271 Log2(("\nE"));
2272
2273 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2274
2275#ifdef VBOX_STRICT
2276 {
2277 RTCCUINTREG val2;
2278
2279 rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2280 AssertRC(rc);
2281 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2282
2283 /* allowed zero */
2284 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2285 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2286
2287 /* allowed one */
2288 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2289 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2290
2291 rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2292 AssertRC(rc);
2293 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2294
2295 /* Must be set according to the MSR, but can be cleared in case of EPT. */
2296 if (pVM->hwaccm.s.fNestedPaging)
2297 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2298 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2299 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2300
2301 /* allowed zero */
2302 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2303 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2304
2305 /* allowed one */
2306 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2307 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2308
2309 rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2310 AssertRC(rc);
2311 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2312
2313 /* allowed zero */
2314 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2315 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2316
2317 /* allowed one */
2318 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2319 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2320
2321 rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2322 AssertRC(rc);
2323 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2324
2325 /* allowed zero */
2326 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2327 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2328
2329 /* allowed one */
2330 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2331 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2332 }
2333 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2334#endif /* VBOX_STRICT */
2335
2336#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2337 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2338#endif
2339
2340 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
2341 */
2342ResumeExecution:
2343 STAM_STATS({
2344 if (fStatExit2Started) { STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = false; }
2345 if (!fStatEntryStarted) { STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = true; }
2346 });
2347 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2348 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2349 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2350 Assert(!HWACCMR0SuspendPending());
2351 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2352 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2353
2354 /* Safety precaution; looping for too long here can have a very bad effect on the host */
2355 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2356 {
2357 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2358 rc = VINF_EM_RAW_INTERRUPT;
2359 goto end;
2360 }
2361
2362 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
2363 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2364 {
2365 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2366 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2367 {
2368 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2369 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2370 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2371 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2372 */
2373 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2374 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2375 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2376 AssertRC(rc);
2377 }
2378 }
2379 else
2380 {
2381 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2382 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2383 AssertRC(rc);
2384 }
2385
2386#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2387 if (RT_UNLIKELY(cResume & 0xf) == 0)
2388 {
2389 uint64_t u64CurTime = RTTimeMilliTS();
2390
2391 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2392 {
2393 u64LastTime = u64CurTime;
2394 TMTimerPollVoid(pVM, pVCpu);
2395 }
2396 }
2397#endif
2398
2399 /* Check for pending actions that force us to go back to ring 3. */
2400 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING)
2401 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2402 {
2403 /* Check if a sync operation is pending. */
2404 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2405 {
2406 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2407 AssertRC(rc);
2408 if (rc != VINF_SUCCESS)
2409 {
2410 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", rc));
2411 goto end;
2412 }
2413 }
2414
2415#ifdef DEBUG
2416 /* Intercept X86_XCPT_DB if stepping is enabled */
2417 if (!DBGFIsStepping(pVCpu))
2418#endif
2419 {
2420 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2421 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2422 {
2423 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
2424 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2425 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2426 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2427 goto end;
2428 }
2429 }
2430
2431 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2432 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2433 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2434 {
2435 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2436 rc = VINF_EM_PENDING_REQUEST;
2437 goto end;
2438 }
2439
2440 /* Check if a pgm pool flush is in progress. */
2441 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2442 {
2443 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2444 rc = VINF_PGM_POOL_FLUSH_PENDING;
2445 goto end;
2446 }
2447 }
2448
2449#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2450 /*
2451 * Exit to ring-3 preemption/work is pending.
2452 *
2453 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2454 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2455 * further down, but VMXR0CheckPendingInterrupt makes that impossible.)
2456 *
2457 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2458 * shootdowns rely on this.
2459 */
2460 uOldEFlags = ASMIntDisableFlags();
2461 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2462 {
2463 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
2464 rc = VINF_EM_RAW_INTERRUPT;
2465 goto end;
2466 }
2467 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2468#endif
2469
2470 /* When external interrupts are pending, we should exit the VM when IF is set. */
2471 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
2472 rc = VMXR0CheckPendingInterrupt(pVM, pVCpu, pCtx);
2473 if (RT_FAILURE(rc))
2474 goto end;
2475
2476 /** @todo check timers?? */
2477
2478 /* TPR caching using CR8 is only available in 64 bits mode */
2479 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
2480 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! (no longer true) */
2481 /**
2482 * @todo query and update the TPR only when it could have been changed (mmio access & wrmsr (x2apic))
2483 */
2484 if (fSetupTPRCaching)
2485 {
2486 /* TPR caching in CR8 */
2487 bool fPending;
2488
2489 int rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
2490 AssertRC(rc2);
2491 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2492 pVCpu->hwaccm.s.vmx.pVAPIC[0x80] = u8LastTPR;
2493
2494 /* Two options here:
2495 * - external interrupt pending, but masked by the TPR value.
2496 * -> a CR8 update that lower the current TPR value should cause an exit
2497 * - no pending interrupts
2498 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2499 */
2500 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
2501 AssertRC(rc);
2502
2503 if (pVM->hwaccm.s.fTPRPatchingActive)
2504 {
2505 Assert(!CPUMIsGuestInLongModeEx(pCtx));
2506 /* Our patch code uses LSTAR for TPR caching. */
2507 pCtx->msrLSTAR = u8LastTPR;
2508
2509 if (fPending)
2510 {
2511 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
2512 vmxR0SetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
2513 }
2514 else
2515 {
2516 /* No interrupts are pending, so we don't need to be explicitely notified.
2517 * There are enough world switches for detecting pending interrupts.
2518 */
2519 vmxR0SetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
2520 }
2521 }
2522 }
2523
2524#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
2525 if ( pVM->hwaccm.s.fNestedPaging
2526# ifdef HWACCM_VTX_WITH_VPID
2527 || pVM->hwaccm.s.vmx.fVPID
2528# endif /* HWACCM_VTX_WITH_VPID */
2529 )
2530 {
2531 PHWACCM_CPUINFO pCpu;
2532
2533 pCpu = HWACCMR0GetCurrentCpu();
2534 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2535 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2536 {
2537 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2538 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2539 else
2540 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2541 }
2542 if (pCpu->fFlushTLB)
2543 LogFlow(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
2544 else
2545 if (pVCpu->hwaccm.s.fForceTLBFlush)
2546 LogFlow(("Manual TLB flush\n"));
2547 }
2548#endif
2549#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2550 PGMDynMapFlushAutoSet(pVCpu);
2551#endif
2552
2553 /*
2554 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
2555 * (until the actual world switch)
2556 */
2557#ifdef VBOX_STRICT
2558 idCpuCheck = RTMpCpuId();
2559#endif
2560#ifdef LOG_ENABLED
2561 VMMR0LogFlushDisable(pVCpu);
2562#endif
2563 /* Save the host state first. */
2564 rc = VMXR0SaveHostState(pVM, pVCpu);
2565 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2566 {
2567 VMMR0LogFlushEnable(pVCpu);
2568 goto end;
2569 }
2570 /* Load the guest state */
2571 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
2572 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2573 {
2574 VMMR0LogFlushEnable(pVCpu);
2575 goto end;
2576 }
2577
2578#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2579 /* Disable interrupts to make sure a poke will interrupt execution.
2580 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
2581 */
2582 uOldEFlags = ASMIntDisableFlags();
2583 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2584#endif
2585
2586 /* Non-register state Guest Context */
2587 /** @todo change me according to cpu state */
2588 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
2589 AssertRC(rc);
2590
2591 /** Set TLB flush state as checked until we return from the world switch. */
2592 ASMAtomicWriteU8(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
2593 /* Deal with tagged TLB setup and invalidation. */
2594 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
2595
2596 STAM_STATS({ STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = false; });
2597
2598 /* Manual save and restore:
2599 * - General purpose registers except RIP, RSP
2600 *
2601 * Trashed:
2602 * - CR2 (we don't care)
2603 * - LDTR (reset to 0)
2604 * - DRx (presumably not changed at all)
2605 * - DR7 (reset to 0x400)
2606 * - EFLAGS (reset to RT_BIT(1); not relevant)
2607 *
2608 */
2609
2610 /* All done! Let's start VM execution. */
2611 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, z);
2612 Assert(idCpuCheck == RTMpCpuId());
2613
2614#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2615 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
2616 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
2617#endif
2618
2619 /* Save the current TPR value in the LSTAR msr so our patches can access it. */
2620 if (pVM->hwaccm.s.fTPRPatchingActive)
2621 {
2622 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2623 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2624 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
2625 }
2626
2627 TMNotifyStartOfExecution(pVCpu);
2628#ifdef VBOX_WITH_KERNEL_USING_XMM
2629 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
2630#else
2631 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
2632#endif
2633 ASMAtomicWriteU8(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
2634 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExit);
2635 /* Possibly the last TSC value seen by the guest (too high) (only when we're in tsc offset mode). */
2636 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
2637 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
2638
2639 TMNotifyEndOfExecution(pVCpu);
2640 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
2641 Assert(!(ASMGetFlags() & X86_EFL_IF));
2642
2643 /* Restore the host LSTAR msr if the guest could have changed it. */
2644 if (pVM->hwaccm.s.fTPRPatchingActive)
2645 {
2646 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2647 pVCpu->hwaccm.s.vmx.pVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2648 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
2649 }
2650
2651 ASMSetFlags(uOldEFlags);
2652#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2653 uOldEFlags = ~(RTCCUINTREG)0;
2654#endif
2655
2656 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
2657
2658 /* In case we execute a goto ResumeExecution later on. */
2659 pVCpu->hwaccm.s.fResumeVM = true;
2660 pVCpu->hwaccm.s.fForceTLBFlush = false;
2661
2662 /*
2663 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2664 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
2665 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2666 */
2667 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, z);
2668 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit1, v);
2669
2670 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2671 {
2672 VMXR0ReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
2673 VMMR0LogFlushEnable(pVCpu);
2674 goto end;
2675 }
2676
2677 /* Success. Query the guest state and figure out what has happened. */
2678
2679 /* Investigate why there was a VM-exit. */
2680 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
2681 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
2682
2683 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
2684 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
2685 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
2686 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
2687 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2688 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
2689 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
2690 rc |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
2691 AssertRC(rc);
2692
2693 /* Sync back the guest state */
2694 rc = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
2695 AssertRC(rc);
2696
2697 /* Note! NOW IT'S SAFE FOR LOGGING! */
2698 VMMR0LogFlushEnable(pVCpu);
2699 Log2(("Raw exit reason %08x\n", exitReason));
2700
2701 /* Check if an injected event was interrupted prematurely. */
2702 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
2703 AssertRC(rc);
2704 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2705 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2706 /* Ignore 'int xx' as they'll be restarted anyway. */
2707 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
2708 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
2709 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2710 {
2711 Assert(!pVCpu->hwaccm.s.Event.fPending);
2712 pVCpu->hwaccm.s.Event.fPending = true;
2713 /* Error code present? */
2714 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2715 {
2716 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
2717 AssertRC(rc);
2718 pVCpu->hwaccm.s.Event.errCode = val;
2719 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2720 }
2721 else
2722 {
2723 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2724 pVCpu->hwaccm.s.Event.errCode = 0;
2725 }
2726 }
2727#ifdef VBOX_STRICT
2728 else
2729 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2730 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2731 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2732 {
2733 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2734 }
2735
2736 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2737 HWACCMDumpRegs(pVM, pVCpu, pCtx);
2738#endif
2739
2740 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs, (RTGCPTR)pCtx->rip));
2741 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
2742 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
2743 Log2(("Interruption error code %d\n", (uint32_t)errCode));
2744 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
2745
2746 /* Sync back the TPR if it was changed. */
2747 if ( fSetupTPRCaching
2748 && u8LastTPR != pVCpu->hwaccm.s.vmx.pVAPIC[0x80])
2749 {
2750 rc = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pVAPIC[0x80]);
2751 AssertRC(rc);
2752 }
2753
2754 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, v);
2755 STAM_STATS({ STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = true; });
2756
2757 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2758 switch (exitReason)
2759 {
2760 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2761 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2762 {
2763 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2764
2765 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2766 {
2767 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2768 /* External interrupt; leave to allow it to be dispatched again. */
2769 rc = VINF_EM_RAW_INTERRUPT;
2770 break;
2771 }
2772 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2773 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2774 {
2775 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2776 /* External interrupt; leave to allow it to be dispatched again. */
2777 rc = VINF_EM_RAW_INTERRUPT;
2778 break;
2779
2780 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2781 AssertFailed(); /* can't come here; fails the first check. */
2782 break;
2783
2784 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
2785 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2786 Assert(vector == 1 || vector == 3 || vector == 4);
2787 /* no break */
2788 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2789 Log2(("Hardware/software interrupt %d\n", vector));
2790 switch (vector)
2791 {
2792 case X86_XCPT_NM:
2793 {
2794 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
2795
2796 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2797 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2798 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
2799 if (rc == VINF_SUCCESS)
2800 {
2801 Assert(CPUMIsGuestFPUStateActive(pVCpu));
2802
2803 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
2804
2805 /* Continue execution. */
2806 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2807
2808 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2809 goto ResumeExecution;
2810 }
2811
2812 Log(("Forward #NM fault to the guest\n"));
2813 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
2814 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2815 AssertRC(rc);
2816 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2817 goto ResumeExecution;
2818 }
2819
2820 case X86_XCPT_PF: /* Page fault */
2821 {
2822#ifdef DEBUG
2823 if (pVM->hwaccm.s.fNestedPaging)
2824 { /* A genuine pagefault.
2825 * Forward the trap to the guest by injecting the exception and resuming execution.
2826 */
2827 Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2828
2829 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2830
2831 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2832
2833 /* Now we must update CR2. */
2834 pCtx->cr2 = exitQualification;
2835 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2836 AssertRC(rc);
2837
2838 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2839 goto ResumeExecution;
2840 }
2841#endif
2842 Assert(!pVM->hwaccm.s.fNestedPaging);
2843
2844#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
2845 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
2846 if ( pVM->hwaccm.s.fTRPPatchingAllowed
2847 && pVM->hwaccm.s.pGuestPatchMem
2848 && (exitQualification & 0xfff) == 0x080
2849 && !(errCode & X86_TRAP_PF_P) /* not present */
2850 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
2851 && !CPUMIsGuestInLongModeEx(pCtx)
2852 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
2853 {
2854 RTGCPHYS GCPhysApicBase, GCPhys;
2855 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
2856 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2857
2858 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
2859 if ( rc == VINF_SUCCESS
2860 && GCPhys == GCPhysApicBase)
2861 {
2862 /* Only attempt to patch the instruction once. */
2863 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2864 if (!pPatch)
2865 {
2866 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
2867 break;
2868 }
2869 }
2870 }
2871#endif
2872
2873 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
2874 /* Exit qualification contains the linear address of the page fault. */
2875 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2876 TRPMSetErrorCode(pVCpu, errCode);
2877 TRPMSetFaultAddress(pVCpu, exitQualification);
2878
2879 /* Shortcut for APIC TPR reads and writes. */
2880 if ( (exitQualification & 0xfff) == 0x080
2881 && !(errCode & X86_TRAP_PF_P) /* not present */
2882 && fSetupTPRCaching
2883 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
2884 {
2885 RTGCPHYS GCPhysApicBase, GCPhys;
2886 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
2887 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2888
2889 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
2890 if ( rc == VINF_SUCCESS
2891 && GCPhys == GCPhysApicBase)
2892 {
2893 Log(("Enable VT-x virtual APIC access filtering\n"));
2894 rc = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
2895 AssertRC(rc);
2896 }
2897 }
2898
2899 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2900 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
2901 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2902
2903 if (rc == VINF_SUCCESS)
2904 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2905 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
2906 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2907
2908 TRPMResetTrap(pVCpu);
2909 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2910 goto ResumeExecution;
2911 }
2912 else
2913 if (rc == VINF_EM_RAW_GUEST_TRAP)
2914 { /* A genuine pagefault.
2915 * Forward the trap to the guest by injecting the exception and resuming execution.
2916 */
2917 Log2(("Forward page fault to the guest\n"));
2918
2919 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2920 /* The error code might have been changed. */
2921 errCode = TRPMGetErrorCode(pVCpu);
2922
2923 TRPMResetTrap(pVCpu);
2924
2925 /* Now we must update CR2. */
2926 pCtx->cr2 = exitQualification;
2927 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2928 AssertRC(rc);
2929
2930 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2931 goto ResumeExecution;
2932 }
2933#ifdef VBOX_STRICT
2934 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
2935 Log2(("PGMTrap0eHandler failed with %d\n", rc));
2936#endif
2937 /* Need to go back to the recompiler to emulate the instruction. */
2938 TRPMResetTrap(pVCpu);
2939 break;
2940 }
2941
2942 case X86_XCPT_MF: /* Floating point exception. */
2943 {
2944 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
2945 if (!(pCtx->cr0 & X86_CR0_NE))
2946 {
2947 /* old style FPU error reporting needs some extra work. */
2948 /** @todo don't fall back to the recompiler, but do it manually. */
2949 rc = VINF_EM_RAW_EMULATE_INSTR;
2950 break;
2951 }
2952 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2953 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2954 AssertRC(rc);
2955
2956 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2957 goto ResumeExecution;
2958 }
2959
2960 case X86_XCPT_DB: /* Debug exception. */
2961 {
2962 uint64_t uDR6;
2963
2964 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
2965 *
2966 * Exit qualification bits:
2967 * 3:0 B0-B3 which breakpoint condition was met
2968 * 12:4 Reserved (0)
2969 * 13 BD - debug register access detected
2970 * 14 BS - single step execution or branch taken
2971 * 63:15 Reserved (0)
2972 */
2973 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
2974
2975 /* Note that we don't support guest and host-initiated debugging at the same time. */
2976
2977 uDR6 = X86_DR6_INIT_VAL;
2978 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
2979 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
2980 if (rc == VINF_EM_RAW_GUEST_TRAP)
2981 {
2982 /* Update DR6 here. */
2983 pCtx->dr[6] = uDR6;
2984
2985 /* Resync DR6 if the debug state is active. */
2986 if (CPUMIsGuestDebugStateActive(pVCpu))
2987 ASMSetDR6(pCtx->dr[6]);
2988
2989 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2990 pCtx->dr[7] &= ~X86_DR7_GD;
2991
2992 /* Paranoia. */
2993 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2994 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2995 pCtx->dr[7] |= 0x400; /* must be one */
2996
2997 /* Resync DR7 */
2998 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2999 AssertRC(rc);
3000
3001 Log(("Trap %x (debug) at %RGv exit qualification %RX64 dr6=%x dr7=%x\n", vector, (RTGCPTR)pCtx->rip, exitQualification, (uint32_t)pCtx->dr[6], (uint32_t)pCtx->dr[7]));
3002 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3003 AssertRC(rc);
3004
3005 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3006 goto ResumeExecution;
3007 }
3008 /* Return to ring 3 to deal with the debug exit code. */
3009 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc));
3010 break;
3011 }
3012
3013 case X86_XCPT_BP: /* Breakpoint. */
3014 {
3015 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3016 if (rc == VINF_EM_RAW_GUEST_TRAP)
3017 {
3018 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip));
3019 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3020 AssertRC(rc);
3021 goto ResumeExecution;
3022 }
3023 if (rc == VINF_SUCCESS)
3024 goto ResumeExecution;
3025 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc));
3026 break;
3027 }
3028
3029 case X86_XCPT_GP: /* General protection failure exception.*/
3030 {
3031 uint32_t cbOp;
3032 uint32_t cbSize;
3033 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3034
3035 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3036#ifdef VBOX_STRICT
3037 if ( !CPUMIsGuestInRealModeEx(pCtx)
3038 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3039 {
3040 Log(("Trap %x at %04X:%RGv errorCode=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
3041 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3042 AssertRC(rc);
3043 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3044 goto ResumeExecution;
3045 }
3046#endif
3047 Assert(CPUMIsGuestInRealModeEx(pCtx));
3048
3049 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip));
3050
3051 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp);
3052 if (RT_SUCCESS(rc))
3053 {
3054 bool fUpdateRIP = true;
3055
3056 Assert(cbOp == pDis->opsize);
3057 switch (pDis->pCurInstr->opcode)
3058 {
3059 case OP_CLI:
3060 pCtx->eflags.Bits.u1IF = 0;
3061 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3062 break;
3063
3064 case OP_STI:
3065 pCtx->eflags.Bits.u1IF = 1;
3066 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->opsize);
3067 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3068 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3069 AssertRC(rc);
3070 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3071 break;
3072
3073 case OP_HLT:
3074 fUpdateRIP = false;
3075 rc = VINF_EM_HALT;
3076 pCtx->rip += pDis->opsize;
3077 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3078 break;
3079
3080 case OP_POPF:
3081 {
3082 RTGCPTR GCPtrStack;
3083 uint32_t cbParm;
3084 uint32_t uMask;
3085 X86EFLAGS eflags;
3086
3087 if (pDis->prefix & PREFIX_OPSIZE)
3088 {
3089 cbParm = 4;
3090 uMask = 0xffffffff;
3091 }
3092 else
3093 {
3094 cbParm = 2;
3095 uMask = 0xffff;
3096 }
3097
3098 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3099 if (RT_FAILURE(rc))
3100 {
3101 rc = VERR_EM_INTERPRETER;
3102 break;
3103 }
3104 eflags.u = 0;
3105 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3106 if (RT_FAILURE(rc))
3107 {
3108 rc = VERR_EM_INTERPRETER;
3109 break;
3110 }
3111 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3112 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
3113 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3114 pCtx->eflags.Bits.u1RF = 0;
3115 pCtx->esp += cbParm;
3116 pCtx->esp &= uMask;
3117
3118 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3119 break;
3120 }
3121
3122 case OP_PUSHF:
3123 {
3124 RTGCPTR GCPtrStack;
3125 uint32_t cbParm;
3126 uint32_t uMask;
3127 X86EFLAGS eflags;
3128
3129 if (pDis->prefix & PREFIX_OPSIZE)
3130 {
3131 cbParm = 4;
3132 uMask = 0xffffffff;
3133 }
3134 else
3135 {
3136 cbParm = 2;
3137 uMask = 0xffff;
3138 }
3139
3140 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack);
3141 if (RT_FAILURE(rc))
3142 {
3143 rc = VERR_EM_INTERPRETER;
3144 break;
3145 }
3146 eflags = pCtx->eflags;
3147 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3148 eflags.Bits.u1RF = 0;
3149 eflags.Bits.u1VM = 0;
3150
3151 rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3152 if (RT_FAILURE(rc))
3153 {
3154 rc = VERR_EM_INTERPRETER;
3155 break;
3156 }
3157 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3158 pCtx->esp -= cbParm;
3159 pCtx->esp &= uMask;
3160 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3161 break;
3162 }
3163
3164 case OP_IRET:
3165 {
3166 RTGCPTR GCPtrStack;
3167 uint32_t uMask = 0xffff;
3168 uint16_t aIretFrame[3];
3169
3170 if (pDis->prefix & (PREFIX_OPSIZE | PREFIX_ADDRSIZE))
3171 {
3172 rc = VERR_EM_INTERPRETER;
3173 break;
3174 }
3175
3176 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3177 if (RT_FAILURE(rc))
3178 {
3179 rc = VERR_EM_INTERPRETER;
3180 break;
3181 }
3182 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3183 if (RT_FAILURE(rc))
3184 {
3185 rc = VERR_EM_INTERPRETER;
3186 break;
3187 }
3188 pCtx->ip = aIretFrame[0];
3189 pCtx->cs = aIretFrame[1];
3190 pCtx->csHid.u64Base = pCtx->cs << 4;
3191 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3192 pCtx->sp += sizeof(aIretFrame);
3193
3194 LogFlow(("iret to %04x:%x\n", pCtx->cs, pCtx->ip));
3195 fUpdateRIP = false;
3196 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3197 break;
3198 }
3199
3200 case OP_INT:
3201 {
3202 uint32_t intInfo2;
3203
3204 LogFlow(("Realmode: INT %x\n", pDis->param1.parval & 0xff));
3205 intInfo2 = pDis->param1.parval & 0xff;
3206 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3207 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3208
3209 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3210 AssertRC(rc);
3211 fUpdateRIP = false;
3212 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3213 break;
3214 }
3215
3216 case OP_INTO:
3217 {
3218 if (pCtx->eflags.Bits.u1OF)
3219 {
3220 uint32_t intInfo2;
3221
3222 LogFlow(("Realmode: INTO\n"));
3223 intInfo2 = X86_XCPT_OF;
3224 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3225 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3226
3227 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3228 AssertRC(rc);
3229 fUpdateRIP = false;
3230 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3231 }
3232 break;
3233 }
3234
3235 case OP_INT3:
3236 {
3237 uint32_t intInfo2;
3238
3239 LogFlow(("Realmode: INT 3\n"));
3240 intInfo2 = 3;
3241 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3242 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3243
3244 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3245 AssertRC(rc);
3246 fUpdateRIP = false;
3247 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3248 break;
3249 }
3250
3251 default:
3252 rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, &cbSize);
3253 break;
3254 }
3255
3256 if (rc == VINF_SUCCESS)
3257 {
3258 if (fUpdateRIP)
3259 pCtx->rip += cbOp; /* Move on to the next instruction. */
3260
3261 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
3262 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3263
3264 /* Only resume if successful. */
3265 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3266 goto ResumeExecution;
3267 }
3268 }
3269 else
3270 rc = VERR_EM_INTERPRETER;
3271
3272 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc));
3273 break;
3274 }
3275
3276#ifdef VBOX_STRICT
3277 case X86_XCPT_XF: /* SIMD exception. */
3278 case X86_XCPT_DE: /* Divide error. */
3279 case X86_XCPT_UD: /* Unknown opcode exception. */
3280 case X86_XCPT_SS: /* Stack segment exception. */
3281 case X86_XCPT_NP: /* Segment not present exception. */
3282 {
3283 switch(vector)
3284 {
3285 case X86_XCPT_DE:
3286 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
3287 break;
3288 case X86_XCPT_UD:
3289 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
3290 break;
3291 case X86_XCPT_SS:
3292 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
3293 break;
3294 case X86_XCPT_NP:
3295 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
3296 break;
3297 }
3298
3299 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3300 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3301 AssertRC(rc);
3302
3303 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3304 goto ResumeExecution;
3305 }
3306#endif
3307 default:
3308 if ( CPUMIsGuestInRealModeEx(pCtx)
3309 && pVM->hwaccm.s.vmx.pRealModeTSS)
3310 {
3311 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
3312 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3313 AssertRC(rc);
3314
3315 /* Go back to ring 3 in case of a triple fault. */
3316 if ( vector == X86_XCPT_DF
3317 && rc == VINF_EM_RESET)
3318 break;
3319
3320 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3321 goto ResumeExecution;
3322 }
3323 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3324 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3325 break;
3326 } /* switch (vector) */
3327
3328 break;
3329
3330 default:
3331 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3332 AssertMsgFailed(("Unexpected interuption code %x\n", intInfo));
3333 break;
3334 }
3335
3336 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3337 break;
3338 }
3339
3340 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
3341 {
3342 RTGCPHYS GCPhys;
3343
3344 Assert(pVM->hwaccm.s.fNestedPaging);
3345
3346 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3347 AssertRC(rc);
3348 Assert(((exitQualification >> 7) & 3) != 2);
3349
3350 /* Determine the kind of violation. */
3351 errCode = 0;
3352 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3353 errCode |= X86_TRAP_PF_ID;
3354
3355 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3356 errCode |= X86_TRAP_PF_RW;
3357
3358 /* If the page is present, then it's a page level protection fault. */
3359 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3360 {
3361 errCode |= X86_TRAP_PF_P;
3362 }
3363 else
3364 {
3365 /* Shortcut for APIC TPR reads and writes. */
3366 if ( (GCPhys & 0xfff) == 0x080
3367 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3368 && fSetupTPRCaching
3369 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3370 {
3371 RTGCPHYS GCPhysApicBase;
3372 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3373 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3374 if (GCPhys == GCPhysApicBase + 0x80)
3375 {
3376 Log(("Enable VT-x virtual APIC access filtering\n"));
3377 rc = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3378 AssertRC(rc);
3379 }
3380 }
3381 }
3382 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
3383
3384 /* GCPhys contains the guest physical address of the page fault. */
3385 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3386 TRPMSetErrorCode(pVCpu, errCode);
3387 TRPMSetFaultAddress(pVCpu, GCPhys);
3388
3389 /* Handle the pagefault trap for the nested shadow table. */
3390 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
3391 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
3392 if (rc == VINF_SUCCESS)
3393 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3394 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
3395 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
3396
3397 TRPMResetTrap(pVCpu);
3398 goto ResumeExecution;
3399 }
3400
3401#ifdef VBOX_STRICT
3402 if (rc != VINF_EM_RAW_EMULATE_INSTR)
3403 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc));
3404#endif
3405 /* Need to go back to the recompiler to emulate the instruction. */
3406 TRPMResetTrap(pVCpu);
3407 break;
3408 }
3409
3410 case VMX_EXIT_EPT_MISCONFIG:
3411 {
3412 RTGCPHYS GCPhys;
3413
3414 Assert(pVM->hwaccm.s.fNestedPaging);
3415
3416 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3417 AssertRC(rc);
3418
3419 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
3420 break;
3421 }
3422
3423 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3424 /* Clear VM-exit on IF=1 change. */
3425 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
3426 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
3427 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3428 AssertRC(rc);
3429 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
3430 goto ResumeExecution; /* we check for pending guest interrupts there */
3431
3432 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
3433 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
3434 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
3435 /* Skip instruction and continue directly. */
3436 pCtx->rip += cbInstr;
3437 /* Continue execution.*/
3438 goto ResumeExecution;
3439
3440 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3441 {
3442 Log2(("VMX: Cpuid %x\n", pCtx->eax));
3443 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
3444 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3445 if (rc == VINF_SUCCESS)
3446 {
3447 /* Update EIP and continue execution. */
3448 Assert(cbInstr == 2);
3449 pCtx->rip += cbInstr;
3450 goto ResumeExecution;
3451 }
3452 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc));
3453 rc = VINF_EM_RAW_EMULATE_INSTR;
3454 break;
3455 }
3456
3457 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
3458 {
3459 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
3460 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
3461 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3462 if (rc == VINF_SUCCESS)
3463 {
3464 /* Update EIP and continue execution. */
3465 Assert(cbInstr == 2);
3466 pCtx->rip += cbInstr;
3467 goto ResumeExecution;
3468 }
3469 rc = VINF_EM_RAW_EMULATE_INSTR;
3470 break;
3471 }
3472
3473 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
3474 {
3475 Log2(("VMX: Rdtsc\n"));
3476 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
3477 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3478 if (rc == VINF_SUCCESS)
3479 {
3480 /* Update EIP and continue execution. */
3481 Assert(cbInstr == 2);
3482 pCtx->rip += cbInstr;
3483 goto ResumeExecution;
3484 }
3485 rc = VINF_EM_RAW_EMULATE_INSTR;
3486 break;
3487 }
3488
3489 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
3490 {
3491 Log2(("VMX: invlpg\n"));
3492 Assert(!pVM->hwaccm.s.fNestedPaging);
3493
3494 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
3495 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
3496 if (rc == VINF_SUCCESS)
3497 {
3498 /* Update EIP and continue execution. */
3499 pCtx->rip += cbInstr;
3500 goto ResumeExecution;
3501 }
3502 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc));
3503 break;
3504 }
3505
3506 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
3507 {
3508 Log2(("VMX: monitor\n"));
3509
3510 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
3511 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3512 if (rc == VINF_SUCCESS)
3513 {
3514 /* Update EIP and continue execution. */
3515 pCtx->rip += cbInstr;
3516 goto ResumeExecution;
3517 }
3518 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", rc));
3519 break;
3520 }
3521
3522 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
3523 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
3524 if ( pVM->hwaccm.s.fTPRPatchingActive
3525 && pCtx->ecx == MSR_K8_LSTAR)
3526 {
3527 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3528 if ((pCtx->eax & 0xff) != u8LastTPR)
3529 {
3530 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
3531
3532 /* Our patch code uses LSTAR for TPR caching. */
3533 rc = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
3534 AssertRC(rc);
3535 }
3536
3537 /* Skip the instruction and continue. */
3538 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
3539
3540 /* Only resume if successful. */
3541 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
3542 goto ResumeExecution;
3543 }
3544 /* no break */
3545 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
3546 {
3547 uint32_t cbSize;
3548
3549 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
3550
3551 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
3552 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
3553 rc = EMInterpretInstruction(pVM, pVCpu, CPUMCTX2CORE(pCtx), 0, &cbSize);
3554 if (rc == VINF_SUCCESS)
3555 {
3556 /* EIP has been updated already. */
3557
3558 /* Only resume if successful. */
3559 goto ResumeExecution;
3560 }
3561 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
3562 break;
3563 }
3564
3565 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
3566 {
3567 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3568
3569 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
3570 {
3571 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
3572 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
3573 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3574 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3575 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
3576 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
3577
3578 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
3579 {
3580 case 0:
3581 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
3582 break;
3583 case 2:
3584 break;
3585 case 3:
3586 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
3587 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
3588 break;
3589 case 4:
3590 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
3591 break;
3592 case 8:
3593 /* CR8 contains the APIC TPR */
3594 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3595 break;
3596
3597 default:
3598 AssertFailed();
3599 break;
3600 }
3601 break;
3602
3603 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
3604 Log2(("VMX: mov x, crx\n"));
3605 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3606
3607 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
3608
3609 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
3610 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3611
3612 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3613 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
3614 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
3615 break;
3616
3617 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
3618 Log2(("VMX: clts\n"));
3619 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
3620 rc = EMInterpretCLTS(pVM, pVCpu);
3621 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3622 break;
3623
3624 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
3625 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
3626 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
3627 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
3628 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3629 break;
3630 }
3631
3632 /* Update EIP if no error occurred. */
3633 if (RT_SUCCESS(rc))
3634 pCtx->rip += cbInstr;
3635
3636 if (rc == VINF_SUCCESS)
3637 {
3638 /* Only resume if successful. */
3639 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3640 goto ResumeExecution;
3641 }
3642 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3643 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3644 break;
3645 }
3646
3647 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3648 {
3649 if ( !DBGFIsStepping(pVCpu)
3650 && !CPUMIsHyperDebugStateActive(pVCpu))
3651 {
3652 /* Disable drx move intercepts. */
3653 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3654 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3655 AssertRC(rc);
3656
3657 /* Save the host and load the guest debug state. */
3658 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
3659 AssertRC(rc);
3660
3661#ifdef LOG_ENABLED
3662 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3663 Log(("VMX_EXIT_DRX_MOVE: write DR%d genreg %d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3664 else
3665 Log(("VMX_EXIT_DRX_MOVE: read DR%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification)));
3666#endif
3667
3668#ifdef VBOX_WITH_STATISTICS
3669 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
3670 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3671 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3672 else
3673 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3674#endif
3675
3676 goto ResumeExecution;
3677 }
3678
3679 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
3680 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3681 {
3682 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3683 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3684 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3685 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
3686 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
3687 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3688 Log2(("DR7=%08x\n", pCtx->dr[7]));
3689 }
3690 else
3691 {
3692 Log2(("VMX: mov x, drx\n"));
3693 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3694 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3695 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
3696 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
3697 }
3698 /* Update EIP if no error occurred. */
3699 if (RT_SUCCESS(rc))
3700 pCtx->rip += cbInstr;
3701
3702 if (rc == VINF_SUCCESS)
3703 {
3704 /* Only resume if successful. */
3705 goto ResumeExecution;
3706 }
3707 Assert(rc == VERR_EM_INTERPRETER);
3708 break;
3709 }
3710
3711 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
3712 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
3713 {
3714 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3715 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
3716 uint32_t uPort;
3717 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
3718
3719 /** @todo necessary to make the distinction? */
3720 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
3721 {
3722 uPort = pCtx->edx & 0xffff;
3723 }
3724 else
3725 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
3726
3727 /* paranoia */
3728 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
3729 {
3730 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
3731 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3732 break;
3733 }
3734
3735 uint32_t cbSize = g_aIOSize[uIOWidth];
3736
3737 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
3738 {
3739 /* ins/outs */
3740 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3741
3742 /* Disassemble manually to deal with segment prefixes. */
3743 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
3744 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
3745 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
3746 if (rc == VINF_SUCCESS)
3747 {
3748 if (fIOWrite)
3749 {
3750 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3751 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
3752 rc = VBOXSTRICTRC_TODO(IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize));
3753 }
3754 else
3755 {
3756 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3757 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
3758 rc = VBOXSTRICTRC_TODO(IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize));
3759 }
3760 }
3761 else
3762 rc = VINF_EM_RAW_EMULATE_INSTR;
3763 }
3764 else
3765 {
3766 /* normal in/out */
3767 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
3768
3769 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
3770
3771 if (fIOWrite)
3772 {
3773 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
3774 rc = VBOXSTRICTRC_TODO(IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize));
3775 if (rc == VINF_IOM_HC_IOPORT_WRITE)
3776 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
3777 }
3778 else
3779 {
3780 uint32_t u32Val = 0;
3781
3782 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
3783 rc = VBOXSTRICTRC_TODO(IOMIOPortRead(pVM, uPort, &u32Val, cbSize));
3784 if (IOM_SUCCESS(rc))
3785 {
3786 /* Write back to the EAX register. */
3787 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
3788 }
3789 else
3790 if (rc == VINF_IOM_HC_IOPORT_READ)
3791 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
3792 }
3793 }
3794 /*
3795 * Handled the I/O return codes.
3796 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
3797 */
3798 if (IOM_SUCCESS(rc))
3799 {
3800 /* Update EIP and continue execution. */
3801 pCtx->rip += cbInstr;
3802 if (RT_LIKELY(rc == VINF_SUCCESS))
3803 {
3804 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
3805 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
3806 {
3807 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
3808 for (unsigned i=0;i<4;i++)
3809 {
3810 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
3811
3812 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
3813 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
3814 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
3815 {
3816 uint64_t uDR6;
3817
3818 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3819
3820 uDR6 = ASMGetDR6();
3821
3822 /* Clear all breakpoint status flags and set the one we just hit. */
3823 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
3824 uDR6 |= (uint64_t)RT_BIT(i);
3825
3826 /* Note: AMD64 Architecture Programmer's Manual 13.1:
3827 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
3828 * the contents have been read.
3829 */
3830 ASMSetDR6(uDR6);
3831
3832 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3833 pCtx->dr[7] &= ~X86_DR7_GD;
3834
3835 /* Paranoia. */
3836 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3837 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3838 pCtx->dr[7] |= 0x400; /* must be one */
3839
3840 /* Resync DR7 */
3841 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3842 AssertRC(rc);
3843
3844 /* Construct inject info. */
3845 intInfo = X86_XCPT_DB;
3846 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3847 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3848
3849 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
3850 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
3851 AssertRC(rc);
3852
3853 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3854 goto ResumeExecution;
3855 }
3856 }
3857 }
3858
3859 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3860 goto ResumeExecution;
3861 }
3862 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3863 break;
3864 }
3865
3866#ifdef VBOX_STRICT
3867 if (rc == VINF_IOM_HC_IOPORT_READ)
3868 Assert(!fIOWrite);
3869 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
3870 Assert(fIOWrite);
3871 else
3872 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
3873#endif
3874 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3875 break;
3876 }
3877
3878 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
3879 LogFlow(("VMX_EXIT_TPR\n"));
3880 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
3881 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3882 goto ResumeExecution;
3883
3884 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
3885 {
3886 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
3887 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
3888
3889 switch(uAccessType)
3890 {
3891 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
3892 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
3893 {
3894 RTGCPHYS GCPhys;
3895 PDMApicGetBase(pVM, &GCPhys);
3896 GCPhys &= PAGE_BASE_GC_MASK;
3897 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
3898
3899 LogFlow(("Apic access at %RGp\n", GCPhys));
3900 rc = VBOXSTRICTRC_TODO(IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys));
3901 if (rc == VINF_SUCCESS)
3902 {
3903 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3904 goto ResumeExecution; /* rip already updated */
3905 }
3906 break;
3907 }
3908
3909 default:
3910 rc = VINF_EM_RAW_EMULATE_INSTR;
3911 break;
3912 }
3913 break;
3914 }
3915
3916 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
3917 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3918 goto ResumeExecution;
3919
3920 default:
3921 /* The rest is handled after syncing the entire CPU state. */
3922 break;
3923 }
3924
3925 /* Note: the guest state isn't entirely synced back at this stage. */
3926
3927 /* Investigate why there was a VM-exit. (part 2) */
3928 switch (exitReason)
3929 {
3930 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3931 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3932 case VMX_EXIT_EPT_VIOLATION:
3933 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
3934 /* Already handled above. */
3935 break;
3936
3937 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
3938 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
3939 break;
3940
3941 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
3942 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
3943 rc = VINF_EM_RAW_INTERRUPT;
3944 AssertFailed(); /* Can't happen. Yet. */
3945 break;
3946
3947 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
3948 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
3949 rc = VINF_EM_RAW_INTERRUPT;
3950 AssertFailed(); /* Can't happen afaik. */
3951 break;
3952
3953 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
3954 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
3955 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
3956 && pVCpu->hwaccm.s.Event.fPending)
3957 {
3958 /* Caused by an injected interrupt. */
3959 pVCpu->hwaccm.s.Event.fPending = false;
3960
3961 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
3962 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
3963 rc = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
3964 AssertRC(rc);
3965 }
3966 /* else Exceptions and software interrupts can just be restarted. */
3967 rc = VERR_EM_INTERPRETER;
3968 break;
3969
3970 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
3971 /** Check if external interrupts are pending; if so, don't switch back. */
3972 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3973 pCtx->rip++; /* skip hlt */
3974 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
3975 goto ResumeExecution;
3976
3977 rc = VINF_EM_HALT;
3978 break;
3979
3980 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
3981 Log2(("VMX: mwait\n"));
3982 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
3983 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3984 if ( rc == VINF_EM_HALT
3985 || rc == VINF_SUCCESS)
3986 {
3987 /* Update EIP and continue execution. */
3988 pCtx->rip += cbInstr;
3989
3990 /** Check if external interrupts are pending; if so, don't switch back. */
3991 if ( rc == VINF_SUCCESS
3992 || ( rc == VINF_EM_HALT
3993 && EMShouldContinueAfterHalt(pVCpu, pCtx))
3994 )
3995 goto ResumeExecution;
3996 }
3997 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", rc));
3998 break;
3999
4000 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4001 AssertFailed(); /* can't happen. */
4002 rc = VERR_EM_INTERPRETER;
4003 break;
4004
4005 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4006 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4007 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4008 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4009 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4010 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4011 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4012 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4013 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4014 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4015 /** @todo inject #UD immediately */
4016 rc = VERR_EM_INTERPRETER;
4017 break;
4018
4019 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4020 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4021 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4022 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4023 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4024 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4025 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4026 /* already handled above */
4027 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4028 || rc == VINF_EM_RAW_INTERRUPT
4029 || rc == VERR_EM_INTERPRETER
4030 || rc == VINF_EM_RAW_EMULATE_INSTR
4031 || rc == VINF_PGM_SYNC_CR3
4032 || rc == VINF_IOM_HC_IOPORT_READ
4033 || rc == VINF_IOM_HC_IOPORT_WRITE
4034 || rc == VINF_EM_RAW_GUEST_TRAP
4035 || rc == VINF_TRPM_XCPT_DISPATCHED
4036 || rc == VINF_EM_RESCHEDULE_REM,
4037 ("rc = %d\n", rc));
4038 break;
4039
4040 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4041 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
4042 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4043 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4044 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4045 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4046 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
4047 rc = VERR_EM_INTERPRETER;
4048 break;
4049
4050 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4051 Assert(rc == VINF_EM_RAW_INTERRUPT);
4052 break;
4053
4054 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4055 {
4056#ifdef VBOX_STRICT
4057 RTCCUINTREG val2 = 0;
4058
4059 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4060
4061 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4062 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4063
4064 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4065 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4066
4067 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4068 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4069
4070 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4071 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4072
4073 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4074 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4075
4076 VMX_LOG_SELREG(CS, "CS", val2);
4077 VMX_LOG_SELREG(DS, "DS", val2);
4078 VMX_LOG_SELREG(ES, "ES", val2);
4079 VMX_LOG_SELREG(FS, "FS", val2);
4080 VMX_LOG_SELREG(GS, "GS", val2);
4081 VMX_LOG_SELREG(SS, "SS", val2);
4082 VMX_LOG_SELREG(TR, "TR", val2);
4083 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4084
4085 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4086 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4087 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4088 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4089#endif /* VBOX_STRICT */
4090 rc = VERR_VMX_INVALID_GUEST_STATE;
4091 break;
4092 }
4093
4094 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4095 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4096 default:
4097 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4098 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4099 break;
4100
4101 }
4102end:
4103
4104 /* Signal changes for the recompiler. */
4105 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
4106
4107 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
4108 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4109 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4110 {
4111 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4112 /* On the next entry we'll only sync the host context. */
4113 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4114 }
4115 else
4116 {
4117 /* On the next entry we'll sync everything. */
4118 /** @todo we can do better than this */
4119 /* Not in the VINF_PGM_CHANGE_MODE though! */
4120 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4121 }
4122
4123 /* translate into a less severe return code */
4124 if (rc == VERR_EM_INTERPRETER)
4125 rc = VINF_EM_RAW_EMULATE_INSTR;
4126 else
4127 /* Try to extract more information about what might have gone wrong here. */
4128 if (rc == VERR_VMX_INVALID_VMCS_PTR)
4129 {
4130 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4131 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS;
4132 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4133 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4134 }
4135
4136 /* Just set the correct state here instead of trying to catch every goto above. */
4137 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4138
4139#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4140 /* Restore interrupts if we exitted after disabling them. */
4141 if (uOldEFlags != ~(RTCCUINTREG)0)
4142 ASMSetFlags(uOldEFlags);
4143#endif
4144
4145 STAM_STATS({
4146 if (fStatExit2Started) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y);
4147 else if (fStatEntryStarted) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4148 });
4149 Log2(("X"));
4150 return rc;
4151}
4152
4153
4154/**
4155 * Enters the VT-x session
4156 *
4157 * @returns VBox status code.
4158 * @param pVM The VM to operate on.
4159 * @param pVCpu The VMCPU to operate on.
4160 * @param pCpu CPU info struct
4161 */
4162VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHWACCM_CPUINFO pCpu)
4163{
4164 Assert(pVM->hwaccm.s.vmx.fSupported);
4165
4166 unsigned cr4 = ASMGetCR4();
4167 if (!(cr4 & X86_CR4_VMXE))
4168 {
4169 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4170 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4171 }
4172
4173 /* Activate the VM Control Structure. */
4174 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4175 if (RT_FAILURE(rc))
4176 return rc;
4177
4178 pVCpu->hwaccm.s.fResumeVM = false;
4179 return VINF_SUCCESS;
4180}
4181
4182
4183/**
4184 * Leaves the VT-x session
4185 *
4186 * @returns VBox status code.
4187 * @param pVM The VM to operate on.
4188 * @param pVCpu The VMCPU to operate on.
4189 * @param pCtx CPU context
4190 */
4191VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4192{
4193 Assert(pVM->hwaccm.s.vmx.fSupported);
4194
4195#ifdef DEBUG
4196 if (CPUMIsHyperDebugStateActive(pVCpu))
4197 {
4198 CPUMR0LoadHostDebugState(pVM, pVCpu);
4199 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4200 }
4201 else
4202#endif
4203 /* Save the guest debug state if necessary. */
4204 if (CPUMIsGuestDebugStateActive(pVCpu))
4205 {
4206 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4207
4208 /* Enable drx move intercepts again. */
4209 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4210 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4211 AssertRC(rc);
4212
4213 /* Resync the debug registers the next time. */
4214 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4215 }
4216 else
4217 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4218
4219 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4220 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4221 AssertRC(rc);
4222
4223 return VINF_SUCCESS;
4224}
4225
4226/**
4227 * Flush the TLB (EPT)
4228 *
4229 * @returns VBox status code.
4230 * @param pVM The VM to operate on.
4231 * @param pVCpu The VM CPU to operate on.
4232 * @param enmFlush Type of flush
4233 * @param GCPhys Physical address of the page to flush
4234 */
4235static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
4236{
4237 uint64_t descriptor[2];
4238
4239 LogFlow(("vmxR0FlushEPT %d %RGv\n", enmFlush, GCPhys));
4240 Assert(pVM->hwaccm.s.fNestedPaging);
4241 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4242 descriptor[1] = GCPhys;
4243 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4244 AssertRC(rc);
4245}
4246
4247#ifdef HWACCM_VTX_WITH_VPID
4248/**
4249 * Flush the TLB (EPT)
4250 *
4251 * @returns VBox status code.
4252 * @param pVM The VM to operate on.
4253 * @param pVCpu The VM CPU to operate on.
4254 * @param enmFlush Type of flush
4255 * @param GCPtr Virtual address of the page to flush
4256 */
4257static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
4258{
4259#if HC_ARCH_BITS == 32
4260 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
4261 if ( CPUMIsGuestInLongMode(pVCpu)
4262 && !VMX_IS_64BIT_HOST_MODE())
4263 {
4264 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4265 }
4266 else
4267#endif
4268 {
4269 uint64_t descriptor[2];
4270
4271 Assert(pVM->hwaccm.s.vmx.fVPID);
4272 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
4273 descriptor[1] = GCPtr;
4274 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
4275 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.uCurrentASID, GCPtr, rc));
4276 }
4277}
4278#endif /* HWACCM_VTX_WITH_VPID */
4279
4280/**
4281 * Invalidates a guest page
4282 *
4283 * @returns VBox status code.
4284 * @param pVM The VM to operate on.
4285 * @param pVCpu The VM CPU to operate on.
4286 * @param GCVirt Page to invalidate
4287 */
4288VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
4289{
4290 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4291
4292 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
4293
4294 /* Only relevant if we want to use VPID.
4295 * In the nested paging case we still see such calls, but
4296 * can safely ignore them. (e.g. after cr3 updates)
4297 */
4298#ifdef HWACCM_VTX_WITH_VPID
4299 /* Skip it if a TLB flush is already pending. */
4300 if ( !fFlushPending
4301 && pVM->hwaccm.s.vmx.fVPID)
4302 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
4303#endif /* HWACCM_VTX_WITH_VPID */
4304
4305 return VINF_SUCCESS;
4306}
4307
4308/**
4309 * Invalidates a guest page by physical address
4310 *
4311 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
4312 *
4313 * @returns VBox status code.
4314 * @param pVM The VM to operate on.
4315 * @param pVCpu The VM CPU to operate on.
4316 * @param GCPhys Page to invalidate
4317 */
4318VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
4319{
4320 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4321
4322 Assert(pVM->hwaccm.s.fNestedPaging);
4323
4324 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
4325
4326 /* Skip it if a TLB flush is already pending. */
4327 if (!fFlushPending)
4328 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
4329
4330 return VINF_SUCCESS;
4331}
4332
4333/**
4334 * Report world switch error and dump some useful debug info
4335 *
4336 * @param pVM The VM to operate on.
4337 * @param pVCpu The VMCPU to operate on.
4338 * @param rc Return code
4339 * @param pCtx Current CPU context (not updated)
4340 */
4341static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx)
4342{
4343 switch (rc)
4344 {
4345 case VERR_VMX_INVALID_VMXON_PTR:
4346 AssertFailed();
4347 break;
4348
4349 case VERR_VMX_UNABLE_TO_START_VM:
4350 case VERR_VMX_UNABLE_TO_RESUME_VM:
4351 {
4352 int rc2;
4353 RTCCUINTREG exitReason, instrError;
4354
4355 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
4356 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
4357 AssertRC(rc2);
4358 if (rc2 == VINF_SUCCESS)
4359 {
4360 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
4361 Log(("Current stack %08x\n", &rc2));
4362
4363 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
4364 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
4365
4366#ifdef VBOX_STRICT
4367 RTGDTR gdtr;
4368 PCX86DESCHC pDesc;
4369 RTCCUINTREG val;
4370
4371 ASMGetGDTR(&gdtr);
4372
4373 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
4374 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
4375 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
4376 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
4377 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
4378 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
4379 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
4380 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
4381 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
4382 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
4383
4384 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
4385 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
4386
4387 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
4388 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
4389
4390 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
4391 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
4392
4393 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
4394 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
4395
4396 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
4397 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
4398
4399 if (val < gdtr.cbGdt)
4400 {
4401 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4402 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
4403 }
4404
4405 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
4406 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
4407 if (val < gdtr.cbGdt)
4408 {
4409 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4410 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
4411 }
4412
4413 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
4414 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
4415 if (val < gdtr.cbGdt)
4416 {
4417 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4418 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
4419 }
4420
4421 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
4422 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
4423 if (val < gdtr.cbGdt)
4424 {
4425 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4426 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
4427 }
4428
4429 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
4430 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
4431 if (val < gdtr.cbGdt)
4432 {
4433 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4434 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
4435 }
4436
4437 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
4438 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
4439 if (val < gdtr.cbGdt)
4440 {
4441 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4442 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
4443 }
4444
4445 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
4446 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
4447 if (val < gdtr.cbGdt)
4448 {
4449 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4450 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
4451 }
4452
4453 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
4454 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
4455
4456 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
4457 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
4458 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
4459 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
4460
4461 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
4462 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
4463
4464 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
4465 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
4466
4467 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
4468 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
4469
4470 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
4471 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
4472 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
4473 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
4474
4475# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4476 if (VMX_IS_64BIT_HOST_MODE())
4477 {
4478 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
4479 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
4480 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4481 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4482 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4483 }
4484# endif
4485#endif /* VBOX_STRICT */
4486 }
4487 break;
4488 }
4489
4490 default:
4491 /* impossible */
4492 AssertMsgFailed(("%Rrc (%#x)\n", rc, rc));
4493 break;
4494 }
4495}
4496
4497#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4498/**
4499 * Prepares for and executes VMLAUNCH (64 bits guest mode)
4500 *
4501 * @returns VBox status code
4502 * @param fResume vmlauch/vmresume
4503 * @param pCtx Guest context
4504 * @param pCache VMCS cache
4505 * @param pVM The VM to operate on.
4506 * @param pVCpu The VMCPU to operate on.
4507 */
4508DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
4509{
4510 uint32_t aParam[6];
4511 PHWACCM_CPUINFO pCpu;
4512 RTHCPHYS pPageCpuPhys;
4513 int rc;
4514
4515 pCpu = HWACCMR0GetCurrentCpu();
4516 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
4517
4518#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4519 pCache->uPos = 1;
4520 pCache->interPD = PGMGetInterPaeCR3(pVM);
4521 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
4522#endif
4523
4524#ifdef DEBUG
4525 pCache->TestIn.pPageCpuPhys = 0;
4526 pCache->TestIn.pVMCSPhys = 0;
4527 pCache->TestIn.pCache = 0;
4528 pCache->TestOut.pVMCSPhys = 0;
4529 pCache->TestOut.pCache = 0;
4530 pCache->TestOut.pCtx = 0;
4531 pCache->TestOut.eflags = 0;
4532#endif
4533
4534 aParam[0] = (uint32_t)(pPageCpuPhys); /* Param 1: VMXON physical address - Lo. */
4535 aParam[1] = (uint32_t)(pPageCpuPhys >> 32); /* Param 1: VMXON physical address - Hi. */
4536 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys); /* Param 2: VMCS physical address - Lo. */
4537 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys >> 32); /* Param 2: VMCS physical address - Hi. */
4538 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
4539 aParam[5] = 0;
4540
4541#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4542 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
4543 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
4544#endif
4545 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
4546
4547#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4548 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
4549 Assert(pCtx->dr[4] == 10);
4550 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
4551#endif
4552
4553#ifdef DEBUG
4554 AssertMsg(pCache->TestIn.pPageCpuPhys == pPageCpuPhys, ("%RHp vs %RHp\n", pCache->TestIn.pPageCpuPhys, pPageCpuPhys));
4555 AssertMsg(pCache->TestIn.pVMCSPhys == pVCpu->hwaccm.s.vmx.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pVCpu->hwaccm.s.vmx.pVMCSPhys));
4556 AssertMsg(pCache->TestIn.pVMCSPhys == pCache->TestOut.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pCache->TestOut.pVMCSPhys));
4557 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache));
4558 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
4559 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx));
4560 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
4561#endif
4562 return rc;
4563}
4564
4565/**
4566 * Executes the specified handler in 64 mode
4567 *
4568 * @returns VBox status code.
4569 * @param pVM The VM to operate on.
4570 * @param pVCpu The VMCPU to operate on.
4571 * @param pCtx Guest context
4572 * @param pfnHandler RC handler
4573 * @param cbParam Number of parameters
4574 * @param paParam Array of 32 bits parameters
4575 */
4576VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
4577{
4578 int rc, rc2;
4579 PHWACCM_CPUINFO pCpu;
4580 RTHCPHYS pPageCpuPhys;
4581 RTHCUINTREG uOldEFlags;
4582
4583 /* @todo This code is not guest SMP safe (hyper stack and switchers) */
4584 AssertReturn(pVM->cCpus == 1, VERR_TOO_MANY_CPUS);
4585 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_INTERNAL_ERROR);
4586 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
4587 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
4588
4589#ifdef VBOX_STRICT
4590 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
4591 Assert(vmxR0IsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
4592
4593 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
4594 Assert(vmxR0IsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
4595#endif
4596
4597 /* Disable interrupts. */
4598 uOldEFlags = ASMIntDisableFlags();
4599
4600 pCpu = HWACCMR0GetCurrentCpu();
4601 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
4602
4603 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4604 VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4605
4606 /* Leave VMX Root Mode. */
4607 VMXDisable();
4608
4609 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4610
4611 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVM));
4612 CPUMSetHyperEIP(pVCpu, pfnHandler);
4613 for (int i=(int)cbParam-1;i>=0;i--)
4614 CPUMPushHyper(pVCpu, paParam[i]);
4615
4616 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
4617 /* Call switcher. */
4618 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM);
4619 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
4620
4621 /* Make sure the VMX instructions don't cause #UD faults. */
4622 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
4623
4624 /* Enter VMX Root Mode */
4625 rc2 = VMXEnable(pPageCpuPhys);
4626 if (RT_FAILURE(rc2))
4627 {
4628 if (pVM)
4629 VMXR0CheckError(pVM, pVCpu, rc2);
4630 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4631 ASMSetFlags(uOldEFlags);
4632 return VERR_VMX_VMXON_FAILED;
4633 }
4634
4635 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4636 AssertRC(rc2);
4637 Assert(!(ASMGetFlags() & X86_EFL_IF));
4638 ASMSetFlags(uOldEFlags);
4639 return rc;
4640}
4641
4642#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
4643
4644
4645#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4646/**
4647 * Executes VMWRITE
4648 *
4649 * @returns VBox status code
4650 * @param pVCpu The VMCPU to operate on.
4651 * @param idxField VMCS index
4652 * @param u64Val 16, 32 or 64 bits value
4653 */
4654VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4655{
4656 int rc;
4657
4658 switch (idxField)
4659 {
4660 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
4661 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
4662 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
4663 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
4664 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
4665 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
4666 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
4667 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
4668 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
4669 case VMX_VMCS_GUEST_LINK_PTR_FULL:
4670 case VMX_VMCS_GUEST_PDPTR0_FULL:
4671 case VMX_VMCS_GUEST_PDPTR1_FULL:
4672 case VMX_VMCS_GUEST_PDPTR2_FULL:
4673 case VMX_VMCS_GUEST_PDPTR3_FULL:
4674 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
4675 case VMX_VMCS_GUEST_EFER_FULL:
4676 case VMX_VMCS_CTRL_EPTP_FULL:
4677 /* These fields consist of two parts, which are both writable in 32 bits mode. */
4678 rc = VMXWriteVMCS32(idxField, u64Val);
4679 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
4680 AssertRC(rc);
4681 return rc;
4682
4683 case VMX_VMCS64_GUEST_LDTR_BASE:
4684 case VMX_VMCS64_GUEST_TR_BASE:
4685 case VMX_VMCS64_GUEST_GDTR_BASE:
4686 case VMX_VMCS64_GUEST_IDTR_BASE:
4687 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4688 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4689 case VMX_VMCS64_GUEST_CR0:
4690 case VMX_VMCS64_GUEST_CR4:
4691 case VMX_VMCS64_GUEST_CR3:
4692 case VMX_VMCS64_GUEST_DR7:
4693 case VMX_VMCS64_GUEST_RIP:
4694 case VMX_VMCS64_GUEST_RSP:
4695 case VMX_VMCS64_GUEST_CS_BASE:
4696 case VMX_VMCS64_GUEST_DS_BASE:
4697 case VMX_VMCS64_GUEST_ES_BASE:
4698 case VMX_VMCS64_GUEST_FS_BASE:
4699 case VMX_VMCS64_GUEST_GS_BASE:
4700 case VMX_VMCS64_GUEST_SS_BASE:
4701 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
4702 if (u64Val >> 32ULL)
4703 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
4704 else
4705 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
4706
4707 return rc;
4708
4709 default:
4710 AssertMsgFailed(("Unexpected field %x\n", idxField));
4711 return VERR_INVALID_PARAMETER;
4712 }
4713}
4714
4715/**
4716 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
4717 *
4718 * @param pVCpu The VMCPU to operate on.
4719 * @param idxField VMCS field
4720 * @param u64Val Value
4721 */
4722VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4723{
4724 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
4725
4726 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
4727
4728 /* Make sure there are no duplicates. */
4729 for (unsigned i=0;i<pCache->Write.cValidEntries;i++)
4730 {
4731 if (pCache->Write.aField[i] == idxField)
4732 {
4733 pCache->Write.aFieldVal[i] = u64Val;
4734 return VINF_SUCCESS;
4735 }
4736 }
4737
4738 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
4739 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
4740 pCache->Write.cValidEntries++;
4741 return VINF_SUCCESS;
4742}
4743
4744#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
4745
4746#ifdef VBOX_STRICT
4747static bool vmxR0IsValidReadField(uint32_t idxField)
4748{
4749 switch(idxField)
4750 {
4751 case VMX_VMCS64_GUEST_RIP:
4752 case VMX_VMCS64_GUEST_RSP:
4753 case VMX_VMCS_GUEST_RFLAGS:
4754 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
4755 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
4756 case VMX_VMCS64_GUEST_CR0:
4757 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
4758 case VMX_VMCS64_GUEST_CR4:
4759 case VMX_VMCS64_GUEST_DR7:
4760 case VMX_VMCS32_GUEST_SYSENTER_CS:
4761 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4762 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4763 case VMX_VMCS32_GUEST_GDTR_LIMIT:
4764 case VMX_VMCS64_GUEST_GDTR_BASE:
4765 case VMX_VMCS32_GUEST_IDTR_LIMIT:
4766 case VMX_VMCS64_GUEST_IDTR_BASE:
4767 case VMX_VMCS16_GUEST_FIELD_CS:
4768 case VMX_VMCS32_GUEST_CS_LIMIT:
4769 case VMX_VMCS64_GUEST_CS_BASE:
4770 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
4771 case VMX_VMCS16_GUEST_FIELD_DS:
4772 case VMX_VMCS32_GUEST_DS_LIMIT:
4773 case VMX_VMCS64_GUEST_DS_BASE:
4774 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
4775 case VMX_VMCS16_GUEST_FIELD_ES:
4776 case VMX_VMCS32_GUEST_ES_LIMIT:
4777 case VMX_VMCS64_GUEST_ES_BASE:
4778 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
4779 case VMX_VMCS16_GUEST_FIELD_FS:
4780 case VMX_VMCS32_GUEST_FS_LIMIT:
4781 case VMX_VMCS64_GUEST_FS_BASE:
4782 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
4783 case VMX_VMCS16_GUEST_FIELD_GS:
4784 case VMX_VMCS32_GUEST_GS_LIMIT:
4785 case VMX_VMCS64_GUEST_GS_BASE:
4786 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
4787 case VMX_VMCS16_GUEST_FIELD_SS:
4788 case VMX_VMCS32_GUEST_SS_LIMIT:
4789 case VMX_VMCS64_GUEST_SS_BASE:
4790 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
4791 case VMX_VMCS16_GUEST_FIELD_LDTR:
4792 case VMX_VMCS32_GUEST_LDTR_LIMIT:
4793 case VMX_VMCS64_GUEST_LDTR_BASE:
4794 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
4795 case VMX_VMCS16_GUEST_FIELD_TR:
4796 case VMX_VMCS32_GUEST_TR_LIMIT:
4797 case VMX_VMCS64_GUEST_TR_BASE:
4798 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
4799 case VMX_VMCS32_RO_EXIT_REASON:
4800 case VMX_VMCS32_RO_VM_INSTR_ERROR:
4801 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
4802 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
4803 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
4804 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
4805 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4806 case VMX_VMCS32_RO_IDT_INFO:
4807 case VMX_VMCS32_RO_IDT_ERRCODE:
4808 case VMX_VMCS64_GUEST_CR3:
4809 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
4810 return true;
4811 }
4812 return false;
4813}
4814
4815static bool vmxR0IsValidWriteField(uint32_t idxField)
4816{
4817 switch(idxField)
4818 {
4819 case VMX_VMCS64_GUEST_LDTR_BASE:
4820 case VMX_VMCS64_GUEST_TR_BASE:
4821 case VMX_VMCS64_GUEST_GDTR_BASE:
4822 case VMX_VMCS64_GUEST_IDTR_BASE:
4823 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4824 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4825 case VMX_VMCS64_GUEST_CR0:
4826 case VMX_VMCS64_GUEST_CR4:
4827 case VMX_VMCS64_GUEST_CR3:
4828 case VMX_VMCS64_GUEST_DR7:
4829 case VMX_VMCS64_GUEST_RIP:
4830 case VMX_VMCS64_GUEST_RSP:
4831 case VMX_VMCS64_GUEST_CS_BASE:
4832 case VMX_VMCS64_GUEST_DS_BASE:
4833 case VMX_VMCS64_GUEST_ES_BASE:
4834 case VMX_VMCS64_GUEST_FS_BASE:
4835 case VMX_VMCS64_GUEST_GS_BASE:
4836 case VMX_VMCS64_GUEST_SS_BASE:
4837 return true;
4838 }
4839 return false;
4840}
4841
4842#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette