VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 27257

Last change on this file since 27257 was 27231, checked in by vboxsync, 15 years ago

Implemented mwait extension for breaking on external interrupt when IF=0; completely untested

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 194.6 KB
Line 
1/* $Id: HWVMXR0.cpp 27231 2010-03-09 20:16:59Z vboxsync $ */
2/** @file
3 * HWACCM VMX - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_HWACCM
27#include <VBox/hwaccm.h>
28#include <VBox/pgm.h>
29#include <VBox/dbgf.h>
30#include <VBox/selm.h>
31#include <VBox/iom.h>
32#include <VBox/rem.h>
33#include <VBox/tm.h>
34#include "HWACCMInternal.h"
35#include <VBox/vm.h>
36#include <VBox/x86.h>
37#include <VBox/pdmapi.h>
38#include <VBox/err.h>
39#include <VBox/log.h>
40#include <iprt/asm.h>
41#include <iprt/assert.h>
42#include <iprt/param.h>
43#include <iprt/string.h>
44#include <iprt/time.h>
45#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
46# include <iprt/thread.h>
47#endif
48#include "HWVMXR0.h"
49
50/*******************************************************************************
51* Defined Constants And Macros *
52*******************************************************************************/
53#if defined(RT_ARCH_AMD64)
54# define VMX_IS_64BIT_HOST_MODE() (true)
55#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
56# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
57#else
58# define VMX_IS_64BIT_HOST_MODE() (false)
59#endif
60
61/*******************************************************************************
62* Global Variables *
63*******************************************************************************/
64/* IO operation lookup arrays. */
65static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
66static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
67
68#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
69/** See HWACCMR0A.asm. */
70extern "C" uint32_t g_fVMXIs64bitHost;
71#endif
72
73/*******************************************************************************
74* Local Functions *
75*******************************************************************************/
76static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx);
77static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu);
78static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu);
79static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu);
80static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
81static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
82static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
83#ifdef VBOX_STRICT
84static bool vmxR0IsValidReadField(uint32_t idxField);
85static bool vmxR0IsValidWriteField(uint32_t idxField);
86#endif
87static void vmxR0SetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
88
89static void VMXR0CheckError(PVM pVM, PVMCPU pVCpu, int rc)
90{
91 if (rc == VERR_VMX_GENERIC)
92 {
93 RTCCUINTREG instrError;
94
95 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
96 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
97 }
98 pVM->hwaccm.s.lLastError = rc;
99}
100
101/**
102 * Sets up and activates VT-x on the current CPU
103 *
104 * @returns VBox status code.
105 * @param pCpu CPU info struct
106 * @param pVM The VM to operate on. (can be NULL after a resume!!)
107 * @param pvPageCpu Pointer to the global cpu page
108 * @param pPageCpuPhys Physical address of the global cpu page
109 */
110VMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
111{
112 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
113 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
114
115 if (pVM)
116 {
117 /* Set revision dword at the beginning of the VMXON structure. */
118 *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
119 }
120
121 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
122 * (which can have very bad consequences!!!)
123 */
124
125 if (ASMGetCR4() & X86_CR4_VMXE)
126 return VERR_VMX_IN_VMX_ROOT_MODE;
127
128 /* Make sure the VMX instructions don't cause #UD faults. */
129 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
130
131 /* Enter VMX Root Mode */
132 int rc = VMXEnable(pPageCpuPhys);
133 if (RT_FAILURE(rc))
134 {
135 if (pVM)
136 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
137 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
138 return VERR_VMX_VMXON_FAILED;
139 }
140 return VINF_SUCCESS;
141}
142
143/**
144 * Deactivates VT-x on the current CPU
145 *
146 * @returns VBox status code.
147 * @param pCpu CPU info struct
148 * @param pvPageCpu Pointer to the global cpu page
149 * @param pPageCpuPhys Physical address of the global cpu page
150 */
151VMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
152{
153 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
154 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
155
156 /* Leave VMX Root Mode. */
157 VMXDisable();
158
159 /* And clear the X86_CR4_VMXE bit */
160 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
161 return VINF_SUCCESS;
162}
163
164/**
165 * Does Ring-0 per VM VT-x init.
166 *
167 * @returns VBox status code.
168 * @param pVM The VM to operate on.
169 */
170VMMR0DECL(int) VMXR0InitVM(PVM pVM)
171{
172 int rc;
173
174#ifdef LOG_ENABLED
175 SUPR0Printf("VMXR0InitVM %x\n", pVM);
176#endif
177
178 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
179
180 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
181 {
182 /* Allocate one page for the APIC physical page (serves for filtering accesses). */
183 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
184 AssertRC(rc);
185 if (RT_FAILURE(rc))
186 return rc;
187
188 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
189 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
190 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
191 }
192 else
193 {
194 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
195 pVM->hwaccm.s.vmx.pAPIC = 0;
196 pVM->hwaccm.s.vmx.pAPICPhys = 0;
197 }
198
199#ifdef VBOX_WITH_CRASHDUMP_MAGIC
200 {
201 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
202 AssertRC(rc);
203 if (RT_FAILURE(rc))
204 return rc;
205
206 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
207 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
208
209 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
210 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
211 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
212 }
213#endif
214
215 /* Allocate VMCBs for all guest CPUs. */
216 for (VMCPUID i = 0; i < pVM->cCpus; i++)
217 {
218 PVMCPU pVCpu = &pVM->aCpus[i];
219
220 pVCpu->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
221
222 /* Allocate one page for the VM control structure (VMCS). */
223 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
224 AssertRC(rc);
225 if (RT_FAILURE(rc))
226 return rc;
227
228 pVCpu->hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVMCS);
229 pVCpu->hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVMCS, 0);
230 ASMMemZero32(pVCpu->hwaccm.s.vmx.pVMCS, PAGE_SIZE);
231
232 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
233 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
234
235 /* Allocate one page for the virtual APIC page for TPR caching. */
236 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
237 AssertRC(rc);
238 if (RT_FAILURE(rc))
239 return rc;
240
241 pVCpu->hwaccm.s.vmx.pVAPIC = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVAPIC);
242 pVCpu->hwaccm.s.vmx.pVAPICPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVAPIC, 0);
243 ASMMemZero32(pVCpu->hwaccm.s.vmx.pVAPIC, PAGE_SIZE);
244
245 /* Allocate the MSR bitmap if this feature is supported. */
246 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
247 {
248 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
249 AssertRC(rc);
250 if (RT_FAILURE(rc))
251 return rc;
252
253 pVCpu->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap);
254 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
255 memset(pVCpu->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
256 }
257
258#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
259 /* Allocate one page for the guest MSR load area (for preloading guest MSRs during the world switch). */
260 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
261 AssertRC(rc);
262 if (RT_FAILURE(rc))
263 return rc;
264
265 pVCpu->hwaccm.s.vmx.pGuestMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR);
266 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, 0);
267 memset(pVCpu->hwaccm.s.vmx.pGuestMSR, 0, PAGE_SIZE);
268
269 /* Allocate one page for the host MSR load area (for restoring host MSRs after the world switch back). */
270 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
271 AssertRC(rc);
272 if (RT_FAILURE(rc))
273 return rc;
274
275 pVCpu->hwaccm.s.vmx.pHostMSR = (uint8_t *)RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjHostMSR);
276 pVCpu->hwaccm.s.vmx.pHostMSRPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, 0);
277 memset(pVCpu->hwaccm.s.vmx.pHostMSR, 0, PAGE_SIZE);
278#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
279
280 /* Current guest paging mode. */
281 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
282
283#ifdef LOG_ENABLED
284 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.pVMCSPhys);
285#endif
286 }
287
288 return VINF_SUCCESS;
289}
290
291/**
292 * Does Ring-0 per VM VT-x termination.
293 *
294 * @returns VBox status code.
295 * @param pVM The VM to operate on.
296 */
297VMMR0DECL(int) VMXR0TermVM(PVM pVM)
298{
299 for (VMCPUID i = 0; i < pVM->cCpus; i++)
300 {
301 PVMCPU pVCpu = &pVM->aCpus[i];
302
303 if (pVCpu->hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
304 {
305 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjVMCS, false);
306 pVCpu->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
307 pVCpu->hwaccm.s.vmx.pVMCS = 0;
308 pVCpu->hwaccm.s.vmx.pVMCSPhys = 0;
309 }
310 if (pVCpu->hwaccm.s.vmx.pMemObjVAPIC != NIL_RTR0MEMOBJ)
311 {
312 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjVAPIC, false);
313 pVCpu->hwaccm.s.vmx.pMemObjVAPIC = NIL_RTR0MEMOBJ;
314 pVCpu->hwaccm.s.vmx.pVAPIC = 0;
315 pVCpu->hwaccm.s.vmx.pVAPICPhys = 0;
316 }
317 if (pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
318 {
319 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap, false);
320 pVCpu->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
321 pVCpu->hwaccm.s.vmx.pMSRBitmap = 0;
322 pVCpu->hwaccm.s.vmx.pMSRBitmapPhys = 0;
323 }
324#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
325 if (pVCpu->hwaccm.s.vmx.pMemObjHostMSR != NIL_RTR0MEMOBJ)
326 {
327 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjHostMSR, false);
328 pVCpu->hwaccm.s.vmx.pMemObjHostMSR = NIL_RTR0MEMOBJ;
329 pVCpu->hwaccm.s.vmx.pHostMSR = 0;
330 pVCpu->hwaccm.s.vmx.pHostMSRPhys = 0;
331 }
332 if (pVCpu->hwaccm.s.vmx.pMemObjGuestMSR != NIL_RTR0MEMOBJ)
333 {
334 RTR0MemObjFree(pVCpu->hwaccm.s.vmx.pMemObjGuestMSR, false);
335 pVCpu->hwaccm.s.vmx.pMemObjGuestMSR = NIL_RTR0MEMOBJ;
336 pVCpu->hwaccm.s.vmx.pGuestMSR = 0;
337 pVCpu->hwaccm.s.vmx.pGuestMSRPhys = 0;
338 }
339#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
340 }
341 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
342 {
343 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
344 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
345 pVM->hwaccm.s.vmx.pAPIC = 0;
346 pVM->hwaccm.s.vmx.pAPICPhys = 0;
347 }
348#ifdef VBOX_WITH_CRASHDUMP_MAGIC
349 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
350 {
351 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
352 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
353 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
354 pVM->hwaccm.s.vmx.pScratch = 0;
355 pVM->hwaccm.s.vmx.pScratchPhys = 0;
356 }
357#endif
358 return VINF_SUCCESS;
359}
360
361/**
362 * Sets up VT-x for the specified VM
363 *
364 * @returns VBox status code.
365 * @param pVM The VM to operate on.
366 */
367VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
368{
369 int rc = VINF_SUCCESS;
370 uint32_t val;
371
372 AssertReturn(pVM, VERR_INVALID_PARAMETER);
373
374 for (VMCPUID i = 0; i < pVM->cCpus; i++)
375 {
376 PVMCPU pVCpu = &pVM->aCpus[i];
377
378 Assert(pVCpu->hwaccm.s.vmx.pVMCS);
379
380 /* Set revision dword at the beginning of the VMCS structure. */
381 *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
382
383 /* Clear VM Control Structure. */
384 Log(("pVMCSPhys = %RHp\n", pVCpu->hwaccm.s.vmx.pVMCSPhys));
385 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
386 if (RT_FAILURE(rc))
387 goto vmx_end;
388
389 /* Activate the VM Control Structure. */
390 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
391 if (RT_FAILURE(rc))
392 goto vmx_end;
393
394 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
395 * Set required bits to one and zero according to the MSR capabilities.
396 */
397 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
398 /* External and non-maskable interrupts cause VM-exits. */
399 val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
400 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
401
402 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
403 AssertRC(rc);
404
405 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
406 * Set required bits to one and zero according to the MSR capabilities.
407 */
408 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
409 /* Program which event cause VM-exits and which features we want to use. */
410 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
411 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
412 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
413 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
414 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
415 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MONITOR_EXIT
416 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
417
418 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
419 if (!pVM->hwaccm.s.fNestedPaging)
420 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
421 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
422 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
423
424 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
425 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
426 {
427 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
428 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
429 Assert(pVM->hwaccm.s.vmx.pAPIC);
430 }
431 else
432 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
433 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
434
435 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
436 {
437 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
438 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
439 }
440
441 /* We will use the secondary control if it's present. */
442 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
443
444 /* Mask away the bits that the CPU doesn't support */
445 /** @todo make sure they don't conflict with the above requirements. */
446 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
447 pVCpu->hwaccm.s.vmx.proc_ctls = val;
448
449 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
450 AssertRC(rc);
451
452 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
453 {
454 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
455 * Set required bits to one and zero according to the MSR capabilities.
456 */
457 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
458 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
459
460#ifdef HWACCM_VTX_WITH_EPT
461 if (pVM->hwaccm.s.fNestedPaging)
462 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
463#endif /* HWACCM_VTX_WITH_EPT */
464#ifdef HWACCM_VTX_WITH_VPID
465 else
466 if (pVM->hwaccm.s.vmx.fVPID)
467 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
468#endif /* HWACCM_VTX_WITH_VPID */
469
470 if (pVM->hwaccm.s.fHasIoApic)
471 val |= VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC;
472
473 if (pVM->hwaccm.s.vmx.fUnrestrictedGuest)
474 val |= VMX_VMCS_CTRL_PROC_EXEC2_REAL_MODE;
475
476 /* Mask away the bits that the CPU doesn't support */
477 /** @todo make sure they don't conflict with the above requirements. */
478 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
479 pVCpu->hwaccm.s.vmx.proc_ctls2 = val;
480 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
481 AssertRC(rc);
482 }
483
484 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
485 * Set required bits to one and zero according to the MSR capabilities.
486 */
487 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
488 AssertRC(rc);
489
490 /* Forward all exception except #NM & #PF to the guest.
491 * We always need to check pagefaults since our shadow page table can be out of sync.
492 * And we always lazily sync the FPU & XMM state.
493 */
494
495 /** @todo Possible optimization:
496 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
497 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
498 * registers ourselves of course.
499 *
500 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
501 */
502
503 /* Don't filter page faults; all of them should cause a switch. */
504 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
505 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
506 AssertRC(rc);
507
508 /* Init TSC offset to zero. */
509 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
510 AssertRC(rc);
511
512 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
513 AssertRC(rc);
514
515 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
516 AssertRC(rc);
517
518 /* Set the MSR bitmap address. */
519 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
520 {
521 Assert(pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
522
523 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVCpu->hwaccm.s.vmx.pMSRBitmapPhys);
524 AssertRC(rc);
525
526 /* Allow the guest to directly modify these MSRs; they are restored and saved automatically. */
527 vmxR0SetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
528 vmxR0SetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
529 vmxR0SetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
530 vmxR0SetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
531 vmxR0SetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
532 vmxR0SetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
533 vmxR0SetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
534 vmxR0SetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
535 vmxR0SetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
536 }
537
538#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
539 /* Set the guest & host MSR load/store physical addresses. */
540 Assert(pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
541 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
542 AssertRC(rc);
543 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, pVCpu->hwaccm.s.vmx.pGuestMSRPhys);
544 AssertRC(rc);
545
546 Assert(pVCpu->hwaccm.s.vmx.pHostMSRPhys);
547 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, pVCpu->hwaccm.s.vmx.pHostMSRPhys);
548 AssertRC(rc);
549#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
550
551 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, 0);
552 AssertRC(rc);
553
554 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
555 AssertRC(rc);
556
557 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
558 {
559 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
560 /* Optional */
561 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
562 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVCpu->hwaccm.s.vmx.pVAPICPhys);
563
564 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC)
565 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
566
567 AssertRC(rc);
568 }
569
570 /* Set link pointer to -1. Not currently used. */
571 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
572 AssertRC(rc);
573
574 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
575 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
576 AssertRC(rc);
577
578 /* Configure the VMCS read cache. */
579 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
580
581 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
582 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
583 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
584 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
585 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
586 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
587 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
588 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
589 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
590 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
591 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
592 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
593 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
594 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
595 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
596 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
597
598 VMX_SETUP_SELREG(ES, pCache);
599 VMX_SETUP_SELREG(SS, pCache);
600 VMX_SETUP_SELREG(CS, pCache);
601 VMX_SETUP_SELREG(DS, pCache);
602 VMX_SETUP_SELREG(FS, pCache);
603 VMX_SETUP_SELREG(GS, pCache);
604 VMX_SETUP_SELREG(LDTR, pCache);
605 VMX_SETUP_SELREG(TR, pCache);
606
607 /* Status code VMCS reads. */
608 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
609 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
610 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
611 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
612 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
613 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
614 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
615 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
616 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
617
618 if (pVM->hwaccm.s.fNestedPaging)
619 {
620 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
621 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
622 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
623 }
624 else
625 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
626 } /* for each VMCPU */
627
628 /* Choose the right TLB setup function. */
629 if (pVM->hwaccm.s.fNestedPaging)
630 {
631 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBEPT;
632
633 /* Default values for flushing. */
634 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
635 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
636
637 /* If the capabilities specify we can do more, then make use of it. */
638 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
639 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
640 else
641 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
642 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
643
644 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
645 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
646 }
647#ifdef HWACCM_VTX_WITH_VPID
648 else
649 if (pVM->hwaccm.s.vmx.fVPID)
650 {
651 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBVPID;
652
653 /* Default values for flushing. */
654 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
655 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
656
657 /* If the capabilities specify we can do more, then make use of it. */
658 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
659 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
660 else
661 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
662 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
663
664 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
665 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
666 }
667#endif /* HWACCM_VTX_WITH_VPID */
668 else
669 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBDummy;
670
671vmx_end:
672 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
673 return rc;
674}
675
676/**
677 * Sets the permission bits for the specified MSR
678 *
679 * @param pVCpu The VMCPU to operate on.
680 * @param ulMSR MSR value
681 * @param fRead Reading allowed/disallowed
682 * @param fWrite Writing allowed/disallowed
683 */
684static void vmxR0SetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
685{
686 unsigned ulBit;
687 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.vmx.pMSRBitmap;
688
689 /* Layout:
690 * 0x000 - 0x3ff - Low MSR read bits
691 * 0x400 - 0x7ff - High MSR read bits
692 * 0x800 - 0xbff - Low MSR write bits
693 * 0xc00 - 0xfff - High MSR write bits
694 */
695 if (ulMSR <= 0x00001FFF)
696 {
697 /* Pentium-compatible MSRs */
698 ulBit = ulMSR;
699 }
700 else
701 if ( ulMSR >= 0xC0000000
702 && ulMSR <= 0xC0001FFF)
703 {
704 /* AMD Sixth Generation x86 Processor MSRs */
705 ulBit = (ulMSR - 0xC0000000);
706 pMSRBitmap += 0x400;
707 }
708 else
709 {
710 AssertFailed();
711 return;
712 }
713
714 Assert(ulBit <= 0x1fff);
715 if (fRead)
716 ASMBitClear(pMSRBitmap, ulBit);
717 else
718 ASMBitSet(pMSRBitmap, ulBit);
719
720 if (fWrite)
721 ASMBitClear(pMSRBitmap + 0x800, ulBit);
722 else
723 ASMBitSet(pMSRBitmap + 0x800, ulBit);
724}
725
726
727/**
728 * Injects an event (trap or external interrupt)
729 *
730 * @returns VBox status code.
731 * @param pVM The VM to operate on.
732 * @param pVCpu The VMCPU to operate on.
733 * @param pCtx CPU Context
734 * @param intInfo VMX interrupt info
735 * @param cbInstr Opcode length of faulting instruction
736 * @param errCode Error code (optional)
737 */
738static int VMXR0InjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
739{
740 int rc;
741 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
742
743#ifdef VBOX_WITH_STATISTICS
744 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[iGate & MASK_INJECT_IRQ_STAT]);
745#endif
746
747#ifdef VBOX_STRICT
748 if (iGate == 0xE)
749 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
750 else
751 if (iGate < 0x20)
752 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
753 else
754 {
755 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
756 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
757 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || pCtx->eflags.u32 & X86_EFL_IF);
758 }
759#endif
760
761#ifdef HWACCM_VMX_EMULATE_REALMODE
762 if ( CPUMIsGuestInRealModeEx(pCtx)
763 && pVM->hwaccm.s.vmx.pRealModeTSS)
764 {
765 RTGCPHYS GCPhysHandler;
766 uint16_t offset, ip;
767 RTSEL sel;
768
769 /* Injecting events doesn't work right with real mode emulation.
770 * (#GP if we try to inject external hardware interrupts)
771 * Inject the interrupt or trap directly instead.
772 *
773 * ASSUMES no access handlers for the bits we read or write below (should be safe).
774 */
775 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
776
777 /* Check if the interrupt handler is present. */
778 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
779 {
780 Log(("IDT cbIdt violation\n"));
781 if (iGate != X86_XCPT_DF)
782 {
783 uint32_t intInfo2;
784
785 intInfo2 = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
786 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
787 intInfo2 |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
788 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
789
790 return VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, 0, 0 /* no error code according to the Intel docs */);
791 }
792 Log(("Triple fault -> reset the VM!\n"));
793 return VINF_EM_RESET;
794 }
795 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
796 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
797 || iGate == 4)
798 {
799 ip = pCtx->ip + cbInstr;
800 }
801 else
802 ip = pCtx->ip;
803
804 /* Read the selector:offset pair of the interrupt handler. */
805 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
806 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
807 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
808
809 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
810
811 /* Construct the stack frame. */
812 /** @todo should check stack limit. */
813 pCtx->sp -= 2;
814 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
815 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
816 pCtx->sp -= 2;
817 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
818 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
819 pCtx->sp -= 2;
820 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
821 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
822
823 /* Update the CPU state for executing the handler. */
824 pCtx->rip = offset;
825 pCtx->cs = sel;
826 pCtx->csHid.u64Base = sel << 4;
827 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
828
829 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
830 return VINF_SUCCESS;
831 }
832#endif /* HWACCM_VMX_EMULATE_REALMODE */
833
834 /* Set event injection state. */
835 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
836
837 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
838 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
839
840 AssertRC(rc);
841 return rc;
842}
843
844
845/**
846 * Checks for pending guest interrupts and injects them
847 *
848 * @returns VBox status code.
849 * @param pVM The VM to operate on.
850 * @param pVCpu The VMCPU to operate on.
851 * @param pCtx CPU Context
852 */
853static int VMXR0CheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
854{
855 int rc;
856
857 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
858 if (pVCpu->hwaccm.s.Event.fPending)
859 {
860 Log(("CPU%d: Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->idCpu, pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
861 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
862 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
863 AssertRC(rc);
864
865 pVCpu->hwaccm.s.Event.fPending = false;
866 return VINF_SUCCESS;
867 }
868
869 /* If an active trap is already pending, then we must forward it first! */
870 if (!TRPMHasTrap(pVCpu))
871 {
872 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
873 {
874 RTGCUINTPTR intInfo;
875
876 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
877
878 intInfo = X86_XCPT_NMI;
879 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
880 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
881
882 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
883 AssertRC(rc);
884
885 return VINF_SUCCESS;
886 }
887
888 /* @todo SMI interrupts. */
889
890 /* When external interrupts are pending, we should exit the VM when IF is set. */
891 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
892 {
893 if (!(pCtx->eflags.u32 & X86_EFL_IF))
894 {
895 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
896 {
897 LogFlow(("Enable irq window exit!\n"));
898 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
899 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
900 AssertRC(rc);
901 }
902 /* else nothing to do but wait */
903 }
904 else
905 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
906 {
907 uint8_t u8Interrupt;
908
909 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
910 Log(("CPU%d: Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", pVCpu->idCpu, u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
911 if (RT_SUCCESS(rc))
912 {
913 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
914 AssertRC(rc);
915 }
916 else
917 {
918 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
919 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
920 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
921 /* Just continue */
922 }
923 }
924 else
925 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
926 }
927 }
928
929#ifdef VBOX_STRICT
930 if (TRPMHasTrap(pVCpu))
931 {
932 uint8_t u8Vector;
933 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
934 AssertRC(rc);
935 }
936#endif
937
938 if ( (pCtx->eflags.u32 & X86_EFL_IF)
939 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
940 && TRPMHasTrap(pVCpu)
941 )
942 {
943 uint8_t u8Vector;
944 TRPMEVENT enmType;
945 RTGCUINTPTR intInfo;
946 RTGCUINT errCode;
947
948 /* If a new event is pending, then dispatch it now. */
949 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
950 AssertRC(rc);
951 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
952 Assert(enmType != TRPM_SOFTWARE_INT);
953
954 /* Clear the pending trap. */
955 rc = TRPMResetTrap(pVCpu);
956 AssertRC(rc);
957
958 intInfo = u8Vector;
959 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
960
961 if (enmType == TRPM_TRAP)
962 {
963 switch (u8Vector) {
964 case 8:
965 case 10:
966 case 11:
967 case 12:
968 case 13:
969 case 14:
970 case 17:
971 /* Valid error codes. */
972 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
973 break;
974 default:
975 break;
976 }
977 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
978 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
979 else
980 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
981 }
982 else
983 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
984
985 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
986 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
987 AssertRC(rc);
988 } /* if (interrupts can be dispatched) */
989
990 return VINF_SUCCESS;
991}
992
993/**
994 * Save the host state
995 *
996 * @returns VBox status code.
997 * @param pVM The VM to operate on.
998 * @param pVCpu The VMCPU to operate on.
999 */
1000VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1001{
1002 int rc = VINF_SUCCESS;
1003
1004 /*
1005 * Host CPU Context
1006 */
1007 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
1008 {
1009 RTIDTR idtr;
1010 RTGDTR gdtr;
1011 RTSEL SelTR;
1012 PCX86DESCHC pDesc;
1013 uintptr_t trBase;
1014 RTSEL cs;
1015 RTSEL ss;
1016 uint64_t cr3;
1017
1018 /* Control registers */
1019 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
1020#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1021 if (VMX_IS_64BIT_HOST_MODE())
1022 {
1023 cr3 = hwaccmR0Get64bitCR3();
1024 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
1025 }
1026 else
1027#endif
1028 {
1029 cr3 = ASMGetCR3();
1030 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
1031 }
1032 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
1033 AssertRC(rc);
1034 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
1035 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
1036 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
1037
1038 /* Selector registers. */
1039#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1040 if (VMX_IS_64BIT_HOST_MODE())
1041 {
1042 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
1043 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
1044 }
1045 else
1046 {
1047 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
1048 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
1049 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
1050 }
1051#else
1052 cs = ASMGetCS();
1053 ss = ASMGetSS();
1054#endif
1055 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
1056 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
1057 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
1058 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
1059 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
1060 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
1061#if HC_ARCH_BITS == 32
1062 if (!VMX_IS_64BIT_HOST_MODE())
1063 {
1064 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
1065 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
1066 }
1067#endif
1068 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
1069 SelTR = ASMGetTR();
1070 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
1071 AssertRC(rc);
1072 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
1073 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
1074 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
1075 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
1076 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
1077 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
1078 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
1079
1080 /* GDTR & IDTR */
1081#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1082 if (VMX_IS_64BIT_HOST_MODE())
1083 {
1084 X86XDTR64 gdtr64, idtr64;
1085 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
1086 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
1087 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
1088 AssertRC(rc);
1089 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
1090 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
1091 gdtr.cbGdt = gdtr64.cb;
1092 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
1093 }
1094 else
1095#endif
1096 {
1097 ASMGetGDTR(&gdtr);
1098 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
1099 ASMGetIDTR(&idtr);
1100 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
1101 AssertRC(rc);
1102 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
1103 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
1104 }
1105
1106 /* Save the base address of the TR selector. */
1107 if (SelTR > gdtr.cbGdt)
1108 {
1109 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
1110 return VERR_VMX_INVALID_HOST_STATE;
1111 }
1112
1113 pDesc = (PCX86DESCHC)(gdtr.pGdt + (SelTR & X86_SEL_MASK));
1114#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1115 if (VMX_IS_64BIT_HOST_MODE())
1116 {
1117 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
1118 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
1119 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
1120 AssertRC(rc);
1121 }
1122 else
1123#endif
1124 {
1125#if HC_ARCH_BITS == 64
1126 trBase = X86DESC64_BASE(*pDesc);
1127#else
1128 trBase = X86DESC_BASE(*pDesc);
1129#endif
1130 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
1131 AssertRC(rc);
1132 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
1133 }
1134
1135 /* FS and GS base. */
1136#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1137 if (VMX_IS_64BIT_HOST_MODE())
1138 {
1139 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
1140 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
1141 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
1142 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
1143 }
1144#endif
1145 AssertRC(rc);
1146
1147 /* Sysenter MSRs. */
1148 /** @todo expensive!! */
1149 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1150 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1151#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1152 if (VMX_IS_64BIT_HOST_MODE())
1153 {
1154 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1155 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1156 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1157 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1158 }
1159 else
1160 {
1161 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1162 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1163 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1164 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1165 }
1166#elif HC_ARCH_BITS == 32
1167 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1168 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1169 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1170 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1171#else
1172 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1173 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1174 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1175 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1176#endif
1177 AssertRC(rc);
1178
1179#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1180 /* Store all host MSRs in the VM-Exit load area, so they will be reloaded after the world switch back to the host. */
1181 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pHostMSR;
1182 unsigned idxMsr = 0;
1183
1184 /* EFER MSR present? */
1185 if (ASMCpuId_EDX(0x80000001) & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1186 {
1187 if (ASMCpuId_EDX(0x80000001) & X86_CPUID_AMD_FEATURE_EDX_SEP)
1188 {
1189 pMsr->u32IndexMSR = MSR_K6_STAR;
1190 pMsr->u32Reserved = 0;
1191 pMsr->u64Value = ASMRdMsr(MSR_K6_STAR); /* legacy syscall eip, cs & ss */
1192 pMsr++; idxMsr++;
1193 }
1194
1195 pMsr->u32IndexMSR = MSR_K6_EFER;
1196 pMsr->u32Reserved = 0;
1197# if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1198 if (CPUMIsGuestInLongMode(pVCpu))
1199 {
1200 /* Must match the efer value in our 64 bits switcher. */
1201 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER) | MSR_K6_EFER_LME | MSR_K6_EFER_SCE | MSR_K6_EFER_NXE;
1202 }
1203 else
1204# endif
1205 pMsr->u64Value = ASMRdMsr(MSR_K6_EFER);
1206 pMsr++; idxMsr++;
1207 }
1208
1209# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1210 if (VMX_IS_64BIT_HOST_MODE())
1211 {
1212 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1213 pMsr->u32Reserved = 0;
1214 pMsr->u64Value = ASMRdMsr(MSR_K8_LSTAR); /* 64 bits mode syscall rip */
1215 pMsr++; idxMsr++;
1216 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1217 pMsr->u32Reserved = 0;
1218 pMsr->u64Value = ASMRdMsr(MSR_K8_SF_MASK); /* syscall flag mask */
1219 pMsr++; idxMsr++;
1220 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1221 pMsr->u32Reserved = 0;
1222 pMsr->u64Value = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); /* swapgs exchange value */
1223 pMsr++; idxMsr++;
1224 }
1225# endif
1226 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, idxMsr);
1227 AssertRC(rc);
1228#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1229
1230 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1231 }
1232 return rc;
1233}
1234
1235/**
1236 * Prefetch the 4 PDPT pointers (PAE and nested paging only)
1237 *
1238 * @param pVM The VM to operate on.
1239 * @param pVCpu The VMCPU to operate on.
1240 * @param pCtx Guest context
1241 */
1242static void vmxR0PrefetchPAEPdptrs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1243{
1244 if (CPUMIsGuestInPAEModeEx(pCtx))
1245 {
1246 X86PDPE Pdpe;
1247
1248 for (unsigned i=0;i<4;i++)
1249 {
1250 Pdpe = PGMGstGetPaePDPtr(pVCpu, i);
1251 int rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL + i*2, Pdpe.u);
1252 AssertRC(rc);
1253 }
1254 }
1255}
1256
1257/**
1258 * Update the exception bitmap according to the current CPU state
1259 *
1260 * @param pVM The VM to operate on.
1261 * @param pVCpu The VMCPU to operate on.
1262 * @param pCtx Guest context
1263 */
1264static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1265{
1266 uint32_t u32TrapMask;
1267 Assert(pCtx);
1268
1269 u32TrapMask = HWACCM_VMX_TRAP_MASK;
1270#ifndef DEBUG
1271 if (pVM->hwaccm.s.fNestedPaging)
1272 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
1273#endif
1274
1275 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
1276 if ( CPUMIsGuestFPUStateActive(pVCpu) == true
1277 && !(pCtx->cr0 & X86_CR0_NE)
1278 && !pVCpu->hwaccm.s.fFPUOldStyleOverride)
1279 {
1280 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1281 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
1282 }
1283
1284#ifdef DEBUG /* till after branching, enable it by default then. */
1285 /* Intercept X86_XCPT_DB if stepping is enabled */
1286 if ( DBGFIsStepping(pVCpu)
1287 || CPUMIsHyperDebugStateActive(pVCpu))
1288 u32TrapMask |= RT_BIT(X86_XCPT_DB);
1289 /** @todo Don't trap it unless the debugger has armed breakpoints. */
1290 u32TrapMask |= RT_BIT(X86_XCPT_BP);
1291#endif
1292
1293#ifdef VBOX_STRICT
1294 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1295#endif
1296
1297# ifdef HWACCM_VMX_EMULATE_REALMODE
1298 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
1299 if ( CPUMIsGuestInRealModeEx(pCtx)
1300 && pVM->hwaccm.s.vmx.pRealModeTSS)
1301 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
1302# endif /* HWACCM_VMX_EMULATE_REALMODE */
1303
1304 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1305 AssertRC(rc);
1306}
1307
1308/**
1309 * Loads the guest state
1310 *
1311 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1312 *
1313 * @returns VBox status code.
1314 * @param pVM The VM to operate on.
1315 * @param pVCpu The VMCPU to operate on.
1316 * @param pCtx Guest context
1317 */
1318VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1319{
1320 int rc = VINF_SUCCESS;
1321 RTGCUINTPTR val;
1322 X86EFLAGS eflags;
1323
1324 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1325 * Set required bits to one and zero according to the MSR capabilities.
1326 */
1327 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1328 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1329 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1330 /* 64 bits guest mode? */
1331 if (CPUMIsGuestInLongModeEx(pCtx))
1332 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1333 /* else Must be zero when AMD64 is not available. */
1334
1335 /* Mask away the bits that the CPU doesn't support */
1336 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1337 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1338 AssertRC(rc);
1339
1340 /* VMX_VMCS_CTRL_EXIT_CONTROLS
1341 * Set required bits to one and zero according to the MSR capabilities.
1342 */
1343 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1344
1345 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1346 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1347
1348#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1349 if (VMX_IS_64BIT_HOST_MODE())
1350 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1351 /* else: Must be zero when AMD64 is not available. */
1352#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1353 if (CPUMIsGuestInLongModeEx(pCtx))
1354 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1355 else
1356 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1357#endif
1358 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1359 /* Don't acknowledge external interrupts on VM-exit. */
1360 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1361 AssertRC(rc);
1362
1363 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1364 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1365 {
1366#ifdef HWACCM_VMX_EMULATE_REALMODE
1367 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1368 {
1369 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1370 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1371 {
1372 /* Correct weird requirements for switching to protected mode. */
1373 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1374 && enmGuestMode >= PGMMODE_PROTECTED)
1375 {
1376 /* Flush the recompiler code cache as it's not unlikely
1377 * the guest will rewrite code it will later execute in real
1378 * mode (OpenBSD 4.0 is one such example)
1379 */
1380 REMFlushTBs(pVM);
1381
1382 /* DPL of all hidden selector registers must match the current CPL (0). */
1383 pCtx->csHid.Attr.n.u2Dpl = 0;
1384 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1385
1386 pCtx->dsHid.Attr.n.u2Dpl = 0;
1387 pCtx->esHid.Attr.n.u2Dpl = 0;
1388 pCtx->fsHid.Attr.n.u2Dpl = 0;
1389 pCtx->gsHid.Attr.n.u2Dpl = 0;
1390 pCtx->ssHid.Attr.n.u2Dpl = 0;
1391
1392 /* The limit must correspond to the 32 bits setting. */
1393 if (!pCtx->csHid.Attr.n.u1DefBig)
1394 pCtx->csHid.u32Limit &= 0xffff;
1395 if (!pCtx->dsHid.Attr.n.u1DefBig)
1396 pCtx->dsHid.u32Limit &= 0xffff;
1397 if (!pCtx->esHid.Attr.n.u1DefBig)
1398 pCtx->esHid.u32Limit &= 0xffff;
1399 if (!pCtx->fsHid.Attr.n.u1DefBig)
1400 pCtx->fsHid.u32Limit &= 0xffff;
1401 if (!pCtx->gsHid.Attr.n.u1DefBig)
1402 pCtx->gsHid.u32Limit &= 0xffff;
1403 if (!pCtx->ssHid.Attr.n.u1DefBig)
1404 pCtx->ssHid.u32Limit &= 0xffff;
1405 }
1406 else
1407 /* Switching from protected mode to real mode. */
1408 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode >= PGMMODE_PROTECTED
1409 && enmGuestMode == PGMMODE_REAL)
1410 {
1411 /* The limit must also be set to 0xffff. */
1412 pCtx->csHid.u32Limit = 0xffff;
1413 pCtx->dsHid.u32Limit = 0xffff;
1414 pCtx->esHid.u32Limit = 0xffff;
1415 pCtx->fsHid.u32Limit = 0xffff;
1416 pCtx->gsHid.u32Limit = 0xffff;
1417 pCtx->ssHid.u32Limit = 0xffff;
1418
1419 Assert(pCtx->csHid.u64Base <= 0xfffff);
1420 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1421 Assert(pCtx->esHid.u64Base <= 0xfffff);
1422 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1423 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1424 }
1425 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1426 }
1427 else
1428 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1429 if ( CPUMIsGuestInRealModeEx(pCtx)
1430 && pCtx->csHid.u64Base == 0xffff0000)
1431 {
1432 pCtx->csHid.u64Base = 0xf0000;
1433 pCtx->cs = 0xf000;
1434 }
1435 }
1436#endif /* HWACCM_VMX_EMULATE_REALMODE */
1437
1438 VMX_WRITE_SELREG(ES, es);
1439 AssertRC(rc);
1440
1441 VMX_WRITE_SELREG(CS, cs);
1442 AssertRC(rc);
1443
1444 VMX_WRITE_SELREG(SS, ss);
1445 AssertRC(rc);
1446
1447 VMX_WRITE_SELREG(DS, ds);
1448 AssertRC(rc);
1449
1450 VMX_WRITE_SELREG(FS, fs);
1451 AssertRC(rc);
1452
1453 VMX_WRITE_SELREG(GS, gs);
1454 AssertRC(rc);
1455 }
1456
1457 /* Guest CPU context: LDTR. */
1458 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1459 {
1460 if (pCtx->ldtr == 0)
1461 {
1462 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1463 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1464 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1465 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1466 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1467 }
1468 else
1469 {
1470 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1471 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1472 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1473 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1474 }
1475 AssertRC(rc);
1476 }
1477 /* Guest CPU context: TR. */
1478 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1479 {
1480#ifdef HWACCM_VMX_EMULATE_REALMODE
1481 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1482 if ( CPUMIsGuestInRealModeEx(pCtx)
1483 && pVM->hwaccm.s.vmx.pRealModeTSS)
1484 {
1485 RTGCPHYS GCPhys;
1486
1487 /* We convert it here every time as pci regions could be reconfigured. */
1488 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1489 AssertRC(rc);
1490
1491 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1492 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1493 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1494
1495 X86DESCATTR attr;
1496
1497 attr.u = 0;
1498 attr.n.u1Present = 1;
1499 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1500 val = attr.u;
1501 }
1502 else
1503#endif /* HWACCM_VMX_EMULATE_REALMODE */
1504 {
1505 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1506 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1507 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1508
1509 val = pCtx->trHid.Attr.u;
1510
1511 /* The TSS selector must be busy. */
1512 if ((val & 0xF) == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
1513 val = (val & ~0xF) | X86_SEL_TYPE_SYS_286_TSS_BUSY;
1514 else
1515 /* Default even if no TR selector has been set (otherwise vmlaunch will fail!) */
1516 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1517
1518 }
1519 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1520 AssertRC(rc);
1521 }
1522 /* Guest CPU context: GDTR. */
1523 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1524 {
1525 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1526 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1527 AssertRC(rc);
1528 }
1529 /* Guest CPU context: IDTR. */
1530 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1531 {
1532 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1533 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1534 AssertRC(rc);
1535 }
1536
1537 /*
1538 * Sysenter MSRs (unconditional)
1539 */
1540 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1541 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1542 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1543 AssertRC(rc);
1544
1545 /* Control registers */
1546 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1547 {
1548 val = pCtx->cr0;
1549 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1550 Log2(("Guest CR0-shadow %08x\n", val));
1551 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1552 {
1553 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1554 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1555 }
1556 else
1557 {
1558 /** @todo check if we support the old style mess correctly. */
1559 if (!(val & X86_CR0_NE))
1560 Log(("Forcing X86_CR0_NE!!!\n"));
1561
1562 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1563 }
1564 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1565 if (!pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1566 val |= X86_CR0_PE | X86_CR0_PG;
1567
1568 if (pVM->hwaccm.s.fNestedPaging)
1569 {
1570 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1571 {
1572 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1573 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1574 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1575 }
1576 else
1577 {
1578 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1579 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1580 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1581 }
1582 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1583 AssertRC(rc);
1584 }
1585 else
1586 {
1587 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1588 val |= X86_CR0_WP;
1589 }
1590
1591 /* Always enable caching. */
1592 val &= ~(X86_CR0_CD|X86_CR0_NW);
1593
1594 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1595 Log2(("Guest CR0 %08x\n", val));
1596 /* CR0 flags owned by the host; if the guests attempts to change them, then
1597 * the VM will exit.
1598 */
1599 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1600 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1601 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1602 | X86_CR0_TS
1603 | X86_CR0_ET /* Bit not restored during VM-exit! */
1604 | X86_CR0_CD /* Bit not restored during VM-exit! */
1605 | X86_CR0_NW /* Bit not restored during VM-exit! */
1606 | X86_CR0_NE
1607 | X86_CR0_MP;
1608 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1609
1610 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1611 Log2(("Guest CR0-mask %08x\n", val));
1612 AssertRC(rc);
1613 }
1614 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1615 {
1616 /* CR4 */
1617 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1618 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1619 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1620 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1621
1622 if (!pVM->hwaccm.s.fNestedPaging)
1623 {
1624 switch(pVCpu->hwaccm.s.enmShadowMode)
1625 {
1626 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1627 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1628 case PGMMODE_32_BIT: /* 32-bit paging. */
1629 val &= ~X86_CR4_PAE;
1630 break;
1631
1632 case PGMMODE_PAE: /* PAE paging. */
1633 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1634 /** @todo use normal 32 bits paging */
1635 val |= X86_CR4_PAE;
1636 break;
1637
1638 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1639 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1640#ifdef VBOX_ENABLE_64_BITS_GUESTS
1641 break;
1642#else
1643 AssertFailed();
1644 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1645#endif
1646 default: /* shut up gcc */
1647 AssertFailed();
1648 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1649 }
1650 }
1651 else
1652 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1653 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1654 {
1655 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1656 val |= X86_CR4_PSE;
1657 /* Our identity mapping is a 32 bits page directory. */
1658 val &= ~X86_CR4_PAE;
1659 }
1660
1661#ifdef HWACCM_VMX_EMULATE_REALMODE
1662 /* Turn off VME if we're in emulated real mode. */
1663 if ( CPUMIsGuestInRealModeEx(pCtx)
1664 && pVM->hwaccm.s.vmx.pRealModeTSS)
1665 val &= ~X86_CR4_VME;
1666#endif /* HWACCM_VMX_EMULATE_REALMODE */
1667
1668 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1669 Log2(("Guest CR4 %08x\n", val));
1670 /* CR4 flags owned by the host; if the guests attempts to change them, then
1671 * the VM will exit.
1672 */
1673 val = 0
1674 | X86_CR4_VME
1675 | X86_CR4_PAE
1676 | X86_CR4_PGE
1677 | X86_CR4_PSE
1678 | X86_CR4_VMXE;
1679 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1680
1681 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1682 Log2(("Guest CR4-mask %08x\n", val));
1683 AssertRC(rc);
1684 }
1685
1686 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1687 {
1688 if (pVM->hwaccm.s.fNestedPaging)
1689 {
1690 Assert(PGMGetHyperCR3(pVCpu));
1691 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1692
1693 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1694 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1695 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1696 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1697
1698 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1699 AssertRC(rc);
1700
1701 if ( !CPUMIsGuestInPagedProtectedModeEx(pCtx)
1702 && !pVM->hwaccm.s.vmx.fUnrestrictedGuest)
1703 {
1704 RTGCPHYS GCPhys;
1705
1706 /* We convert it here every time as pci regions could be reconfigured. */
1707 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1708 AssertMsgRC(rc, ("pNonPagingModeEPTPageTable = %RGv\n", pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable));
1709
1710 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1711 * take care of the translation to host physical addresses.
1712 */
1713 val = GCPhys;
1714 }
1715 else
1716 {
1717 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1718 val = pCtx->cr3;
1719 /* Prefetch the four PDPT entries in PAE mode. */
1720 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1721 }
1722 }
1723 else
1724 {
1725 val = PGMGetHyperCR3(pVCpu);
1726 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1727 }
1728
1729 /* Save our shadow CR3 register. */
1730 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1731 AssertRC(rc);
1732 }
1733
1734 /* Debug registers. */
1735 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1736 {
1737 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1738 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1739
1740 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1741 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1742 pCtx->dr[7] |= 0x400; /* must be one */
1743
1744 /* Resync DR7 */
1745 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
1746 AssertRC(rc);
1747
1748#ifdef DEBUG
1749 /* Sync the hypervisor debug state now if any breakpoint is armed. */
1750 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
1751 && !CPUMIsHyperDebugStateActive(pVCpu)
1752 && !DBGFIsStepping(pVCpu))
1753 {
1754 /* Save the host and load the hypervisor debug state. */
1755 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1756 AssertRC(rc);
1757
1758 /* DRx intercepts remain enabled. */
1759
1760 /* Override dr7 with the hypervisor value. */
1761 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, CPUMGetHyperDR7(pVCpu));
1762 AssertRC(rc);
1763 }
1764 else
1765#endif
1766 /* Sync the debug state now if any breakpoint is armed. */
1767 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1768 && !CPUMIsGuestDebugStateActive(pVCpu)
1769 && !DBGFIsStepping(pVCpu))
1770 {
1771 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1772
1773 /* Disable drx move intercepts. */
1774 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1775 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1776 AssertRC(rc);
1777
1778 /* Save the host and load the guest debug state. */
1779 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1780 AssertRC(rc);
1781 }
1782
1783 /* IA32_DEBUGCTL MSR. */
1784 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1785 AssertRC(rc);
1786
1787 /** @todo do we really ever need this? */
1788 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1789 AssertRC(rc);
1790 }
1791
1792 /* EIP, ESP and EFLAGS */
1793 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1794 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1795 AssertRC(rc);
1796
1797 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1798 eflags = pCtx->eflags;
1799 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1800 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1801
1802#ifdef HWACCM_VMX_EMULATE_REALMODE
1803 /* Real mode emulation using v86 mode. */
1804 if ( CPUMIsGuestInRealModeEx(pCtx)
1805 && pVM->hwaccm.s.vmx.pRealModeTSS)
1806 {
1807 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1808
1809 eflags.Bits.u1VM = 1;
1810 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1811 }
1812#endif /* HWACCM_VMX_EMULATE_REALMODE */
1813 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1814 AssertRC(rc);
1815
1816 if (TMCpuTickCanUseRealTSC(pVCpu, &pVCpu->hwaccm.s.vmx.u64TSCOffset))
1817 {
1818 uint64_t u64CurTSC = ASMReadTSC();
1819 if (u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
1820 {
1821 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1822 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, pVCpu->hwaccm.s.vmx.u64TSCOffset);
1823 AssertRC(rc);
1824
1825 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1826 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1827 AssertRC(rc);
1828 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
1829 }
1830 else
1831 {
1832 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
1833 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVCpu->hwaccm.s.vmx.u64TSCOffset, u64CurTSC + pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVCpu->hwaccm.s.vmx.u64TSCOffset, TMCpuTickGet(pVCpu)));
1834 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1835 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1836 AssertRC(rc);
1837 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
1838 }
1839 }
1840 else
1841 {
1842 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1843 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1844 AssertRC(rc);
1845 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
1846 }
1847
1848 /* 64 bits guest mode? */
1849 if (CPUMIsGuestInLongModeEx(pCtx))
1850 {
1851#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
1852 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1853#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1854 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
1855#else
1856# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1857 if (!pVM->hwaccm.s.fAllow64BitGuests)
1858 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1859# endif
1860 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1861#endif
1862 /* Unconditionally update these as wrmsr might have changed them. */
1863 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1864 AssertRC(rc);
1865 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1866 AssertRC(rc);
1867 }
1868 else
1869 {
1870 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1871 }
1872
1873 vmxR0UpdateExceptionBitmap(pVM, pVCpu, pCtx);
1874
1875#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
1876 /* Store all guest MSRs in the VM-Entry load area, so they will be loaded during the world switch. */
1877 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
1878 unsigned idxMsr = 0;
1879
1880 uint32_t ulEdx;
1881 uint32_t ulTemp;
1882 CPUMGetGuestCpuId(pVCpu, 0x80000001, &ulTemp, &ulTemp, &ulTemp, &ulEdx);
1883 /* EFER MSR present? */
1884 if (ulEdx & (X86_CPUID_AMD_FEATURE_EDX_NX|X86_CPUID_AMD_FEATURE_EDX_LONG_MODE))
1885 {
1886 pMsr->u32IndexMSR = MSR_K6_EFER;
1887 pMsr->u32Reserved = 0;
1888 pMsr->u64Value = pCtx->msrEFER;
1889 /* VT-x will complain if only MSR_K6_EFER_LME is set. */
1890 if (!CPUMIsGuestInLongModeEx(pCtx))
1891 pMsr->u64Value &= ~(MSR_K6_EFER_LMA|MSR_K6_EFER_LME);
1892 pMsr++; idxMsr++;
1893
1894 if (ulEdx & X86_CPUID_AMD_FEATURE_EDX_LONG_MODE)
1895 {
1896 pMsr->u32IndexMSR = MSR_K8_LSTAR;
1897 pMsr->u32Reserved = 0;
1898 pMsr->u64Value = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
1899 pMsr++; idxMsr++;
1900 pMsr->u32IndexMSR = MSR_K6_STAR;
1901 pMsr->u32Reserved = 0;
1902 pMsr->u64Value = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
1903 pMsr++; idxMsr++;
1904 pMsr->u32IndexMSR = MSR_K8_SF_MASK;
1905 pMsr->u32Reserved = 0;
1906 pMsr->u64Value = pCtx->msrSFMASK; /* syscall flag mask */
1907 pMsr++; idxMsr++;
1908 pMsr->u32IndexMSR = MSR_K8_KERNEL_GS_BASE;
1909 pMsr->u32Reserved = 0;
1910 pMsr->u64Value = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
1911 pMsr++; idxMsr++;
1912 }
1913 }
1914 pVCpu->hwaccm.s.vmx.cCachedMSRs = idxMsr;
1915
1916 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_MSR_LOAD_COUNT, idxMsr);
1917 AssertRC(rc);
1918
1919 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, idxMsr);
1920 AssertRC(rc);
1921#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
1922
1923 /* Done. */
1924 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1925
1926 return rc;
1927}
1928
1929/**
1930 * Syncs back the guest state
1931 *
1932 * @returns VBox status code.
1933 * @param pVM The VM to operate on.
1934 * @param pVCpu The VMCPU to operate on.
1935 * @param pCtx Guest context
1936 */
1937DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1938{
1939 RTGCUINTREG val, valShadow;
1940 RTGCUINTPTR uInterruptState;
1941 int rc;
1942
1943 /* Let's first sync back eip, esp, and eflags. */
1944 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
1945 AssertRC(rc);
1946 pCtx->rip = val;
1947 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
1948 AssertRC(rc);
1949 pCtx->rsp = val;
1950 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
1951 AssertRC(rc);
1952 pCtx->eflags.u32 = val;
1953
1954 /* Take care of instruction fusing (sti, mov ss) */
1955 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
1956 uInterruptState = val;
1957 if (uInterruptState != 0)
1958 {
1959 Assert(uInterruptState <= 2); /* only sti & mov ss */
1960 Log(("uInterruptState %x eip=%RGv\n", (uint32_t)uInterruptState, pCtx->rip));
1961 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
1962 }
1963 else
1964 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1965
1966 /* Control registers. */
1967 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
1968 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
1969 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
1970 CPUMSetGuestCR0(pVCpu, val);
1971
1972 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
1973 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
1974 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
1975 CPUMSetGuestCR4(pVCpu, val);
1976
1977 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
1978 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1979 if ( pVM->hwaccm.s.fNestedPaging
1980 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
1981 {
1982 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
1983
1984 /* Can be updated behind our back in the nested paging case. */
1985 CPUMSetGuestCR2(pVCpu, pCache->cr2);
1986
1987 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
1988
1989 if (val != pCtx->cr3)
1990 {
1991 CPUMSetGuestCR3(pVCpu, val);
1992 PGMUpdateCR3(pVCpu, val);
1993 }
1994 /* Prefetch the four PDPT entries in PAE mode. */
1995 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1996 }
1997
1998 /* Sync back DR7 here. */
1999 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
2000 pCtx->dr[7] = val;
2001
2002 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
2003 VMX_READ_SELREG(ES, es);
2004 VMX_READ_SELREG(SS, ss);
2005 VMX_READ_SELREG(CS, cs);
2006 VMX_READ_SELREG(DS, ds);
2007 VMX_READ_SELREG(FS, fs);
2008 VMX_READ_SELREG(GS, gs);
2009
2010 /*
2011 * System MSRs
2012 */
2013 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
2014 pCtx->SysEnter.cs = val;
2015 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
2016 pCtx->SysEnter.eip = val;
2017 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
2018 pCtx->SysEnter.esp = val;
2019
2020 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
2021 VMX_READ_SELREG(LDTR, ldtr);
2022
2023 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
2024 pCtx->gdtr.cbGdt = val;
2025 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
2026 pCtx->gdtr.pGdt = val;
2027
2028 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
2029 pCtx->idtr.cbIdt = val;
2030 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
2031 pCtx->idtr.pIdt = val;
2032
2033#ifdef HWACCM_VMX_EMULATE_REALMODE
2034 /* Real mode emulation using v86 mode. */
2035 if ( CPUMIsGuestInRealModeEx(pCtx)
2036 && pVM->hwaccm.s.vmx.pRealModeTSS)
2037 {
2038 /* Hide our emulation flags */
2039 pCtx->eflags.Bits.u1VM = 0;
2040
2041 /* Restore original IOPL setting as we always use 0. */
2042 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
2043
2044 /* Force a TR resync every time in case we switch modes. */
2045 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
2046 }
2047 else
2048#endif /* HWACCM_VMX_EMULATE_REALMODE */
2049 {
2050 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
2051 VMX_READ_SELREG(TR, tr);
2052 }
2053
2054#ifdef VBOX_WITH_AUTO_MSR_LOAD_RESTORE
2055 /* Save the possibly changed MSRs that we automatically restore and save during a world switch. */
2056 for (unsigned i = 0; i < pVCpu->hwaccm.s.vmx.cCachedMSRs; i++)
2057 {
2058 PVMXMSR pMsr = (PVMXMSR)pVCpu->hwaccm.s.vmx.pGuestMSR;
2059 pMsr += i;
2060
2061 switch (pMsr->u32IndexMSR)
2062 {
2063 case MSR_K8_LSTAR:
2064 pCtx->msrLSTAR = pMsr->u64Value;
2065 break;
2066 case MSR_K6_STAR:
2067 pCtx->msrSTAR = pMsr->u64Value;
2068 break;
2069 case MSR_K8_SF_MASK:
2070 pCtx->msrSFMASK = pMsr->u64Value;
2071 break;
2072 case MSR_K8_KERNEL_GS_BASE:
2073 pCtx->msrKERNELGSBASE = pMsr->u64Value;
2074 break;
2075 case MSR_K6_EFER:
2076 /* EFER can't be changed without causing a VM-exit. */
2077// Assert(pCtx->msrEFER == pMsr->u64Value);
2078 break;
2079 default:
2080 AssertFailed();
2081 return VERR_INTERNAL_ERROR;
2082 }
2083 }
2084#endif /* VBOX_WITH_AUTO_MSR_LOAD_RESTORE */
2085 return VINF_SUCCESS;
2086}
2087
2088/**
2089 * Dummy placeholder
2090 *
2091 * @param pVM The VM to operate on.
2092 * @param pVCpu The VMCPU to operate on.
2093 */
2094static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu)
2095{
2096 NOREF(pVM);
2097 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH);
2098 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2099 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2100 return;
2101}
2102
2103/**
2104 * Setup the tagged TLB for EPT
2105 *
2106 * @returns VBox status code.
2107 * @param pVM The VM to operate on.
2108 * @param pVCpu The VMCPU to operate on.
2109 */
2110static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu)
2111{
2112 PHWACCM_CPUINFO pCpu;
2113
2114 Assert(pVM->hwaccm.s.fNestedPaging);
2115 Assert(!pVM->hwaccm.s.vmx.fVPID);
2116
2117 /* Deal with tagged TLBs if VPID or EPT is supported. */
2118 pCpu = HWACCMR0GetCurrentCpu();
2119 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2120 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2121 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2122 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2123 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2124 {
2125 /* Force a TLB flush on VM entry. */
2126 pVCpu->hwaccm.s.fForceTLBFlush = true;
2127 }
2128 else
2129 Assert(!pCpu->fFlushTLB);
2130
2131 /* Check for tlb shootdown flushes. */
2132 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2133 pVCpu->hwaccm.s.fForceTLBFlush = true;
2134
2135 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2136 pCpu->fFlushTLB = false;
2137
2138 if (pVCpu->hwaccm.s.fForceTLBFlush)
2139 {
2140 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
2141 }
2142 else
2143 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2144 {
2145 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
2146 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2147
2148 for (unsigned i=0;i<pVCpu->hwaccm.s.TlbShootdown.cPages;i++)
2149 {
2150 /* aTlbShootdownPages contains physical addresses in this case. */
2151 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2152 }
2153 }
2154 pVCpu->hwaccm.s.TlbShootdown.cPages= 0;
2155 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2156
2157#ifdef VBOX_WITH_STATISTICS
2158 if (pVCpu->hwaccm.s.fForceTLBFlush)
2159 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2160 else
2161 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2162#endif
2163}
2164
2165#ifdef HWACCM_VTX_WITH_VPID
2166/**
2167 * Setup the tagged TLB for VPID
2168 *
2169 * @returns VBox status code.
2170 * @param pVM The VM to operate on.
2171 * @param pVCpu The VMCPU to operate on.
2172 */
2173static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu)
2174{
2175 PHWACCM_CPUINFO pCpu;
2176
2177 Assert(pVM->hwaccm.s.vmx.fVPID);
2178 Assert(!pVM->hwaccm.s.fNestedPaging);
2179
2180 /* Deal with tagged TLBs if VPID or EPT is supported. */
2181 pCpu = HWACCMR0GetCurrentCpu();
2182 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
2183 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
2184 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2185 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
2186 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2187 {
2188 /* Force a TLB flush on VM entry. */
2189 pVCpu->hwaccm.s.fForceTLBFlush = true;
2190 }
2191 else
2192 Assert(!pCpu->fFlushTLB);
2193
2194 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
2195
2196 /* Check for tlb shootdown flushes. */
2197 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
2198 pVCpu->hwaccm.s.fForceTLBFlush = true;
2199
2200 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
2201 if (pVCpu->hwaccm.s.fForceTLBFlush)
2202 {
2203 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
2204 || pCpu->fFlushTLB)
2205 {
2206 pCpu->fFlushTLB = false;
2207 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
2208 pCpu->cTLBFlushes++;
2209 vmxR0FlushVPID(pVM, pVCpu, VMX_FLUSH_ALL_CONTEXTS, 0);
2210 }
2211 else
2212 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
2213
2214 pVCpu->hwaccm.s.fForceTLBFlush = false;
2215 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
2216 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
2217 }
2218 else
2219 {
2220 Assert(!pCpu->fFlushTLB);
2221 Assert(pVCpu->hwaccm.s.uCurrentASID && pCpu->uCurrentASID);
2222
2223 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
2224 {
2225 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
2226 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
2227 for (unsigned i=0;i<pVCpu->hwaccm.s.TlbShootdown.cPages;i++)
2228 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, pVCpu->hwaccm.s.TlbShootdown.aPages[i]);
2229 }
2230 }
2231 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
2232 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
2233
2234 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2235 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
2236 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
2237
2238 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
2239 AssertRC(rc);
2240
2241 if (pVCpu->hwaccm.s.fForceTLBFlush)
2242 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
2243
2244#ifdef VBOX_WITH_STATISTICS
2245 if (pVCpu->hwaccm.s.fForceTLBFlush)
2246 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
2247 else
2248 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
2249#endif
2250}
2251#endif /* HWACCM_VTX_WITH_VPID */
2252
2253/**
2254 * Runs guest code in a VT-x VM.
2255 *
2256 * @returns VBox status code.
2257 * @param pVM The VM to operate on.
2258 * @param pVCpu The VMCPU to operate on.
2259 * @param pCtx Guest context
2260 */
2261VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2262{
2263 int rc = VINF_SUCCESS;
2264 RTGCUINTREG val;
2265 RTGCUINTREG exitReason = (RTGCUINTREG)VMX_EXIT_INVALID;
2266 RTGCUINTREG instrError, cbInstr;
2267 RTGCUINTPTR exitQualification = 0;
2268 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
2269 RTGCUINTPTR errCode, instrInfo;
2270 bool fSetupTPRCaching = false;
2271 uint64_t u64OldLSTAR = 0;
2272 uint8_t u8LastTPR = 0;
2273 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
2274 unsigned cResume = 0;
2275#ifdef VBOX_STRICT
2276 RTCPUID idCpuCheck;
2277 bool fWasInLongMode = false;
2278#endif
2279#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2280 uint64_t u64LastTime = RTTimeMilliTS();
2281#endif
2282#ifdef VBOX_WITH_STATISTICS
2283 bool fStatEntryStarted = true;
2284 bool fStatExit2Started = false;
2285#endif
2286
2287 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || (pVCpu->hwaccm.s.vmx.pVAPIC && pVM->hwaccm.s.vmx.pAPIC));
2288
2289 /* Check if we need to use TPR shadowing. */
2290 if ( CPUMIsGuestInLongModeEx(pCtx)
2291 || ( ((pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC) || pVM->hwaccm.s.fTRPPatchingAllowed)
2292 && pVM->hwaccm.s.fHasIoApic)
2293 )
2294 {
2295 fSetupTPRCaching = true;
2296 }
2297
2298 Log2(("\nE"));
2299
2300 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
2301
2302#ifdef VBOX_STRICT
2303 {
2304 RTCCUINTREG val2;
2305
2306 rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val2);
2307 AssertRC(rc);
2308 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val2));
2309
2310 /* allowed zero */
2311 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
2312 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
2313
2314 /* allowed one */
2315 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
2316 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
2317
2318 rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val2);
2319 AssertRC(rc);
2320 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val2));
2321
2322 /* Must be set according to the MSR, but can be cleared in case of EPT. */
2323 if (pVM->hwaccm.s.fNestedPaging)
2324 val2 |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
2325 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
2326 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
2327
2328 /* allowed zero */
2329 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
2330 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
2331
2332 /* allowed one */
2333 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
2334 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
2335
2336 rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val2);
2337 AssertRC(rc);
2338 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val2));
2339
2340 /* allowed zero */
2341 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
2342 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
2343
2344 /* allowed one */
2345 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
2346 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
2347
2348 rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val2);
2349 AssertRC(rc);
2350 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val2));
2351
2352 /* allowed zero */
2353 if ((val2 & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2354 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2355
2356 /* allowed one */
2357 if ((val2 & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2358 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2359 }
2360 fWasInLongMode = CPUMIsGuestInLongModeEx(pCtx);
2361#endif /* VBOX_STRICT */
2362
2363#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2364 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2365#endif
2366
2367 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
2368 */
2369ResumeExecution:
2370 STAM_STATS({
2371 if (fStatExit2Started) { STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = false; }
2372 if (!fStatEntryStarted) { STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = true; }
2373 });
2374 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2375 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2376 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2377 Assert(!HWACCMR0SuspendPending());
2378 /* Not allowed to switch modes without reloading the host state (32->64 switcher)!! */
2379 Assert(fWasInLongMode == CPUMIsGuestInLongModeEx(pCtx));
2380
2381 /* Safety precaution; looping for too long here can have a very bad effect on the host */
2382 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
2383 {
2384 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2385 rc = VINF_EM_RAW_INTERRUPT;
2386 goto end;
2387 }
2388
2389 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
2390 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2391 {
2392 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2393 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2394 {
2395 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2396 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2397 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2398 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2399 */
2400 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2401 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2402 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2403 AssertRC(rc);
2404 }
2405 }
2406 else
2407 {
2408 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2409 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2410 AssertRC(rc);
2411 }
2412
2413#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
2414 if (RT_UNLIKELY(cResume & 0xf) == 0)
2415 {
2416 uint64_t u64CurTime = RTTimeMilliTS();
2417
2418 if (RT_UNLIKELY(u64CurTime > u64LastTime))
2419 {
2420 u64LastTime = u64CurTime;
2421 TMTimerPollVoid(pVM, pVCpu);
2422 }
2423 }
2424#endif
2425
2426 /* Check for pending actions that force us to go back to ring 3. */
2427 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING)
2428 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
2429 {
2430 /* Check if a sync operation is pending. */
2431 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2432 {
2433 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2434 AssertRC(rc);
2435 if (rc != VINF_SUCCESS)
2436 {
2437 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", rc));
2438 goto end;
2439 }
2440 }
2441
2442#ifdef DEBUG
2443 /* Intercept X86_XCPT_DB if stepping is enabled */
2444 if (!DBGFIsStepping(pVCpu))
2445#endif
2446 {
2447 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2448 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2449 {
2450 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
2451 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2452 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2453 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2454 goto end;
2455 }
2456 }
2457
2458 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2459 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
2460 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
2461 {
2462 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2463 rc = VINF_EM_PENDING_REQUEST;
2464 goto end;
2465 }
2466
2467 /* Check if a pgm pool flush is in progress. */
2468 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2469 {
2470 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2471 rc = VINF_PGM_POOL_FLUSH_PENDING;
2472 goto end;
2473 }
2474 }
2475
2476#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2477 /*
2478 * Exit to ring-3 preemption/work is pending.
2479 *
2480 * Interrupts are disabled before the call to make sure we don't miss any interrupt
2481 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
2482 * further down, but VMXR0CheckPendingInterrupt makes that impossible.)
2483 *
2484 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
2485 * shootdowns rely on this.
2486 */
2487 uOldEFlags = ASMIntDisableFlags();
2488 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2489 {
2490 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
2491 rc = VINF_EM_RAW_INTERRUPT;
2492 goto end;
2493 }
2494 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2495#endif
2496
2497 /* When external interrupts are pending, we should exit the VM when IF is set. */
2498 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
2499 rc = VMXR0CheckPendingInterrupt(pVM, pVCpu, pCtx);
2500 if (RT_FAILURE(rc))
2501 goto end;
2502
2503 /** @todo check timers?? */
2504
2505 /* TPR caching using CR8 is only available in 64 bits mode */
2506 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
2507 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! (no longer true) */
2508 /**
2509 * @todo query and update the TPR only when it could have been changed (mmio access & wrmsr (x2apic))
2510 */
2511 if (fSetupTPRCaching)
2512 {
2513 /* TPR caching in CR8 */
2514 bool fPending;
2515
2516 int rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
2517 AssertRC(rc2);
2518 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2519 pVCpu->hwaccm.s.vmx.pVAPIC[0x80] = u8LastTPR;
2520
2521 /* Two options here:
2522 * - external interrupt pending, but masked by the TPR value.
2523 * -> a CR8 update that lower the current TPR value should cause an exit
2524 * - no pending interrupts
2525 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2526 */
2527 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? (u8LastTPR >> 4) : 0); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
2528 AssertRC(rc);
2529
2530 if (pVM->hwaccm.s.fTPRPatchingActive)
2531 {
2532 Assert(!CPUMIsGuestInLongModeEx(pCtx));
2533 /* Our patch code uses LSTAR for TPR caching. */
2534 pCtx->msrLSTAR = u8LastTPR;
2535
2536 if (fPending)
2537 {
2538 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
2539 vmxR0SetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
2540 }
2541 else
2542 {
2543 /* No interrupts are pending, so we don't need to be explicitely notified.
2544 * There are enough world switches for detecting pending interrupts.
2545 */
2546 vmxR0SetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
2547 }
2548 }
2549 }
2550
2551#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
2552 if ( pVM->hwaccm.s.fNestedPaging
2553# ifdef HWACCM_VTX_WITH_VPID
2554 || pVM->hwaccm.s.vmx.fVPID
2555# endif /* HWACCM_VTX_WITH_VPID */
2556 )
2557 {
2558 PHWACCM_CPUINFO pCpu;
2559
2560 pCpu = HWACCMR0GetCurrentCpu();
2561 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2562 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2563 {
2564 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2565 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2566 else
2567 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2568 }
2569 if (pCpu->fFlushTLB)
2570 LogFlow(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
2571 else
2572 if (pVCpu->hwaccm.s.fForceTLBFlush)
2573 LogFlow(("Manual TLB flush\n"));
2574 }
2575#endif
2576#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2577 PGMDynMapFlushAutoSet(pVCpu);
2578#endif
2579
2580 /*
2581 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
2582 * (until the actual world switch)
2583 */
2584#ifdef VBOX_STRICT
2585 idCpuCheck = RTMpCpuId();
2586#endif
2587#ifdef LOG_ENABLED
2588 VMMR0LogFlushDisable(pVCpu);
2589#endif
2590 /* Save the host state first. */
2591 rc = VMXR0SaveHostState(pVM, pVCpu);
2592 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2593 {
2594 VMMR0LogFlushEnable(pVCpu);
2595 goto end;
2596 }
2597 /* Load the guest state */
2598 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
2599 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2600 {
2601 VMMR0LogFlushEnable(pVCpu);
2602 goto end;
2603 }
2604
2605#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2606 /* Disable interrupts to make sure a poke will interrupt execution.
2607 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
2608 */
2609 uOldEFlags = ASMIntDisableFlags();
2610 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2611#endif
2612
2613 /* Non-register state Guest Context */
2614 /** @todo change me according to cpu state */
2615 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
2616 AssertRC(rc);
2617
2618 /** Set TLB flush state as checked until we return from the world switch. */
2619 ASMAtomicWriteU8(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
2620 /* Deal with tagged TLB setup and invalidation. */
2621 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
2622
2623 STAM_STATS({ STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = false; });
2624
2625 /* Manual save and restore:
2626 * - General purpose registers except RIP, RSP
2627 *
2628 * Trashed:
2629 * - CR2 (we don't care)
2630 * - LDTR (reset to 0)
2631 * - DRx (presumably not changed at all)
2632 * - DR7 (reset to 0x400)
2633 * - EFLAGS (reset to RT_BIT(1); not relevant)
2634 *
2635 */
2636
2637 /* All done! Let's start VM execution. */
2638 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, z);
2639 Assert(idCpuCheck == RTMpCpuId());
2640
2641#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2642 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
2643 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
2644#endif
2645
2646 /* Save the current TPR value in the LSTAR msr so our patches can access it. */
2647 if (pVM->hwaccm.s.fTPRPatchingActive)
2648 {
2649 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2650 u64OldLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2651 ASMWrMsr(MSR_K8_LSTAR, u8LastTPR);
2652 }
2653
2654 TMNotifyStartOfExecution(pVCpu);
2655#ifdef VBOX_WITH_KERNEL_USING_XMM
2656 rc = hwaccmR0VMXStartVMWrapXMM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hwaccm.s.vmx.pfnStartVM);
2657#else
2658 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
2659#endif
2660 ASMAtomicWriteU8(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
2661 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExit);
2662 /* Possibly the last TSC value seen by the guest (too high) (only when we're in tsc offset mode). */
2663 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT))
2664 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVCpu->hwaccm.s.vmx.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
2665
2666 TMNotifyEndOfExecution(pVCpu);
2667 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
2668 Assert(!(ASMGetFlags() & X86_EFL_IF));
2669
2670 /* Restore the host LSTAR msr if the guest could have changed it. */
2671 if (pVM->hwaccm.s.fTPRPatchingActive)
2672 {
2673 Assert(pVM->hwaccm.s.fTPRPatchingActive);
2674 pVCpu->hwaccm.s.vmx.pVAPIC[0x80] = pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR);
2675 ASMWrMsr(MSR_K8_LSTAR, u64OldLSTAR);
2676 }
2677
2678 ASMSetFlags(uOldEFlags);
2679#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2680 uOldEFlags = ~(RTCCUINTREG)0;
2681#endif
2682
2683 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
2684
2685 /* In case we execute a goto ResumeExecution later on. */
2686 pVCpu->hwaccm.s.fResumeVM = true;
2687 pVCpu->hwaccm.s.fForceTLBFlush = false;
2688
2689 /*
2690 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2691 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
2692 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2693 */
2694 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, z);
2695 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit1, v);
2696
2697 if (RT_UNLIKELY(rc != VINF_SUCCESS))
2698 {
2699 VMXR0ReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
2700 VMMR0LogFlushEnable(pVCpu);
2701 goto end;
2702 }
2703
2704 /* Success. Query the guest state and figure out what has happened. */
2705
2706 /* Investigate why there was a VM-exit. */
2707 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
2708 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
2709
2710 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
2711 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
2712 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
2713 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
2714 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2715 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
2716 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
2717 rc |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
2718 AssertRC(rc);
2719
2720 /* Sync back the guest state */
2721 rc = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
2722 AssertRC(rc);
2723
2724 /* Note! NOW IT'S SAFE FOR LOGGING! */
2725 VMMR0LogFlushEnable(pVCpu);
2726 Log2(("Raw exit reason %08x\n", exitReason));
2727
2728 /* Check if an injected event was interrupted prematurely. */
2729 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
2730 AssertRC(rc);
2731 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2732 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2733 /* Ignore 'int xx' as they'll be restarted anyway. */
2734 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
2735 /* Ignore software exceptions (such as int3) as they'll reoccur when we restart the instruction anyway. */
2736 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2737 {
2738 Assert(!pVCpu->hwaccm.s.Event.fPending);
2739 pVCpu->hwaccm.s.Event.fPending = true;
2740 /* Error code present? */
2741 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2742 {
2743 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
2744 AssertRC(rc);
2745 pVCpu->hwaccm.s.Event.errCode = val;
2746 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2747 }
2748 else
2749 {
2750 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2751 pVCpu->hwaccm.s.Event.errCode = 0;
2752 }
2753 }
2754#ifdef VBOX_STRICT
2755 else
2756 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2757 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2758 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2759 {
2760 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2761 }
2762
2763 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2764 HWACCMDumpRegs(pVM, pVCpu, pCtx);
2765#endif
2766
2767 Log2(("E%d: New EIP=%x:%RGv\n", (uint32_t)exitReason, pCtx->cs, (RTGCPTR)pCtx->rip));
2768 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
2769 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
2770 Log2(("Interruption error code %d\n", (uint32_t)errCode));
2771 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
2772
2773 /* Sync back the TPR if it was changed. */
2774 if ( fSetupTPRCaching
2775 && u8LastTPR != pVCpu->hwaccm.s.vmx.pVAPIC[0x80])
2776 {
2777 rc = PDMApicSetTPR(pVCpu, pVCpu->hwaccm.s.vmx.pVAPIC[0x80]);
2778 AssertRC(rc);
2779 }
2780
2781 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, v);
2782 STAM_STATS({ STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = true; });
2783
2784 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2785 switch (exitReason)
2786 {
2787 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2788 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2789 {
2790 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2791
2792 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2793 {
2794 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2795 /* External interrupt; leave to allow it to be dispatched again. */
2796 rc = VINF_EM_RAW_INTERRUPT;
2797 break;
2798 }
2799 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2800 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2801 {
2802 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2803 /* External interrupt; leave to allow it to be dispatched again. */
2804 rc = VINF_EM_RAW_INTERRUPT;
2805 break;
2806
2807 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2808 AssertFailed(); /* can't come here; fails the first check. */
2809 break;
2810
2811 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
2812 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2813 Assert(vector == 1 || vector == 3 || vector == 4);
2814 /* no break */
2815 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2816 Log2(("Hardware/software interrupt %d\n", vector));
2817 switch (vector)
2818 {
2819 case X86_XCPT_NM:
2820 {
2821 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
2822
2823 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2824 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2825 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
2826 if (rc == VINF_SUCCESS)
2827 {
2828 Assert(CPUMIsGuestFPUStateActive(pVCpu));
2829
2830 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
2831
2832 /* Continue execution. */
2833 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2834
2835 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2836 goto ResumeExecution;
2837 }
2838
2839 Log(("Forward #NM fault to the guest\n"));
2840 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
2841 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2842 AssertRC(rc);
2843 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2844 goto ResumeExecution;
2845 }
2846
2847 case X86_XCPT_PF: /* Page fault */
2848 {
2849#ifdef DEBUG
2850 if (pVM->hwaccm.s.fNestedPaging)
2851 { /* A genuine pagefault.
2852 * Forward the trap to the guest by injecting the exception and resuming execution.
2853 */
2854 Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2855
2856 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2857
2858 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2859
2860 /* Now we must update CR2. */
2861 pCtx->cr2 = exitQualification;
2862 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2863 AssertRC(rc);
2864
2865 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2866 goto ResumeExecution;
2867 }
2868#endif
2869 Assert(!pVM->hwaccm.s.fNestedPaging);
2870
2871#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
2872 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
2873 if ( pVM->hwaccm.s.fTRPPatchingAllowed
2874 && pVM->hwaccm.s.pGuestPatchMem
2875 && (exitQualification & 0xfff) == 0x080
2876 && !(errCode & X86_TRAP_PF_P) /* not present */
2877 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
2878 && !CPUMIsGuestInLongModeEx(pCtx)
2879 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
2880 {
2881 RTGCPHYS GCPhysApicBase, GCPhys;
2882 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
2883 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2884
2885 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
2886 if ( rc == VINF_SUCCESS
2887 && GCPhys == GCPhysApicBase)
2888 {
2889 /* Only attempt to patch the instruction once. */
2890 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2891 if (!pPatch)
2892 {
2893 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
2894 break;
2895 }
2896 }
2897 }
2898#endif
2899
2900 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
2901 /* Exit qualification contains the linear address of the page fault. */
2902 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2903 TRPMSetErrorCode(pVCpu, errCode);
2904 TRPMSetFaultAddress(pVCpu, exitQualification);
2905
2906 /* Shortcut for APIC TPR reads and writes. */
2907 if ( (exitQualification & 0xfff) == 0x080
2908 && !(errCode & X86_TRAP_PF_P) /* not present */
2909 && fSetupTPRCaching
2910 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
2911 {
2912 RTGCPHYS GCPhysApicBase, GCPhys;
2913 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
2914 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2915
2916 rc = PGMGstGetPage(pVCpu, (RTGCPTR)exitQualification, NULL, &GCPhys);
2917 if ( rc == VINF_SUCCESS
2918 && GCPhys == GCPhysApicBase)
2919 {
2920 Log(("Enable VT-x virtual APIC access filtering\n"));
2921 rc = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
2922 AssertRC(rc);
2923 }
2924 }
2925
2926 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2927 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
2928 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2929
2930 if (rc == VINF_SUCCESS)
2931 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2932 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
2933 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2934
2935 TRPMResetTrap(pVCpu);
2936 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2937 goto ResumeExecution;
2938 }
2939 else
2940 if (rc == VINF_EM_RAW_GUEST_TRAP)
2941 { /* A genuine pagefault.
2942 * Forward the trap to the guest by injecting the exception and resuming execution.
2943 */
2944 Log2(("Forward page fault to the guest\n"));
2945
2946 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2947 /* The error code might have been changed. */
2948 errCode = TRPMGetErrorCode(pVCpu);
2949
2950 TRPMResetTrap(pVCpu);
2951
2952 /* Now we must update CR2. */
2953 pCtx->cr2 = exitQualification;
2954 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2955 AssertRC(rc);
2956
2957 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2958 goto ResumeExecution;
2959 }
2960#ifdef VBOX_STRICT
2961 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
2962 Log2(("PGMTrap0eHandler failed with %d\n", rc));
2963#endif
2964 /* Need to go back to the recompiler to emulate the instruction. */
2965 TRPMResetTrap(pVCpu);
2966 break;
2967 }
2968
2969 case X86_XCPT_MF: /* Floating point exception. */
2970 {
2971 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
2972 if (!(pCtx->cr0 & X86_CR0_NE))
2973 {
2974 /* old style FPU error reporting needs some extra work. */
2975 /** @todo don't fall back to the recompiler, but do it manually. */
2976 rc = VINF_EM_RAW_EMULATE_INSTR;
2977 break;
2978 }
2979 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2980 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2981 AssertRC(rc);
2982
2983 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2984 goto ResumeExecution;
2985 }
2986
2987 case X86_XCPT_DB: /* Debug exception. */
2988 {
2989 uint64_t uDR6;
2990
2991 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
2992 *
2993 * Exit qualification bits:
2994 * 3:0 B0-B3 which breakpoint condition was met
2995 * 12:4 Reserved (0)
2996 * 13 BD - debug register access detected
2997 * 14 BS - single step execution or branch taken
2998 * 63:15 Reserved (0)
2999 */
3000 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
3001
3002 /* Note that we don't support guest and host-initiated debugging at the same time. */
3003 Assert(DBGFIsStepping(pVCpu) || CPUMIsGuestInRealModeEx(pCtx) || CPUMIsHyperDebugStateActive(pVCpu));
3004
3005 uDR6 = X86_DR6_INIT_VAL;
3006 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
3007 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), uDR6);
3008 if (rc == VINF_EM_RAW_GUEST_TRAP)
3009 {
3010 /** @todo this isn't working, but we'll never get here normally. */
3011
3012 /* Update DR6 here. */
3013 pCtx->dr[6] = uDR6;
3014
3015 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3016 pCtx->dr[7] &= ~X86_DR7_GD;
3017
3018 /* Paranoia. */
3019 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3020 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3021 pCtx->dr[7] |= 0x400; /* must be one */
3022
3023 /* Resync DR7 */
3024 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3025 AssertRC(rc);
3026
3027 Log(("Trap %x (debug) at %RGv exit qualification %RX64\n", vector, (RTGCPTR)pCtx->rip, exitQualification));
3028 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3029 AssertRC(rc);
3030
3031 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3032 goto ResumeExecution;
3033 }
3034 /* Return to ring 3 to deal with the debug exit code. */
3035 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc));
3036 break;
3037 }
3038
3039 case X86_XCPT_BP: /* Breakpoint. */
3040 {
3041 rc = DBGFRZTrap03Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3042 if (rc == VINF_EM_RAW_GUEST_TRAP)
3043 {
3044 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip));
3045 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3046 AssertRC(rc);
3047 goto ResumeExecution;
3048 }
3049 if (rc == VINF_SUCCESS)
3050 goto ResumeExecution;
3051 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc));
3052 break;
3053 }
3054
3055 case X86_XCPT_GP: /* General protection failure exception.*/
3056 {
3057 uint32_t cbOp;
3058 uint32_t cbSize;
3059 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3060
3061 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
3062#ifdef VBOX_STRICT
3063 if ( !CPUMIsGuestInRealModeEx(pCtx)
3064 || !pVM->hwaccm.s.vmx.pRealModeTSS)
3065 {
3066 Log(("Trap %x at %04X:%RGv errorCode=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
3067 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3068 AssertRC(rc);
3069 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3070 goto ResumeExecution;
3071 }
3072#endif
3073 Assert(CPUMIsGuestInRealModeEx(pCtx));
3074
3075 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %x:%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip));
3076
3077 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, &cbOp);
3078 if (RT_SUCCESS(rc))
3079 {
3080 bool fUpdateRIP = true;
3081
3082 Assert(cbOp == pDis->opsize);
3083 switch (pDis->pCurInstr->opcode)
3084 {
3085 case OP_CLI:
3086 pCtx->eflags.Bits.u1IF = 0;
3087 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
3088 break;
3089
3090 case OP_STI:
3091 pCtx->eflags.Bits.u1IF = 1;
3092 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip + pDis->opsize);
3093 Assert(VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
3094 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, VMX_VMCS_GUEST_INTERRUPTIBILITY_STATE_BLOCK_STI);
3095 AssertRC(rc);
3096 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
3097 break;
3098
3099 case OP_HLT:
3100 fUpdateRIP = false;
3101 rc = VINF_EM_HALT;
3102 pCtx->rip += pDis->opsize;
3103 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3104 break;
3105
3106 case OP_POPF:
3107 {
3108 RTGCPTR GCPtrStack;
3109 uint32_t cbParm;
3110 uint32_t uMask;
3111 X86EFLAGS eflags;
3112
3113 if (pDis->prefix & PREFIX_OPSIZE)
3114 {
3115 cbParm = 4;
3116 uMask = 0xffffffff;
3117 }
3118 else
3119 {
3120 cbParm = 2;
3121 uMask = 0xffff;
3122 }
3123
3124 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3125 if (RT_FAILURE(rc))
3126 {
3127 rc = VERR_EM_INTERPRETER;
3128 break;
3129 }
3130 eflags.u = 0;
3131 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3132 if (RT_FAILURE(rc))
3133 {
3134 rc = VERR_EM_INTERPRETER;
3135 break;
3136 }
3137 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
3138 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
3139 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
3140 pCtx->eflags.Bits.u1RF = 0;
3141 pCtx->esp += cbParm;
3142 pCtx->esp &= uMask;
3143
3144 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
3145 break;
3146 }
3147
3148 case OP_PUSHF:
3149 {
3150 RTGCPTR GCPtrStack;
3151 uint32_t cbParm;
3152 uint32_t uMask;
3153 X86EFLAGS eflags;
3154
3155 if (pDis->prefix & PREFIX_OPSIZE)
3156 {
3157 cbParm = 4;
3158 uMask = 0xffffffff;
3159 }
3160 else
3161 {
3162 cbParm = 2;
3163 uMask = 0xffff;
3164 }
3165
3166 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack);
3167 if (RT_FAILURE(rc))
3168 {
3169 rc = VERR_EM_INTERPRETER;
3170 break;
3171 }
3172 eflags = pCtx->eflags;
3173 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
3174 eflags.Bits.u1RF = 0;
3175 eflags.Bits.u1VM = 0;
3176
3177 rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
3178 if (RT_FAILURE(rc))
3179 {
3180 rc = VERR_EM_INTERPRETER;
3181 break;
3182 }
3183 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
3184 pCtx->esp -= cbParm;
3185 pCtx->esp &= uMask;
3186 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
3187 break;
3188 }
3189
3190 case OP_IRET:
3191 {
3192 RTGCPTR GCPtrStack;
3193 uint32_t uMask = 0xffff;
3194 uint16_t aIretFrame[3];
3195
3196 if (pDis->prefix & (PREFIX_OPSIZE | PREFIX_ADDRSIZE))
3197 {
3198 rc = VERR_EM_INTERPRETER;
3199 break;
3200 }
3201
3202 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
3203 if (RT_FAILURE(rc))
3204 {
3205 rc = VERR_EM_INTERPRETER;
3206 break;
3207 }
3208 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
3209 if (RT_FAILURE(rc))
3210 {
3211 rc = VERR_EM_INTERPRETER;
3212 break;
3213 }
3214 pCtx->ip = aIretFrame[0];
3215 pCtx->cs = aIretFrame[1];
3216 pCtx->csHid.u64Base = pCtx->cs << 4;
3217 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
3218 pCtx->sp += sizeof(aIretFrame);
3219
3220 LogFlow(("iret to %04x:%x\n", pCtx->cs, pCtx->ip));
3221 fUpdateRIP = false;
3222 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
3223 break;
3224 }
3225
3226 case OP_INT:
3227 {
3228 uint32_t intInfo2;
3229
3230 LogFlow(("Realmode: INT %x\n", pDis->param1.parval & 0xff));
3231 intInfo2 = pDis->param1.parval & 0xff;
3232 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3233 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3234
3235 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3236 AssertRC(rc);
3237 fUpdateRIP = false;
3238 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3239 break;
3240 }
3241
3242 case OP_INTO:
3243 {
3244 if (pCtx->eflags.Bits.u1OF)
3245 {
3246 uint32_t intInfo2;
3247
3248 LogFlow(("Realmode: INTO\n"));
3249 intInfo2 = X86_XCPT_OF;
3250 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3251 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3252
3253 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3254 AssertRC(rc);
3255 fUpdateRIP = false;
3256 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3257 }
3258 break;
3259 }
3260
3261 case OP_INT3:
3262 {
3263 uint32_t intInfo2;
3264
3265 LogFlow(("Realmode: INT 3\n"));
3266 intInfo2 = 3;
3267 intInfo2 |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3268 intInfo2 |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3269
3270 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo2, cbOp, 0);
3271 AssertRC(rc);
3272 fUpdateRIP = false;
3273 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
3274 break;
3275 }
3276
3277 default:
3278 rc = EMInterpretInstructionCPU(pVM, pVCpu, pDis, CPUMCTX2CORE(pCtx), 0, &cbSize);
3279 break;
3280 }
3281
3282 if (rc == VINF_SUCCESS)
3283 {
3284 if (fUpdateRIP)
3285 pCtx->rip += cbOp; /* Move on to the next instruction. */
3286
3287 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
3288 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3289
3290 /* Only resume if successful. */
3291 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3292 goto ResumeExecution;
3293 }
3294 }
3295 else
3296 rc = VERR_EM_INTERPRETER;
3297
3298 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc));
3299 break;
3300 }
3301
3302#ifdef VBOX_STRICT
3303 case X86_XCPT_XF: /* SIMD exception. */
3304 case X86_XCPT_DE: /* Divide error. */
3305 case X86_XCPT_UD: /* Unknown opcode exception. */
3306 case X86_XCPT_SS: /* Stack segment exception. */
3307 case X86_XCPT_NP: /* Segment not present exception. */
3308 {
3309 switch(vector)
3310 {
3311 case X86_XCPT_DE:
3312 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
3313 break;
3314 case X86_XCPT_UD:
3315 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
3316 break;
3317 case X86_XCPT_SS:
3318 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
3319 break;
3320 case X86_XCPT_NP:
3321 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
3322 break;
3323 }
3324
3325 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
3326 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3327 AssertRC(rc);
3328
3329 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3330 goto ResumeExecution;
3331 }
3332#endif
3333 default:
3334#ifdef HWACCM_VMX_EMULATE_REALMODE
3335 if ( CPUMIsGuestInRealModeEx(pCtx)
3336 && pVM->hwaccm.s.vmx.pRealModeTSS)
3337 {
3338 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
3339 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
3340 AssertRC(rc);
3341
3342 /* Go back to ring 3 in case of a triple fault. */
3343 if ( vector == X86_XCPT_DF
3344 && rc == VINF_EM_RESET)
3345 break;
3346
3347 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3348 goto ResumeExecution;
3349 }
3350#endif
3351 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
3352 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
3353 break;
3354 } /* switch (vector) */
3355
3356 break;
3357
3358 default:
3359 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
3360 AssertMsgFailed(("Unexpected interuption code %x\n", intInfo));
3361 break;
3362 }
3363
3364 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
3365 break;
3366 }
3367
3368 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
3369 {
3370 RTGCPHYS GCPhys;
3371
3372 Assert(pVM->hwaccm.s.fNestedPaging);
3373
3374 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3375 AssertRC(rc);
3376 Assert(((exitQualification >> 7) & 3) != 2);
3377
3378 /* Determine the kind of violation. */
3379 errCode = 0;
3380 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
3381 errCode |= X86_TRAP_PF_ID;
3382
3383 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
3384 errCode |= X86_TRAP_PF_RW;
3385
3386 /* If the page is present, then it's a page level protection fault. */
3387 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
3388 {
3389 errCode |= X86_TRAP_PF_P;
3390 }
3391 else {
3392 /* Shortcut for APIC TPR reads and writes. */
3393 if ( (GCPhys & 0xfff) == 0x080
3394 && GCPhys > 0x1000000 /* to skip VGA frame buffer accesses */
3395 && fSetupTPRCaching
3396 && (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC2_VIRT_APIC))
3397 {
3398 RTGCPHYS GCPhysApicBase;
3399 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
3400 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3401 if (GCPhys == GCPhysApicBase + 0x80)
3402 {
3403 Log(("Enable VT-x virtual APIC access filtering\n"));
3404 rc = IOMMMIOMapMMIOHCPage(pVM, GCPhysApicBase, pVM->hwaccm.s.vmx.pAPICPhys, X86_PTE_RW | X86_PTE_P);
3405 AssertRC(rc);
3406 }
3407 }
3408 }
3409 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
3410
3411 /* GCPhys contains the guest physical address of the page fault. */
3412 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
3413 TRPMSetErrorCode(pVCpu, errCode);
3414 TRPMSetFaultAddress(pVCpu, GCPhys);
3415
3416 /* Handle the pagefault trap for the nested shadow table. */
3417 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
3418 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
3419 if (rc == VINF_SUCCESS)
3420 { /* We've successfully synced our shadow pages, so let's just continue execution. */
3421 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
3422 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
3423
3424 TRPMResetTrap(pVCpu);
3425 goto ResumeExecution;
3426 }
3427
3428#ifdef VBOX_STRICT
3429 if (rc != VINF_EM_RAW_EMULATE_INSTR)
3430 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc));
3431#endif
3432 /* Need to go back to the recompiler to emulate the instruction. */
3433 TRPMResetTrap(pVCpu);
3434 break;
3435 }
3436
3437 case VMX_EXIT_EPT_MISCONFIG:
3438 {
3439 RTGCPHYS GCPhys;
3440
3441 Assert(pVM->hwaccm.s.fNestedPaging);
3442
3443 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
3444 AssertRC(rc);
3445
3446 Log(("VMX_EXIT_EPT_MISCONFIG for %RGp\n", GCPhys));
3447 break;
3448 }
3449
3450 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3451 /* Clear VM-exit on IF=1 change. */
3452 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
3453 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
3454 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3455 AssertRC(rc);
3456 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
3457 goto ResumeExecution; /* we check for pending guest interrupts there */
3458
3459 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
3460 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
3461 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
3462 /* Skip instruction and continue directly. */
3463 pCtx->rip += cbInstr;
3464 /* Continue execution.*/
3465 goto ResumeExecution;
3466
3467 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3468 {
3469 Log2(("VMX: Cpuid %x\n", pCtx->eax));
3470 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
3471 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3472 if (rc == VINF_SUCCESS)
3473 {
3474 /* Update EIP and continue execution. */
3475 Assert(cbInstr == 2);
3476 pCtx->rip += cbInstr;
3477 goto ResumeExecution;
3478 }
3479 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc));
3480 rc = VINF_EM_RAW_EMULATE_INSTR;
3481 break;
3482 }
3483
3484 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
3485 {
3486 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
3487 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
3488 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3489 if (rc == VINF_SUCCESS)
3490 {
3491 /* Update EIP and continue execution. */
3492 Assert(cbInstr == 2);
3493 pCtx->rip += cbInstr;
3494 goto ResumeExecution;
3495 }
3496 rc = VINF_EM_RAW_EMULATE_INSTR;
3497 break;
3498 }
3499
3500 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
3501 {
3502 Log2(("VMX: Rdtsc\n"));
3503 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
3504 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3505 if (rc == VINF_SUCCESS)
3506 {
3507 /* Update EIP and continue execution. */
3508 Assert(cbInstr == 2);
3509 pCtx->rip += cbInstr;
3510 goto ResumeExecution;
3511 }
3512 rc = VINF_EM_RAW_EMULATE_INSTR;
3513 break;
3514 }
3515
3516 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
3517 {
3518 Log2(("VMX: invlpg\n"));
3519 Assert(!pVM->hwaccm.s.fNestedPaging);
3520
3521 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
3522 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
3523 if (rc == VINF_SUCCESS)
3524 {
3525 /* Update EIP and continue execution. */
3526 pCtx->rip += cbInstr;
3527 goto ResumeExecution;
3528 }
3529 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc));
3530 break;
3531 }
3532
3533 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
3534 {
3535 Log2(("VMX: monitor\n"));
3536
3537 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
3538 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3539 if (rc == VINF_SUCCESS)
3540 {
3541 /* Update EIP and continue execution. */
3542 pCtx->rip += cbInstr;
3543 goto ResumeExecution;
3544 }
3545 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", rc));
3546 break;
3547 }
3548
3549 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
3550 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
3551 if ( pVM->hwaccm.s.fTPRPatchingActive
3552 && pCtx->ecx == MSR_K8_LSTAR)
3553 {
3554 Assert(!CPUMIsGuestInLongModeEx(pCtx));
3555 if ((pCtx->eax & 0xff) != u8LastTPR)
3556 {
3557 Log(("VMX: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
3558
3559 /* Our patch code uses LSTAR for TPR caching. */
3560 rc = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
3561 AssertRC(rc);
3562 }
3563
3564 /* Skip the instruction and continue. */
3565 pCtx->rip += cbInstr; /* wrmsr = [0F 30] */
3566
3567 /* Only resume if successful. */
3568 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
3569 goto ResumeExecution;
3570 }
3571 /* no break */
3572 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
3573 {
3574 uint32_t cbSize;
3575
3576 STAM_COUNTER_INC((exitReason == VMX_EXIT_RDMSR) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
3577
3578 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
3579 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
3580 rc = EMInterpretInstruction(pVM, pVCpu, CPUMCTX2CORE(pCtx), 0, &cbSize);
3581 if (rc == VINF_SUCCESS)
3582 {
3583 /* EIP has been updated already. */
3584
3585 /* Only resume if successful. */
3586 goto ResumeExecution;
3587 }
3588 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
3589 break;
3590 }
3591
3592 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
3593 {
3594 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3595
3596 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
3597 {
3598 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
3599 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
3600 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3601 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3602 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
3603 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
3604
3605 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
3606 {
3607 case 0:
3608 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
3609 break;
3610 case 2:
3611 break;
3612 case 3:
3613 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
3614 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
3615 break;
3616 case 4:
3617 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
3618 break;
3619 case 8:
3620 /* CR8 contains the APIC TPR */
3621 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3622 break;
3623
3624 default:
3625 AssertFailed();
3626 break;
3627 }
3628 break;
3629
3630 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
3631 Log2(("VMX: mov x, crx\n"));
3632 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3633
3634 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
3635
3636 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
3637 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3638
3639 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3640 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
3641 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
3642 break;
3643
3644 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
3645 Log2(("VMX: clts\n"));
3646 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
3647 rc = EMInterpretCLTS(pVM, pVCpu);
3648 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3649 break;
3650
3651 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
3652 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
3653 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
3654 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
3655 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3656 break;
3657 }
3658
3659 /* Update EIP if no error occurred. */
3660 if (RT_SUCCESS(rc))
3661 pCtx->rip += cbInstr;
3662
3663 if (rc == VINF_SUCCESS)
3664 {
3665 /* Only resume if successful. */
3666 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3667 goto ResumeExecution;
3668 }
3669 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3670 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3671 break;
3672 }
3673
3674 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3675 {
3676 if ( !DBGFIsStepping(pVCpu)
3677 && !CPUMIsHyperDebugStateActive(pVCpu))
3678 {
3679 /* Disable drx move intercepts. */
3680 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3681 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3682 AssertRC(rc);
3683
3684 /* Save the host and load the guest debug state. */
3685 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
3686 AssertRC(rc);
3687
3688#ifdef VBOX_WITH_STATISTICS
3689 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
3690 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3691 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3692 else
3693 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3694#endif
3695
3696 goto ResumeExecution;
3697 }
3698
3699 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
3700 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3701 {
3702 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3703 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3704 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3705 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
3706 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
3707 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3708 Log2(("DR7=%08x\n", pCtx->dr[7]));
3709 }
3710 else
3711 {
3712 Log2(("VMX: mov x, drx\n"));
3713 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3714 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3715 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
3716 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
3717 }
3718 /* Update EIP if no error occurred. */
3719 if (RT_SUCCESS(rc))
3720 pCtx->rip += cbInstr;
3721
3722 if (rc == VINF_SUCCESS)
3723 {
3724 /* Only resume if successful. */
3725 goto ResumeExecution;
3726 }
3727 Assert(rc == VERR_EM_INTERPRETER);
3728 break;
3729 }
3730
3731 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
3732 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
3733 {
3734 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3735 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
3736 uint32_t uPort;
3737 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
3738
3739 /** @todo necessary to make the distinction? */
3740 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
3741 {
3742 uPort = pCtx->edx & 0xffff;
3743 }
3744 else
3745 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
3746
3747 /* paranoia */
3748 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
3749 {
3750 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
3751 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3752 break;
3753 }
3754
3755 uint32_t cbSize = g_aIOSize[uIOWidth];
3756
3757 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
3758 {
3759 /* ins/outs */
3760 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
3761
3762 /* Disassemble manually to deal with segment prefixes. */
3763 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
3764 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
3765 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
3766 if (rc == VINF_SUCCESS)
3767 {
3768 if (fIOWrite)
3769 {
3770 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3771 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
3772 rc = VBOXSTRICTRC_TODO(IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize));
3773 }
3774 else
3775 {
3776 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3777 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
3778 rc = VBOXSTRICTRC_TODO(IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, pDis->prefix, cbSize));
3779 }
3780 }
3781 else
3782 rc = VINF_EM_RAW_EMULATE_INSTR;
3783 }
3784 else
3785 {
3786 /* normal in/out */
3787 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
3788
3789 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
3790
3791 if (fIOWrite)
3792 {
3793 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
3794 rc = VBOXSTRICTRC_TODO(IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize));
3795 if (rc == VINF_IOM_HC_IOPORT_WRITE)
3796 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
3797 }
3798 else
3799 {
3800 uint32_t u32Val = 0;
3801
3802 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
3803 rc = VBOXSTRICTRC_TODO(IOMIOPortRead(pVM, uPort, &u32Val, cbSize));
3804 if (IOM_SUCCESS(rc))
3805 {
3806 /* Write back to the EAX register. */
3807 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
3808 }
3809 else
3810 if (rc == VINF_IOM_HC_IOPORT_READ)
3811 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pCtx->rip + cbInstr, uPort, uAndVal, cbSize);
3812 }
3813 }
3814 /*
3815 * Handled the I/O return codes.
3816 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
3817 */
3818 if (IOM_SUCCESS(rc))
3819 {
3820 /* Update EIP and continue execution. */
3821 pCtx->rip += cbInstr;
3822 if (RT_LIKELY(rc == VINF_SUCCESS))
3823 {
3824 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
3825 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
3826 {
3827 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
3828 for (unsigned i=0;i<4;i++)
3829 {
3830 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
3831
3832 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
3833 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
3834 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
3835 {
3836 uint64_t uDR6;
3837
3838 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3839
3840 uDR6 = ASMGetDR6();
3841
3842 /* Clear all breakpoint status flags and set the one we just hit. */
3843 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
3844 uDR6 |= (uint64_t)RT_BIT(i);
3845
3846 /* Note: AMD64 Architecture Programmer's Manual 13.1:
3847 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
3848 * the contents have been read.
3849 */
3850 ASMSetDR6(uDR6);
3851
3852 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3853 pCtx->dr[7] &= ~X86_DR7_GD;
3854
3855 /* Paranoia. */
3856 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3857 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3858 pCtx->dr[7] |= 0x400; /* must be one */
3859
3860 /* Resync DR7 */
3861 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3862 AssertRC(rc);
3863
3864 /* Construct inject info. */
3865 intInfo = X86_XCPT_DB;
3866 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3867 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3868
3869 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
3870 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
3871 AssertRC(rc);
3872
3873 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3874 goto ResumeExecution;
3875 }
3876 }
3877 }
3878
3879 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3880 goto ResumeExecution;
3881 }
3882 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3883 break;
3884 }
3885
3886#ifdef VBOX_STRICT
3887 if (rc == VINF_IOM_HC_IOPORT_READ)
3888 Assert(!fIOWrite);
3889 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
3890 Assert(fIOWrite);
3891 else
3892 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
3893#endif
3894 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3895 break;
3896 }
3897
3898 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
3899 LogFlow(("VMX_EXIT_TPR\n"));
3900 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
3901 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3902 goto ResumeExecution;
3903
3904 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
3905 {
3906 LogFlow(("VMX_EXIT_APIC_ACCESS\n"));
3907 unsigned uAccessType = VMX_EXIT_QUALIFICATION_APIC_ACCESS_TYPE(exitQualification);
3908
3909 switch(uAccessType)
3910 {
3911 case VMX_APIC_ACCESS_TYPE_LINEAR_READ:
3912 case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE:
3913 {
3914 RTGCPHYS GCPhys;
3915 PDMApicGetBase(pVM, &GCPhys);
3916 GCPhys &= PAGE_BASE_GC_MASK;
3917 GCPhys += VMX_EXIT_QUALIFICATION_APIC_ACCESS_OFFSET(exitQualification);
3918
3919 LogFlow(("Apic access at %RGp\n", GCPhys));
3920 rc = VBOXSTRICTRC_TODO(IOMMMIOPhysHandler(pVM, (uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ) ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys));
3921 if (rc == VINF_SUCCESS)
3922 {
3923 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3924 goto ResumeExecution; /* rip already updated */
3925 }
3926 break;
3927 }
3928
3929 default:
3930 rc = VINF_EM_RAW_EMULATE_INSTR;
3931 break;
3932 }
3933 break;
3934 }
3935
3936 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
3937 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3938 goto ResumeExecution;
3939
3940 default:
3941 /* The rest is handled after syncing the entire CPU state. */
3942 break;
3943 }
3944
3945 /* Note: the guest state isn't entirely synced back at this stage. */
3946
3947 /* Investigate why there was a VM-exit. (part 2) */
3948 switch (exitReason)
3949 {
3950 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3951 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3952 case VMX_EXIT_EPT_VIOLATION:
3953 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
3954 /* Already handled above. */
3955 break;
3956
3957 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
3958 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
3959 break;
3960
3961 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
3962 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
3963 rc = VINF_EM_RAW_INTERRUPT;
3964 AssertFailed(); /* Can't happen. Yet. */
3965 break;
3966
3967 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
3968 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
3969 rc = VINF_EM_RAW_INTERRUPT;
3970 AssertFailed(); /* Can't happen afaik. */
3971 break;
3972
3973 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch: too complicated to emulate, so fall back to the recompiler */
3974 Log(("VMX_EXIT_TASK_SWITCH: exit=%RX64\n", exitQualification));
3975 if ( (VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE(exitQualification) == VMX_EXIT_QUALIFICATION_TASK_SWITCH_TYPE_IDT)
3976 && pVCpu->hwaccm.s.Event.fPending)
3977 {
3978 /* Caused by an injected interrupt. */
3979 pVCpu->hwaccm.s.Event.fPending = false;
3980
3981 Log(("VMX_EXIT_TASK_SWITCH: reassert trap %d\n", VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo)));
3982 Assert(!VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo));
3983 rc = TRPMAssertTrap(pVCpu, VMX_EXIT_INTERRUPTION_INFO_VECTOR(pVCpu->hwaccm.s.Event.intInfo), TRPM_HARDWARE_INT);
3984 AssertRC(rc);
3985 }
3986 /* else Exceptions and software interrupts can just be restarted. */
3987 rc = VERR_EM_INTERPRETER;
3988 break;
3989
3990 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
3991 /** Check if external interrupts are pending; if so, don't switch back. */
3992 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
3993 pCtx->rip++; /* skip hlt */
3994 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
3995 goto ResumeExecution;
3996
3997 rc = VINF_EM_HALT;
3998 break;
3999
4000 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
4001 Log2(("VMX: mwait\n"));
4002 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
4003 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
4004 if ( rc == VINF_EM_HALT
4005 || rc == VINF_SUCCESS)
4006 {
4007 /* Update EIP and continue execution. */
4008 pCtx->rip += cbInstr;
4009
4010 /** Check if external interrupts are pending; if so, don't switch back. */
4011 if ( rc == VINF_SUCCESS
4012 || ( rc == VINF_EM_HALT
4013 && EMShouldContinueAfterHalt(pVCpu, pCtx))
4014 )
4015 goto ResumeExecution;
4016 }
4017 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", rc));
4018 break;
4019
4020 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
4021 AssertFailed(); /* can't happen. */
4022 rc = VERR_EM_INTERPRETER;
4023 break;
4024
4025 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
4026 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
4027 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
4028 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
4029 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
4030 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
4031 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
4032 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
4033 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
4034 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
4035 /** @todo inject #UD immediately */
4036 rc = VERR_EM_INTERPRETER;
4037 break;
4038
4039 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
4040 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
4041 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
4042 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
4043 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
4044 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
4045 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
4046 /* already handled above */
4047 AssertMsg( rc == VINF_PGM_CHANGE_MODE
4048 || rc == VINF_EM_RAW_INTERRUPT
4049 || rc == VERR_EM_INTERPRETER
4050 || rc == VINF_EM_RAW_EMULATE_INSTR
4051 || rc == VINF_PGM_SYNC_CR3
4052 || rc == VINF_IOM_HC_IOPORT_READ
4053 || rc == VINF_IOM_HC_IOPORT_WRITE
4054 || rc == VINF_EM_RAW_GUEST_TRAP
4055 || rc == VINF_TRPM_XCPT_DISPATCHED
4056 || rc == VINF_EM_RESCHEDULE_REM,
4057 ("rc = %d\n", rc));
4058 break;
4059
4060 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
4061 case VMX_EXIT_APIC_ACCESS: /* 44 APIC access. Guest software attempted to access memory at a physical address on the APIC-access page. */
4062 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
4063 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
4064 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
4065 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
4066 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
4067 rc = VERR_EM_INTERPRETER;
4068 break;
4069
4070 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
4071 Assert(rc == VINF_EM_RAW_INTERRUPT);
4072 break;
4073
4074 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
4075 {
4076#ifdef VBOX_STRICT
4077 RTCCUINTREG val2 = 0;
4078
4079 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
4080
4081 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val2);
4082 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val2));
4083
4084 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val2);
4085 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val2));
4086
4087 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val2);
4088 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val2));
4089
4090 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val2);
4091 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val2));
4092
4093 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val2);
4094 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val2));
4095
4096 VMX_LOG_SELREG(CS, "CS", val2);
4097 VMX_LOG_SELREG(DS, "DS", val2);
4098 VMX_LOG_SELREG(ES, "ES", val2);
4099 VMX_LOG_SELREG(FS, "FS", val2);
4100 VMX_LOG_SELREG(GS, "GS", val2);
4101 VMX_LOG_SELREG(SS, "SS", val2);
4102 VMX_LOG_SELREG(TR, "TR", val2);
4103 VMX_LOG_SELREG(LDTR, "LDTR", val2);
4104
4105 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val2);
4106 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val2));
4107 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val2);
4108 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val2));
4109#endif /* VBOX_STRICT */
4110 rc = VERR_VMX_INVALID_GUEST_STATE;
4111 break;
4112 }
4113
4114 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
4115 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
4116 default:
4117 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
4118 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
4119 break;
4120
4121 }
4122end:
4123
4124 /* Signal changes for the recompiler. */
4125 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
4126
4127 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
4128 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
4129 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
4130 {
4131 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
4132 /* On the next entry we'll only sync the host context. */
4133 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
4134 }
4135 else
4136 {
4137 /* On the next entry we'll sync everything. */
4138 /** @todo we can do better than this */
4139 /* Not in the VINF_PGM_CHANGE_MODE though! */
4140 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
4141 }
4142
4143 /* translate into a less severe return code */
4144 if (rc == VERR_EM_INTERPRETER)
4145 rc = VINF_EM_RAW_EMULATE_INSTR;
4146 else
4147 /* Try to extract more information about what might have gone wrong here. */
4148 if (rc == VERR_VMX_INVALID_VMCS_PTR)
4149 {
4150 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
4151 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS;
4152 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
4153 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
4154 }
4155
4156 /* Just set the correct state here instead of trying to catch every goto above. */
4157 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
4158
4159#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
4160 /* Restore interrupts if we exitted after disabling them. */
4161 if (uOldEFlags != ~(RTCCUINTREG)0)
4162 ASMSetFlags(uOldEFlags);
4163#endif
4164
4165 STAM_STATS({
4166 if (fStatExit2Started) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y);
4167 else if (fStatEntryStarted) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
4168 });
4169 Log2(("X"));
4170 return rc;
4171}
4172
4173
4174/**
4175 * Enters the VT-x session
4176 *
4177 * @returns VBox status code.
4178 * @param pVM The VM to operate on.
4179 * @param pVCpu The VMCPU to operate on.
4180 * @param pCpu CPU info struct
4181 */
4182VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHWACCM_CPUINFO pCpu)
4183{
4184 Assert(pVM->hwaccm.s.vmx.fSupported);
4185
4186 unsigned cr4 = ASMGetCR4();
4187 if (!(cr4 & X86_CR4_VMXE))
4188 {
4189 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
4190 return VERR_VMX_X86_CR4_VMXE_CLEARED;
4191 }
4192
4193 /* Activate the VM Control Structure. */
4194 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4195 if (RT_FAILURE(rc))
4196 return rc;
4197
4198 pVCpu->hwaccm.s.fResumeVM = false;
4199 return VINF_SUCCESS;
4200}
4201
4202
4203/**
4204 * Leaves the VT-x session
4205 *
4206 * @returns VBox status code.
4207 * @param pVM The VM to operate on.
4208 * @param pVCpu The VMCPU to operate on.
4209 * @param pCtx CPU context
4210 */
4211VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
4212{
4213 Assert(pVM->hwaccm.s.vmx.fSupported);
4214
4215#ifdef DEBUG
4216 if (CPUMIsHyperDebugStateActive(pVCpu))
4217 {
4218 CPUMR0LoadHostDebugState(pVM, pVCpu);
4219 }
4220 else
4221#endif
4222 /* Save the guest debug state if necessary. */
4223 if (CPUMIsGuestDebugStateActive(pVCpu))
4224 {
4225 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
4226
4227 /* Enable drx move intercepts again. */
4228 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
4229 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
4230 AssertRC(rc);
4231
4232 /* Resync the debug registers the next time. */
4233 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
4234 }
4235 else
4236 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
4237
4238 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4239 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4240 AssertRC(rc);
4241
4242 return VINF_SUCCESS;
4243}
4244
4245/**
4246 * Flush the TLB (EPT)
4247 *
4248 * @returns VBox status code.
4249 * @param pVM The VM to operate on.
4250 * @param pVCpu The VM CPU to operate on.
4251 * @param enmFlush Type of flush
4252 * @param GCPhys Physical address of the page to flush
4253 */
4254static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
4255{
4256 uint64_t descriptor[2];
4257
4258 LogFlow(("vmxR0FlushEPT %d %RGv\n", enmFlush, GCPhys));
4259 Assert(pVM->hwaccm.s.fNestedPaging);
4260 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
4261 descriptor[1] = GCPhys;
4262 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
4263 AssertRC(rc);
4264}
4265
4266#ifdef HWACCM_VTX_WITH_VPID
4267/**
4268 * Flush the TLB (EPT)
4269 *
4270 * @returns VBox status code.
4271 * @param pVM The VM to operate on.
4272 * @param pVCpu The VM CPU to operate on.
4273 * @param enmFlush Type of flush
4274 * @param GCPtr Virtual address of the page to flush
4275 */
4276static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
4277{
4278#if HC_ARCH_BITS == 32
4279 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
4280 if ( CPUMIsGuestInLongMode(pVCpu)
4281 && !VMX_IS_64BIT_HOST_MODE())
4282 {
4283 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
4284 }
4285 else
4286#endif
4287 {
4288 uint64_t descriptor[2];
4289
4290 Assert(pVM->hwaccm.s.vmx.fVPID);
4291 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
4292 descriptor[1] = GCPtr;
4293 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
4294 AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvVPID %x %x %RGv failed with %d\n", enmFlush, pVCpu->hwaccm.s.uCurrentASID, GCPtr, rc));
4295 }
4296}
4297#endif /* HWACCM_VTX_WITH_VPID */
4298
4299/**
4300 * Invalidates a guest page
4301 *
4302 * @returns VBox status code.
4303 * @param pVM The VM to operate on.
4304 * @param pVCpu The VM CPU to operate on.
4305 * @param GCVirt Page to invalidate
4306 */
4307VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
4308{
4309 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4310
4311 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
4312
4313 /* Only relevant if we want to use VPID.
4314 * In the nested paging case we still see such calls, but
4315 * can safely ignore them. (e.g. after cr3 updates)
4316 */
4317#ifdef HWACCM_VTX_WITH_VPID
4318 /* Skip it if a TLB flush is already pending. */
4319 if ( !fFlushPending
4320 && pVM->hwaccm.s.vmx.fVPID)
4321 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
4322#endif /* HWACCM_VTX_WITH_VPID */
4323
4324 return VINF_SUCCESS;
4325}
4326
4327/**
4328 * Invalidates a guest page by physical address
4329 *
4330 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
4331 *
4332 * @returns VBox status code.
4333 * @param pVM The VM to operate on.
4334 * @param pVCpu The VM CPU to operate on.
4335 * @param GCPhys Page to invalidate
4336 */
4337VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
4338{
4339 bool fFlushPending = VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
4340
4341 Assert(pVM->hwaccm.s.fNestedPaging);
4342
4343 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
4344
4345 /* Skip it if a TLB flush is already pending. */
4346 if (!fFlushPending)
4347 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
4348
4349 return VINF_SUCCESS;
4350}
4351
4352/**
4353 * Report world switch error and dump some useful debug info
4354 *
4355 * @param pVM The VM to operate on.
4356 * @param pVCpu The VMCPU to operate on.
4357 * @param rc Return code
4358 * @param pCtx Current CPU context (not updated)
4359 */
4360static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx)
4361{
4362 switch (rc)
4363 {
4364 case VERR_VMX_INVALID_VMXON_PTR:
4365 AssertFailed();
4366 break;
4367
4368 case VERR_VMX_UNABLE_TO_START_VM:
4369 case VERR_VMX_UNABLE_TO_RESUME_VM:
4370 {
4371 int rc2;
4372 RTCCUINTREG exitReason, instrError;
4373
4374 rc2 = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
4375 rc2 |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
4376 AssertRC(rc2);
4377 if (rc2 == VINF_SUCCESS)
4378 {
4379 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
4380 Log(("Current stack %08x\n", &rc2));
4381
4382 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
4383 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
4384
4385#ifdef VBOX_STRICT
4386 RTGDTR gdtr;
4387 PCX86DESCHC pDesc;
4388 RTCCUINTREG val;
4389
4390 ASMGetGDTR(&gdtr);
4391
4392 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
4393 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
4394 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
4395 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
4396 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
4397 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
4398 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
4399 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
4400 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
4401 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
4402
4403 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
4404 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
4405
4406 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
4407 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
4408
4409 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
4410 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
4411
4412 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
4413 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
4414
4415 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
4416 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
4417
4418 if (val < gdtr.cbGdt)
4419 {
4420 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4421 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
4422 }
4423
4424 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
4425 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
4426 if (val < gdtr.cbGdt)
4427 {
4428 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4429 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
4430 }
4431
4432 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
4433 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
4434 if (val < gdtr.cbGdt)
4435 {
4436 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4437 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
4438 }
4439
4440 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
4441 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
4442 if (val < gdtr.cbGdt)
4443 {
4444 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4445 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
4446 }
4447
4448 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
4449 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
4450 if (val < gdtr.cbGdt)
4451 {
4452 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4453 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
4454 }
4455
4456 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
4457 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
4458 if (val < gdtr.cbGdt)
4459 {
4460 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4461 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
4462 }
4463
4464 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
4465 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
4466 if (val < gdtr.cbGdt)
4467 {
4468 pDesc = (PCX86DESCHC)(gdtr.pGdt + (val & X86_SEL_MASK));
4469 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
4470 }
4471
4472 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
4473 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
4474
4475 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
4476 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
4477 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
4478 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
4479
4480 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
4481 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
4482
4483 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
4484 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
4485
4486 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
4487 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
4488
4489 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
4490 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
4491 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
4492 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
4493
4494# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4495 if (VMX_IS_64BIT_HOST_MODE())
4496 {
4497 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
4498 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
4499 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
4500 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
4501 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
4502 }
4503# endif
4504#endif /* VBOX_STRICT */
4505 }
4506 break;
4507 }
4508
4509 default:
4510 /* impossible */
4511 AssertMsgFailed(("%Rrc (%#x)\n", rc, rc));
4512 break;
4513 }
4514}
4515
4516#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
4517/**
4518 * Prepares for and executes VMLAUNCH (64 bits guest mode)
4519 *
4520 * @returns VBox status code
4521 * @param fResume vmlauch/vmresume
4522 * @param pCtx Guest context
4523 * @param pCache VMCS cache
4524 * @param pVM The VM to operate on.
4525 * @param pVCpu The VMCPU to operate on.
4526 */
4527DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
4528{
4529 uint32_t aParam[6];
4530 PHWACCM_CPUINFO pCpu;
4531 RTHCPHYS pPageCpuPhys;
4532 int rc;
4533
4534 pCpu = HWACCMR0GetCurrentCpu();
4535 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
4536
4537#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4538 pCache->uPos = 1;
4539 pCache->interPD = PGMGetInterPaeCR3(pVM);
4540 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
4541#endif
4542
4543#ifdef DEBUG
4544 pCache->TestIn.pPageCpuPhys = 0;
4545 pCache->TestIn.pVMCSPhys = 0;
4546 pCache->TestIn.pCache = 0;
4547 pCache->TestOut.pVMCSPhys = 0;
4548 pCache->TestOut.pCache = 0;
4549 pCache->TestOut.pCtx = 0;
4550 pCache->TestOut.eflags = 0;
4551#endif
4552
4553 aParam[0] = (uint32_t)(pPageCpuPhys); /* Param 1: VMXON physical address - Lo. */
4554 aParam[1] = (uint32_t)(pPageCpuPhys >> 32); /* Param 1: VMXON physical address - Hi. */
4555 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys); /* Param 2: VMCS physical address - Lo. */
4556 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys >> 32); /* Param 2: VMCS physical address - Hi. */
4557 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
4558 aParam[5] = 0;
4559
4560#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4561 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
4562 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
4563#endif
4564 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
4565
4566#ifdef VBOX_WITH_CRASHDUMP_MAGIC
4567 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
4568 Assert(pCtx->dr[4] == 10);
4569 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
4570#endif
4571
4572#ifdef DEBUG
4573 AssertMsg(pCache->TestIn.pPageCpuPhys == pPageCpuPhys, ("%RHp vs %RHp\n", pCache->TestIn.pPageCpuPhys, pPageCpuPhys));
4574 AssertMsg(pCache->TestIn.pVMCSPhys == pVCpu->hwaccm.s.vmx.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pVCpu->hwaccm.s.vmx.pVMCSPhys));
4575 AssertMsg(pCache->TestIn.pVMCSPhys == pCache->TestOut.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pCache->TestOut.pVMCSPhys));
4576 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache));
4577 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
4578 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx));
4579 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
4580#endif
4581 return rc;
4582}
4583
4584/**
4585 * Executes the specified handler in 64 mode
4586 *
4587 * @returns VBox status code.
4588 * @param pVM The VM to operate on.
4589 * @param pVCpu The VMCPU to operate on.
4590 * @param pCtx Guest context
4591 * @param pfnHandler RC handler
4592 * @param cbParam Number of parameters
4593 * @param paParam Array of 32 bits parameters
4594 */
4595VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
4596{
4597 int rc, rc2;
4598 PHWACCM_CPUINFO pCpu;
4599 RTHCPHYS pPageCpuPhys;
4600 RTHCUINTREG uOldEFlags;
4601
4602 /* @todo This code is not guest SMP safe (hyper stack and switchers) */
4603 AssertReturn(pVM->cCpus == 1, VERR_TOO_MANY_CPUS);
4604 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_INTERNAL_ERROR);
4605 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
4606 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
4607
4608#ifdef VBOX_STRICT
4609 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
4610 Assert(vmxR0IsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
4611
4612 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
4613 Assert(vmxR0IsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
4614#endif
4615
4616 /* Disable interrupts. */
4617 uOldEFlags = ASMIntDisableFlags();
4618
4619 pCpu = HWACCMR0GetCurrentCpu();
4620 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
4621
4622 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
4623 VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4624
4625 /* Leave VMX Root Mode. */
4626 VMXDisable();
4627
4628 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4629
4630 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVM));
4631 CPUMSetHyperEIP(pVCpu, pfnHandler);
4632 for (int i=(int)cbParam-1;i>=0;i--)
4633 CPUMPushHyper(pVCpu, paParam[i]);
4634
4635 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
4636 /* Call switcher. */
4637 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM);
4638 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
4639
4640 /* Make sure the VMX instructions don't cause #UD faults. */
4641 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
4642
4643 /* Enter VMX Root Mode */
4644 rc2 = VMXEnable(pPageCpuPhys);
4645 if (RT_FAILURE(rc2))
4646 {
4647 if (pVM)
4648 VMXR0CheckError(pVM, pVCpu, rc2);
4649 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
4650 ASMSetFlags(uOldEFlags);
4651 return VERR_VMX_VMXON_FAILED;
4652 }
4653
4654 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
4655 AssertRC(rc2);
4656 Assert(!(ASMGetFlags() & X86_EFL_IF));
4657 ASMSetFlags(uOldEFlags);
4658 return rc;
4659}
4660
4661#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
4662
4663
4664#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
4665/**
4666 * Executes VMWRITE
4667 *
4668 * @returns VBox status code
4669 * @param pVCpu The VMCPU to operate on.
4670 * @param idxField VMCS index
4671 * @param u64Val 16, 32 or 64 bits value
4672 */
4673VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4674{
4675 int rc;
4676
4677 switch (idxField)
4678 {
4679 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
4680 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
4681 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
4682 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
4683 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
4684 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
4685 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
4686 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
4687 case VMX_VMCS_CTRL_APIC_ACCESSADDR_FULL:
4688 case VMX_VMCS_GUEST_LINK_PTR_FULL:
4689 case VMX_VMCS_GUEST_PDPTR0_FULL:
4690 case VMX_VMCS_GUEST_PDPTR1_FULL:
4691 case VMX_VMCS_GUEST_PDPTR2_FULL:
4692 case VMX_VMCS_GUEST_PDPTR3_FULL:
4693 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
4694 case VMX_VMCS_GUEST_EFER_FULL:
4695 case VMX_VMCS_CTRL_EPTP_FULL:
4696 /* These fields consist of two parts, which are both writable in 32 bits mode. */
4697 rc = VMXWriteVMCS32(idxField, u64Val);
4698 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
4699 AssertRC(rc);
4700 return rc;
4701
4702 case VMX_VMCS64_GUEST_LDTR_BASE:
4703 case VMX_VMCS64_GUEST_TR_BASE:
4704 case VMX_VMCS64_GUEST_GDTR_BASE:
4705 case VMX_VMCS64_GUEST_IDTR_BASE:
4706 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4707 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4708 case VMX_VMCS64_GUEST_CR0:
4709 case VMX_VMCS64_GUEST_CR4:
4710 case VMX_VMCS64_GUEST_CR3:
4711 case VMX_VMCS64_GUEST_DR7:
4712 case VMX_VMCS64_GUEST_RIP:
4713 case VMX_VMCS64_GUEST_RSP:
4714 case VMX_VMCS64_GUEST_CS_BASE:
4715 case VMX_VMCS64_GUEST_DS_BASE:
4716 case VMX_VMCS64_GUEST_ES_BASE:
4717 case VMX_VMCS64_GUEST_FS_BASE:
4718 case VMX_VMCS64_GUEST_GS_BASE:
4719 case VMX_VMCS64_GUEST_SS_BASE:
4720 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
4721 if (u64Val >> 32ULL)
4722 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
4723 else
4724 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
4725
4726 return rc;
4727
4728 default:
4729 AssertMsgFailed(("Unexpected field %x\n", idxField));
4730 return VERR_INVALID_PARAMETER;
4731 }
4732}
4733
4734/**
4735 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
4736 *
4737 * @param pVCpu The VMCPU to operate on.
4738 * @param idxField VMCS field
4739 * @param u64Val Value
4740 */
4741VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4742{
4743 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
4744
4745 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
4746
4747 /* Make sure there are no duplicates. */
4748 for (unsigned i=0;i<pCache->Write.cValidEntries;i++)
4749 {
4750 if (pCache->Write.aField[i] == idxField)
4751 {
4752 pCache->Write.aFieldVal[i] = u64Val;
4753 return VINF_SUCCESS;
4754 }
4755 }
4756
4757 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
4758 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
4759 pCache->Write.cValidEntries++;
4760 return VINF_SUCCESS;
4761}
4762
4763#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
4764
4765#ifdef VBOX_STRICT
4766static bool vmxR0IsValidReadField(uint32_t idxField)
4767{
4768 switch(idxField)
4769 {
4770 case VMX_VMCS64_GUEST_RIP:
4771 case VMX_VMCS64_GUEST_RSP:
4772 case VMX_VMCS_GUEST_RFLAGS:
4773 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
4774 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
4775 case VMX_VMCS64_GUEST_CR0:
4776 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
4777 case VMX_VMCS64_GUEST_CR4:
4778 case VMX_VMCS64_GUEST_DR7:
4779 case VMX_VMCS32_GUEST_SYSENTER_CS:
4780 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4781 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4782 case VMX_VMCS32_GUEST_GDTR_LIMIT:
4783 case VMX_VMCS64_GUEST_GDTR_BASE:
4784 case VMX_VMCS32_GUEST_IDTR_LIMIT:
4785 case VMX_VMCS64_GUEST_IDTR_BASE:
4786 case VMX_VMCS16_GUEST_FIELD_CS:
4787 case VMX_VMCS32_GUEST_CS_LIMIT:
4788 case VMX_VMCS64_GUEST_CS_BASE:
4789 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
4790 case VMX_VMCS16_GUEST_FIELD_DS:
4791 case VMX_VMCS32_GUEST_DS_LIMIT:
4792 case VMX_VMCS64_GUEST_DS_BASE:
4793 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
4794 case VMX_VMCS16_GUEST_FIELD_ES:
4795 case VMX_VMCS32_GUEST_ES_LIMIT:
4796 case VMX_VMCS64_GUEST_ES_BASE:
4797 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
4798 case VMX_VMCS16_GUEST_FIELD_FS:
4799 case VMX_VMCS32_GUEST_FS_LIMIT:
4800 case VMX_VMCS64_GUEST_FS_BASE:
4801 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
4802 case VMX_VMCS16_GUEST_FIELD_GS:
4803 case VMX_VMCS32_GUEST_GS_LIMIT:
4804 case VMX_VMCS64_GUEST_GS_BASE:
4805 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
4806 case VMX_VMCS16_GUEST_FIELD_SS:
4807 case VMX_VMCS32_GUEST_SS_LIMIT:
4808 case VMX_VMCS64_GUEST_SS_BASE:
4809 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
4810 case VMX_VMCS16_GUEST_FIELD_LDTR:
4811 case VMX_VMCS32_GUEST_LDTR_LIMIT:
4812 case VMX_VMCS64_GUEST_LDTR_BASE:
4813 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
4814 case VMX_VMCS16_GUEST_FIELD_TR:
4815 case VMX_VMCS32_GUEST_TR_LIMIT:
4816 case VMX_VMCS64_GUEST_TR_BASE:
4817 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
4818 case VMX_VMCS32_RO_EXIT_REASON:
4819 case VMX_VMCS32_RO_VM_INSTR_ERROR:
4820 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
4821 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
4822 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
4823 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
4824 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4825 case VMX_VMCS32_RO_IDT_INFO:
4826 case VMX_VMCS32_RO_IDT_ERRCODE:
4827 case VMX_VMCS64_GUEST_CR3:
4828 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
4829 return true;
4830 }
4831 return false;
4832}
4833
4834static bool vmxR0IsValidWriteField(uint32_t idxField)
4835{
4836 switch(idxField)
4837 {
4838 case VMX_VMCS64_GUEST_LDTR_BASE:
4839 case VMX_VMCS64_GUEST_TR_BASE:
4840 case VMX_VMCS64_GUEST_GDTR_BASE:
4841 case VMX_VMCS64_GUEST_IDTR_BASE:
4842 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4843 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4844 case VMX_VMCS64_GUEST_CR0:
4845 case VMX_VMCS64_GUEST_CR4:
4846 case VMX_VMCS64_GUEST_CR3:
4847 case VMX_VMCS64_GUEST_DR7:
4848 case VMX_VMCS64_GUEST_RIP:
4849 case VMX_VMCS64_GUEST_RSP:
4850 case VMX_VMCS64_GUEST_CS_BASE:
4851 case VMX_VMCS64_GUEST_DS_BASE:
4852 case VMX_VMCS64_GUEST_ES_BASE:
4853 case VMX_VMCS64_GUEST_FS_BASE:
4854 case VMX_VMCS64_GUEST_GS_BASE:
4855 case VMX_VMCS64_GUEST_SS_BASE:
4856 return true;
4857 }
4858 return false;
4859}
4860
4861#endif
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette