VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWVMXR0.cpp@ 19169

Last change on this file since 19169 was 19141, checked in by vboxsync, 16 years ago

Action flags breakup.
Fixed PGM saved state loading of 2.2.2 images.
Reduced hacks in PATM state loading (fixups).

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 164.9 KB
Line 
1/* $Id: HWVMXR0.cpp 19141 2009-04-23 13:52:18Z vboxsync $ */
2/** @file
3 * HWACCM VMX - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2007 Sun Microsystems, Inc.
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 *
17 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
18 * Clara, CA 95054 USA or visit http://www.sun.com if you need
19 * additional information or have any questions.
20 */
21
22
23/*******************************************************************************
24* Header Files *
25*******************************************************************************/
26#define LOG_GROUP LOG_GROUP_HWACCM
27#include <VBox/hwaccm.h>
28#include "HWACCMInternal.h"
29#include <VBox/vm.h>
30#include <VBox/x86.h>
31#include <VBox/pgm.h>
32#include <VBox/pdm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35#include <VBox/selm.h>
36#include <VBox/iom.h>
37#include <VBox/rem.h>
38#include <iprt/param.h>
39#include <iprt/assert.h>
40#include <iprt/asm.h>
41#include <iprt/string.h>
42#include "HWVMXR0.h"
43
44/*******************************************************************************
45* Defined Constants And Macros *
46*******************************************************************************/
47#if defined(RT_ARCH_AMD64)
48# define VMX_IS_64BIT_HOST_MODE() (true)
49#elif defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
50# define VMX_IS_64BIT_HOST_MODE() (g_fVMXIs64bitHost != 0)
51#else
52# define VMX_IS_64BIT_HOST_MODE() (false)
53#endif
54
55/*******************************************************************************
56* Global Variables *
57*******************************************************************************/
58/* IO operation lookup arrays. */
59static uint32_t const g_aIOSize[4] = {1, 2, 0, 4};
60static uint32_t const g_aIOOpAnd[4] = {0xff, 0xffff, 0, 0xffffffff};
61
62#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
63/** See HWACCMR0A.asm. */
64extern "C" uint32_t g_fVMXIs64bitHost;
65#endif
66
67/*******************************************************************************
68* Local Functions *
69*******************************************************************************/
70static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx);
71static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu);
72static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu);
73static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu);
74static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys);
75static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr);
76static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
77#ifdef VBOX_STRICT
78static bool vmxR0IsValidReadField(uint32_t idxField);
79static bool vmxR0IsValidWriteField(uint32_t idxField);
80#endif
81
82static void VMXR0CheckError(PVM pVM, PVMCPU pVCpu, int rc)
83{
84 if (rc == VERR_VMX_GENERIC)
85 {
86 RTCCUINTREG instrError;
87
88 VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
89 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
90 }
91 pVM->hwaccm.s.lLastError = rc;
92}
93
94/**
95 * Sets up and activates VT-x on the current CPU
96 *
97 * @returns VBox status code.
98 * @param pCpu CPU info struct
99 * @param pVM The VM to operate on. (can be NULL after a resume!!)
100 * @param pvPageCpu Pointer to the global cpu page
101 * @param pPageCpuPhys Physical address of the global cpu page
102 */
103VMMR0DECL(int) VMXR0EnableCpu(PHWACCM_CPUINFO pCpu, PVM pVM, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
104{
105 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
106 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
107
108#if defined(LOG_ENABLED) && !defined(DEBUG_bird)
109 SUPR0Printf("VMXR0EnableCpu cpu %d page (%x) %x\n", pCpu->idCpu, pvPageCpu, (uint32_t)pPageCpuPhys);
110#endif
111 if (pVM)
112 {
113 /* Set revision dword at the beginning of the VMXON structure. */
114 *(uint32_t *)pvPageCpu = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
115 }
116
117 /** @todo we should unmap the two pages from the virtual address space in order to prevent accidental corruption.
118 * (which can have very bad consequences!!!)
119 */
120
121 /* Make sure the VMX instructions don't cause #UD faults. */
122 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
123
124 /* Enter VMX Root Mode */
125 int rc = VMXEnable(pPageCpuPhys);
126 if (RT_FAILURE(rc))
127 {
128 if (pVM)
129 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
130 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
131 return VERR_VMX_VMXON_FAILED;
132 }
133 return VINF_SUCCESS;
134}
135
136/**
137 * Deactivates VT-x on the current CPU
138 *
139 * @returns VBox status code.
140 * @param pCpu CPU info struct
141 * @param pvPageCpu Pointer to the global cpu page
142 * @param pPageCpuPhys Physical address of the global cpu page
143 */
144VMMR0DECL(int) VMXR0DisableCpu(PHWACCM_CPUINFO pCpu, void *pvPageCpu, RTHCPHYS pPageCpuPhys)
145{
146 AssertReturn(pPageCpuPhys, VERR_INVALID_PARAMETER);
147 AssertReturn(pvPageCpu, VERR_INVALID_PARAMETER);
148
149 /* Leave VMX Root Mode. */
150 VMXDisable();
151
152 /* And clear the X86_CR4_VMXE bit */
153 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
154
155#if defined(LOG_ENABLED) && !defined(DEBUG_bird)
156 SUPR0Printf("VMXR0DisableCpu cpu %d\n", pCpu->idCpu);
157#endif
158 return VINF_SUCCESS;
159}
160
161/**
162 * Does Ring-0 per VM VT-x init.
163 *
164 * @returns VBox status code.
165 * @param pVM The VM to operate on.
166 */
167VMMR0DECL(int) VMXR0InitVM(PVM pVM)
168{
169 int rc;
170
171#ifdef LOG_ENABLED
172 SUPR0Printf("VMXR0InitVM %x\n", pVM);
173#endif
174
175 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
176
177 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
178 {
179 /* Allocate one page for the virtual APIC mmio cache. */
180 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjAPIC, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
181 AssertRC(rc);
182 if (RT_FAILURE(rc))
183 return rc;
184
185 pVM->hwaccm.s.vmx.pAPIC = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjAPIC);
186 pVM->hwaccm.s.vmx.pAPICPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjAPIC, 0);
187 ASMMemZero32(pVM->hwaccm.s.vmx.pAPIC, PAGE_SIZE);
188 }
189 else
190 {
191 pVM->hwaccm.s.vmx.pMemObjAPIC = 0;
192 pVM->hwaccm.s.vmx.pAPIC = 0;
193 pVM->hwaccm.s.vmx.pAPICPhys = 0;
194 }
195
196 /* Allocate the MSR bitmap if this feature is supported. */
197 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
198 {
199 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
200 AssertRC(rc);
201 if (RT_FAILURE(rc))
202 return rc;
203
204 pVM->hwaccm.s.vmx.pMSRBitmap = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjMSRBitmap);
205 pVM->hwaccm.s.vmx.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, 0);
206 memset(pVM->hwaccm.s.vmx.pMSRBitmap, 0xff, PAGE_SIZE);
207 }
208
209#ifdef VBOX_WITH_CRASHDUMP_MAGIC
210 {
211 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.vmx.pMemObjScratch, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
212 AssertRC(rc);
213 if (RT_FAILURE(rc))
214 return rc;
215
216 pVM->hwaccm.s.vmx.pScratch = (uint8_t *)RTR0MemObjAddress(pVM->hwaccm.s.vmx.pMemObjScratch);
217 pVM->hwaccm.s.vmx.pScratchPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.vmx.pMemObjScratch, 0);
218
219 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
220 strcpy((char *)pVM->hwaccm.s.vmx.pScratch, "SCRATCH Magic");
221 *(uint64_t *)(pVM->hwaccm.s.vmx.pScratch + 16) = UINT64_C(0xDEADBEEFDEADBEEF);
222 }
223#endif
224
225 /* Allocate VMCBs for all guest CPUs. */
226 for (unsigned i=0;i<pVM->cCPUs;i++)
227 {
228 PVMCPU pVCpu = &pVM->aCpus[i];
229
230 pVCpu->hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
231
232 /* Allocate one page for the VM control structure (VMCS). */
233 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.vmx.pMemObjVMCS, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
234 AssertRC(rc);
235 if (RT_FAILURE(rc))
236 return rc;
237
238 pVCpu->hwaccm.s.vmx.pVMCS = RTR0MemObjAddress(pVCpu->hwaccm.s.vmx.pMemObjVMCS);
239 pVCpu->hwaccm.s.vmx.pVMCSPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.vmx.pMemObjVMCS, 0);
240 ASMMemZero32(pVCpu->hwaccm.s.vmx.pVMCS, PAGE_SIZE);
241
242 pVCpu->hwaccm.s.vmx.cr0_mask = 0;
243 pVCpu->hwaccm.s.vmx.cr4_mask = 0;
244
245 /* Current guest paging mode. */
246 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = PGMMODE_REAL;
247
248#ifdef LOG_ENABLED
249 SUPR0Printf("VMXR0InitVM %x VMCS=%x (%x)\n", pVM, pVCpu->hwaccm.s.vmx.pVMCS, (uint32_t)pVCpu->hwaccm.s.vmx.pVMCSPhys);
250#endif
251 }
252
253 return VINF_SUCCESS;
254}
255
256/**
257 * Does Ring-0 per VM VT-x termination.
258 *
259 * @returns VBox status code.
260 * @param pVM The VM to operate on.
261 */
262VMMR0DECL(int) VMXR0TermVM(PVM pVM)
263{
264 for (unsigned i=0;i<pVM->cCPUs;i++)
265 {
266 if (pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS != NIL_RTR0MEMOBJ)
267 {
268 RTR0MemObjFree(pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS, false);
269 pVM->aCpus[i].hwaccm.s.vmx.pMemObjVMCS = NIL_RTR0MEMOBJ;
270 pVM->aCpus[i].hwaccm.s.vmx.pVMCS = 0;
271 pVM->aCpus[i].hwaccm.s.vmx.pVMCSPhys = 0;
272 }
273 }
274 if (pVM->hwaccm.s.vmx.pMemObjAPIC != NIL_RTR0MEMOBJ)
275 {
276 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjAPIC, false);
277 pVM->hwaccm.s.vmx.pMemObjAPIC = NIL_RTR0MEMOBJ;
278 pVM->hwaccm.s.vmx.pAPIC = 0;
279 pVM->hwaccm.s.vmx.pAPICPhys = 0;
280 }
281 if (pVM->hwaccm.s.vmx.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
282 {
283 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjMSRBitmap, false);
284 pVM->hwaccm.s.vmx.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
285 pVM->hwaccm.s.vmx.pMSRBitmap = 0;
286 pVM->hwaccm.s.vmx.pMSRBitmapPhys = 0;
287 }
288#ifdef VBOX_WITH_CRASHDUMP_MAGIC
289 if (pVM->hwaccm.s.vmx.pMemObjScratch != NIL_RTR0MEMOBJ)
290 {
291 ASMMemZero32(pVM->hwaccm.s.vmx.pScratch, PAGE_SIZE);
292 RTR0MemObjFree(pVM->hwaccm.s.vmx.pMemObjScratch, false);
293 pVM->hwaccm.s.vmx.pMemObjScratch = NIL_RTR0MEMOBJ;
294 pVM->hwaccm.s.vmx.pScratch = 0;
295 pVM->hwaccm.s.vmx.pScratchPhys = 0;
296 }
297#endif
298 return VINF_SUCCESS;
299}
300
301/**
302 * Sets up VT-x for the specified VM
303 *
304 * @returns VBox status code.
305 * @param pVM The VM to operate on.
306 */
307VMMR0DECL(int) VMXR0SetupVM(PVM pVM)
308{
309 int rc = VINF_SUCCESS;
310 uint32_t val;
311
312 AssertReturn(pVM, VERR_INVALID_PARAMETER);
313
314 for (unsigned i=0;i<pVM->cCPUs;i++)
315 {
316 PVMCPU pVCpu = &pVM->aCpus[i];
317
318 Assert(pVCpu->hwaccm.s.vmx.pVMCS);
319
320 /* Set revision dword at the beginning of the VMCS structure. */
321 *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS = MSR_IA32_VMX_BASIC_INFO_VMCS_ID(pVM->hwaccm.s.vmx.msr.vmx_basic_info);
322
323 /* Clear VM Control Structure. */
324 Log(("pVMCSPhys = %RHp\n", pVCpu->hwaccm.s.vmx.pVMCSPhys));
325 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
326 if (RT_FAILURE(rc))
327 goto vmx_end;
328
329 /* Activate the VM Control Structure. */
330 rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
331 if (RT_FAILURE(rc))
332 goto vmx_end;
333
334 /* VMX_VMCS_CTRL_PIN_EXEC_CONTROLS
335 * Set required bits to one and zero according to the MSR capabilities.
336 */
337 val = pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0;
338 /* External and non-maskable interrupts cause VM-exits. */
339 val = val | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_EXT_INT_EXIT | VMX_VMCS_CTRL_PIN_EXEC_CONTROLS_NMI_EXIT;
340 val &= pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1;
341
342 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, val);
343 AssertRC(rc);
344
345 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS
346 * Set required bits to one and zero according to the MSR capabilities.
347 */
348 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0;
349 /* Program which event cause VM-exits and which features we want to use. */
350 val = val | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_HLT_EXIT
351 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_TSC_OFFSET
352 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT
353 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_UNCOND_IO_EXIT
354 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDPMC_EXIT
355 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
356
357 /* Without nested paging we should intercept invlpg and cr3 mov instructions. */
358 if (!pVM->hwaccm.s.fNestedPaging)
359 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
360 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
361 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
362
363 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MWAIT_EXIT might cause a vmlaunch failure with an invalid control fields error. (combined with some other exit reasons) */
364 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
365 {
366 /* CR8 reads from the APIC shadow page; writes cause an exit is they lower the TPR below the threshold */
367 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW;
368 Assert(pVM->hwaccm.s.vmx.pAPIC);
369 }
370 else
371 /* Exit on CR8 reads & writes in case the TPR shadow feature isn't present. */
372 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_STORE_EXIT | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR8_LOAD_EXIT;
373
374#ifdef VBOX_WITH_VTX_MSR_BITMAPS
375 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
376 {
377 Assert(pVM->hwaccm.s.vmx.pMSRBitmapPhys);
378 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS;
379 }
380#endif
381
382 /* We will use the secondary control if it's present. */
383 val |= VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL;
384
385 /* Mask away the bits that the CPU doesn't support */
386 /** @todo make sure they don't conflict with the above requirements. */
387 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1;
388 pVCpu->hwaccm.s.vmx.proc_ctls = val;
389
390 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, val);
391 AssertRC(rc);
392
393 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_USE_SECONDARY_EXEC_CTRL)
394 {
395 /* VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2
396 * Set required bits to one and zero according to the MSR capabilities.
397 */
398 val = pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.disallowed0;
399 val |= VMX_VMCS_CTRL_PROC_EXEC2_WBINVD_EXIT;
400
401#ifdef HWACCM_VTX_WITH_EPT
402 if (pVM->hwaccm.s.fNestedPaging)
403 val |= VMX_VMCS_CTRL_PROC_EXEC2_EPT;
404#endif /* HWACCM_VTX_WITH_EPT */
405#ifdef HWACCM_VTX_WITH_VPID
406 else
407 if (pVM->hwaccm.s.vmx.fVPID)
408 val |= VMX_VMCS_CTRL_PROC_EXEC2_VPID;
409#endif /* HWACCM_VTX_WITH_VPID */
410
411 /* Mask away the bits that the CPU doesn't support */
412 /** @todo make sure they don't conflict with the above requirements. */
413 val &= pVM->hwaccm.s.vmx.msr.vmx_proc_ctls2.n.allowed1;
414
415 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS2, val);
416 AssertRC(rc);
417 }
418
419 /* VMX_VMCS_CTRL_CR3_TARGET_COUNT
420 * Set required bits to one and zero according to the MSR capabilities.
421 */
422 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR3_TARGET_COUNT, 0);
423 AssertRC(rc);
424
425 /* Forward all exception except #NM & #PF to the guest.
426 * We always need to check pagefaults since our shadow page table can be out of sync.
427 * And we always lazily sync the FPU & XMM state.
428 */
429
430 /** @todo Possible optimization:
431 * Keep the FPU and XMM state current in the EM thread. That way there's no need to
432 * lazily sync anything, but the downside is that we can't use the FPU stack or XMM
433 * registers ourselves of course.
434 *
435 * Note: only possible if the current state is actually ours (X86_CR0_TS flag)
436 */
437
438 /* Don't filter page faults; all of them should cause a switch. */
439 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MASK, 0);
440 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_PAGEFAULT_ERROR_MATCH, 0);
441 AssertRC(rc);
442
443 /* Init TSC offset to zero. */
444 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, 0);
445 AssertRC(rc);
446
447 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_A_FULL, 0);
448 AssertRC(rc);
449
450 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_IO_BITMAP_B_FULL, 0);
451 AssertRC(rc);
452
453 /* Set the MSR bitmap address. */
454 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_MSR_BITMAPS)
455 {
456 /* Optional */
457 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_MSR_BITMAP_FULL, pVM->hwaccm.s.vmx.pMSRBitmapPhys);
458 AssertRC(rc);
459 }
460
461 /* Clear MSR controls. */
462 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL, 0);
463 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL, 0);
464 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL, 0);
465 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_STORE_COUNT, 0);
466 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_MSR_LOAD_COUNT, 0);
467 AssertRC(rc);
468
469 if (pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW)
470 {
471 Assert(pVM->hwaccm.s.vmx.pMemObjAPIC);
472 /* Optional */
473 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, 0);
474 rc |= VMXWriteVMCS64(VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL, pVM->hwaccm.s.vmx.pAPICPhys);
475 AssertRC(rc);
476 }
477
478 /* Set link pointer to -1. Not currently used. */
479 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_LINK_PTR_FULL, 0xFFFFFFFFFFFFFFFFULL);
480 AssertRC(rc);
481
482 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
483 rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
484 AssertRC(rc);
485
486 /* Configure the VMCS read cache. */
487 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
488
489 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RIP);
490 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_RSP);
491 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_GUEST_RFLAGS);
492 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE);
493 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR0_READ_SHADOW);
494 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR0);
495 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_CTRL_CR4_READ_SHADOW);
496 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR4);
497 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_DR7);
498 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_SYSENTER_CS);
499 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_EIP);
500 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_SYSENTER_ESP);
501 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_GDTR_LIMIT);
502 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_GDTR_BASE);
503 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_GUEST_IDTR_LIMIT);
504 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_IDTR_BASE);
505
506 VMX_SETUP_SELREG(ES, pCache);
507 VMX_SETUP_SELREG(SS, pCache);
508 VMX_SETUP_SELREG(CS, pCache);
509 VMX_SETUP_SELREG(DS, pCache);
510 VMX_SETUP_SELREG(FS, pCache);
511 VMX_SETUP_SELREG(GS, pCache);
512 VMX_SETUP_SELREG(LDTR, pCache);
513 VMX_SETUP_SELREG(TR, pCache);
514
515 /* Status code VMCS reads. */
516 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_REASON);
517 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_VM_INSTR_ERROR);
518 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_LENGTH);
519 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE);
520 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO);
521 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_EXIT_INSTR_INFO);
522 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION);
523 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_INFO);
524 VMXSetupCachedReadVMCS(pCache, VMX_VMCS32_RO_IDT_ERRCODE);
525
526 if (pVM->hwaccm.s.fNestedPaging)
527 {
528 VMXSetupCachedReadVMCS(pCache, VMX_VMCS64_GUEST_CR3);
529 VMXSetupCachedReadVMCS(pCache, VMX_VMCS_EXIT_PHYS_ADDR_FULL);
530 pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX;
531 }
532 else
533 pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX;
534 } /* for each VMCPU */
535
536 /* Choose the right TLB setup function. */
537 if (pVM->hwaccm.s.fNestedPaging)
538 {
539 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBEPT;
540
541 /* Default values for flushing. */
542 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
543 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
544
545 /* If the capabilities specify we can do more, then make use of it. */
546 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_INDIV)
547 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
548 else
549 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
550 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
551
552 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVEPT_CAPS_CONTEXT)
553 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
554 }
555#ifdef HWACCM_VTX_WITH_VPID
556 else
557 if (pVM->hwaccm.s.vmx.fVPID)
558 {
559 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBVPID;
560
561 /* Default values for flushing. */
562 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_ALL_CONTEXTS;
563 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_ALL_CONTEXTS;
564
565 /* If the capabilities specify we can do more, then make use of it. */
566 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_INDIV)
567 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_PAGE;
568 else
569 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
570 pVM->hwaccm.s.vmx.enmFlushPage = VMX_FLUSH_SINGLE_CONTEXT;
571
572 if (pVM->hwaccm.s.vmx.msr.vmx_eptcaps & MSR_IA32_VMX_EPT_CAPS_INVVPID_CAPS_CONTEXT)
573 pVM->hwaccm.s.vmx.enmFlushContext = VMX_FLUSH_SINGLE_CONTEXT;
574 }
575#endif /* HWACCM_VTX_WITH_VPID */
576 else
577 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB = vmxR0SetupTLBDummy;
578
579vmx_end:
580 VMXR0CheckError(pVM, &pVM->aCpus[0], rc);
581 return rc;
582}
583
584
585/**
586 * Injects an event (trap or external interrupt)
587 *
588 * @returns VBox status code.
589 * @param pVM The VM to operate on.
590 * @param pVCpu The VMCPU to operate on.
591 * @param pCtx CPU Context
592 * @param intInfo VMX interrupt info
593 * @param cbInstr Opcode length of faulting instruction
594 * @param errCode Error code (optional)
595 */
596static int VMXR0InjectEvent(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t intInfo, uint32_t cbInstr, uint32_t errCode)
597{
598 int rc;
599 uint32_t iGate = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
600
601#ifdef VBOX_STRICT
602 if (iGate == 0xE)
603 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x CR2=%RGv intInfo=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode, pCtx->cr2, intInfo));
604 else
605 if (iGate < 0x20)
606 LogFlow(("VMXR0InjectEvent: Injecting interrupt %d at %RGv error code=%08x\n", iGate, (RTGCPTR)pCtx->rip, errCode));
607 else
608 {
609 LogFlow(("INJ-EI: %x at %RGv\n", iGate, (RTGCPTR)pCtx->rip));
610 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || !VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
611 Assert(VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW || pCtx->eflags.u32 & X86_EFL_IF);
612 }
613#endif
614
615#ifdef HWACCM_VMX_EMULATE_REALMODE
616 if (CPUMIsGuestInRealModeEx(pCtx))
617 {
618 RTGCPHYS GCPhysHandler;
619 uint16_t offset, ip;
620 RTSEL sel;
621
622 /* Injecting events doesn't work right with real mode emulation.
623 * (#GP if we try to inject external hardware interrupts)
624 * Inject the interrupt or trap directly instead.
625 *
626 * ASSUMES no access handlers for the bits we read or write below (should be safe).
627 */
628 Log(("Manual interrupt/trap '%x' inject (real mode)\n", iGate));
629
630 /* Check if the interrupt handler is present. */
631 if (iGate * 4 + 3 > pCtx->idtr.cbIdt)
632 {
633 Log(("IDT cbIdt violation\n"));
634 if (iGate != X86_XCPT_DF)
635 {
636 RTGCUINTPTR intInfo;
637
638 intInfo = (iGate == X86_XCPT_GP) ? (uint32_t)X86_XCPT_DF : iGate;
639 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
640 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
641 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
642
643 return VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0 /* no error code according to the Intel docs */);
644 }
645 Log(("Triple fault -> reset the VM!\n"));
646 return VINF_EM_RESET;
647 }
648 if ( VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
649 || iGate == 3 /* Both #BP and #OF point to the instruction after. */
650 || iGate == 4)
651 {
652 ip = pCtx->ip + cbInstr;
653 }
654 else
655 ip = pCtx->ip;
656
657 /* Read the selector:offset pair of the interrupt handler. */
658 GCPhysHandler = (RTGCPHYS)pCtx->idtr.pIdt + iGate * 4;
659 rc = PGMPhysSimpleReadGCPhys(pVM, &offset, GCPhysHandler, sizeof(offset)); AssertRC(rc);
660 rc = PGMPhysSimpleReadGCPhys(pVM, &sel, GCPhysHandler + 2, sizeof(sel)); AssertRC(rc);
661
662 LogFlow(("IDT handler %04X:%04X\n", sel, offset));
663
664 /* Construct the stack frame. */
665 /** @todo should check stack limit. */
666 pCtx->sp -= 2;
667 LogFlow(("ss:sp %04X:%04X eflags=%x\n", pCtx->ss, pCtx->sp, pCtx->eflags.u));
668 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->eflags, sizeof(uint16_t)); AssertRC(rc);
669 pCtx->sp -= 2;
670 LogFlow(("ss:sp %04X:%04X cs=%x\n", pCtx->ss, pCtx->sp, pCtx->cs));
671 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &pCtx->cs, sizeof(uint16_t)); AssertRC(rc);
672 pCtx->sp -= 2;
673 LogFlow(("ss:sp %04X:%04X ip=%x\n", pCtx->ss, pCtx->sp, ip));
674 rc = PGMPhysSimpleWriteGCPhys(pVM, pCtx->ssHid.u64Base + pCtx->sp, &ip, sizeof(ip)); AssertRC(rc);
675
676 /* Update the CPU state for executing the handler. */
677 pCtx->rip = offset;
678 pCtx->cs = sel;
679 pCtx->csHid.u64Base = sel << 4;
680 pCtx->eflags.u &= ~(X86_EFL_IF|X86_EFL_TF|X86_EFL_RF|X86_EFL_AC);
681
682 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_SEGMENT_REGS;
683 return VINF_SUCCESS;
684 }
685#endif /* HWACCM_VMX_EMULATE_REALMODE */
686
687 /* Set event injection state. */
688 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_IRQ_INFO, intInfo | (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT));
689
690 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_INSTR_LENGTH, cbInstr);
691 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_EXCEPTION_ERRCODE, errCode);
692
693 AssertRC(rc);
694 return rc;
695}
696
697
698/**
699 * Checks for pending guest interrupts and injects them
700 *
701 * @returns VBox status code.
702 * @param pVM The VM to operate on.
703 * @param pVCpu The VMCPU to operate on.
704 * @param pCtx CPU Context
705 */
706static int VMXR0CheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, CPUMCTX *pCtx)
707{
708 int rc;
709
710 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
711 if (pVCpu->hwaccm.s.Event.fPending)
712 {
713 Log(("Reinjecting event %RX64 %08x at %RGv cr2=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip, pCtx->cr2));
714 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
715 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, pVCpu->hwaccm.s.Event.intInfo, 0, pVCpu->hwaccm.s.Event.errCode);
716 AssertRC(rc);
717
718 pVCpu->hwaccm.s.Event.fPending = false;
719 return VINF_SUCCESS;
720 }
721
722 if (pVM->hwaccm.s.fInjectNMI)
723 {
724 RTGCUINTPTR intInfo;
725
726 intInfo = X86_XCPT_NMI;
727 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
728 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
729
730 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, 0);
731 AssertRC(rc);
732
733 pVM->hwaccm.s.fInjectNMI = false;
734 return VINF_SUCCESS;
735 }
736
737 /* When external interrupts are pending, we should exit the VM when IF is set. */
738 if ( !TRPMHasTrap(pVCpu)
739 && VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
740 {
741 if (!(pCtx->eflags.u32 & X86_EFL_IF))
742 {
743 if (!(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT))
744 {
745 LogFlow(("Enable irq window exit!\n"));
746 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
747 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
748 AssertRC(rc);
749 }
750 /* else nothing to do but wait */
751 }
752 else
753 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
754 {
755 uint8_t u8Interrupt;
756
757 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
758 Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc cs:rip=%04X:%RGv\n", u8Interrupt, u8Interrupt, rc, pCtx->cs, (RTGCPTR)pCtx->rip));
759 if (RT_SUCCESS(rc))
760 {
761 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
762 AssertRC(rc);
763 }
764 else
765 {
766 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
767 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
768 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
769 /* Just continue */
770 }
771 }
772 else
773 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS!!\n", (RTGCPTR)pCtx->rip));
774 }
775
776#ifdef VBOX_STRICT
777 if (TRPMHasTrap(pVCpu))
778 {
779 uint8_t u8Vector;
780 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
781 AssertRC(rc);
782 }
783#endif
784
785 if ( pCtx->eflags.u32 & X86_EFL_IF
786 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
787 && TRPMHasTrap(pVCpu)
788 )
789 {
790 uint8_t u8Vector;
791 int rc;
792 TRPMEVENT enmType;
793 RTGCUINTPTR intInfo;
794 RTGCUINT errCode;
795
796 /* If a new event is pending, then dispatch it now. */
797 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &errCode, 0);
798 AssertRC(rc);
799 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
800 Assert(enmType != TRPM_SOFTWARE_INT);
801
802 /* Clear the pending trap. */
803 rc = TRPMResetTrap(pVCpu);
804 AssertRC(rc);
805
806 intInfo = u8Vector;
807 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
808
809 if (enmType == TRPM_TRAP)
810 {
811 switch (u8Vector) {
812 case 8:
813 case 10:
814 case 11:
815 case 12:
816 case 13:
817 case 14:
818 case 17:
819 /* Valid error codes. */
820 intInfo |= VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_VALID;
821 break;
822 default:
823 break;
824 }
825 if (u8Vector == X86_XCPT_BP || u8Vector == X86_XCPT_OF)
826 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
827 else
828 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
829 }
830 else
831 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
832
833 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
834 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, 0, errCode);
835 AssertRC(rc);
836 } /* if (interrupts can be dispatched) */
837
838 return VINF_SUCCESS;
839}
840
841/**
842 * Save the host state
843 *
844 * @returns VBox status code.
845 * @param pVM The VM to operate on.
846 * @param pVCpu The VMCPU to operate on.
847 */
848VMMR0DECL(int) VMXR0SaveHostState(PVM pVM, PVMCPU pVCpu)
849{
850 int rc = VINF_SUCCESS;
851
852 /*
853 * Host CPU Context
854 */
855 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_HOST_CONTEXT)
856 {
857 RTIDTR idtr;
858 RTGDTR gdtr;
859 RTSEL SelTR;
860 PX86DESCHC pDesc;
861 uintptr_t trBase;
862 RTSEL cs;
863 RTSEL ss;
864 uint64_t cr3;
865
866 /* Control registers */
867 rc = VMXWriteVMCS(VMX_VMCS_HOST_CR0, ASMGetCR0());
868#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
869 if (VMX_IS_64BIT_HOST_MODE())
870 {
871 cr3 = hwaccmR0Get64bitCR3();
872 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_CR3, cr3);
873 }
874 else
875#endif
876 {
877 cr3 = ASMGetCR3();
878 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR3, cr3);
879 }
880 rc |= VMXWriteVMCS(VMX_VMCS_HOST_CR4, ASMGetCR4());
881 AssertRC(rc);
882 Log2(("VMX_VMCS_HOST_CR0 %08x\n", ASMGetCR0()));
883 Log2(("VMX_VMCS_HOST_CR3 %08RX64\n", cr3));
884 Log2(("VMX_VMCS_HOST_CR4 %08x\n", ASMGetCR4()));
885
886 /* Selector registers. */
887#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
888 if (VMX_IS_64BIT_HOST_MODE())
889 {
890 cs = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelCS;
891 ss = (RTSEL)(uintptr_t)&SUPR0Abs64bitKernelSS;
892 }
893 else
894 {
895 /* sysenter loads LDT cs & ss, VMX doesn't like this. Load the GDT ones (safe). */
896 cs = (RTSEL)(uintptr_t)&SUPR0AbsKernelCS;
897 ss = (RTSEL)(uintptr_t)&SUPR0AbsKernelSS;
898 }
899#else
900 cs = ASMGetCS();
901 ss = ASMGetSS();
902#endif
903 Assert(!(cs & X86_SEL_LDT)); Assert((cs & X86_SEL_RPL) == 0);
904 Assert(!(ss & X86_SEL_LDT)); Assert((ss & X86_SEL_RPL) == 0);
905 rc = VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_CS, cs);
906 /* Note: VMX is (again) very picky about the RPL of the selectors here; we'll restore them manually. */
907 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_DS, 0);
908 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_ES, 0);
909#if HC_ARCH_BITS == 32
910 if (!VMX_IS_64BIT_HOST_MODE())
911 {
912 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_FS, 0);
913 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_GS, 0);
914 }
915#endif
916 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_SS, ss);
917 SelTR = ASMGetTR();
918 rc |= VMXWriteVMCS(VMX_VMCS16_HOST_FIELD_TR, SelTR);
919 AssertRC(rc);
920 Log2(("VMX_VMCS_HOST_FIELD_CS %08x (%08x)\n", cs, ASMGetSS()));
921 Log2(("VMX_VMCS_HOST_FIELD_DS 00000000 (%08x)\n", ASMGetDS()));
922 Log2(("VMX_VMCS_HOST_FIELD_ES 00000000 (%08x)\n", ASMGetES()));
923 Log2(("VMX_VMCS_HOST_FIELD_FS 00000000 (%08x)\n", ASMGetFS()));
924 Log2(("VMX_VMCS_HOST_FIELD_GS 00000000 (%08x)\n", ASMGetGS()));
925 Log2(("VMX_VMCS_HOST_FIELD_SS %08x (%08x)\n", ss, ASMGetSS()));
926 Log2(("VMX_VMCS_HOST_FIELD_TR %08x\n", ASMGetTR()));
927
928 /* GDTR & IDTR */
929#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
930 if (VMX_IS_64BIT_HOST_MODE())
931 {
932 X86XDTR64 gdtr64, idtr64;
933 hwaccmR0Get64bitGDTRandIDTR(&gdtr64, &idtr64);
934 rc = VMXWriteVMCS64(VMX_VMCS_HOST_GDTR_BASE, gdtr64.uAddr);
935 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_IDTR_BASE, gdtr64.uAddr);
936 AssertRC(rc);
937 Log2(("VMX_VMCS_HOST_GDTR_BASE %RX64\n", gdtr64.uAddr));
938 Log2(("VMX_VMCS_HOST_IDTR_BASE %RX64\n", idtr64.uAddr));
939 gdtr.cbGdt = gdtr64.cb;
940 gdtr.pGdt = (uintptr_t)gdtr64.uAddr;
941 }
942 else
943#endif
944 {
945 ASMGetGDTR(&gdtr);
946 rc = VMXWriteVMCS(VMX_VMCS_HOST_GDTR_BASE, gdtr.pGdt);
947 ASMGetIDTR(&idtr);
948 rc |= VMXWriteVMCS(VMX_VMCS_HOST_IDTR_BASE, idtr.pIdt);
949 AssertRC(rc);
950 Log2(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", gdtr.pGdt));
951 Log2(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", idtr.pIdt));
952 }
953
954
955 /* Save the base address of the TR selector. */
956 if (SelTR > gdtr.cbGdt)
957 {
958 AssertMsgFailed(("Invalid TR selector %x. GDTR.cbGdt=%x\n", SelTR, gdtr.cbGdt));
959 return VERR_VMX_INVALID_HOST_STATE;
960 }
961
962#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
963 if (VMX_IS_64BIT_HOST_MODE())
964 {
965 pDesc = &((PX86DESCHC)gdtr.pGdt)[SelTR >> X86_SEL_SHIFT_HC]; /// ????
966 uint64_t trBase64 = X86DESC64_BASE(*(PX86DESC64)pDesc);
967 rc = VMXWriteVMCS64(VMX_VMCS_HOST_TR_BASE, trBase64);
968 Log2(("VMX_VMCS_HOST_TR_BASE %RX64\n", trBase64));
969 AssertRC(rc);
970 }
971 else
972#endif
973 {
974 pDesc = &((PX86DESCHC)gdtr.pGdt)[SelTR >> X86_SEL_SHIFT_HC];
975#if HC_ARCH_BITS == 64
976 trBase = X86DESC64_BASE(*pDesc);
977#else
978 trBase = X86DESC_BASE(*pDesc);
979#endif
980 rc = VMXWriteVMCS(VMX_VMCS_HOST_TR_BASE, trBase);
981 AssertRC(rc);
982 Log2(("VMX_VMCS_HOST_TR_BASE %RHv\n", trBase));
983 }
984
985 /* FS and GS base. */
986#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
987 if (VMX_IS_64BIT_HOST_MODE())
988 {
989 Log2(("MSR_K8_FS_BASE = %RX64\n", ASMRdMsr(MSR_K8_FS_BASE)));
990 Log2(("MSR_K8_GS_BASE = %RX64\n", ASMRdMsr(MSR_K8_GS_BASE)));
991 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FS_BASE, ASMRdMsr(MSR_K8_FS_BASE));
992 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_GS_BASE, ASMRdMsr(MSR_K8_GS_BASE));
993 }
994#endif
995 AssertRC(rc);
996
997 /* Sysenter MSRs. */
998 /** @todo expensive!! */
999 rc = VMXWriteVMCS(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS));
1000 Log2(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)));
1001#ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1002 if (VMX_IS_64BIT_HOST_MODE())
1003 {
1004 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1005 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1006 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1007 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1008 }
1009 else
1010 {
1011 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1012 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1013 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1014 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1015 }
1016#elif HC_ARCH_BITS == 32
1017 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP));
1018 rc |= VMXWriteVMCS(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP));
1019 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)));
1020 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX32\n", ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)));
1021#else
1022 Log2(("VMX_VMCS_HOST_SYSENTER_EIP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_EIP)));
1023 Log2(("VMX_VMCS_HOST_SYSENTER_ESP %RX64\n", ASMRdMsr(MSR_IA32_SYSENTER_ESP)));
1024 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP));
1025 rc |= VMXWriteVMCS64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP));
1026#endif
1027 AssertRC(rc);
1028
1029#if 0 /* @todo deal with 32/64 */
1030 /* Restore the host EFER - on CPUs that support it. */
1031 if (pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1 & VMX_VMCS_CTRL_EXIT_CONTROLS_LOAD_HOST_EFER_MSR)
1032 {
1033 uint64_t msrEFER = ASMRdMsr(MSR_IA32_EFER);
1034 rc = VMXWriteVMCS64(VMX_VMCS_HOST_FIELD_EFER_FULL, msrEFER);
1035 AssertRC(rc);
1036 }
1037#endif
1038 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_HOST_CONTEXT;
1039 }
1040 return rc;
1041}
1042
1043/**
1044 * Prefetch the 4 PDPT pointers (PAE and nested paging only)
1045 *
1046 * @param pVM The VM to operate on.
1047 * @param pVCpu The VMCPU to operate on.
1048 * @param pCtx Guest context
1049 */
1050static void vmxR0PrefetchPAEPdptrs(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1051{
1052 if (CPUMIsGuestInPAEModeEx(pCtx))
1053 {
1054 X86PDPE Pdpe;
1055
1056 for (unsigned i=0;i<4;i++)
1057 {
1058 Pdpe = PGMGstGetPaePDPtr(pVCpu, i);
1059 int rc = VMXWriteVMCS64(VMX_VMCS_GUEST_PDPTR0_FULL + i*2, Pdpe.u);
1060 AssertRC(rc);
1061 }
1062 }
1063}
1064
1065/**
1066 * Update the exception bitmap according to the current CPU state
1067 *
1068 * @param pVM The VM to operate on.
1069 * @param pVCpu The VMCPU to operate on.
1070 * @param pCtx Guest context
1071 */
1072static void vmxR0UpdateExceptionBitmap(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1073{
1074 uint32_t u32TrapMask;
1075 Assert(pCtx);
1076
1077 u32TrapMask = HWACCM_VMX_TRAP_MASK;
1078#ifndef DEBUG
1079 if (pVM->hwaccm.s.fNestedPaging)
1080 u32TrapMask &= ~RT_BIT(X86_XCPT_PF); /* no longer need to intercept #PF. */
1081#endif
1082
1083 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
1084 if ( CPUMIsGuestFPUStateActive(pVCpu) == true
1085 && !(pCtx->cr0 & X86_CR0_NE)
1086 && !pVCpu->hwaccm.s.fFPUOldStyleOverride)
1087 {
1088 u32TrapMask |= RT_BIT(X86_XCPT_MF);
1089 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
1090 }
1091
1092#ifdef DEBUG /* till after branching, enable it by default then. */
1093 /* Intercept X86_XCPT_DB if stepping is enabled */
1094 if (DBGFIsStepping(pVM))
1095 u32TrapMask |= RT_BIT(X86_XCPT_DB);
1096 /** @todo Don't trap it unless the debugger has armed breakpoints. */
1097 u32TrapMask |= RT_BIT(X86_XCPT_BP);
1098#endif
1099
1100#ifdef VBOX_STRICT
1101 Assert(u32TrapMask & RT_BIT(X86_XCPT_GP));
1102#endif
1103
1104# ifdef HWACCM_VMX_EMULATE_REALMODE
1105 /* Intercept all exceptions in real mode as none of them can be injected directly (#GP otherwise). */
1106 if (CPUMIsGuestInRealModeEx(pCtx) && pVM->hwaccm.s.vmx.pRealModeTSS)
1107 u32TrapMask |= HWACCM_VMX_TRAP_MASK_REALMODE;
1108# endif /* HWACCM_VMX_EMULATE_REALMODE */
1109
1110 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXCEPTION_BITMAP, u32TrapMask);
1111 AssertRC(rc);
1112}
1113
1114/**
1115 * Loads the guest state
1116 *
1117 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
1118 *
1119 * @returns VBox status code.
1120 * @param pVM The VM to operate on.
1121 * @param pVCpu The VMCPU to operate on.
1122 * @param pCtx Guest context
1123 */
1124VMMR0DECL(int) VMXR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1125{
1126 int rc = VINF_SUCCESS;
1127 RTGCUINTPTR val;
1128 X86EFLAGS eflags;
1129
1130 /* VMX_VMCS_CTRL_ENTRY_CONTROLS
1131 * Set required bits to one and zero according to the MSR capabilities.
1132 */
1133 val = pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0;
1134 /* Load guest debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1135 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_DEBUG;
1136#if 0 /* @todo deal with 32/64 */
1137 /* Required for the EFER write below, not supported on all CPUs. */
1138 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_GUEST_EFER_MSR;
1139#endif
1140 /* 64 bits guest mode? */
1141 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1142 val |= VMX_VMCS_CTRL_ENTRY_CONTROLS_IA64_MODE;
1143 /* else Must be zero when AMD64 is not available. */
1144
1145 /* Mask away the bits that the CPU doesn't support */
1146 val &= pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1;
1147 rc = VMXWriteVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, val);
1148 AssertRC(rc);
1149
1150 /* VMX_VMCS_CTRL_EXIT_CONTROLS
1151 * Set required bits to one and zero according to the MSR capabilities.
1152 */
1153 val = pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0;
1154
1155 /* Save debug controls (dr7 & IA32_DEBUGCTL_MSR) (forced to 1 on the 'first' VT-x capable CPUs; this actually includes the newest Nehalem CPUs) */
1156#if 0 /* @todo deal with 32/64 */
1157 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG | VMX_VMCS_CTRL_EXIT_CONTROLS_LOAD_HOST_EFER_MSR;
1158#else
1159 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_SAVE_DEBUG;
1160#endif
1161
1162#if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1163 if (VMX_IS_64BIT_HOST_MODE())
1164 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64;
1165 /* else: Must be zero when AMD64 is not available. */
1166#elif HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS)
1167 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1168 val |= VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64; /* our switcher goes to long mode */
1169 else
1170 Assert(!(val & VMX_VMCS_CTRL_EXIT_CONTROLS_HOST_AMD64));
1171#endif
1172 val &= pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1;
1173 /* Don't acknowledge external interrupts on VM-exit. */
1174 rc = VMXWriteVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, val);
1175 AssertRC(rc);
1176
1177 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1178 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
1179 {
1180#ifdef HWACCM_VMX_EMULATE_REALMODE
1181 if (pVM->hwaccm.s.vmx.pRealModeTSS)
1182 {
1183 PGMMODE enmGuestMode = PGMGetGuestMode(pVCpu);
1184 if (pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode != enmGuestMode)
1185 {
1186 /* Correct weird requirements for switching to protected mode. */
1187 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode == PGMMODE_REAL
1188 && enmGuestMode >= PGMMODE_PROTECTED)
1189 {
1190 /* Flush the recompiler code cache as it's not unlikely
1191 * the guest will rewrite code it will later execute in real
1192 * mode (OpenBSD 4.0 is one such example)
1193 */
1194 REMFlushTBs(pVM);
1195
1196 /* DPL of all hidden selector registers must match the current CPL (0). */
1197 pCtx->csHid.Attr.n.u2Dpl = 0;
1198 pCtx->csHid.Attr.n.u4Type = X86_SEL_TYPE_CODE | X86_SEL_TYPE_RW_ACC;
1199
1200 pCtx->dsHid.Attr.n.u2Dpl = 0;
1201 pCtx->esHid.Attr.n.u2Dpl = 0;
1202 pCtx->fsHid.Attr.n.u2Dpl = 0;
1203 pCtx->gsHid.Attr.n.u2Dpl = 0;
1204 pCtx->ssHid.Attr.n.u2Dpl = 0;
1205
1206 /* The limit must correspond to the granularity bit. */
1207 if (!pCtx->csHid.Attr.n.u1Granularity)
1208 pCtx->csHid.u32Limit &= 0xffff;
1209 if (!pCtx->dsHid.Attr.n.u1Granularity)
1210 pCtx->dsHid.u32Limit &= 0xffff;
1211 if (!pCtx->esHid.Attr.n.u1Granularity)
1212 pCtx->esHid.u32Limit &= 0xffff;
1213 if (!pCtx->fsHid.Attr.n.u1Granularity)
1214 pCtx->fsHid.u32Limit &= 0xffff;
1215 if (!pCtx->gsHid.Attr.n.u1Granularity)
1216 pCtx->gsHid.u32Limit &= 0xffff;
1217 if (!pCtx->ssHid.Attr.n.u1Granularity)
1218 pCtx->ssHid.u32Limit &= 0xffff;
1219 }
1220 else
1221 /* Switching from protected mode to real mode. */
1222 if ( pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode >= PGMMODE_PROTECTED
1223 && enmGuestMode == PGMMODE_REAL)
1224 {
1225 /* The limit must also be set to 0xffff. */
1226 pCtx->csHid.u32Limit = 0xffff;
1227 pCtx->dsHid.u32Limit = 0xffff;
1228 pCtx->esHid.u32Limit = 0xffff;
1229 pCtx->fsHid.u32Limit = 0xffff;
1230 pCtx->gsHid.u32Limit = 0xffff;
1231 pCtx->ssHid.u32Limit = 0xffff;
1232
1233 Assert(pCtx->csHid.u64Base <= 0xfffff);
1234 Assert(pCtx->dsHid.u64Base <= 0xfffff);
1235 Assert(pCtx->esHid.u64Base <= 0xfffff);
1236 Assert(pCtx->fsHid.u64Base <= 0xfffff);
1237 Assert(pCtx->gsHid.u64Base <= 0xfffff);
1238 }
1239 pVCpu->hwaccm.s.vmx.enmLastSeenGuestMode = enmGuestMode;
1240 }
1241 else
1242 /* VT-x will fail with a guest invalid state otherwise... (CPU state after a reset) */
1243 if ( CPUMIsGuestInRealModeEx(pCtx)
1244 && pCtx->csHid.u64Base == 0xffff0000)
1245 {
1246 pCtx->csHid.u64Base = 0xf0000;
1247 pCtx->cs = 0xf000;
1248 }
1249 }
1250#endif /* HWACCM_VMX_EMULATE_REALMODE */
1251
1252 VMX_WRITE_SELREG(ES, es);
1253 AssertRC(rc);
1254
1255 VMX_WRITE_SELREG(CS, cs);
1256 AssertRC(rc);
1257
1258 VMX_WRITE_SELREG(SS, ss);
1259 AssertRC(rc);
1260
1261 VMX_WRITE_SELREG(DS, ds);
1262 AssertRC(rc);
1263
1264 /* The base values in the hidden fs & gs registers are not in sync with the msrs; they are cut to 32 bits. */
1265 VMX_WRITE_SELREG(FS, fs);
1266 AssertRC(rc);
1267
1268 VMX_WRITE_SELREG(GS, gs);
1269 AssertRC(rc);
1270 }
1271
1272 /* Guest CPU context: LDTR. */
1273 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
1274 {
1275 if (pCtx->ldtr == 0)
1276 {
1277 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, 0);
1278 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, 0);
1279 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, 0);
1280 /* Note: vmlaunch will fail with 0 or just 0x02. No idea why. */
1281 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, 0x82 /* present, LDT */);
1282 }
1283 else
1284 {
1285 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_LDTR, pCtx->ldtr);
1286 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtrHid.u32Limit);
1287 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_LDTR_BASE, pCtx->ldtrHid.u64Base);
1288 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, pCtx->ldtrHid.Attr.u);
1289 }
1290 AssertRC(rc);
1291 }
1292 /* Guest CPU context: TR. */
1293 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
1294 {
1295#ifdef HWACCM_VMX_EMULATE_REALMODE
1296 /* Real mode emulation using v86 mode with CR4.VME (interrupt redirection using the int bitmap in the TSS) */
1297 if (CPUMIsGuestInRealModeEx(pCtx))
1298 {
1299 RTGCPHYS GCPhys;
1300
1301 /* We convert it here every time as pci regions could be reconfigured. */
1302 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pRealModeTSS, &GCPhys);
1303 AssertRC(rc);
1304
1305 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, 0);
1306 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, HWACCM_VTX_TSS_SIZE);
1307 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, GCPhys /* phys = virt in this mode */);
1308
1309 X86DESCATTR attr;
1310
1311 attr.u = 0;
1312 attr.n.u1Present = 1;
1313 attr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY;
1314 val = attr.u;
1315 }
1316 else
1317#endif /* HWACCM_VMX_EMULATE_REALMODE */
1318 {
1319 rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_TR, pCtx->tr);
1320 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_LIMIT, pCtx->trHid.u32Limit);
1321 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_TR_BASE, pCtx->trHid.u64Base);
1322
1323 val = pCtx->trHid.Attr.u;
1324
1325 /* The TSS selector must be busy. */
1326 if ((val & 0xF) == X86_SEL_TYPE_SYS_286_TSS_AVAIL)
1327 val = (val & ~0xF) | X86_SEL_TYPE_SYS_286_TSS_BUSY;
1328 else
1329 /* Default even if no TR selector has been set (otherwise vmlaunch will fail!) */
1330 val = (val & ~0xF) | X86_SEL_TYPE_SYS_386_TSS_BUSY;
1331
1332 }
1333 rc |= VMXWriteVMCS(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, val);
1334 AssertRC(rc);
1335 }
1336 /* Guest CPU context: GDTR. */
1337 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
1338 {
1339 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt);
1340 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_GDTR_BASE, pCtx->gdtr.pGdt);
1341 AssertRC(rc);
1342 }
1343 /* Guest CPU context: IDTR. */
1344 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
1345 {
1346 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt);
1347 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_IDTR_BASE, pCtx->idtr.pIdt);
1348 AssertRC(rc);
1349 }
1350
1351 /*
1352 * Sysenter MSRs (unconditional)
1353 */
1354 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs);
1355 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip);
1356 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp);
1357 AssertRC(rc);
1358
1359 /* Control registers */
1360 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
1361 {
1362 val = pCtx->cr0;
1363 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, val);
1364 Log2(("Guest CR0-shadow %08x\n", val));
1365 if (CPUMIsGuestFPUStateActive(pVCpu) == false)
1366 {
1367 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
1368 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
1369 }
1370 else
1371 {
1372 /** @todo check if we support the old style mess correctly. */
1373 if (!(val & X86_CR0_NE))
1374 Log(("Forcing X86_CR0_NE!!!\n"));
1375
1376 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
1377 }
1378 /* Note: protected mode & paging are always enabled; we use them for emulating real and protected mode without paging too. */
1379 val |= X86_CR0_PE | X86_CR0_PG;
1380 if (pVM->hwaccm.s.fNestedPaging)
1381 {
1382 if (CPUMIsGuestInPagedProtectedModeEx(pCtx))
1383 {
1384 /* Disable cr3 read/write monitoring as we don't need it for EPT. */
1385 pVCpu->hwaccm.s.vmx.proc_ctls &= ~( VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1386 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT);
1387 }
1388 else
1389 {
1390 /* Reenable cr3 read/write monitoring as our identity mapped page table is active. */
1391 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1392 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1393 }
1394 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1395 AssertRC(rc);
1396 }
1397 else
1398 {
1399 /* Note: We must also set this as we rely on protecting various pages for which supervisor writes must be caught. */
1400 val |= X86_CR0_WP;
1401 }
1402
1403 /* Always enable caching. */
1404 val &= ~(X86_CR0_CD|X86_CR0_NW);
1405
1406 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR0, val);
1407 Log2(("Guest CR0 %08x\n", val));
1408 /* CR0 flags owned by the host; if the guests attempts to change them, then
1409 * the VM will exit.
1410 */
1411 val = X86_CR0_PE /* Must monitor this bit (assumptions are made for real mode emulation) */
1412 | X86_CR0_WP /* Must monitor this bit (it must always be enabled). */
1413 | X86_CR0_PG /* Must monitor this bit (assumptions are made for real mode & protected mode without paging emulation) */
1414 | X86_CR0_TS
1415 | X86_CR0_ET /* Bit not restored during VM-exit! */
1416 | X86_CR0_CD /* Bit not restored during VM-exit! */
1417 | X86_CR0_NW /* Bit not restored during VM-exit! */
1418 | X86_CR0_NE
1419 | X86_CR0_MP;
1420 pVCpu->hwaccm.s.vmx.cr0_mask = val;
1421
1422 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR0_MASK, val);
1423 Log2(("Guest CR0-mask %08x\n", val));
1424 AssertRC(rc);
1425 }
1426 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
1427 {
1428 /* CR4 */
1429 rc = VMXWriteVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, pCtx->cr4);
1430 Log2(("Guest CR4-shadow %08x\n", pCtx->cr4));
1431 /* Set the required bits in cr4 too (currently X86_CR4_VMXE). */
1432 val = pCtx->cr4 | (uint32_t)pVM->hwaccm.s.vmx.msr.vmx_cr4_fixed0;
1433
1434 if (!pVM->hwaccm.s.fNestedPaging)
1435 {
1436 switch(pVCpu->hwaccm.s.enmShadowMode)
1437 {
1438 case PGMMODE_REAL: /* Real mode -> emulated using v86 mode */
1439 case PGMMODE_PROTECTED: /* Protected mode, no paging -> emulated using identity mapping. */
1440 case PGMMODE_32_BIT: /* 32-bit paging. */
1441 val &= ~X86_CR4_PAE;
1442 break;
1443
1444 case PGMMODE_PAE: /* PAE paging. */
1445 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1446 /** @todo use normal 32 bits paging */
1447 val |= X86_CR4_PAE;
1448 break;
1449
1450 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1451 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1452#ifdef VBOX_ENABLE_64_BITS_GUESTS
1453 break;
1454#else
1455 AssertFailed();
1456 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1457#endif
1458 default: /* shut up gcc */
1459 AssertFailed();
1460 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1461 }
1462 }
1463 else
1464 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1465 {
1466 /* We use 4 MB pages in our identity mapping page table for real and protected mode without paging. */
1467 val |= X86_CR4_PSE;
1468 /* Our identity mapping is a 32 bits page directory. */
1469 val &= ~X86_CR4_PAE;
1470 }
1471
1472 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_CR4, val);
1473 Log2(("Guest CR4 %08x\n", val));
1474 /* CR4 flags owned by the host; if the guests attempts to change them, then
1475 * the VM will exit.
1476 */
1477 val = 0
1478 | X86_CR4_PAE
1479 | X86_CR4_PGE
1480 | X86_CR4_PSE
1481 | X86_CR4_VMXE;
1482 pVCpu->hwaccm.s.vmx.cr4_mask = val;
1483
1484 rc |= VMXWriteVMCS(VMX_VMCS_CTRL_CR4_MASK, val);
1485 Log2(("Guest CR4-mask %08x\n", val));
1486 AssertRC(rc);
1487 }
1488
1489 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
1490 {
1491 if (pVM->hwaccm.s.fNestedPaging)
1492 {
1493 Assert(PGMGetHyperCR3(pVCpu));
1494 pVCpu->hwaccm.s.vmx.GCPhysEPTP = PGMGetHyperCR3(pVCpu);
1495
1496 Assert(!(pVCpu->hwaccm.s.vmx.GCPhysEPTP & 0xfff));
1497 /** @todo Check the IA32_VMX_EPT_VPID_CAP MSR for other supported memory types. */
1498 pVCpu->hwaccm.s.vmx.GCPhysEPTP |= VMX_EPT_MEMTYPE_WB
1499 | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT);
1500
1501 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_EPTP_FULL, pVCpu->hwaccm.s.vmx.GCPhysEPTP);
1502 AssertRC(rc);
1503
1504 if (!CPUMIsGuestInPagedProtectedModeEx(pCtx))
1505 {
1506 RTGCPHYS GCPhys;
1507
1508 /* We convert it here every time as pci regions could be reconfigured. */
1509 rc = PDMVMMDevHeapR3ToGCPhys(pVM, pVM->hwaccm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys);
1510 AssertRC(rc);
1511
1512 /* We use our identity mapping page table here as we need to map guest virtual to guest physical addresses; EPT will
1513 * take care of the translation to host physical addresses.
1514 */
1515 val = GCPhys;
1516 }
1517 else
1518 {
1519 /* Save the real guest CR3 in VMX_VMCS_GUEST_CR3 */
1520 val = pCtx->cr3;
1521 /* Prefetch the four PDPT entries in PAE mode. */
1522 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1523 }
1524 }
1525 else
1526 {
1527 val = PGMGetHyperCR3(pVCpu);
1528 Assert(val || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
1529 }
1530
1531 /* Save our shadow CR3 register. */
1532 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_CR3, val);
1533 AssertRC(rc);
1534 }
1535
1536 /* Debug registers. */
1537 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
1538 {
1539 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
1540 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
1541
1542 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
1543 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
1544 pCtx->dr[7] |= 0x400; /* must be one */
1545
1546 /* Resync DR7 */
1547 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
1548 AssertRC(rc);
1549
1550 /* Sync the debug state now if any breakpoint is armed. */
1551 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
1552 && !CPUMIsGuestDebugStateActive(pVCpu)
1553 && !DBGFIsStepping(pVM))
1554 {
1555 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
1556
1557 /* Disable drx move intercepts. */
1558 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
1559 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1560 AssertRC(rc);
1561
1562 /* Save the host and load the guest debug state. */
1563 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1564 AssertRC(rc);
1565 }
1566
1567 /* IA32_DEBUGCTL MSR. */
1568 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_DEBUGCTL_FULL, 0);
1569 AssertRC(rc);
1570
1571 /** @todo do we really ever need this? */
1572 rc |= VMXWriteVMCS(VMX_VMCS_GUEST_DEBUG_EXCEPTIONS, 0);
1573 AssertRC(rc);
1574 }
1575
1576 /* EIP, ESP and EFLAGS */
1577 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_RIP, pCtx->rip);
1578 rc |= VMXWriteVMCS64(VMX_VMCS64_GUEST_RSP, pCtx->rsp);
1579 AssertRC(rc);
1580
1581 /* Bits 22-31, 15, 5 & 3 must be zero. Bit 1 must be 1. */
1582 eflags = pCtx->eflags;
1583 eflags.u32 &= VMX_EFLAGS_RESERVED_0;
1584 eflags.u32 |= VMX_EFLAGS_RESERVED_1;
1585
1586#ifdef HWACCM_VMX_EMULATE_REALMODE
1587 /* Real mode emulation using v86 mode. */
1588 if (CPUMIsGuestInRealModeEx(pCtx))
1589 {
1590 pVCpu->hwaccm.s.vmx.RealMode.eflags = eflags;
1591
1592 eflags.Bits.u1VM = 1;
1593 eflags.Bits.u2IOPL = 0; /* must always be 0 or else certain instructions won't cause faults. */
1594 }
1595#endif /* HWACCM_VMX_EMULATE_REALMODE */
1596 rc = VMXWriteVMCS(VMX_VMCS_GUEST_RFLAGS, eflags.u32);
1597 AssertRC(rc);
1598
1599 /* TSC offset. */
1600 uint64_t u64TSCOffset;
1601
1602 if (TMCpuTickCanUseRealTSC(pVCpu, &u64TSCOffset))
1603 {
1604 /* Note: VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT takes precedence over TSC_OFFSET */
1605 rc = VMXWriteVMCS64(VMX_VMCS_CTRL_TSC_OFFSET_FULL, u64TSCOffset);
1606 AssertRC(rc);
1607
1608 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1609 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1610 AssertRC(rc);
1611 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
1612 }
1613 else
1614 {
1615 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_RDTSC_EXIT;
1616 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
1617 AssertRC(rc);
1618 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
1619 }
1620
1621 /* 64 bits guest mode? */
1622 if (pCtx->msrEFER & MSR_K6_EFER_LMA)
1623 {
1624#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
1625 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1626#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1627 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64;
1628#else
1629# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1630 if (!pVM->hwaccm.s.fAllow64BitGuests)
1631 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1632# endif
1633 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM64;
1634#endif
1635 /* Unconditionally update these as wrmsr might have changed them. */
1636 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_FS_BASE, pCtx->fsHid.u64Base);
1637 AssertRC(rc);
1638 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_GS_BASE, pCtx->gsHid.u64Base);
1639 AssertRC(rc);
1640 }
1641 else
1642 {
1643 pVCpu->hwaccm.s.vmx.pfnStartVM = VMXR0StartVM32;
1644 }
1645
1646#if 0 /* @todo deal with 32/64 */
1647 /* Unconditionally update the guest EFER - on CPUs that supports it. */
1648 if (pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1 & VMX_VMCS_CTRL_ENTRY_CONTROLS_LOAD_GUEST_EFER_MSR)
1649 {
1650 rc = VMXWriteVMCS64(VMX_VMCS_GUEST_EFER_FULL, pCtx->msrEFER);
1651 AssertRC(rc);
1652 }
1653#endif
1654
1655 vmxR0UpdateExceptionBitmap(pVM, pVCpu, pCtx);
1656
1657 /* Done. */
1658 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
1659
1660 return rc;
1661}
1662
1663/**
1664 * Syncs back the guest state
1665 *
1666 * @returns VBox status code.
1667 * @param pVM The VM to operate on.
1668 * @param pVCpu The VMCPU to operate on.
1669 * @param pCtx Guest context
1670 */
1671DECLINLINE(int) VMXR0SaveGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1672{
1673 RTGCUINTREG val, valShadow;
1674 RTGCUINTPTR uInterruptState;
1675 int rc;
1676
1677 /* Let's first sync back eip, esp, and eflags. */
1678 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RIP, &val);
1679 AssertRC(rc);
1680 pCtx->rip = val;
1681 rc = VMXReadCachedVMCS(VMX_VMCS64_GUEST_RSP, &val);
1682 AssertRC(rc);
1683 pCtx->rsp = val;
1684 rc = VMXReadCachedVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
1685 AssertRC(rc);
1686 pCtx->eflags.u32 = val;
1687
1688 /* Take care of instruction fusing (sti, mov ss) */
1689 rc |= VMXReadCachedVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, &val);
1690 uInterruptState = val;
1691 if (uInterruptState != 0)
1692 {
1693 Assert(uInterruptState <= 2); /* only sti & mov ss */
1694 Log(("uInterruptState %x eip=%RGv\n", uInterruptState, pCtx->rip));
1695 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
1696 }
1697 else
1698 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1699
1700 /* Control registers. */
1701 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR0_READ_SHADOW, &valShadow);
1702 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR0, &val);
1703 val = (valShadow & pVCpu->hwaccm.s.vmx.cr0_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr0_mask);
1704 CPUMSetGuestCR0(pVCpu, val);
1705
1706 VMXReadCachedVMCS(VMX_VMCS_CTRL_CR4_READ_SHADOW, &valShadow);
1707 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR4, &val);
1708 val = (valShadow & pVCpu->hwaccm.s.vmx.cr4_mask) | (val & ~pVCpu->hwaccm.s.vmx.cr4_mask);
1709 CPUMSetGuestCR4(pVCpu, val);
1710
1711 /* Note: no reason to sync back the CRx registers. They can't be changed by the guest. */
1712 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1713 if ( pVM->hwaccm.s.fNestedPaging
1714 && CPUMIsGuestInPagedProtectedModeEx(pCtx))
1715 {
1716 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
1717
1718 /* Can be updated behind our back in the nested paging case. */
1719 CPUMSetGuestCR2(pVCpu, pCache->cr2);
1720
1721 VMXReadCachedVMCS(VMX_VMCS64_GUEST_CR3, &val);
1722
1723 if (val != pCtx->cr3)
1724 {
1725 CPUMSetGuestCR3(pVCpu, val);
1726 PGMUpdateCR3(pVCpu, val);
1727 }
1728 /* Prefetch the four PDPT entries in PAE mode. */
1729 vmxR0PrefetchPAEPdptrs(pVM, pVCpu, pCtx);
1730 }
1731
1732 /* Sync back DR7 here. */
1733 VMXReadCachedVMCS(VMX_VMCS64_GUEST_DR7, &val);
1734 pCtx->dr[7] = val;
1735
1736 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1737 VMX_READ_SELREG(ES, es);
1738 VMX_READ_SELREG(SS, ss);
1739 VMX_READ_SELREG(CS, cs);
1740 VMX_READ_SELREG(DS, ds);
1741 VMX_READ_SELREG(FS, fs);
1742 VMX_READ_SELREG(GS, gs);
1743
1744 /*
1745 * System MSRs
1746 */
1747 VMXReadCachedVMCS(VMX_VMCS32_GUEST_SYSENTER_CS, &val);
1748 pCtx->SysEnter.cs = val;
1749 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_EIP, &val);
1750 pCtx->SysEnter.eip = val;
1751 VMXReadCachedVMCS(VMX_VMCS64_GUEST_SYSENTER_ESP, &val);
1752 pCtx->SysEnter.esp = val;
1753
1754 /* Misc. registers; must sync everything otherwise we can get out of sync when jumping to ring 3. */
1755 VMX_READ_SELREG(LDTR, ldtr);
1756
1757 VMXReadCachedVMCS(VMX_VMCS32_GUEST_GDTR_LIMIT, &val);
1758 pCtx->gdtr.cbGdt = val;
1759 VMXReadCachedVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
1760 pCtx->gdtr.pGdt = val;
1761
1762 VMXReadCachedVMCS(VMX_VMCS32_GUEST_IDTR_LIMIT, &val);
1763 pCtx->idtr.cbIdt = val;
1764 VMXReadCachedVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
1765 pCtx->idtr.pIdt = val;
1766
1767#ifdef HWACCM_VMX_EMULATE_REALMODE
1768 /* Real mode emulation using v86 mode. */
1769 if (CPUMIsGuestInRealModeEx(pCtx))
1770 {
1771 /* Hide our emulation flags */
1772 pCtx->eflags.Bits.u1VM = 0;
1773
1774 /* Restore original IOPL setting as we always use 0. */
1775 pCtx->eflags.Bits.u2IOPL = pVCpu->hwaccm.s.vmx.RealMode.eflags.Bits.u2IOPL;
1776
1777 /* Force a TR resync every time in case we switch modes. */
1778 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_TR;
1779 }
1780 else
1781#endif /* HWACCM_VMX_EMULATE_REALMODE */
1782 {
1783 /* In real mode we have a fake TSS, so only sync it back when it's supposed to be valid. */
1784 VMX_READ_SELREG(TR, tr);
1785 }
1786 return VINF_SUCCESS;
1787}
1788
1789/**
1790 * Dummy placeholder
1791 *
1792 * @param pVM The VM to operate on.
1793 * @param pVCpu The VMCPU to operate on.
1794 */
1795static void vmxR0SetupTLBDummy(PVM pVM, PVMCPU pVCpu)
1796{
1797 NOREF(pVM);
1798 NOREF(pVCpu);
1799 return;
1800}
1801
1802/**
1803 * Setup the tagged TLB for EPT
1804 *
1805 * @returns VBox status code.
1806 * @param pVM The VM to operate on.
1807 * @param pVCpu The VMCPU to operate on.
1808 */
1809static void vmxR0SetupTLBEPT(PVM pVM, PVMCPU pVCpu)
1810{
1811 PHWACCM_CPUINFO pCpu;
1812
1813 Assert(pVM->hwaccm.s.fNestedPaging);
1814 Assert(!pVM->hwaccm.s.vmx.fVPID);
1815
1816 /* Deal with tagged TLBs if VPID or EPT is supported. */
1817 pCpu = HWACCMR0GetCurrentCpu();
1818 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1819 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1820 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1821 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1822 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1823 {
1824 /* Force a TLB flush on VM entry. */
1825 pVCpu->hwaccm.s.fForceTLBFlush = true;
1826 }
1827 else
1828 Assert(!pCpu->fFlushTLB);
1829
1830 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1831 pCpu->fFlushTLB = false;
1832
1833 if (pVCpu->hwaccm.s.fForceTLBFlush)
1834 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1835
1836#ifdef VBOX_WITH_STATISTICS
1837 if (pVCpu->hwaccm.s.fForceTLBFlush)
1838 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1839 else
1840 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1841#endif
1842}
1843
1844#ifdef HWACCM_VTX_WITH_VPID
1845/**
1846 * Setup the tagged TLB for VPID
1847 *
1848 * @returns VBox status code.
1849 * @param pVM The VM to operate on.
1850 * @param pVCpu The VMCPU to operate on.
1851 */
1852static void vmxR0SetupTLBVPID(PVM pVM, PVMCPU pVCpu)
1853{
1854 PHWACCM_CPUINFO pCpu;
1855
1856 Assert(pVM->hwaccm.s.vmx.fVPID);
1857 Assert(!pVM->hwaccm.s.fNestedPaging);
1858
1859 /* Deal with tagged TLBs if VPID or EPT is supported. */
1860 pCpu = HWACCMR0GetCurrentCpu();
1861 /* Force a TLB flush for the first world switch if the current cpu differs from the one we ran on last. */
1862 /* Note that this can happen both for start and resume due to long jumps back to ring 3. */
1863 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1864 /* if the tlb flush count has changed, another VM has flushed the TLB of this cpu, so we can't use our current ASID anymore. */
1865 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1866 {
1867 /* Force a TLB flush on VM entry. */
1868 pVCpu->hwaccm.s.fForceTLBFlush = true;
1869 }
1870 else
1871 Assert(!pCpu->fFlushTLB);
1872
1873 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1874
1875 /* Make sure we flush the TLB when required. Switch ASID to achieve the same thing, but without actually flushing the whole TLB (which is expensive). */
1876 if (pVCpu->hwaccm.s.fForceTLBFlush)
1877 {
1878 if ( ++pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID
1879 || pCpu->fFlushTLB)
1880 {
1881 pCpu->fFlushTLB = false;
1882 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
1883 pCpu->cTLBFlushes++;
1884 }
1885 else
1886 {
1887 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
1888 pVCpu->hwaccm.s.fForceTLBFlush = false;
1889 }
1890
1891 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
1892 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
1893 }
1894 else
1895 {
1896 Assert(!pCpu->fFlushTLB);
1897
1898 if (!pCpu->uCurrentASID || !pVCpu->hwaccm.s.uCurrentASID)
1899 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID = 1;
1900 }
1901 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1902 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
1903 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
1904
1905 int rc = VMXWriteVMCS(VMX_VMCS16_GUEST_FIELD_VPID, pVCpu->hwaccm.s.uCurrentASID);
1906 AssertRC(rc);
1907
1908 if (pVCpu->hwaccm.s.fForceTLBFlush)
1909 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushContext, 0);
1910
1911#ifdef VBOX_WITH_STATISTICS
1912 if (pVCpu->hwaccm.s.fForceTLBFlush)
1913 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1914 else
1915 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1916#endif
1917}
1918#endif /* HWACCM_VTX_WITH_VPID */
1919
1920/**
1921 * Runs guest code in a VT-x VM.
1922 *
1923 * @returns VBox status code.
1924 * @param pVM The VM to operate on.
1925 * @param pVCpu The VMCPU to operate on.
1926 * @param pCtx Guest context
1927 */
1928VMMR0DECL(int) VMXR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1929{
1930 int rc = VINF_SUCCESS;
1931 RTGCUINTREG val;
1932 RTGCUINTREG exitReason = VMX_EXIT_INVALID;
1933 RTGCUINTREG instrError, cbInstr;
1934 RTGCUINTPTR exitQualification;
1935 RTGCUINTPTR intInfo = 0; /* shut up buggy gcc 4 */
1936 RTGCUINTPTR errCode, instrInfo;
1937 bool fSyncTPR = false;
1938 PHWACCM_CPUINFO pCpu = 0;
1939 unsigned cResume = 0;
1940#ifdef VBOX_STRICT
1941 RTCPUID idCpuCheck;
1942#endif
1943#ifdef VBOX_WITH_STATISTICS
1944 bool fStatEntryStarted = true;
1945 bool fStatExit2Started = false;
1946#endif
1947
1948 Log2(("\nE"));
1949
1950 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
1951
1952#ifdef VBOX_STRICT
1953 {
1954 RTCCUINTREG val;
1955
1956 rc = VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
1957 AssertRC(rc);
1958 Log2(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS = %08x\n", val));
1959
1960 /* allowed zero */
1961 if ((val & pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.disallowed0)
1962 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: zero\n"));
1963
1964 /* allowed one */
1965 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_pin_ctls.n.allowed1) != 0)
1966 Log(("Invalid VMX_VMCS_CTRL_PIN_EXEC_CONTROLS: one\n"));
1967
1968 rc = VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
1969 AssertRC(rc);
1970 Log2(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS = %08x\n", val));
1971
1972 /* Must be set according to the MSR, but can be cleared in case of EPT. */
1973 if (pVM->hwaccm.s.fNestedPaging)
1974 val |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_INVLPG_EXIT
1975 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_LOAD_EXIT
1976 | VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_CR3_STORE_EXIT;
1977
1978 /* allowed zero */
1979 if ((val & pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.disallowed0)
1980 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: zero\n"));
1981
1982 /* allowed one */
1983 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1) != 0)
1984 Log(("Invalid VMX_VMCS_CTRL_PROC_EXEC_CONTROLS: one\n"));
1985
1986 rc = VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
1987 AssertRC(rc);
1988 Log2(("VMX_VMCS_CTRL_ENTRY_CONTROLS = %08x\n", val));
1989
1990 /* allowed zero */
1991 if ((val & pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_entry.n.disallowed0)
1992 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: zero\n"));
1993
1994 /* allowed one */
1995 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_entry.n.allowed1) != 0)
1996 Log(("Invalid VMX_VMCS_CTRL_ENTRY_CONTROLS: one\n"));
1997
1998 rc = VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
1999 AssertRC(rc);
2000 Log2(("VMX_VMCS_CTRL_EXIT_CONTROLS = %08x\n", val));
2001
2002 /* allowed zero */
2003 if ((val & pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0) != pVM->hwaccm.s.vmx.msr.vmx_exit.n.disallowed0)
2004 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: zero\n"));
2005
2006 /* allowed one */
2007 if ((val & ~pVM->hwaccm.s.vmx.msr.vmx_exit.n.allowed1) != 0)
2008 Log(("Invalid VMX_VMCS_CTRL_EXIT_CONTROLS: one\n"));
2009 }
2010#endif
2011
2012#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2013 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeEntry = RTTimeNanoTS();
2014#endif
2015
2016 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
2017 */
2018ResumeExecution:
2019 STAM_STATS({
2020 if (fStatExit2Started) { STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = false; }
2021 if (!fStatEntryStarted) { STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = true; }
2022 });
2023 AssertMsg(pVCpu->hwaccm.s.idEnteredCpu == RTMpCpuId(),
2024 ("Expected %d, I'm %d; cResume=%d exitReason=%RGv exitQualification=%RGv\n",
2025 (int)pVCpu->hwaccm.s.idEnteredCpu, (int)RTMpCpuId(), cResume, exitReason, exitQualification));
2026 Assert(!HWACCMR0SuspendPending());
2027
2028 /* Safety precaution; looping for too long here can have a very bad effect on the host */
2029 if (++cResume > HWACCM_MAX_RESUME_LOOPS)
2030 {
2031 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
2032 rc = VINF_EM_RAW_INTERRUPT;
2033 goto end;
2034 }
2035
2036 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
2037 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
2038 {
2039 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
2040 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
2041 {
2042 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
2043 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
2044 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
2045 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
2046 */
2047 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
2048 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2049 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2050 AssertRC(rc);
2051 }
2052 }
2053 else
2054 {
2055 /* Irq inhibition is no longer active; clear the corresponding VMX state. */
2056 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE, 0);
2057 AssertRC(rc);
2058 }
2059
2060 /* Check for pending actions that force us to go back to ring 3. */
2061 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
2062 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
2063 {
2064 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
2065 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
2066 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2067 goto end;
2068 }
2069 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
2070 if (VM_FF_ISPENDING(pVM, VM_FF_REQUEST))
2071 {
2072 rc = VINF_EM_PENDING_REQUEST;
2073 goto end;
2074 }
2075
2076 /* When external interrupts are pending, we should exit the VM when IF is set. */
2077 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
2078 rc = VMXR0CheckPendingInterrupt(pVM, pVCpu, pCtx);
2079 if (RT_FAILURE(rc))
2080 goto end;
2081
2082 /** @todo check timers?? */
2083
2084 /* TPR caching using CR8 is only available in 64 bits mode */
2085 /* Note the 32 bits exception for AMD (X86_CPUID_AMD_FEATURE_ECX_CR8L), but that appears missing in Intel CPUs */
2086 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!! */
2087 /**
2088 * @todo reduce overhead
2089 */
2090 if ( (pCtx->msrEFER & MSR_K6_EFER_LMA)
2091 && pVM->hwaccm.s.vmx.pAPIC)
2092 {
2093 /* TPR caching in CR8 */
2094 uint8_t u8TPR;
2095 bool fPending;
2096
2097 int rc = PDMApicGetTPR(pVM, &u8TPR, &fPending);
2098 AssertRC(rc);
2099 /* The TPR can be found at offset 0x80 in the APIC mmio page. */
2100 pVM->hwaccm.s.vmx.pAPIC[0x80] = u8TPR << 4; /* bits 7-4 contain the task priority */
2101
2102 /* Two options here:
2103 * - external interrupt pending, but masked by the TPR value.
2104 * -> a CR8 update that lower the current TPR value should cause an exit
2105 * - no pending interrupts
2106 * -> We don't need to be explicitely notified. There are enough world switches for detecting pending interrupts.
2107 */
2108 rc = VMXWriteVMCS(VMX_VMCS_CTRL_TPR_THRESHOLD, (fPending) ? u8TPR : 0);
2109 AssertRC(rc);
2110
2111 /* Always sync back the TPR; we should optimize this though */ /** @todo optimize TPR sync. */
2112 fSyncTPR = true;
2113 }
2114
2115#if defined(HWACCM_VTX_WITH_EPT) && defined(LOG_ENABLED)
2116 if ( pVM->hwaccm.s.fNestedPaging
2117# ifdef HWACCM_VTX_WITH_VPID
2118 || pVM->hwaccm.s.vmx.fVPID
2119# endif /* HWACCM_VTX_WITH_VPID */
2120 )
2121 {
2122 pCpu = HWACCMR0GetCurrentCpu();
2123 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
2124 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
2125 {
2126 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
2127 Log(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
2128 else
2129 Log(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
2130 }
2131 if (pCpu->fFlushTLB)
2132 Log(("Force TLB flush: first time cpu %d is used -> flush\n", pCpu->idCpu));
2133 else
2134 if (pVCpu->hwaccm.s.fForceTLBFlush)
2135 LogFlow(("Manual TLB flush\n"));
2136 }
2137#endif
2138#ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0
2139 PGMDynMapFlushAutoSet(pVCpu);
2140#endif
2141
2142 /*
2143 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
2144 * (until the actual world switch)
2145 */
2146#ifdef VBOX_STRICT
2147 idCpuCheck = RTMpCpuId();
2148#endif
2149#ifdef LOG_LOGGING
2150 VMMR0LogFlushDisable(pVCpu);
2151#endif
2152 /* Save the host state first. */
2153 rc = VMXR0SaveHostState(pVM, pVCpu);
2154 if (rc != VINF_SUCCESS)
2155 goto end;
2156 /* Load the guest state */
2157 rc = VMXR0LoadGuestState(pVM, pVCpu, pCtx);
2158 if (rc != VINF_SUCCESS)
2159 goto end;
2160
2161 /* Deal with tagged TLB setup and invalidation. */
2162 pVM->hwaccm.s.vmx.pfnSetupTaggedTLB(pVM, pVCpu);
2163
2164 /* Non-register state Guest Context */
2165 /** @todo change me according to cpu state */
2166 rc = VMXWriteVMCS(VMX_VMCS32_GUEST_ACTIVITY_STATE, VMX_CMS_GUEST_ACTIVITY_ACTIVE);
2167 AssertRC(rc);
2168
2169 STAM_STATS({ STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x); fStatEntryStarted = false; });
2170
2171 /* Manual save and restore:
2172 * - General purpose registers except RIP, RSP
2173 *
2174 * Trashed:
2175 * - CR2 (we don't care)
2176 * - LDTR (reset to 0)
2177 * - DRx (presumably not changed at all)
2178 * - DR7 (reset to 0x400)
2179 * - EFLAGS (reset to RT_BIT(1); not relevant)
2180 *
2181 */
2182
2183
2184 /* All done! Let's start VM execution. */
2185 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatInGC, z);
2186#ifdef VBOX_STRICT
2187 Assert(idCpuCheck == RTMpCpuId());
2188#endif
2189
2190#ifdef VBOX_WITH_CRASHDUMP_MAGIC
2191 pVCpu->hwaccm.s.vmx.VMCSCache.cResume = cResume;
2192 pVCpu->hwaccm.s.vmx.VMCSCache.u64TimeSwitch = RTTimeNanoTS();
2193#endif
2194
2195 TMNotifyStartOfExecution(pVCpu);
2196 rc = pVCpu->hwaccm.s.vmx.pfnStartVM(pVCpu->hwaccm.s.fResumeVM, pCtx, &pVCpu->hwaccm.s.vmx.VMCSCache, pVM, pVCpu);
2197 TMNotifyEndOfExecution(pVCpu);
2198
2199 AssertMsg(!pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries, ("pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries=%d\n", pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries));
2200
2201 /* In case we execute a goto ResumeExecution later on. */
2202 pVCpu->hwaccm.s.fResumeVM = true;
2203 pVCpu->hwaccm.s.fForceTLBFlush = false;
2204
2205 /*
2206 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2207 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
2208 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
2209 */
2210 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatInGC, z);
2211 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit1, v);
2212
2213 if (rc != VINF_SUCCESS)
2214 {
2215 VMXR0ReportWorldSwitchError(pVM, pVCpu, rc, pCtx);
2216 goto end;
2217 }
2218
2219 /* Success. Query the guest state and figure out what has happened. */
2220
2221 /* Investigate why there was a VM-exit. */
2222 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
2223 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitReason & MASK_EXITREASON_STAT]);
2224
2225 exitReason &= 0xffff; /* bit 0-15 contain the exit code. */
2226 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
2227 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &cbInstr);
2228 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &intInfo);
2229 /* might not be valid; depends on VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID. */
2230 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE, &errCode);
2231 rc |= VMXReadCachedVMCS(VMX_VMCS32_RO_EXIT_INSTR_INFO, &instrInfo);
2232 rc |= VMXReadCachedVMCS(VMX_VMCS_RO_EXIT_QUALIFICATION, &exitQualification);
2233 AssertRC(rc);
2234
2235 /* Sync back the guest state */
2236 rc = VMXR0SaveGuestState(pVM, pVCpu, pCtx);
2237 AssertRC(rc);
2238
2239 /* Note! NOW IT'S SAFE FOR LOGGING! */
2240#ifdef LOG_LOGGING
2241 VMMR0LogFlushEnable(pVCpu);
2242#endif
2243 Log2(("Raw exit reason %08x\n", exitReason));
2244
2245 /* Check if an injected event was interrupted prematurely. */
2246 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_INFO, &val);
2247 AssertRC(rc);
2248 pVCpu->hwaccm.s.Event.intInfo = VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(val);
2249 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2250 /* Ignore 'int xx' as they'll be restarted anyway. */
2251 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SW
2252 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2253 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) != VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2254 {
2255 pVCpu->hwaccm.s.Event.fPending = true;
2256 /* Error code present? */
2257 if (VMX_EXIT_INTERRUPTION_INFO_ERROR_CODE_IS_VALID(pVCpu->hwaccm.s.Event.intInfo))
2258 {
2259 rc = VMXReadCachedVMCS(VMX_VMCS32_RO_IDT_ERRCODE, &val);
2260 AssertRC(rc);
2261 pVCpu->hwaccm.s.Event.errCode = val;
2262 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv pending error=%RX64\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification, val));
2263 }
2264 else
2265 {
2266 Log(("Pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2267 pVCpu->hwaccm.s.Event.errCode = 0;
2268 }
2269 }
2270#ifdef VBOX_STRICT
2271 else
2272 if ( VMX_EXIT_INTERRUPTION_INFO_VALID(pVCpu->hwaccm.s.Event.intInfo)
2273 /* Ignore software exceptions (such as int3) as they're reoccur when we restart the instruction anyway. */
2274 && VMX_EXIT_INTERRUPTION_INFO_TYPE(pVCpu->hwaccm.s.Event.intInfo) == VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT)
2275 {
2276 Log(("Ignore pending inject %RX64 at %RGv exit=%08x intInfo=%08x exitQualification=%RGv\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitReason, intInfo, exitQualification));
2277 }
2278
2279 if (exitReason == VMX_EXIT_ERR_INVALID_GUEST_STATE)
2280 HWACCMDumpRegs(pVM, pVCpu, pCtx);
2281#endif
2282
2283 Log2(("E%d: New EIP=%RGv\n", exitReason, (RTGCPTR)pCtx->rip));
2284 Log2(("Exit reason %d, exitQualification %RGv\n", (uint32_t)exitReason, exitQualification));
2285 Log2(("instrInfo=%d instrError=%d instr length=%d\n", (uint32_t)instrInfo, (uint32_t)instrError, (uint32_t)cbInstr));
2286 Log2(("Interruption error code %d\n", (uint32_t)errCode));
2287 Log2(("IntInfo = %08x\n", (uint32_t)intInfo));
2288
2289 if (fSyncTPR)
2290 {
2291 rc = PDMApicSetTPR(pVM, pVM->hwaccm.s.vmx.pAPIC[0x80] >> 4);
2292 AssertRC(rc);
2293 }
2294
2295 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, v);
2296 STAM_STATS({ STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2, y); fStatExit2Started = true; });
2297
2298 /* Some cases don't need a complete resync of the guest CPU state; handle them here. */
2299 switch (exitReason)
2300 {
2301 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
2302 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
2303 {
2304 uint32_t vector = VMX_EXIT_INTERRUPTION_INFO_VECTOR(intInfo);
2305
2306 if (!VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
2307 {
2308 Assert(exitReason == VMX_EXIT_EXTERNAL_IRQ);
2309 /* External interrupt; leave to allow it to be dispatched again. */
2310 rc = VINF_EM_RAW_INTERRUPT;
2311 break;
2312 }
2313 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2314 switch (VMX_EXIT_INTERRUPTION_INFO_TYPE(intInfo))
2315 {
2316 case VMX_EXIT_INTERRUPTION_INFO_TYPE_NMI: /* Non-maskable interrupt. */
2317 /* External interrupt; leave to allow it to be dispatched again. */
2318 rc = VINF_EM_RAW_INTERRUPT;
2319 break;
2320
2321 case VMX_EXIT_INTERRUPTION_INFO_TYPE_EXT: /* External hardware interrupt. */
2322 AssertFailed(); /* can't come here; fails the first check. */
2323 break;
2324
2325 case VMX_EXIT_INTERRUPTION_INFO_TYPE_DBEXCPT: /* Unknown why we get this type for #DB */
2326 case VMX_EXIT_INTERRUPTION_INFO_TYPE_SWEXCPT: /* Software exception. (#BP or #OF) */
2327 Assert(vector == 1 || vector == 3 || vector == 4);
2328 /* no break */
2329 case VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT: /* Hardware exception. */
2330 Log2(("Hardware/software interrupt %d\n", vector));
2331 switch (vector)
2332 {
2333 case X86_XCPT_NM:
2334 {
2335 Log(("#NM fault at %RGv error code %x\n", (RTGCPTR)pCtx->rip, errCode));
2336
2337 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
2338 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
2339 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
2340 if (rc == VINF_SUCCESS)
2341 {
2342 Assert(CPUMIsGuestFPUStateActive(pVCpu));
2343
2344 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
2345
2346 /* Continue execution. */
2347 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2348
2349 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2350 goto ResumeExecution;
2351 }
2352
2353 Log(("Forward #NM fault to the guest\n"));
2354 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
2355 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, 0);
2356 AssertRC(rc);
2357 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2358 goto ResumeExecution;
2359 }
2360
2361 case X86_XCPT_PF: /* Page fault */
2362 {
2363#ifdef DEBUG
2364 if (pVM->hwaccm.s.fNestedPaging)
2365 { /* A genuine pagefault.
2366 * Forward the trap to the guest by injecting the exception and resuming execution.
2367 */
2368 Log(("Guest page fault at %RGv cr2=%RGv error code %x rsp=%RGv\n", (RTGCPTR)pCtx->rip, exitQualification, errCode, (RTGCPTR)pCtx->rsp));
2369
2370 Assert(CPUMIsGuestInPagedProtectedModeEx(pCtx));
2371
2372 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2373
2374 /* Now we must update CR2. */
2375 pCtx->cr2 = exitQualification;
2376 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2377 AssertRC(rc);
2378
2379 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2380 goto ResumeExecution;
2381 }
2382#endif
2383 Assert(!pVM->hwaccm.s.fNestedPaging);
2384
2385 Log2(("Page fault at %RGv error code %x\n", exitQualification, errCode));
2386 /* Exit qualification contains the linear address of the page fault. */
2387 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2388 TRPMSetErrorCode(pVCpu, errCode);
2389 TRPMSetFaultAddress(pVCpu, exitQualification);
2390
2391 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2392 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)exitQualification);
2393 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2394 if (rc == VINF_SUCCESS)
2395 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2396 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, exitQualification ,errCode));
2397 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2398
2399 TRPMResetTrap(pVCpu);
2400
2401 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2402 goto ResumeExecution;
2403 }
2404 else
2405 if (rc == VINF_EM_RAW_GUEST_TRAP)
2406 { /* A genuine pagefault.
2407 * Forward the trap to the guest by injecting the exception and resuming execution.
2408 */
2409 Log2(("Forward page fault to the guest\n"));
2410
2411 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
2412 /* The error code might have been changed. */
2413 errCode = TRPMGetErrorCode(pVCpu);
2414
2415 TRPMResetTrap(pVCpu);
2416
2417 /* Now we must update CR2. */
2418 pCtx->cr2 = exitQualification;
2419 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2420 AssertRC(rc);
2421
2422 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2423 goto ResumeExecution;
2424 }
2425#ifdef VBOX_STRICT
2426 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
2427 Log2(("PGMTrap0eHandler failed with %d\n", rc));
2428#endif
2429 /* Need to go back to the recompiler to emulate the instruction. */
2430 TRPMResetTrap(pVCpu);
2431 break;
2432 }
2433
2434 case X86_XCPT_MF: /* Floating point exception. */
2435 {
2436 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
2437 if (!(pCtx->cr0 & X86_CR0_NE))
2438 {
2439 /* old style FPU error reporting needs some extra work. */
2440 /** @todo don't fall back to the recompiler, but do it manually. */
2441 rc = VINF_EM_RAW_EMULATE_INSTR;
2442 break;
2443 }
2444 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2445 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2446 AssertRC(rc);
2447
2448 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2449 goto ResumeExecution;
2450 }
2451
2452 case X86_XCPT_DB: /* Debug exception. */
2453 {
2454 uint64_t uDR6;
2455
2456 /* DR6, DR7.GD and IA32_DEBUGCTL.LBR are not updated yet.
2457 *
2458 * Exit qualification bits:
2459 * 3:0 B0-B3 which breakpoint condition was met
2460 * 12:4 Reserved (0)
2461 * 13 BD - debug register access detected
2462 * 14 BS - single step execution or branch taken
2463 * 63:15 Reserved (0)
2464 */
2465 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
2466
2467 /* Note that we don't support guest and host-initiated debugging at the same time. */
2468 Assert(DBGFIsStepping(pVM) || CPUMIsGuestInRealModeEx(pCtx));
2469
2470 uDR6 = X86_DR6_INIT_VAL;
2471 uDR6 |= (exitQualification & (X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3|X86_DR6_BD|X86_DR6_BS));
2472 rc = DBGFR0Trap01Handler(pVM, CPUMCTX2CORE(pCtx), uDR6);
2473 if (rc == VINF_EM_RAW_GUEST_TRAP)
2474 {
2475 /** @todo this isn't working, but we'll never get here normally. */
2476
2477 /* Update DR6 here. */
2478 pCtx->dr[6] = uDR6;
2479
2480 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2481 pCtx->dr[7] &= ~X86_DR7_GD;
2482
2483 /* Paranoia. */
2484 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2485 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2486 pCtx->dr[7] |= 0x400; /* must be one */
2487
2488 /* Resync DR7 */
2489 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
2490 AssertRC(rc);
2491
2492 Log(("Trap %x (debug) at %RGv exit qualification %RX64\n", vector, (RTGCPTR)pCtx->rip, exitQualification));
2493 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2494 AssertRC(rc);
2495
2496 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2497 goto ResumeExecution;
2498 }
2499 /* Return to ring 3 to deal with the debug exit code. */
2500 break;
2501 }
2502
2503#ifdef DEBUG /* till after branching, enable by default after that. */
2504 case X86_XCPT_BP: /* Breakpoint. */
2505 {
2506 rc = DBGFR0Trap03Handler(pVM, CPUMCTX2CORE(pCtx));
2507 if (rc == VINF_EM_RAW_GUEST_TRAP)
2508 {
2509 Log(("Guest #BP at %04x:%RGv\n", pCtx->cs, pCtx->rip));
2510 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2511 AssertRC(rc);
2512 goto ResumeExecution;
2513 }
2514 if (rc == VINF_SUCCESS)
2515 goto ResumeExecution;
2516 Log(("Debugger BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, rc));
2517 break;
2518 }
2519#endif
2520
2521 case X86_XCPT_GP: /* General protection failure exception.*/
2522 {
2523 uint32_t cbOp;
2524 uint32_t cbSize;
2525 DISCPUSTATE Cpu;
2526
2527 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
2528#ifdef VBOX_STRICT
2529 if (!CPUMIsGuestInRealModeEx(pCtx))
2530 {
2531 Log(("Trap %x at %04X:%RGv errorCode=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, errCode));
2532 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2533 AssertRC(rc);
2534 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2535 goto ResumeExecution;
2536 }
2537#endif
2538 Assert(CPUMIsGuestInRealModeEx(pCtx));
2539
2540 LogFlow(("Real mode X86_XCPT_GP instruction emulation at %RGv\n", (RTGCPTR)pCtx->rip));
2541
2542 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), &Cpu, &cbOp);
2543 if (RT_SUCCESS(rc))
2544 {
2545 bool fUpdateRIP = true;
2546
2547 Assert(cbOp == Cpu.opsize);
2548 switch (Cpu.pCurInstr->opcode)
2549 {
2550 case OP_CLI:
2551 pCtx->eflags.Bits.u1IF = 0;
2552 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCli);
2553 break;
2554
2555 case OP_STI:
2556 pCtx->eflags.Bits.u1IF = 1;
2557 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitSti);
2558 break;
2559
2560 case OP_HLT:
2561 fUpdateRIP = false;
2562 rc = VINF_EM_HALT;
2563 pCtx->rip += Cpu.opsize;
2564 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
2565 break;
2566
2567 case OP_POPF:
2568 {
2569 RTGCPTR GCPtrStack;
2570 uint32_t cbParm;
2571 uint32_t uMask;
2572 X86EFLAGS eflags;
2573
2574 if (Cpu.prefix & PREFIX_OPSIZE)
2575 {
2576 cbParm = 4;
2577 uMask = 0xffffffff;
2578 }
2579 else
2580 {
2581 cbParm = 2;
2582 uMask = 0xffff;
2583 }
2584
2585 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
2586 if (RT_FAILURE(rc))
2587 {
2588 rc = VERR_EM_INTERPRETER;
2589 break;
2590 }
2591 eflags.u = 0;
2592 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
2593 if (RT_FAILURE(rc))
2594 {
2595 rc = VERR_EM_INTERPRETER;
2596 break;
2597 }
2598 LogFlow(("POPF %x -> %RGv mask=%x\n", eflags.u, pCtx->rsp, uMask));
2599 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (eflags.u & X86_EFL_POPF_BITS & uMask);
2600 /* RF cleared when popped in real mode; see pushf description in AMD manual. */
2601 pCtx->eflags.Bits.u1RF = 0;
2602 pCtx->esp += cbParm;
2603 pCtx->esp &= uMask;
2604
2605 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPopf);
2606 break;
2607 }
2608
2609 case OP_PUSHF:
2610 {
2611 RTGCPTR GCPtrStack;
2612 uint32_t cbParm;
2613 uint32_t uMask;
2614 X86EFLAGS eflags;
2615
2616 if (Cpu.prefix & PREFIX_OPSIZE)
2617 {
2618 cbParm = 4;
2619 uMask = 0xffffffff;
2620 }
2621 else
2622 {
2623 cbParm = 2;
2624 uMask = 0xffff;
2625 }
2626
2627 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), (pCtx->esp - cbParm) & uMask, 0, &GCPtrStack);
2628 if (RT_FAILURE(rc))
2629 {
2630 rc = VERR_EM_INTERPRETER;
2631 break;
2632 }
2633 eflags = pCtx->eflags;
2634 /* RF & VM cleared when pushed in real mode; see pushf description in AMD manual. */
2635 eflags.Bits.u1RF = 0;
2636 eflags.Bits.u1VM = 0;
2637
2638 rc = PGMPhysWrite(pVM, (RTGCPHYS)GCPtrStack, &eflags.u, cbParm);
2639 if (RT_FAILURE(rc))
2640 {
2641 rc = VERR_EM_INTERPRETER;
2642 break;
2643 }
2644 LogFlow(("PUSHF %x -> %RGv\n", eflags.u, GCPtrStack));
2645 pCtx->esp -= cbParm;
2646 pCtx->esp &= uMask;
2647 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPushf);
2648 break;
2649 }
2650
2651 case OP_IRET:
2652 {
2653 RTGCPTR GCPtrStack;
2654 uint32_t uMask = 0xffff;
2655 uint16_t aIretFrame[3];
2656
2657 if (Cpu.prefix & (PREFIX_OPSIZE | PREFIX_ADDRSIZE))
2658 {
2659 rc = VERR_EM_INTERPRETER;
2660 break;
2661 }
2662
2663 rc = SELMToFlatEx(pVM, DIS_SELREG_SS, CPUMCTX2CORE(pCtx), pCtx->esp & uMask, 0, &GCPtrStack);
2664 if (RT_FAILURE(rc))
2665 {
2666 rc = VERR_EM_INTERPRETER;
2667 break;
2668 }
2669 rc = PGMPhysRead(pVM, (RTGCPHYS)GCPtrStack, &aIretFrame[0], sizeof(aIretFrame));
2670 if (RT_FAILURE(rc))
2671 {
2672 rc = VERR_EM_INTERPRETER;
2673 break;
2674 }
2675 pCtx->ip = aIretFrame[0];
2676 pCtx->cs = aIretFrame[1];
2677 pCtx->csHid.u64Base = pCtx->cs << 4;
2678 pCtx->eflags.u = (pCtx->eflags.u & ~(X86_EFL_POPF_BITS & uMask)) | (aIretFrame[2] & X86_EFL_POPF_BITS & uMask);
2679 pCtx->sp += sizeof(aIretFrame);
2680
2681 LogFlow(("iret to %04x:%x\n", pCtx->cs, pCtx->ip));
2682 fUpdateRIP = false;
2683 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIret);
2684 break;
2685 }
2686
2687 case OP_INT:
2688 {
2689 RTGCUINTPTR intInfo;
2690
2691 LogFlow(("Realmode: INT %x\n", Cpu.param1.parval & 0xff));
2692 intInfo = Cpu.param1.parval & 0xff;
2693 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
2694 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
2695
2696 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, cbOp, 0);
2697 AssertRC(rc);
2698 fUpdateRIP = false;
2699 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
2700 break;
2701 }
2702
2703 case OP_INTO:
2704 {
2705 if (pCtx->eflags.Bits.u1OF)
2706 {
2707 RTGCUINTPTR intInfo;
2708
2709 LogFlow(("Realmode: INTO\n"));
2710 intInfo = X86_XCPT_OF;
2711 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
2712 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
2713
2714 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, cbOp, 0);
2715 AssertRC(rc);
2716 fUpdateRIP = false;
2717 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
2718 }
2719 break;
2720 }
2721
2722 case OP_INT3:
2723 {
2724 RTGCUINTPTR intInfo;
2725
2726 LogFlow(("Realmode: INT 3\n"));
2727 intInfo = 3;
2728 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
2729 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_SW << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
2730
2731 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, intInfo, cbOp, 0);
2732 AssertRC(rc);
2733 fUpdateRIP = false;
2734 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInt);
2735 break;
2736 }
2737
2738 default:
2739 rc = EMInterpretInstructionCPU(pVM, pVCpu, &Cpu, CPUMCTX2CORE(pCtx), 0, &cbSize);
2740 break;
2741 }
2742
2743 if (rc == VINF_SUCCESS)
2744 {
2745 if (fUpdateRIP)
2746 pCtx->rip += cbOp; /* Move on to the next instruction. */
2747
2748 /* lidt, lgdt can end up here. In the future crx changes as well. Just reload the whole context to be done with it. */
2749 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2750
2751 /* Only resume if successful. */
2752 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2753 goto ResumeExecution;
2754 }
2755 }
2756 else
2757 rc = VERR_EM_INTERPRETER;
2758
2759 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_EM_HALT, ("Unexpected rc=%Rrc\n", rc));
2760 break;
2761 }
2762
2763#ifdef VBOX_STRICT
2764 case X86_XCPT_DE: /* Divide error. */
2765 case X86_XCPT_UD: /* Unknown opcode exception. */
2766 case X86_XCPT_SS: /* Stack segment exception. */
2767 case X86_XCPT_NP: /* Segment not present exception. */
2768 {
2769 switch(vector)
2770 {
2771 case X86_XCPT_DE:
2772 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
2773 break;
2774 case X86_XCPT_UD:
2775 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
2776 break;
2777 case X86_XCPT_SS:
2778 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
2779 break;
2780 case X86_XCPT_NP:
2781 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
2782 break;
2783 }
2784
2785 Log(("Trap %x at %04X:%RGv\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip));
2786 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2787 AssertRC(rc);
2788
2789 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2790 goto ResumeExecution;
2791 }
2792#endif
2793 default:
2794#ifdef HWACCM_VMX_EMULATE_REALMODE
2795 if (CPUMIsGuestInRealModeEx(pCtx))
2796 {
2797 Log(("Real Mode Trap %x at %04x:%04X error code %x\n", vector, pCtx->cs, pCtx->eip, errCode));
2798 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), cbInstr, errCode);
2799 AssertRC(rc);
2800
2801 /* Go back to ring 3 in case of a triple fault. */
2802 if ( vector == X86_XCPT_DF
2803 && rc == VINF_EM_RESET)
2804 break;
2805
2806 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2807 goto ResumeExecution;
2808 }
2809#endif
2810 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
2811 rc = VERR_VMX_UNEXPECTED_EXCEPTION;
2812 break;
2813 } /* switch (vector) */
2814
2815 break;
2816
2817 default:
2818 rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_CODE;
2819 AssertMsgFailed(("Unexpected interuption code %x\n", intInfo));
2820 break;
2821 }
2822
2823 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub3, y3);
2824 break;
2825 }
2826
2827 case VMX_EXIT_EPT_VIOLATION: /* 48 EPT violation. An attempt to access memory with a guest-physical address was disallowed by the configuration of the EPT paging structures. */
2828 {
2829 RTGCPHYS GCPhys;
2830
2831 Assert(pVM->hwaccm.s.fNestedPaging);
2832
2833 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
2834 AssertRC(rc);
2835 Assert(((exitQualification >> 7) & 3) != 2);
2836
2837 /* Determine the kind of violation. */
2838 errCode = 0;
2839 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_INSTR_FETCH)
2840 errCode |= X86_TRAP_PF_ID;
2841
2842 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_DATA_WRITE)
2843 errCode |= X86_TRAP_PF_RW;
2844
2845 /* If the page is present, then it's a page level protection fault. */
2846 if (exitQualification & VMX_EXIT_QUALIFICATION_EPT_ENTRY_PRESENT)
2847 errCode |= X86_TRAP_PF_P;
2848
2849 Log(("EPT Page fault %x at %RGp error code %x\n", (uint32_t)exitQualification, GCPhys, errCode));
2850
2851 /* GCPhys contains the guest physical address of the page fault. */
2852 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2853 TRPMSetErrorCode(pVCpu, errCode);
2854 TRPMSetFaultAddress(pVCpu, GCPhys);
2855
2856 /* Handle the pagefault trap for the nested shadow table. */
2857 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, errCode, CPUMCTX2CORE(pCtx), GCPhys);
2858 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, rc));
2859 if (rc == VINF_SUCCESS)
2860 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2861 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, exitQualification , errCode));
2862 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
2863
2864 TRPMResetTrap(pVCpu);
2865
2866 goto ResumeExecution;
2867 }
2868
2869#ifdef VBOX_STRICT
2870 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2871 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", rc));
2872#endif
2873 /* Need to go back to the recompiler to emulate the instruction. */
2874 TRPMResetTrap(pVCpu);
2875 break;
2876 }
2877
2878 case VMX_EXIT_EPT_MISCONFIG:
2879 {
2880 RTGCPHYS GCPhys;
2881
2882 Assert(pVM->hwaccm.s.fNestedPaging);
2883
2884 rc = VMXReadVMCS64(VMX_VMCS_EXIT_PHYS_ADDR_FULL, &GCPhys);
2885 AssertRC(rc);
2886
2887 Log(("VMX_EXIT_EPT_MISCONFIG for %VGp\n", GCPhys));
2888 break;
2889 }
2890
2891 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
2892 /* Clear VM-exit on IF=1 change. */
2893 LogFlow(("VMX_EXIT_IRQ_WINDOW %RGv pending=%d IF=%d\n", (RTGCPTR)pCtx->rip, VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)), pCtx->eflags.Bits.u1IF));
2894 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_IRQ_WINDOW_EXIT;
2895 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
2896 AssertRC(rc);
2897 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIrqWindow);
2898 goto ResumeExecution; /* we check for pending guest interrupts there */
2899
2900 case VMX_EXIT_WBINVD: /* 54 Guest software attempted to execute WBINVD. (conditional) */
2901 case VMX_EXIT_INVD: /* 13 Guest software attempted to execute INVD. (unconditional) */
2902 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
2903 /* Skip instruction and continue directly. */
2904 pCtx->rip += cbInstr;
2905 /* Continue execution.*/
2906 goto ResumeExecution;
2907
2908 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
2909 {
2910 Log2(("VMX: Cpuid %x\n", pCtx->eax));
2911 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
2912 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2913 if (rc == VINF_SUCCESS)
2914 {
2915 /* Update EIP and continue execution. */
2916 Assert(cbInstr == 2);
2917 pCtx->rip += cbInstr;
2918 goto ResumeExecution;
2919 }
2920 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", rc));
2921 rc = VINF_EM_RAW_EMULATE_INSTR;
2922 break;
2923 }
2924
2925 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
2926 {
2927 Log2(("VMX: Rdpmc %x\n", pCtx->ecx));
2928 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
2929 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2930 if (rc == VINF_SUCCESS)
2931 {
2932 /* Update EIP and continue execution. */
2933 Assert(cbInstr == 2);
2934 pCtx->rip += cbInstr;
2935 goto ResumeExecution;
2936 }
2937 rc = VINF_EM_RAW_EMULATE_INSTR;
2938 break;
2939 }
2940
2941 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
2942 {
2943 Log2(("VMX: Rdtsc\n"));
2944 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
2945 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2946 if (rc == VINF_SUCCESS)
2947 {
2948 /* Update EIP and continue execution. */
2949 Assert(cbInstr == 2);
2950 pCtx->rip += cbInstr;
2951 goto ResumeExecution;
2952 }
2953 rc = VINF_EM_RAW_EMULATE_INSTR;
2954 break;
2955 }
2956
2957 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
2958 {
2959 Log2(("VMX: invlpg\n"));
2960 Assert(!pVM->hwaccm.s.fNestedPaging);
2961
2962 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
2963 rc = EMInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), exitQualification);
2964 if (rc == VINF_SUCCESS)
2965 {
2966 /* Update EIP and continue execution. */
2967 pCtx->rip += cbInstr;
2968 goto ResumeExecution;
2969 }
2970 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: invlpg %RGv failed with %Rrc\n", exitQualification, rc));
2971 break;
2972 }
2973
2974 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
2975 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
2976 {
2977 uint32_t cbSize;
2978
2979 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
2980 Log2(("VMX: %s\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr"));
2981 rc = EMInterpretInstruction(pVM, pVCpu, CPUMCTX2CORE(pCtx), 0, &cbSize);
2982 if (rc == VINF_SUCCESS)
2983 {
2984 /* EIP has been updated already. */
2985
2986 /* Only resume if successful. */
2987 goto ResumeExecution;
2988 }
2989 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (exitReason == VMX_EXIT_RDMSR) ? "rdmsr" : "wrmsr", rc));
2990 break;
2991 }
2992
2993 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
2994 {
2995 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
2996
2997 switch (VMX_EXIT_QUALIFICATION_CRX_ACCESS(exitQualification))
2998 {
2999 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_WRITE:
3000 Log2(("VMX: %RGv mov cr%d, x\n", (RTGCPTR)pCtx->rip, VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)));
3001 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3002 rc = EMInterpretCRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3003 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification),
3004 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification));
3005
3006 switch (VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification))
3007 {
3008 case 0:
3009 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0 | HWACCM_CHANGED_GUEST_CR3;
3010 break;
3011 case 2:
3012 break;
3013 case 3:
3014 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx));
3015 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
3016 break;
3017 case 4:
3018 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
3019 break;
3020 case 8:
3021 /* CR8 contains the APIC TPR */
3022 Assert(!(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3023 break;
3024
3025 default:
3026 AssertFailed();
3027 break;
3028 }
3029 /* Check if a sync operation is pending. */
3030 if ( rc == VINF_SUCCESS /* don't bother if we are going to ring 3 anyway */
3031 && VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
3032 {
3033 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
3034 AssertRC(rc);
3035 }
3036 break;
3037
3038 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_READ:
3039 Log2(("VMX: mov x, crx\n"));
3040 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification)]);
3041
3042 Assert(!pVM->hwaccm.s.fNestedPaging || !CPUMIsGuestInPagedProtectedModeEx(pCtx) || VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != USE_REG_CR3);
3043
3044 /* CR8 reads only cause an exit when the TPR shadow feature isn't present. */
3045 Assert(VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification) != 8 || !(pVM->hwaccm.s.vmx.msr.vmx_proc_ctls.n.allowed1 & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_USE_TPR_SHADOW));
3046
3047 rc = EMInterpretCRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3048 VMX_EXIT_QUALIFICATION_CRX_GENREG(exitQualification),
3049 VMX_EXIT_QUALIFICATION_CRX_REGISTER(exitQualification));
3050 break;
3051
3052 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_CLTS:
3053 Log2(("VMX: clts\n"));
3054 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCLTS);
3055 rc = EMInterpretCLTS(pVM, pVCpu);
3056 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3057 break;
3058
3059 case VMX_EXIT_QUALIFICATION_CRX_ACCESS_LMSW:
3060 Log2(("VMX: lmsw %x\n", VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification)));
3061 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitLMSW);
3062 rc = EMInterpretLMSW(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUALIFICATION_CRX_LMSW_DATA(exitQualification));
3063 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
3064 break;
3065 }
3066
3067 /* Update EIP if no error occurred. */
3068 if (RT_SUCCESS(rc))
3069 pCtx->rip += cbInstr;
3070
3071 if (rc == VINF_SUCCESS)
3072 {
3073 /* Only resume if successful. */
3074 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3075 goto ResumeExecution;
3076 }
3077 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3078 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub2, y2);
3079 break;
3080 }
3081
3082 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3083 {
3084 if (!DBGFIsStepping(pVM))
3085 {
3086 /* Disable drx move intercepts. */
3087 pVCpu->hwaccm.s.vmx.proc_ctls &= ~VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3088 rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3089 AssertRC(rc);
3090
3091 /* Save the host and load the guest debug state. */
3092 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
3093 AssertRC(rc);
3094
3095#ifdef VBOX_WITH_STATISTICS
3096 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
3097 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3098 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3099 else
3100 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3101#endif
3102
3103 goto ResumeExecution;
3104 }
3105
3106 /** @todo clear VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT after the first time and restore drx registers afterwards */
3107 if (VMX_EXIT_QUALIFICATION_DRX_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_DRX_DIRECTION_WRITE)
3108 {
3109 Log2(("VMX: mov drx%d, genreg%d\n", VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification), VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification)));
3110 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
3111 rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3112 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification),
3113 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification));
3114 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3115 Log2(("DR7=%08x\n", pCtx->dr[7]));
3116 }
3117 else
3118 {
3119 Log2(("VMX: mov x, drx\n"));
3120 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
3121 rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx),
3122 VMX_EXIT_QUALIFICATION_DRX_GENREG(exitQualification),
3123 VMX_EXIT_QUALIFICATION_DRX_REGISTER(exitQualification));
3124 }
3125 /* Update EIP if no error occurred. */
3126 if (RT_SUCCESS(rc))
3127 pCtx->rip += cbInstr;
3128
3129 if (rc == VINF_SUCCESS)
3130 {
3131 /* Only resume if successful. */
3132 goto ResumeExecution;
3133 }
3134 Assert(rc == VERR_EM_INTERPRETER);
3135 break;
3136 }
3137
3138 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
3139 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
3140 {
3141 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3142 uint32_t uIOWidth = VMX_EXIT_QUALIFICATION_IO_WIDTH(exitQualification);
3143 uint32_t uPort;
3144 bool fIOWrite = (VMX_EXIT_QUALIFICATION_IO_DIRECTION(exitQualification) == VMX_EXIT_QUALIFICATION_IO_DIRECTION_OUT);
3145
3146 /** @todo necessary to make the distinction? */
3147 if (VMX_EXIT_QUALIFICATION_IO_ENCODING(exitQualification) == VMX_EXIT_QUALIFICATION_IO_ENCODING_DX)
3148 {
3149 uPort = pCtx->edx & 0xffff;
3150 }
3151 else
3152 uPort = VMX_EXIT_QUALIFICATION_IO_PORT(exitQualification); /* Immediate encoding. */
3153
3154 /* paranoia */
3155 if (RT_UNLIKELY(uIOWidth == 2 || uIOWidth >= 4))
3156 {
3157 rc = fIOWrite ? VINF_IOM_HC_IOPORT_WRITE : VINF_IOM_HC_IOPORT_READ;
3158 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3159 break;
3160 }
3161
3162 uint32_t cbSize = g_aIOSize[uIOWidth];
3163
3164 if (VMX_EXIT_QUALIFICATION_IO_STRING(exitQualification))
3165 {
3166 /* ins/outs */
3167 DISCPUSTATE Cpu;
3168
3169 /* Disassemble manually to deal with segment prefixes. */
3170 /** @todo VMX_VMCS_EXIT_GUEST_LINEAR_ADDR contains the flat pointer operand of the instruction. */
3171 /** @todo VMX_VMCS32_RO_EXIT_INSTR_INFO also contains segment prefix info. */
3172 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), &Cpu, NULL);
3173 if (rc == VINF_SUCCESS)
3174 {
3175 if (fIOWrite)
3176 {
3177 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3178 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
3179 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), uPort, Cpu.prefix, cbSize);
3180 }
3181 else
3182 {
3183 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, uPort, cbSize));
3184 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
3185 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), uPort, Cpu.prefix, cbSize);
3186 }
3187 }
3188 else
3189 rc = VINF_EM_RAW_EMULATE_INSTR;
3190 }
3191 else
3192 {
3193 /* normal in/out */
3194 uint32_t uAndVal = g_aIOOpAnd[uIOWidth];
3195
3196 Assert(!VMX_EXIT_QUALIFICATION_IO_REP(exitQualification));
3197
3198 if (fIOWrite)
3199 {
3200 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
3201 rc = IOMIOPortWrite(pVM, uPort, pCtx->eax & uAndVal, cbSize);
3202 }
3203 else
3204 {
3205 uint32_t u32Val = 0;
3206
3207 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
3208 rc = IOMIOPortRead(pVM, uPort, &u32Val, cbSize);
3209 if (IOM_SUCCESS(rc))
3210 {
3211 /* Write back to the EAX register. */
3212 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
3213 }
3214 }
3215 }
3216 /*
3217 * Handled the I/O return codes.
3218 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
3219 */
3220 if (IOM_SUCCESS(rc))
3221 {
3222 /* Update EIP and continue execution. */
3223 pCtx->rip += cbInstr;
3224 if (RT_LIKELY(rc == VINF_SUCCESS))
3225 {
3226 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
3227 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
3228 {
3229 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
3230 for (unsigned i=0;i<4;i++)
3231 {
3232 unsigned uBPLen = g_aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
3233
3234 if ( (uPort >= pCtx->dr[i] && uPort < pCtx->dr[i] + uBPLen)
3235 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
3236 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
3237 {
3238 uint64_t uDR6;
3239
3240 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3241
3242 uDR6 = ASMGetDR6();
3243
3244 /* Clear all breakpoint status flags and set the one we just hit. */
3245 uDR6 &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
3246 uDR6 |= (uint64_t)RT_BIT(i);
3247
3248 /* Note: AMD64 Architecture Programmer's Manual 13.1:
3249 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
3250 * the contents have been read.
3251 */
3252 ASMSetDR6(uDR6);
3253
3254 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3255 pCtx->dr[7] &= ~X86_DR7_GD;
3256
3257 /* Paranoia. */
3258 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
3259 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
3260 pCtx->dr[7] |= 0x400; /* must be one */
3261
3262 /* Resync DR7 */
3263 rc = VMXWriteVMCS64(VMX_VMCS64_GUEST_DR7, pCtx->dr[7]);
3264 AssertRC(rc);
3265
3266 /* Construct inject info. */
3267 intInfo = X86_XCPT_DB;
3268 intInfo |= (1 << VMX_EXIT_INTERRUPTION_INFO_VALID_SHIFT);
3269 intInfo |= (VMX_EXIT_INTERRUPTION_INFO_TYPE_HWEXCPT << VMX_EXIT_INTERRUPTION_INFO_TYPE_SHIFT);
3270
3271 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
3272 rc = VMXR0InjectEvent(pVM, pVCpu, pCtx, VMX_VMCS_CTRL_ENTRY_IRQ_INFO_FROM_EXIT_INT_INFO(intInfo), 0, 0);
3273 AssertRC(rc);
3274
3275 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3276 goto ResumeExecution;
3277 }
3278 }
3279 }
3280
3281 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3282 goto ResumeExecution;
3283 }
3284 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3285 break;
3286 }
3287
3288#ifdef VBOX_STRICT
3289 if (rc == VINF_IOM_HC_IOPORT_READ)
3290 Assert(!fIOWrite);
3291 else if (rc == VINF_IOM_HC_IOPORT_WRITE)
3292 Assert(fIOWrite);
3293 else
3294 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
3295#endif
3296 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2Sub1, y1);
3297 break;
3298 }
3299
3300 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
3301 LogFlow(("VMX_EXIT_TPR\n"));
3302 /* RIP is already set to the next instruction and the TPR has been synced back. Just resume. */
3303 goto ResumeExecution;
3304
3305 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
3306 goto ResumeExecution;
3307
3308 default:
3309 /* The rest is handled after syncing the entire CPU state. */
3310 break;
3311 }
3312
3313 /* Note: the guest state isn't entirely synced back at this stage. */
3314
3315 /* Investigate why there was a VM-exit. (part 2) */
3316 switch (exitReason)
3317 {
3318 case VMX_EXIT_EXCEPTION: /* 0 Exception or non-maskable interrupt (NMI). */
3319 case VMX_EXIT_EXTERNAL_IRQ: /* 1 External interrupt. */
3320 case VMX_EXIT_EPT_VIOLATION:
3321 case VMX_EXIT_PREEMPTION_TIMER: /* 52 VMX-preemption timer expired. The preemption timer counted down to zero. */
3322 /* Already handled above. */
3323 break;
3324
3325 case VMX_EXIT_TRIPLE_FAULT: /* 2 Triple fault. */
3326 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
3327 break;
3328
3329 case VMX_EXIT_INIT_SIGNAL: /* 3 INIT signal. */
3330 case VMX_EXIT_SIPI: /* 4 Start-up IPI (SIPI). */
3331 rc = VINF_EM_RAW_INTERRUPT;
3332 AssertFailed(); /* Can't happen. Yet. */
3333 break;
3334
3335 case VMX_EXIT_IO_SMI_IRQ: /* 5 I/O system-management interrupt (SMI). */
3336 case VMX_EXIT_SMI_IRQ: /* 6 Other SMI. */
3337 rc = VINF_EM_RAW_INTERRUPT;
3338 AssertFailed(); /* Can't happen afaik. */
3339 break;
3340
3341 case VMX_EXIT_TASK_SWITCH: /* 9 Task switch. */
3342 rc = VERR_EM_INTERPRETER;
3343 break;
3344
3345 case VMX_EXIT_HLT: /* 12 Guest software attempted to execute HLT. */
3346 /** Check if external interrupts are pending; if so, don't switch back. */
3347 pCtx->rip++; /* skip hlt */
3348 if ( pCtx->eflags.Bits.u1IF
3349 && VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
3350 goto ResumeExecution;
3351
3352 rc = VINF_EM_HALT;
3353 break;
3354
3355 case VMX_EXIT_RSM: /* 17 Guest software attempted to execute RSM in SMM. */
3356 AssertFailed(); /* can't happen. */
3357 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
3358 break;
3359
3360 case VMX_EXIT_VMCALL: /* 18 Guest software executed VMCALL. */
3361 case VMX_EXIT_VMCLEAR: /* 19 Guest software executed VMCLEAR. */
3362 case VMX_EXIT_VMLAUNCH: /* 20 Guest software executed VMLAUNCH. */
3363 case VMX_EXIT_VMPTRLD: /* 21 Guest software executed VMPTRLD. */
3364 case VMX_EXIT_VMPTRST: /* 22 Guest software executed VMPTRST. */
3365 case VMX_EXIT_VMREAD: /* 23 Guest software executed VMREAD. */
3366 case VMX_EXIT_VMRESUME: /* 24 Guest software executed VMRESUME. */
3367 case VMX_EXIT_VMWRITE: /* 25 Guest software executed VMWRITE. */
3368 case VMX_EXIT_VMXOFF: /* 26 Guest software executed VMXOFF. */
3369 case VMX_EXIT_VMXON: /* 27 Guest software executed VMXON. */
3370 /** @todo inject #UD immediately */
3371 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
3372 break;
3373
3374 case VMX_EXIT_CPUID: /* 10 Guest software attempted to execute CPUID. */
3375 case VMX_EXIT_RDTSC: /* 16 Guest software attempted to execute RDTSC. */
3376 case VMX_EXIT_INVPG: /* 14 Guest software attempted to execute INVPG. */
3377 case VMX_EXIT_CRX_MOVE: /* 28 Control-register accesses. */
3378 case VMX_EXIT_DRX_MOVE: /* 29 Debug-register accesses. */
3379 case VMX_EXIT_PORT_IO: /* 30 I/O instruction. */
3380 case VMX_EXIT_RDPMC: /* 15 Guest software attempted to execute RDPMC. */
3381 /* already handled above */
3382 AssertMsg( rc == VINF_PGM_CHANGE_MODE
3383 || rc == VINF_EM_RAW_INTERRUPT
3384 || rc == VERR_EM_INTERPRETER
3385 || rc == VINF_EM_RAW_EMULATE_INSTR
3386 || rc == VINF_PGM_SYNC_CR3
3387 || rc == VINF_IOM_HC_IOPORT_READ
3388 || rc == VINF_IOM_HC_IOPORT_WRITE
3389 || rc == VINF_EM_RAW_GUEST_TRAP
3390 || rc == VINF_TRPM_XCPT_DISPATCHED
3391 || rc == VINF_EM_RESCHEDULE_REM,
3392 ("rc = %d\n", rc));
3393 break;
3394
3395 case VMX_EXIT_TPR: /* 43 TPR below threshold. Guest software executed MOV to CR8. */
3396 case VMX_EXIT_RDMSR: /* 31 RDMSR. Guest software attempted to execute RDMSR. */
3397 case VMX_EXIT_WRMSR: /* 32 WRMSR. Guest software attempted to execute WRMSR. */
3398 /* Note: If we decide to emulate them here, then we must sync the MSRs that could have been changed (sysenter, fs/gs base)!!! */
3399 rc = VERR_EM_INTERPRETER;
3400 break;
3401
3402 case VMX_EXIT_MWAIT: /* 36 Guest software executed MWAIT. */
3403 case VMX_EXIT_MONITOR: /* 39 Guest software attempted to execute MONITOR. */
3404 case VMX_EXIT_PAUSE: /* 40 Guest software attempted to execute PAUSE. */
3405 rc = VINF_EM_RAW_EXCEPTION_PRIVILEGED;
3406 break;
3407
3408 case VMX_EXIT_IRQ_WINDOW: /* 7 Interrupt window. */
3409 Assert(rc == VINF_EM_RAW_INTERRUPT);
3410 break;
3411
3412 case VMX_EXIT_ERR_INVALID_GUEST_STATE: /* 33 VM-entry failure due to invalid guest state. */
3413 {
3414#ifdef VBOX_STRICT
3415 RTCCUINTREG val = 0;
3416
3417 Log(("VMX_EXIT_ERR_INVALID_GUEST_STATE\n"));
3418
3419 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
3420 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
3421
3422 VMXReadVMCS(VMX_VMCS64_GUEST_CR0, &val);
3423 Log(("VMX_VMCS_GUEST_CR0 %RX64\n", (uint64_t)val));
3424
3425 VMXReadVMCS(VMX_VMCS64_GUEST_CR3, &val);
3426 Log(("VMX_VMCS_GUEST_CR3 %RX64\n", (uint64_t)val));
3427
3428 VMXReadVMCS(VMX_VMCS64_GUEST_CR4, &val);
3429 Log(("VMX_VMCS_GUEST_CR4 %RX64\n", (uint64_t)val));
3430
3431 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
3432 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
3433
3434 VMX_LOG_SELREG(CS, "CS");
3435 VMX_LOG_SELREG(DS, "DS");
3436 VMX_LOG_SELREG(ES, "ES");
3437 VMX_LOG_SELREG(FS, "FS");
3438 VMX_LOG_SELREG(GS, "GS");
3439 VMX_LOG_SELREG(SS, "SS");
3440 VMX_LOG_SELREG(TR, "TR");
3441 VMX_LOG_SELREG(LDTR, "LDTR");
3442
3443 VMXReadVMCS(VMX_VMCS64_GUEST_GDTR_BASE, &val);
3444 Log(("VMX_VMCS_GUEST_GDTR_BASE %RX64\n", (uint64_t)val));
3445 VMXReadVMCS(VMX_VMCS64_GUEST_IDTR_BASE, &val);
3446 Log(("VMX_VMCS_GUEST_IDTR_BASE %RX64\n", (uint64_t)val));
3447#endif /* VBOX_STRICT */
3448 rc = VERR_VMX_INVALID_GUEST_STATE;
3449 break;
3450 }
3451
3452 case VMX_EXIT_ERR_MSR_LOAD: /* 34 VM-entry failure due to MSR loading. */
3453 case VMX_EXIT_ERR_MACHINE_CHECK: /* 41 VM-entry failure due to machine-check. */
3454 default:
3455 rc = VERR_VMX_UNEXPECTED_EXIT_CODE;
3456 AssertMsgFailed(("Unexpected exit code %d\n", exitReason)); /* Can't happen. */
3457 break;
3458
3459 }
3460end:
3461
3462 /* Signal changes for the recompiler. */
3463 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
3464
3465 /* If we executed vmlaunch/vmresume and an external irq was pending, then we don't have to do a full sync the next time. */
3466 if ( exitReason == VMX_EXIT_EXTERNAL_IRQ
3467 && !VMX_EXIT_INTERRUPTION_INFO_VALID(intInfo))
3468 {
3469 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
3470 /* On the next entry we'll only sync the host context. */
3471 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
3472 }
3473 else
3474 {
3475 /* On the next entry we'll sync everything. */
3476 /** @todo we can do better than this */
3477 /* Not in the VINF_PGM_CHANGE_MODE though! */
3478 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
3479 }
3480
3481 /* translate into a less severe return code */
3482 if (rc == VERR_EM_INTERPRETER)
3483 rc = VINF_EM_RAW_EMULATE_INSTR;
3484 else
3485 /* Try to extract more information about what might have gone wrong here. */
3486 if (rc == VERR_VMX_INVALID_VMCS_PTR)
3487 {
3488 VMXGetActivateVMCS(&pVCpu->hwaccm.s.vmx.lasterror.u64VMCSPhys);
3489 pVCpu->hwaccm.s.vmx.lasterror.ulVMCSRevision = *(uint32_t *)pVCpu->hwaccm.s.vmx.pVMCS;
3490 pVCpu->hwaccm.s.vmx.lasterror.idEnteredCpu = pVCpu->hwaccm.s.idEnteredCpu;
3491 pVCpu->hwaccm.s.vmx.lasterror.idCurrentCpu = RTMpCpuId();
3492 }
3493
3494 STAM_STATS({
3495 if (fStatExit2Started) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, y);
3496 else if (fStatEntryStarted) STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
3497 });
3498 Log2(("X"));
3499 return rc;
3500}
3501
3502
3503/**
3504 * Enters the VT-x session
3505 *
3506 * @returns VBox status code.
3507 * @param pVM The VM to operate on.
3508 * @param pVCpu The VMCPU to operate on.
3509 * @param pCpu CPU info struct
3510 */
3511VMMR0DECL(int) VMXR0Enter(PVM pVM, PVMCPU pVCpu, PHWACCM_CPUINFO pCpu)
3512{
3513 Assert(pVM->hwaccm.s.vmx.fSupported);
3514
3515 unsigned cr4 = ASMGetCR4();
3516 if (!(cr4 & X86_CR4_VMXE))
3517 {
3518 AssertMsgFailed(("X86_CR4_VMXE should be set!\n"));
3519 return VERR_VMX_X86_CR4_VMXE_CLEARED;
3520 }
3521
3522 /* Activate the VM Control Structure. */
3523 int rc = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3524 if (RT_FAILURE(rc))
3525 return rc;
3526
3527 pVCpu->hwaccm.s.fResumeVM = false;
3528 return VINF_SUCCESS;
3529}
3530
3531
3532/**
3533 * Leaves the VT-x session
3534 *
3535 * @returns VBox status code.
3536 * @param pVM The VM to operate on.
3537 * @param pVCpu The VMCPU to operate on.
3538 * @param pCtx CPU context
3539 */
3540VMMR0DECL(int) VMXR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
3541{
3542 Assert(pVM->hwaccm.s.vmx.fSupported);
3543
3544 /* Save the guest debug state if necessary. */
3545 if (CPUMIsGuestDebugStateActive(pVCpu))
3546 {
3547 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
3548
3549 /* Enable drx move intercepts again. */
3550 pVCpu->hwaccm.s.vmx.proc_ctls |= VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT;
3551 int rc = VMXWriteVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, pVCpu->hwaccm.s.vmx.proc_ctls);
3552 AssertRC(rc);
3553
3554 /* Resync the debug registers the next time. */
3555 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
3556 }
3557 else
3558 Assert(pVCpu->hwaccm.s.vmx.proc_ctls & VMX_VMCS_CTRL_PROC_EXEC_CONTROLS_MOV_DR_EXIT);
3559
3560 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
3561 int rc = VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3562 AssertRC(rc);
3563
3564 return VINF_SUCCESS;
3565}
3566
3567/**
3568 * Flush the TLB (EPT)
3569 *
3570 * @returns VBox status code.
3571 * @param pVM The VM to operate on.
3572 * @param pVCpu The VM CPU to operate on.
3573 * @param enmFlush Type of flush
3574 * @param GCPhys Physical address of the page to flush
3575 */
3576static void vmxR0FlushEPT(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPHYS GCPhys)
3577{
3578 uint64_t descriptor[2];
3579
3580 LogFlow(("vmxR0FlushEPT %d %RGv\n", enmFlush, GCPhys));
3581 Assert(pVM->hwaccm.s.fNestedPaging);
3582 descriptor[0] = pVCpu->hwaccm.s.vmx.GCPhysEPTP;
3583 descriptor[1] = GCPhys;
3584 int rc = VMXR0InvEPT(enmFlush, &descriptor[0]);
3585 AssertRC(rc);
3586}
3587
3588#ifdef HWACCM_VTX_WITH_VPID
3589/**
3590 * Flush the TLB (EPT)
3591 *
3592 * @returns VBox status code.
3593 * @param pVM The VM to operate on.
3594 * @param pVCpu The VM CPU to operate on.
3595 * @param enmFlush Type of flush
3596 * @param GCPtr Virtual address of the page to flush
3597 */
3598static void vmxR0FlushVPID(PVM pVM, PVMCPU pVCpu, VMX_FLUSH enmFlush, RTGCPTR GCPtr)
3599{
3600#if HC_ARCH_BITS == 32
3601 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invvpid probably takes only 32 bits addresses. (@todo) */
3602 if ( CPUMIsGuestInLongMode(pVCpu)
3603 && !VMX_IS_64BIT_HOST_MODE())
3604 {
3605 pVCpu->hwaccm.s.fForceTLBFlush = true;
3606 }
3607 else
3608#endif
3609 {
3610 uint64_t descriptor[2];
3611
3612 Assert(pVM->hwaccm.s.vmx.fVPID);
3613 descriptor[0] = pVCpu->hwaccm.s.uCurrentASID;
3614 descriptor[1] = GCPtr;
3615 int rc = VMXR0InvVPID(enmFlush, &descriptor[0]);
3616 AssertRC(rc);
3617 }
3618}
3619#endif /* HWACCM_VTX_WITH_VPID */
3620
3621/**
3622 * Invalidates a guest page
3623 *
3624 * @returns VBox status code.
3625 * @param pVM The VM to operate on.
3626 * @param pVCpu The VM CPU to operate on.
3627 * @param GCVirt Page to invalidate
3628 */
3629VMMR0DECL(int) VMXR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
3630{
3631 bool fFlushPending = pVCpu->hwaccm.s.fForceTLBFlush;
3632
3633 Log2(("VMXR0InvalidatePage %RGv\n", GCVirt));
3634
3635 /* Only relevant if we want to use VPID.
3636 * In the nested paging case we still see such calls, but
3637 * can safely ignore them. (e.g. after cr3 updates)
3638 */
3639#ifdef HWACCM_VTX_WITH_VPID
3640 /* Skip it if a TLB flush is already pending. */
3641 if ( !fFlushPending
3642 && pVM->hwaccm.s.vmx.fVPID)
3643 vmxR0FlushVPID(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCVirt);
3644#endif /* HWACCM_VTX_WITH_VPID */
3645
3646 return VINF_SUCCESS;
3647}
3648
3649/**
3650 * Invalidates a guest page by physical address
3651 *
3652 * NOTE: Assumes the current instruction references this physical page though a virtual address!!
3653 *
3654 * @returns VBox status code.
3655 * @param pVM The VM to operate on.
3656 * @param pVCpu The VM CPU to operate on.
3657 * @param GCPhys Page to invalidate
3658 */
3659VMMR0DECL(int) VMXR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
3660{
3661 bool fFlushPending = pVCpu->hwaccm.s.fForceTLBFlush;
3662
3663 Assert(pVM->hwaccm.s.fNestedPaging);
3664
3665 LogFlow(("VMXR0InvalidatePhysPage %RGp\n", GCPhys));
3666
3667 /* Skip it if a TLB flush is already pending. */
3668 if (!fFlushPending)
3669 vmxR0FlushEPT(pVM, pVCpu, pVM->hwaccm.s.vmx.enmFlushPage, GCPhys);
3670
3671 return VINF_SUCCESS;
3672}
3673
3674/**
3675 * Report world switch error and dump some useful debug info
3676 *
3677 * @param pVM The VM to operate on.
3678 * @param pVCpu The VMCPU to operate on.
3679 * @param rc Return code
3680 * @param pCtx Current CPU context (not updated)
3681 */
3682static void VMXR0ReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rc, PCPUMCTX pCtx)
3683{
3684 switch (rc)
3685 {
3686 case VERR_VMX_INVALID_VMXON_PTR:
3687 AssertFailed();
3688 break;
3689
3690 case VERR_VMX_UNABLE_TO_START_VM:
3691 case VERR_VMX_UNABLE_TO_RESUME_VM:
3692 {
3693 int rc;
3694 RTCCUINTREG exitReason, instrError;
3695
3696 rc = VMXReadVMCS(VMX_VMCS32_RO_EXIT_REASON, &exitReason);
3697 rc |= VMXReadVMCS(VMX_VMCS32_RO_VM_INSTR_ERROR, &instrError);
3698 AssertRC(rc);
3699 if (rc == VINF_SUCCESS)
3700 {
3701 Log(("Unable to start/resume VM for reason: %x. Instruction error %x\n", (uint32_t)exitReason, (uint32_t)instrError));
3702 Log(("Current stack %08x\n", &rc));
3703
3704 pVCpu->hwaccm.s.vmx.lasterror.ulInstrError = instrError;
3705 pVCpu->hwaccm.s.vmx.lasterror.ulExitReason = exitReason;
3706
3707#ifdef VBOX_STRICT
3708 RTGDTR gdtr;
3709 PX86DESCHC pDesc;
3710 RTCCUINTREG val;
3711
3712 ASMGetGDTR(&gdtr);
3713
3714 VMXReadVMCS(VMX_VMCS64_GUEST_RIP, &val);
3715 Log(("Old eip %RGv new %RGv\n", (RTGCPTR)pCtx->rip, (RTGCPTR)val));
3716 VMXReadVMCS(VMX_VMCS_CTRL_PIN_EXEC_CONTROLS, &val);
3717 Log(("VMX_VMCS_CTRL_PIN_EXEC_CONTROLS %08x\n", val));
3718 VMXReadVMCS(VMX_VMCS_CTRL_PROC_EXEC_CONTROLS, &val);
3719 Log(("VMX_VMCS_CTRL_PROC_EXEC_CONTROLS %08x\n", val));
3720 VMXReadVMCS(VMX_VMCS_CTRL_ENTRY_CONTROLS, &val);
3721 Log(("VMX_VMCS_CTRL_ENTRY_CONTROLS %08x\n", val));
3722 VMXReadVMCS(VMX_VMCS_CTRL_EXIT_CONTROLS, &val);
3723 Log(("VMX_VMCS_CTRL_EXIT_CONTROLS %08x\n", val));
3724
3725 VMXReadVMCS(VMX_VMCS_HOST_CR0, &val);
3726 Log(("VMX_VMCS_HOST_CR0 %08x\n", val));
3727
3728 VMXReadVMCS(VMX_VMCS_HOST_CR3, &val);
3729 Log(("VMX_VMCS_HOST_CR3 %08x\n", val));
3730
3731 VMXReadVMCS(VMX_VMCS_HOST_CR4, &val);
3732 Log(("VMX_VMCS_HOST_CR4 %08x\n", val));
3733
3734 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_CS, &val);
3735 Log(("VMX_VMCS_HOST_FIELD_CS %08x\n", val));
3736
3737 VMXReadVMCS(VMX_VMCS_GUEST_RFLAGS, &val);
3738 Log(("VMX_VMCS_GUEST_RFLAGS %08x\n", val));
3739
3740 if (val < gdtr.cbGdt)
3741 {
3742 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3743 HWACCMR0DumpDescriptor(pDesc, val, "CS: ");
3744 }
3745
3746 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_DS, &val);
3747 Log(("VMX_VMCS_HOST_FIELD_DS %08x\n", val));
3748 if (val < gdtr.cbGdt)
3749 {
3750 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3751 HWACCMR0DumpDescriptor(pDesc, val, "DS: ");
3752 }
3753
3754 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_ES, &val);
3755 Log(("VMX_VMCS_HOST_FIELD_ES %08x\n", val));
3756 if (val < gdtr.cbGdt)
3757 {
3758 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3759 HWACCMR0DumpDescriptor(pDesc, val, "ES: ");
3760 }
3761
3762 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_FS, &val);
3763 Log(("VMX_VMCS16_HOST_FIELD_FS %08x\n", val));
3764 if (val < gdtr.cbGdt)
3765 {
3766 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3767 HWACCMR0DumpDescriptor(pDesc, val, "FS: ");
3768 }
3769
3770 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_GS, &val);
3771 Log(("VMX_VMCS16_HOST_FIELD_GS %08x\n", val));
3772 if (val < gdtr.cbGdt)
3773 {
3774 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3775 HWACCMR0DumpDescriptor(pDesc, val, "GS: ");
3776 }
3777
3778 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_SS, &val);
3779 Log(("VMX_VMCS16_HOST_FIELD_SS %08x\n", val));
3780 if (val < gdtr.cbGdt)
3781 {
3782 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3783 HWACCMR0DumpDescriptor(pDesc, val, "SS: ");
3784 }
3785
3786 VMXReadVMCS(VMX_VMCS16_HOST_FIELD_TR, &val);
3787 Log(("VMX_VMCS16_HOST_FIELD_TR %08x\n", val));
3788 if (val < gdtr.cbGdt)
3789 {
3790 pDesc = &((PX86DESCHC)gdtr.pGdt)[val >> X86_SEL_SHIFT_HC];
3791 HWACCMR0DumpDescriptor(pDesc, val, "TR: ");
3792 }
3793
3794 VMXReadVMCS(VMX_VMCS_HOST_TR_BASE, &val);
3795 Log(("VMX_VMCS_HOST_TR_BASE %RHv\n", val));
3796
3797 VMXReadVMCS(VMX_VMCS_HOST_GDTR_BASE, &val);
3798 Log(("VMX_VMCS_HOST_GDTR_BASE %RHv\n", val));
3799 VMXReadVMCS(VMX_VMCS_HOST_IDTR_BASE, &val);
3800 Log(("VMX_VMCS_HOST_IDTR_BASE %RHv\n", val));
3801
3802 VMXReadVMCS(VMX_VMCS32_HOST_SYSENTER_CS, &val);
3803 Log(("VMX_VMCS_HOST_SYSENTER_CS %08x\n", val));
3804
3805 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_EIP, &val);
3806 Log(("VMX_VMCS_HOST_SYSENTER_EIP %RHv\n", val));
3807
3808 VMXReadVMCS(VMX_VMCS_HOST_SYSENTER_ESP, &val);
3809 Log(("VMX_VMCS_HOST_SYSENTER_ESP %RHv\n", val));
3810
3811 VMXReadVMCS(VMX_VMCS_HOST_RSP, &val);
3812 Log(("VMX_VMCS_HOST_RSP %RHv\n", val));
3813 VMXReadVMCS(VMX_VMCS_HOST_RIP, &val);
3814 Log(("VMX_VMCS_HOST_RIP %RHv\n", val));
3815
3816# if HC_ARCH_BITS == 64 || defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3817 if (VMX_IS_64BIT_HOST_MODE())
3818 {
3819 Log(("MSR_K6_EFER = %RX64\n", ASMRdMsr(MSR_K6_EFER)));
3820 Log(("MSR_K6_STAR = %RX64\n", ASMRdMsr(MSR_K6_STAR)));
3821 Log(("MSR_K8_LSTAR = %RX64\n", ASMRdMsr(MSR_K8_LSTAR)));
3822 Log(("MSR_K8_CSTAR = %RX64\n", ASMRdMsr(MSR_K8_CSTAR)));
3823 Log(("MSR_K8_SF_MASK = %RX64\n", ASMRdMsr(MSR_K8_SF_MASK)));
3824 }
3825# endif
3826#endif /* VBOX_STRICT */
3827 }
3828 break;
3829 }
3830
3831 default:
3832 /* impossible */
3833 AssertMsgFailed(("%Rrc (%#x)\n", rc, rc));
3834 break;
3835 }
3836}
3837
3838#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3839/**
3840 * Prepares for and executes VMLAUNCH (64 bits guest mode)
3841 *
3842 * @returns VBox status code
3843 * @param fResume vmlauch/vmresume
3844 * @param pCtx Guest context
3845 * @param pCache VMCS cache
3846 * @param pVM The VM to operate on.
3847 * @param pVCpu The VMCPU to operate on.
3848 */
3849DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu)
3850{
3851 uint32_t aParam[6];
3852 PHWACCM_CPUINFO pCpu;
3853 RTHCPHYS pPageCpuPhys;
3854 int rc;
3855
3856 pCpu = HWACCMR0GetCurrentCpu();
3857 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
3858
3859#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3860 pCache->uPos = 1;
3861 pCache->interPD = PGMGetInterPaeCR3(pVM);
3862 pCache->pSwitcher = (uint64_t)pVM->hwaccm.s.pfnHost32ToGuest64R0;
3863#endif
3864
3865#ifdef DEBUG
3866 pCache->TestIn.pPageCpuPhys = 0;
3867 pCache->TestIn.pVMCSPhys = 0;
3868 pCache->TestIn.pCache = 0;
3869 pCache->TestOut.pVMCSPhys = 0;
3870 pCache->TestOut.pCache = 0;
3871 pCache->TestOut.pCtx = 0;
3872 pCache->TestOut.eflags = 0;
3873#endif
3874
3875 aParam[0] = (uint32_t)(pPageCpuPhys); /* Param 1: VMXON physical address - Lo. */
3876 aParam[1] = (uint32_t)(pPageCpuPhys >> 32); /* Param 1: VMXON physical address - Hi. */
3877 aParam[2] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys); /* Param 2: VMCS physical address - Lo. */
3878 aParam[3] = (uint32_t)(pVCpu->hwaccm.s.vmx.pVMCSPhys >> 32); /* Param 2: VMCS physical address - Hi. */
3879 aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache);
3880 aParam[5] = 0;
3881
3882#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3883 pCtx->dr[4] = pVM->hwaccm.s.vmx.pScratchPhys + 16 + 8;
3884 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 1;
3885#endif
3886 rc = VMXR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnVMXGCStartVM64, 6, &aParam[0]);
3887
3888#ifdef VBOX_WITH_CRASHDUMP_MAGIC
3889 Assert(*(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) == 5);
3890 Assert(pCtx->dr[4] == 10);
3891 *(uint32_t *)(pVM->hwaccm.s.vmx.pScratch + 16 + 8) = 0xff;
3892#endif
3893
3894#ifdef DEBUG
3895 AssertMsg(pCache->TestIn.pPageCpuPhys == pPageCpuPhys, ("%RHp vs %RHp\n", pCache->TestIn.pPageCpuPhys, pPageCpuPhys));
3896 AssertMsg(pCache->TestIn.pVMCSPhys == pVCpu->hwaccm.s.vmx.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pVCpu->hwaccm.s.vmx.pVMCSPhys));
3897 AssertMsg(pCache->TestIn.pVMCSPhys == pCache->TestOut.pVMCSPhys, ("%RHp vs %RHp\n", pCache->TestIn.pVMCSPhys, pCache->TestOut.pVMCSPhys));
3898 AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache));
3899 AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hwaccm.s.vmx.VMCSCache)));
3900 AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx));
3901 Assert(!(pCache->TestOut.eflags & X86_EFL_IF));
3902#endif
3903 return rc;
3904}
3905
3906/**
3907 * Executes the specified handler in 64 mode
3908 *
3909 * @returns VBox status code.
3910 * @param pVM The VM to operate on.
3911 * @param pVCpu The VMCPU to operate on.
3912 * @param pCtx Guest context
3913 * @param pfnHandler RC handler
3914 * @param cbParam Number of parameters
3915 * @param paParam Array of 32 bits parameters
3916 */
3917VMMR0DECL(int) VMXR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
3918{
3919 int rc, rc2;
3920 PHWACCM_CPUINFO pCpu;
3921 RTHCPHYS pPageCpuPhys;
3922
3923 /* @todo This code is not guest SMP safe (hyper stack) */
3924 AssertReturn(pVM->cCPUs == 1, VERR_ACCESS_DENIED);
3925 AssertReturn(pVM->hwaccm.s.pfnHost32ToGuest64R0, VERR_INTERNAL_ERROR);
3926 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField));
3927 Assert(pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField));
3928
3929#ifdef VBOX_STRICT
3930 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Write.cValidEntries;i++)
3931 Assert(vmxR0IsValidWriteField(pVCpu->hwaccm.s.vmx.VMCSCache.Write.aField[i]));
3932
3933 for (unsigned i=0;i<pVCpu->hwaccm.s.vmx.VMCSCache.Read.cValidEntries;i++)
3934 Assert(vmxR0IsValidReadField(pVCpu->hwaccm.s.vmx.VMCSCache.Read.aField[i]));
3935#endif
3936
3937 pCpu = HWACCMR0GetCurrentCpu();
3938 pPageCpuPhys = RTR0MemObjGetPagePhysAddr(pCpu->pMemObj, 0);
3939
3940 /* Clear VM Control Structure. Marking it inactive, clearing implementation specific data and writing back VMCS data to memory. */
3941 VMXClearVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3942
3943 /* Leave VMX Root Mode. */
3944 VMXDisable();
3945
3946 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
3947
3948 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVM));
3949 CPUMSetHyperEIP(pVCpu, pfnHandler);
3950 for (int i=(int)cbParam-1;i>=0;i--)
3951 CPUMPushHyper(pVCpu, paParam[i]);
3952
3953 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
3954 /* Call switcher. */
3955 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM);
3956 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
3957
3958 /* Make sure the VMX instructions don't cause #UD faults. */
3959 ASMSetCR4(ASMGetCR4() | X86_CR4_VMXE);
3960
3961 /* Enter VMX Root Mode */
3962 rc2 = VMXEnable(pPageCpuPhys);
3963 if (RT_FAILURE(rc2))
3964 {
3965 if (pVM)
3966 VMXR0CheckError(pVM, pVCpu, rc2);
3967 ASMSetCR4(ASMGetCR4() & ~X86_CR4_VMXE);
3968 return VERR_VMX_VMXON_FAILED;
3969 }
3970
3971 rc2 = VMXActivateVMCS(pVCpu->hwaccm.s.vmx.pVMCSPhys);
3972 AssertRCReturn(rc2, rc2);
3973#ifdef RT_OS_WINDOWS
3974 Assert(ASMGetFlags() & X86_EFL_IF);
3975#else
3976 Assert(!(ASMGetFlags() & X86_EFL_IF));
3977#endif
3978 return rc;
3979}
3980
3981#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) */
3982
3983
3984#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0)
3985/**
3986 * Executes VMWRITE
3987 *
3988 * @returns VBox status code
3989 * @param pVCpu The VMCPU to operate on.
3990 * @param idxField VMCS index
3991 * @param u64Val 16, 32 or 64 bits value
3992 */
3993VMMR0DECL(int) VMXWriteVMCS64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
3994{
3995 int rc;
3996
3997 switch (idxField)
3998 {
3999 case VMX_VMCS_CTRL_TSC_OFFSET_FULL:
4000 case VMX_VMCS_CTRL_IO_BITMAP_A_FULL:
4001 case VMX_VMCS_CTRL_IO_BITMAP_B_FULL:
4002 case VMX_VMCS_CTRL_MSR_BITMAP_FULL:
4003 case VMX_VMCS_CTRL_VMEXIT_MSR_STORE_FULL:
4004 case VMX_VMCS_CTRL_VMEXIT_MSR_LOAD_FULL:
4005 case VMX_VMCS_CTRL_VMENTRY_MSR_LOAD_FULL:
4006 case VMX_VMCS_CTRL_VAPIC_PAGEADDR_FULL:
4007 case VMX_VMCS_GUEST_LINK_PTR_FULL:
4008 case VMX_VMCS_GUEST_PDPTR0_FULL:
4009 case VMX_VMCS_GUEST_PDPTR1_FULL:
4010 case VMX_VMCS_GUEST_PDPTR2_FULL:
4011 case VMX_VMCS_GUEST_PDPTR3_FULL:
4012 case VMX_VMCS_GUEST_DEBUGCTL_FULL:
4013 case VMX_VMCS_GUEST_EFER_FULL:
4014 case VMX_VMCS_CTRL_EPTP_FULL:
4015 /* These fields consist of two parts, which are both writable in 32 bits mode. */
4016 rc = VMXWriteVMCS32(idxField, u64Val);
4017 rc |= VMXWriteVMCS32(idxField + 1, (uint32_t)(u64Val >> 32ULL));
4018 AssertRC(rc);
4019 return rc;
4020
4021 case VMX_VMCS64_GUEST_LDTR_BASE:
4022 case VMX_VMCS64_GUEST_TR_BASE:
4023 case VMX_VMCS64_GUEST_GDTR_BASE:
4024 case VMX_VMCS64_GUEST_IDTR_BASE:
4025 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4026 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4027 case VMX_VMCS64_GUEST_CR0:
4028 case VMX_VMCS64_GUEST_CR4:
4029 case VMX_VMCS64_GUEST_CR3:
4030 case VMX_VMCS64_GUEST_DR7:
4031 case VMX_VMCS64_GUEST_RIP:
4032 case VMX_VMCS64_GUEST_RSP:
4033 case VMX_VMCS64_GUEST_CS_BASE:
4034 case VMX_VMCS64_GUEST_DS_BASE:
4035 case VMX_VMCS64_GUEST_ES_BASE:
4036 case VMX_VMCS64_GUEST_FS_BASE:
4037 case VMX_VMCS64_GUEST_GS_BASE:
4038 case VMX_VMCS64_GUEST_SS_BASE:
4039 /* Queue a 64 bits value as we can't set it in 32 bits host mode. */
4040 if (u64Val >> 32ULL)
4041 rc = VMXWriteCachedVMCSEx(pVCpu, idxField, u64Val);
4042 else
4043 rc = VMXWriteVMCS32(idxField, (uint32_t)u64Val);
4044
4045 return rc;
4046
4047 default:
4048 AssertMsgFailed(("Unexpected field %x\n", idxField));
4049 return VERR_INVALID_PARAMETER;
4050 }
4051}
4052
4053/**
4054 * Cache VMCS writes for performance reasons (Darwin) and for running 64 bits guests on 32 bits hosts.
4055 *
4056 * @param pVCpu The VMCPU to operate on.
4057 * @param idxField VMCS field
4058 * @param u64Val Value
4059 */
4060VMMR0DECL(int) VMXWriteCachedVMCSEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val)
4061{
4062 PVMCSCACHE pCache = &pVCpu->hwaccm.s.vmx.VMCSCache;
4063
4064 AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%x\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED);
4065
4066 /* Make sure there are no duplicates. */
4067 for (unsigned i=0;i<pCache->Write.cValidEntries;i++)
4068 {
4069 if (pCache->Write.aField[i] == idxField)
4070 {
4071 pCache->Write.aFieldVal[i] = u64Val;
4072 return VINF_SUCCESS;
4073 }
4074 }
4075
4076 pCache->Write.aField[pCache->Write.cValidEntries] = idxField;
4077 pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val;
4078 pCache->Write.cValidEntries++;
4079 return VINF_SUCCESS;
4080}
4081
4082#endif /* HC_ARCH_BITS == 32 && !VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 */
4083
4084#ifdef VBOX_STRICT
4085static bool vmxR0IsValidReadField(uint32_t idxField)
4086{
4087 switch(idxField)
4088 {
4089 case VMX_VMCS64_GUEST_RIP:
4090 case VMX_VMCS64_GUEST_RSP:
4091 case VMX_VMCS_GUEST_RFLAGS:
4092 case VMX_VMCS32_GUEST_INTERRUPTIBILITY_STATE:
4093 case VMX_VMCS_CTRL_CR0_READ_SHADOW:
4094 case VMX_VMCS64_GUEST_CR0:
4095 case VMX_VMCS_CTRL_CR4_READ_SHADOW:
4096 case VMX_VMCS64_GUEST_CR4:
4097 case VMX_VMCS64_GUEST_DR7:
4098 case VMX_VMCS32_GUEST_SYSENTER_CS:
4099 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4100 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4101 case VMX_VMCS32_GUEST_GDTR_LIMIT:
4102 case VMX_VMCS64_GUEST_GDTR_BASE:
4103 case VMX_VMCS32_GUEST_IDTR_LIMIT:
4104 case VMX_VMCS64_GUEST_IDTR_BASE:
4105 case VMX_VMCS16_GUEST_FIELD_CS:
4106 case VMX_VMCS32_GUEST_CS_LIMIT:
4107 case VMX_VMCS64_GUEST_CS_BASE:
4108 case VMX_VMCS32_GUEST_CS_ACCESS_RIGHTS:
4109 case VMX_VMCS16_GUEST_FIELD_DS:
4110 case VMX_VMCS32_GUEST_DS_LIMIT:
4111 case VMX_VMCS64_GUEST_DS_BASE:
4112 case VMX_VMCS32_GUEST_DS_ACCESS_RIGHTS:
4113 case VMX_VMCS16_GUEST_FIELD_ES:
4114 case VMX_VMCS32_GUEST_ES_LIMIT:
4115 case VMX_VMCS64_GUEST_ES_BASE:
4116 case VMX_VMCS32_GUEST_ES_ACCESS_RIGHTS:
4117 case VMX_VMCS16_GUEST_FIELD_FS:
4118 case VMX_VMCS32_GUEST_FS_LIMIT:
4119 case VMX_VMCS64_GUEST_FS_BASE:
4120 case VMX_VMCS32_GUEST_FS_ACCESS_RIGHTS:
4121 case VMX_VMCS16_GUEST_FIELD_GS:
4122 case VMX_VMCS32_GUEST_GS_LIMIT:
4123 case VMX_VMCS64_GUEST_GS_BASE:
4124 case VMX_VMCS32_GUEST_GS_ACCESS_RIGHTS:
4125 case VMX_VMCS16_GUEST_FIELD_SS:
4126 case VMX_VMCS32_GUEST_SS_LIMIT:
4127 case VMX_VMCS64_GUEST_SS_BASE:
4128 case VMX_VMCS32_GUEST_SS_ACCESS_RIGHTS:
4129 case VMX_VMCS16_GUEST_FIELD_LDTR:
4130 case VMX_VMCS32_GUEST_LDTR_LIMIT:
4131 case VMX_VMCS64_GUEST_LDTR_BASE:
4132 case VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS:
4133 case VMX_VMCS16_GUEST_FIELD_TR:
4134 case VMX_VMCS32_GUEST_TR_LIMIT:
4135 case VMX_VMCS64_GUEST_TR_BASE:
4136 case VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS:
4137 case VMX_VMCS32_RO_EXIT_REASON:
4138 case VMX_VMCS32_RO_VM_INSTR_ERROR:
4139 case VMX_VMCS32_RO_EXIT_INSTR_LENGTH:
4140 case VMX_VMCS32_RO_EXIT_INTERRUPTION_ERRCODE:
4141 case VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO:
4142 case VMX_VMCS32_RO_EXIT_INSTR_INFO:
4143 case VMX_VMCS_RO_EXIT_QUALIFICATION:
4144 case VMX_VMCS32_RO_IDT_INFO:
4145 case VMX_VMCS32_RO_IDT_ERRCODE:
4146 case VMX_VMCS64_GUEST_CR3:
4147 case VMX_VMCS_EXIT_PHYS_ADDR_FULL:
4148 return true;
4149 }
4150 return false;
4151}
4152
4153static bool vmxR0IsValidWriteField(uint32_t idxField)
4154{
4155 switch(idxField)
4156 {
4157 case VMX_VMCS64_GUEST_LDTR_BASE:
4158 case VMX_VMCS64_GUEST_TR_BASE:
4159 case VMX_VMCS64_GUEST_GDTR_BASE:
4160 case VMX_VMCS64_GUEST_IDTR_BASE:
4161 case VMX_VMCS64_GUEST_SYSENTER_EIP:
4162 case VMX_VMCS64_GUEST_SYSENTER_ESP:
4163 case VMX_VMCS64_GUEST_CR0:
4164 case VMX_VMCS64_GUEST_CR4:
4165 case VMX_VMCS64_GUEST_CR3:
4166 case VMX_VMCS64_GUEST_DR7:
4167 case VMX_VMCS64_GUEST_RIP:
4168 case VMX_VMCS64_GUEST_RSP:
4169 case VMX_VMCS64_GUEST_CS_BASE:
4170 case VMX_VMCS64_GUEST_DS_BASE:
4171 case VMX_VMCS64_GUEST_ES_BASE:
4172 case VMX_VMCS64_GUEST_FS_BASE:
4173 case VMX_VMCS64_GUEST_GS_BASE:
4174 case VMX_VMCS64_GUEST_SS_BASE:
4175 return true;
4176 }
4177 return false;
4178}
4179
4180#endif
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette