VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HMSVMR0.cpp@ 46655

Last change on this file since 46655 was 46603, checked in by vboxsync, 12 years ago

VMM/HMSVMR0: AMD-V bits.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 146.1 KB
Line 
1/* $Id: HMSVMR0.cpp 46603 2013-06-17 16:31:02Z vboxsync $ */
2/** @file
3 * HM SVM (AMD-V) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2013 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21
22#ifdef DEBUG_ramshankar
23# define HMSVM_ALWAYS_TRAP_ALL_XCPTS
24# define HMSVM_ALWAYS_TRAP_PF
25#endif
26
27
28/*******************************************************************************
29* Defined Constants And Macros *
30*******************************************************************************/
31#ifdef VBOX_WITH_STATISTICS
32# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { \
33 if ((u64ExitCode) == SVM_EXIT_NPF) \
34 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); \
35 else \
36 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[(u64ExitCode) & MASK_EXITREASON_STAT]); \
37 } while (0)
38#else
39# define HMSVM_EXITCODE_STAM_COUNTER_INC(u64ExitCode) do { } while (0)
40#endif
41
42/** If we decide to use a function table approach this can be useful to
43 * switch to a "static DECLCALLBACK(int)". */
44#define HMSVM_EXIT_DECL static int
45
46/** @name Segment attribute conversion between CPU and AMD-V VMCB format.
47 *
48 * The CPU format of the segment attribute is described in X86DESCATTRBITS
49 * which is 16-bits (i.e. includes 4 bits of the segment limit).
50 *
51 * The AMD-V VMCB format the segment attribute is compact 12-bits (strictly
52 * only the attribute bits and nothing else). Upper 4-bits are unused.
53 *
54 * @{ */
55#define HMSVM_CPU_2_VMCB_SEG_ATTR(a) (a & 0xff) | ((a & 0xf000) >> 4)
56#define HMSVM_VMCB_2_CPU_SEG_ATTR(a) (a & 0xff) | ((a & 0x0f00) << 4)
57/** @} */
58
59/** @name Macros for loading, storing segment registers to/from the VMCB.
60 * @{ */
61#define HMSVM_LOAD_SEG_REG(REG, reg) \
62 do \
63 { \
64 Assert(pCtx->reg.fFlags & CPUMSELREG_FLAGS_VALID); \
65 Assert(pCtx->reg.ValidSel == pCtx->reg.Sel); \
66 pVmcb->guest.REG.u16Sel = pCtx->reg.Sel; \
67 pVmcb->guest.REG.u32Limit = pCtx->reg.u32Limit; \
68 pVmcb->guest.REG.u64Base = pCtx->reg.u64Base; \
69 pVmcb->guest.REG.u16Attr = HMSVM_CPU_2_VMCB_SEG_ATTR(pCtx->reg.Attr.u); \
70 } while (0)
71
72#define HMSVM_SAVE_SEG_REG(REG, reg) \
73 do \
74 { \
75 pCtx->reg.Sel = pVmcb->guest.REG.u16Sel; \
76 pCtx->reg.ValidSel = pVmcb->guest.REG.u16Sel; \
77 pCtx->reg.fFlags = CPUMSELREG_FLAGS_VALID; \
78 pCtx->reg.u32Limit = pVmcb->guest.REG.u32Limit; \
79 pCtx->reg.u64Base = pVmcb->guest.REG.u64Base; \
80 pCtx->reg.Attr.u = HMSVM_VMCB_2_CPU_SEG_ATTR(pVmcb->guest.REG.u16Attr); \
81 } while (0)
82/** @} */
83
84/** @name VMCB Clean Bits.
85 *
86 * These flags are used for VMCB-state caching. A set VMCB Clean Bit indicates
87 * AMD-V doesn't need to reload the corresponding value(s) from the VMCB in
88 * memory.
89 *
90 * @{ */
91/** All intercepts vectors, TSC offset, PAUSE filter counter. */
92#define HMSVM_VMCB_CLEAN_INTERCEPTS RT_BIT(0)
93/** I/O permission bitmap, MSR permission bitmap. */
94#define HMSVM_VMCB_CLEAN_IOPM_MSRPM RT_BIT(1)
95/** ASID. */
96#define HMSVM_VMCB_CLEAN_ASID RT_BIT(2)
97/** TRP: V_TPR, V_IRQ, V_INTR_PRIO, V_IGN_TPR, V_INTR_MASKING,
98V_INTR_VECTOR. */
99#define HMSVM_VMCB_CLEAN_TPR RT_BIT(3)
100/** Nested Paging: Nested CR3 (nCR3), PAT. */
101#define HMSVM_VMCB_CLEAN_NP RT_BIT(4)
102/** Control registers (CR0, CR3, CR4, EFER). */
103#define HMSVM_VMCB_CLEAN_CRX_EFER RT_BIT(5)
104/** Debug registers (DR6, DR7). */
105#define HMSVM_VMCB_CLEAN_DRX RT_BIT(6)
106/** GDT, IDT limit and base. */
107#define HMSVM_VMCB_CLEAN_DT RT_BIT(7)
108/** Segment register: CS, SS, DS, ES limit and base. */
109#define HMSVM_VMCB_CLEAN_SEG RT_BIT(8)
110/** CR2.*/
111#define HMSVM_VMCB_CLEAN_CR2 RT_BIT(9)
112/** Last-branch record (DbgCtlMsr, br_from, br_to, lastint_from, lastint_to) */
113#define HMSVM_VMCB_CLEAN_LBR RT_BIT(10)
114/** AVIC (AVIC APIC_BAR; AVIC APIC_BACKING_PAGE, AVIC
115PHYSICAL_TABLE and AVIC LOGICAL_TABLE Pointers). */
116#define HMSVM_VMCB_CLEAN_AVIC RT_BIT(11)
117/** Mask of all valid VMCB Clean bits. */
118#define HMSVM_VMCB_CLEAN_ALL ( HMSVM_VMCB_CLEAN_INTERCEPTS
119 | HMSVM_VMCB_CLEAN_IOPM_MSRPM
120 | HMSVM_VMCB_CLEAN_ASID
121 | HMSVM_VMCB_CLEAN_TPR
122 | HMSVM_VMCB_CLEAN_NP
123 | HMSVM_VMCB_CLEAN_CRX
124 | HMSVM_VMCB_CLEAN_DRX
125 | HMSVM_VMCB_CLEAN_DT
126 | HMSVM_VMCB_CLEAN_SEG
127 | HMSVM_VMCB_CLEAN_CR2
128 | HMSVM_VMCB_CLEAN_LBR
129 | HMSVM_VMCB_CLEAN_AVIC)
130/** @} */
131
132/** @name SVM transient.
133 *
134 * A state structure for holding miscellaneous information across AMD-V
135 * VMRUN/#VMEXIT operation, restored after the transition.
136 *
137 * @{ */
138typedef struct SVMTRANSIENT
139{
140 /** The host's rflags/eflags. */
141 RTCCUINTREG uEFlags;
142#if HC_ARCH_BITS == 32
143 uint32_t u32Alignment0;
144#endif
145
146 /** The #VMEXIT exit code (the EXITCODE field in the VMCB). */
147 uint64_t u64ExitCode;
148 /** The guest's TPR value used for TPR shadowing. */
149 uint8_t u8GuestTpr;
150} SVMTRANSIENT, *PSVMTRANSIENT;
151/** @} */
152
153
154/**
155 * MSRPM (MSR permission bitmap) read permissions (for guest RDMSR).
156 */
157typedef enum SVMMSREXITREAD
158{
159 /** Reading this MSR causes a VM-exit. */
160 SVMMSREXIT_INTERCEPT_READ = 0xb,
161 /** Reading this MSR does not cause a VM-exit. */
162 SVMMSREXIT_PASSTHRU_READ
163} SVMMSREXITREAD;
164
165/**
166 * MSRPM (MSR permission bitmap) write permissions (for guest WRMSR).
167 */
168typedef enum SVMMSREXITWRITE
169{
170 /** Writing to this MSR causes a VM-exit. */
171 SVMMSREXIT_INTERCEPT_WRITE = 0xd,
172 /** Writing to this MSR does not cause a VM-exit. */
173 SVMMSREXIT_PASSTHRU_WRITE
174} SVMMSREXITWRITE;
175
176
177/*******************************************************************************
178* Internal Functions *
179*******************************************************************************/
180static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, unsigned uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite);
181
182DECLINLINE(int) hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient);
183
184
185/*******************************************************************************
186* Global Variables *
187*******************************************************************************/
188/** Ring-0 memory object for the IO bitmap. */
189RTR0MEMOBJ g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
190/** Physical address of the IO bitmap. */
191RTHCPHYS g_HCPhysIOBitmap = 0;
192/** Virtual address of the IO bitmap. */
193R0PTRTYPE(void *) g_pvIOBitmap = NULL;
194
195
196/**
197 * Sets up and activates AMD-V on the current CPU.
198 *
199 * @returns VBox status code.
200 * @param pCpu Pointer to the CPU info struct.
201 * @param pVM Pointer to the VM (can be NULL after a resume!).
202 * @param pvCpuPage Pointer to the global CPU page.
203 * @param HCPhysCpuPage Physical address of the global CPU page.
204 */
205VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
206{
207 AssertReturn(!fEnabledByHost, VERR_INVALID_PARAMETER);
208 AssertReturn( HCPhysCpuPage
209 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
210 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
211
212 /*
213 * We must turn on AMD-V and setup the host state physical address, as those MSRs are per CPU.
214 */
215 uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
216 if (u64HostEfer & MSR_K6_EFER_SVME)
217 {
218 /* If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V. */
219 if ( pVM
220 && pVM->hm.s.svm.fIgnoreInUseError)
221 {
222 pCpu->fIgnoreAMDVInUseError = true;
223 }
224
225 if (!pCpu->fIgnoreAMDVInUseError)
226 return VERR_SVM_IN_USE;
227 }
228
229 /* Turn on AMD-V in the EFER MSR. */
230 ASMWrMsr(MSR_K6_EFER, u64HostEfer | MSR_K6_EFER_SVME);
231
232 /* Write the physical page address where the CPU will store the host state while executing the VM. */
233 ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage);
234
235 /*
236 * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all non-zero ASIDs
237 * when enabling SVM. AMD doesn't have an SVM instruction to flush all ASIDs (flushing is done
238 * upon VMRUN). Therefore, just set the fFlushAsidBeforeUse flag which instructs hmR0SvmSetupTLB()
239 * to flush the TLB with before using a new ASID.
240 */
241 pCpu->fFlushAsidBeforeUse = true;
242
243 /*
244 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
245 */
246 ++pCpu->cTlbFlushes;
247
248 return VINF_SUCCESS;
249}
250
251
252/**
253 * Deactivates AMD-V on the current CPU.
254 *
255 * @returns VBox status code.
256 * @param pCpu Pointer to the CPU info struct.
257 * @param pvCpuPage Pointer to the global CPU page.
258 * @param HCPhysCpuPage Physical address of the global CPU page.
259 */
260VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
261{
262 AssertReturn( HCPhysCpuPage
263 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
264 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
265 NOREF(pCpu);
266
267 /* Turn off AMD-V in the EFER MSR if AMD-V is active. */
268 uint64_t u64HostEfer = ASMRdMsr(MSR_K6_EFER);
269 if (u64HostEfer & MSR_K6_EFER_SVME)
270 {
271 ASMWrMsr(MSR_K6_EFER, u64HostEfer & ~MSR_K6_EFER_SVME);
272
273 /* Invalidate host state physical address. */
274 ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0);
275 }
276
277 return VINF_SUCCESS;
278}
279
280
281/**
282 * Does global AMD-V initialization (called during module initialization).
283 *
284 * @returns VBox status code.
285 */
286VMMR0DECL(int) SVMR0GlobalInit(void)
287{
288 /*
289 * Allocate 12 KB for the IO bitmap. Since this is non-optional and we always intercept all IO accesses, it's done
290 * once globally here instead of per-VM.
291 */
292 int rc = RTR0MemObjAllocCont(&g_hMemObjIOBitmap, 3 << PAGE_SHIFT, false /* fExecutable */);
293 if (RT_FAILURE(rc))
294 return rc;
295
296 g_pvIOBitmap = RTR0MemObjAddress(g_hMemObjIOBitmap);
297 g_HCPhysIOBitmap = RTR0MemObjGetPagePhysAddr(g_hMemObjIOBitmap, 0 /* iPage */);
298
299 /* Set all bits to intercept all IO accesses. */
300 ASMMemFill32(pVM->hm.s.svm.pvIOBitmap, 3 << PAGE_SHIFT, UINT32_C(0xffffffff));
301}
302
303
304/**
305 * Does global VT-x termination (called during module termination).
306 */
307VMMR0DECL(void) SVMR0GlobalTerm(void)
308{
309 if (g_hMemObjIOBitmap != NIL_RTR0MEMOBJ)
310 {
311 RTR0MemObjFree(pVM->hm.s.svm.hMemObjIOBitmap, false /* fFreeMappings */);
312 g_pvIOBitmap = NULL;
313 g_HCPhysIOBitmap = 0;
314 g_hMemObjIOBitmap = NIL_RTR0MEMOBJ;
315 }
316}
317
318
319/**
320 * Frees any allocated per-VCPU structures for a VM.
321 *
322 * @param pVM Pointer to the VM.
323 */
324DECLINLINE(void) hmR0SvmFreeStructs(PVM pVM)
325{
326 for (uint32_t i = 0; i < pVM->cCpus; i++)
327 {
328 PVMCPU pVCpu = &pVM->aCpus[i];
329 AssertPtr(pVCpu);
330
331 if (pVCpu->hm.s.svm.hMemObjVmcbHost != NIL_RTR0MEMOBJ)
332 {
333 RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcbHost, false);
334 pVCpu->hm.s.svm.pvVmcbHost = 0;
335 pVCpu->hm.s.svm.HCPhysVmcbHost = 0;
336 pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ;
337 }
338
339 if (pVCpu->hm.s.svm.hMemObjVmcb != NIL_RTR0MEMOBJ)
340 {
341 RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVmcb, false);
342 pVCpu->hm.s.svm.pvVmcb = 0;
343 pVCpu->hm.s.svm.HCPhysVmcb = 0;
344 pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ;
345 }
346
347 if (pVCpu->hm.s.svm.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
348 {
349 RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false);
350 pVCpu->hm.s.svm.pvMsrBitmap = 0;
351 pVCpu->hm.s.svm.HCPhysMsrBitmap = 0;
352 pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
353 }
354 }
355}
356
357
358/**
359 * Does per-VM AMD-V initialization.
360 *
361 * @returns VBox status code.
362 * @param pVM Pointer to the VM.
363 */
364VMMR0DECL(int) SVMR0InitVM(PVM pVM)
365{
366 int rc = VERR_INTERNAL_ERROR_5;
367
368 /*
369 * Check for an AMD CPU erratum which requires us to flush the TLB before every world-switch.
370 */
371 uint32_t u32Family;
372 uint32_t u32Model;
373 uint32_t u32Stepping;
374 if (HMAmdIsSubjectToErratum170(&u32Family, &u32Model, &u32Stepping))
375 {
376 Log4(("SVMR0InitVM: AMD cpu with erratum 170 family %#x model %#x stepping %#x\n", u32Family, u32Model, u32Stepping));
377 pVM->hm.s.svm.fAlwaysFlushTLB = true;
378 }
379
380 /*
381 * Initialize the R0 memory objects up-front so we can properly cleanup on allocation failures.
382 */
383 for (VMCPUID i = 0; i < pVM->cCpus; i++)
384 {
385 PVMCPU pVCpu = &pVM->aCpus[i];
386 pVCpu->hm.s.svm.hMemObjVmcbHost = NIL_RTR0MEMOBJ;
387 pVCpu->hm.s.svm.hMemObjVmcb = NIL_RTR0MEMOBJ;
388 pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
389 }
390
391 for (VMCPUID i = 0; i < pVM->cCpus; i++)
392 {
393 /*
394 * Allocate one page for the host-context VM control block (VMCB). This is used for additional host-state (such as
395 * FS, GS, Kernel GS Base, etc.) apart from the host-state save area specified in MSR_K8_VM_HSAVE_PA.
396 */
397 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcbHost, 1 << PAGE_SHIFT, false /* fExecutable */);
398 if (RT_FAILURE(rc))
399 goto failure_cleanup;
400
401 pVCpu->hm.s.svm.pvVmcbHost = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcbHost);
402 pVCpu->hm.s.svm.HCPhysVmcbHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcbHost, 0 /* iPage */);
403 Assert(pVCpu->hm.s.svm.HCPhysVmcbHost < _4G);
404 ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcbHost);
405
406 /*
407 * Allocate one page for the guest-state VMCB.
408 */
409 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVmcb, 1 << PAGE_SHIFT, false /* fExecutable */);
410 if (RT_FAILURE(rc))
411 goto failure_cleanup;
412
413 pVCpu->hm.s.svm.pvVmcb = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVmcb);
414 pVCpu->hm.s.svm.HCPhysVmcb = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVmcb, 0 /* iPage */);
415 Assert(pVCpu->hm.s.svm.HCPhysVmcb < _4G);
416 ASMMemZeroPage(pVCpu->hm.s.svm.pvVmcb);
417
418 /*
419 * Allocate two pages (8 KB) for the MSR permission bitmap. There doesn't seem to be a way to convince
420 * SVM to not require one.
421 */
422 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjMsrBitmap, 2 << PAGE_SHIFT, false /* fExecutable */);
423 if (RT_FAILURE(rc))
424 failure_cleanup;
425
426 pVCpu->hm.s.svm.pvMsrBitmap = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjMsrBitmap);
427 pVCpu->hm.s.svm.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjMsrBitmap, 0 /* iPage */);
428 /* Set all bits to intercept all MSR accesses (changed later on). */
429 ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, 2 << PAGE_SHIFT, 0xffffffff);
430 }
431
432 return VINF_SUCCESS;
433
434failure_cleanup:
435 hmR0SvmFreeVMStructs(pVM);
436 return rc;
437}
438
439
440/**
441 * Does per-VM AMD-V termination.
442 *
443 * @returns VBox status code.
444 * @param pVM Pointer to the VM.
445 */
446VMMR0DECL(int) SVMR0TermVM(PVM pVM)
447{
448 hmR0SvmFreeVMStructs(pVM);
449 return VINF_SUCCESS;
450}
451
452
453/**
454 * Sets the permission bits for the specified MSR in the MSRPM.
455 *
456 * @param pVCpu Pointer to the VMCPU.
457 * @param uMsr The MSR for which the access permissions are being set.
458 * @param enmRead MSR read permissions.
459 * @param enmWrite MSR write permissions.
460 */
461static void hmR0SvmSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, SVMMSREXITREAD enmRead, SVMMSREXITWRITE enmWrite)
462{
463 unsigned ulBit;
464 uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
465
466 /*
467 * Layout:
468 * Byte offset MSR range
469 * 0x000 - 0x7ff 0x00000000 - 0x00001fff
470 * 0x800 - 0xfff 0xc0000000 - 0xc0001fff
471 * 0x1000 - 0x17ff 0xc0010000 - 0xc0011fff
472 * 0x1800 - 0x1fff Reserved
473 */
474 if (uMsr <= 0x00001FFF)
475 {
476 /* Pentium-compatible MSRs. */
477 ulBit = uMsr * 2;
478 }
479 else if ( uMsr >= 0xC0000000
480 && uMsr <= 0xC0001FFF)
481 {
482 /* AMD Sixth Generation x86 Processor MSRs. */
483 ulBit = (uMsr - 0xC0000000) * 2;
484 pbMsrBitmap += 0x800;
485 }
486 else if ( uMsr >= 0xC0010000
487 && uMsr <= 0xC0011FFF)
488 {
489 /* AMD Seventh and Eighth Generation Processor MSRs. */
490 ulBit = (uMsr - 0xC0001000) * 2;
491 pbMsrBitmap += 0x1000;
492 }
493 else
494 {
495 AssertFailed();
496 return;
497 }
498
499 Assert(ulBit < 0x3fff /* 16 * 1024 - 1 */);
500 if (enmRead == SVMMSREXIT_INTERCEPT_READ)
501 ASMBitSet(pbMsrBitmap, ulBit);
502 else
503 ASMBitClear(pbMsrBitmap, ulBit);
504
505 if (enmWrite == SVMMSREXIT_INTERCEPT_WRITE)
506 ASMBitSet(pbMsrBitmap, ulBit + 1);
507 else
508 ASMBitClear(pbMsrBitmap, ulBit + 1);
509
510 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
511}
512
513
514/**
515 * Sets up AMD-V for the specified VM.
516 * This function is only called once per-VM during initalization.
517 *
518 * @returns VBox status code.
519 * @param pVM Pointer to the VM.
520 */
521VMMR0DECL(int) SVMR0SetupVM(PVM pVM)
522{
523 int rc = VINF_SUCCESS;
524
525 AssertReturn(pVM, VERR_INVALID_PARAMETER);
526 Assert(pVM->hm.s.svm.fSupported);
527
528 for (VMCPUID i = 0; i < pVM->cCpus; i++)
529 {
530 PVMCPU pVCpu = &pVM->aCpus[i];
531 PSVMVMCB pVmcb = (PSVMVMCB)pVM->aCpus[i].hm.s.svm.pvVmcb;
532
533 AssertMsgReturn(pVmcb, ("Invalid pVmcb\n"), VERR_SVM_INVALID_PVMCB);
534
535 /* Trap exceptions unconditionally (debug purposes). */
536#ifdef HMSVM_ALWAYS_TRAP_PF
537 pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
538#endif
539#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
540 /* If you add any exceptions here, make sure to update hmR0SvmHandleExit(). */
541 pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_BP)
542 | RT_BIT(X86_XCPT_DB)
543 | RT_BIT(X86_XCPT_DE)
544 | RT_BIT(X86_XCPT_NM)
545 | RT_BIT(X86_XCPT_UD)
546 | RT_BIT(X86_XCPT_NP)
547 | RT_BIT(X86_XCPT_SS)
548 | RT_BIT(X86_XCPT_GP)
549 | RT_BIT(X86_XCPT_PF)
550 | RT_BIT(X86_XCPT_MF);
551#endif
552
553 /* Set up unconditional intercepts and conditions. */
554 pVmcb->ctrl.u32InterceptCtrl1 = SVM_CTRL1_INTERCEPT_INTR /* External interrupt causes a VM-exit. */
555 | SVM_CTRL1_INTERCEPT_VINTR /* When guest enables interrupts cause a VM-exit. */
556 | SVM_CTRL1_INTERCEPT_NMI /* Non-Maskable Interrupts causes a VM-exit. */
557 | SVM_CTRL1_INTERCEPT_SMI /* System Management Interrupt cause a VM-exit. */
558 | SVM_CTRL1_INTERCEPT_INIT /* INIT signal causes a VM-exit. */
559 | SVM_CTRL1_INTERCEPT_RDPMC /* RDPMC causes a VM-exit. */
560 | SVM_CTRL1_INTERCEPT_CPUID /* CPUID causes a VM-exit. */
561 | SVM_CTRL1_INTERCEPT_RSM /* RSM causes a VM-exit. */
562 | SVM_CTRL1_INTERCEPT_HLT /* HLT causes a VM-exit. */
563 | SVM_CTRL1_INTERCEPT_INOUT_BITMAP /* Use the IOPM to cause IOIO VM-exits. */
564 | SVM_CTRL1_INTERCEPT_MSR_SHADOW /* MSR access not covered by MSRPM causes a VM-exit.*/
565 | SVM_CTRL1_INTERCEPT_INVLPGA /* INVLPGA causes a VM-exit. */
566 | SVM_CTRL1_INTERCEPT_SHUTDOWN /* Shutdown events causes a VM-exit. */
567 | SVM_CTRL1_INTERCEPT_FERR_FREEZE; /* Intercept "freezing" during legacy FPU handling. */
568
569 pVmcb->ctrl.u32InterceptCtrl2 = SVM_CTRL2_INTERCEPT_VMRUN /* VMRUN causes a VM-exit. */
570 | SVM_CTRL2_INTERCEPT_VMMCALL /* VMMCALL causes a VM-exit. */
571 | SVM_CTRL2_INTERCEPT_VMLOAD /* VMLOAD causes a VM-exit. */
572 | SVM_CTRL2_INTERCEPT_VMSAVE /* VMSAVE causes a VM-exit. */
573 | SVM_CTRL2_INTERCEPT_STGI /* STGI causes a VM-exit. */
574 | SVM_CTRL2_INTERCEPT_CLGI /* CLGI causes a VM-exit. */
575 | SVM_CTRL2_INTERCEPT_SKINIT /* SKINIT causes a VM-exit. */
576 | SVM_CTRL2_INTERCEPT_WBINVD /* WBINVD causes a VM-exit. */
577 | SVM_CTRL2_INTERCEPT_MONITOR /* MONITOR causes a VM-exit. */
578 | SVM_CTRL2_INTERCEPT_MWAIT; /* MWAIT causes a VM-exit. */
579
580 /* CR0, CR4 reads must be intercepted, our shadow values are not necessarily the same as the guest's. */
581 pVmcb->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4);
582
583 /* CR0, CR4 writes must be intercepted for the same reasons as above. */
584 pVmcb->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4);
585
586 /* Intercept all DRx reads and writes by default. Changed later on. */
587 pVmcb->ctrl.u16InterceptRdDRx = 0xffff;
588 pVmcb->ctrl.u16InterceptWrDRx = 0xffff;
589
590 /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */
591 pVmcb->ctrl.IntCtrl.n.u1VIrqMasking = 1;
592
593 /* Ignore the priority in the TPR; we take into account the guest TPR anyway while delivering interrupts. */
594 pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR = 1;
595
596 /* Set IO and MSR bitmap permission bitmap physical addresses. */
597 pVmcb->ctrl.u64IOPMPhysAddr = g_HCPhysIOBitmap;
598 pVmcb->ctrl.u64MSRPMPhysAddr = pVCpu->hm.s.svm.HCPhysMsrBitmap;
599
600 /* No LBR virtualization. */
601 pVmcb->ctrl.u64LBRVirt = 0;
602
603 /* Initially set all VMCB clean bits to 0 indicating that everything should be loaded from memory. */
604 pVmcb->ctrl.u64VmcbCleanBits = 0;
605
606 /* The guest ASID MBNZ, set it to 1. The host uses 0. */
607 pVmcb->ctrl.TLBCtrl.n.u32ASID = 1;
608
609 /*
610 * Setup the PAT MSR (applicable for Nested Paging only).
611 * The default value should be 0x0007040600070406ULL, but we want to treat all guest memory as WB,
612 * so choose type 6 for all PAT slots.
613 */
614 pVmcb->guest.u64GPAT = UINT64_C(0x0006060606060606);
615
616 /* Without Nested Paging, we need additionally intercepts. */
617 if (!pVM->hm.s.fNestedPaging)
618 {
619 /* CR3 reads/writes must be intercepted; our shadow values differ from the guest values. */
620 pVmcb->ctrl.u16InterceptRdCRx |= RT_BIT(3);
621 pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(3);
622
623 /* Intercept INVLPG and task switches (may change CR3, EFLAGS, LDT). */
624 pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_INVLPG
625 | SVM_CTRL1_INTERCEPT_TASK_SWITCH;
626
627 /* Page faults must be intercepted to implement shadow paging. */
628 pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
629 }
630
631 /*
632 * The following MSRs are saved/restored automatically during the world-switch.
633 * Don't intercept guest read/write accesses to these MSRs.
634 */
635 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
636 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_CSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
637 hmR0SvmSetMsrPermission(pVCpu, MSR_K6_STAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
638 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_SF_MASK, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
639 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_FS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
640 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
641 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
642 hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
643 hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
644 hmR0SvmSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
645 }
646
647 return rc;
648}
649
650
651/**
652 * Flushes the appropriate tagged-TLB entries.
653 *
654 * @param pVM Pointer to the VM.
655 * @param pVCpu Pointer to the VMCPU.
656 */
657static void hmR0SvmFlushTaggedTlb(PVMCPU pVCpu)
658{
659 PVM pVM = pVCpu->CTX_SUFF(pVM);
660 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
661 PHMGLOBLCPUINFO pCpu = HMR0GetCurrentCpu();
662
663 /*
664 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last.
665 * This can happen both for start & resume due to long jumps back to ring-3.
666 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB,
667 * so we cannot reuse the ASIDs without flushing.
668 */
669 bool fNewAsid = false;
670 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
671 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
672 {
673 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
674 pVCpu->hm.s.fForceTLBFlush = true;
675 fNewAsid = true;
676 }
677
678 /* Set TLB flush state as checked until we return from the world switch. */
679 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
680
681 /* Check for explicit TLB shootdowns. */
682 if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
683 {
684 pVCpu->hm.s.fForceTLBFlush = true;
685 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb);
686 }
687
688 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
689 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING;
690
691 if (pVM->hm.s.svm.fAlwaysFlushTLB)
692 {
693 /*
694 * This is the AMD erratum 170. We need to flush the entire TLB for each world switch. Sad.
695 */
696 pCpu->uCurrentAsid = 1;
697 pVCpu->hm.s.uCurrentAsid = 1;
698 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
699 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
700 }
701 else if (pVCpu->hm.s.fForceTLBFlush)
702 {
703 if (fNewAsid)
704 {
705 ++pCpu->uCurrentAsid;
706 bool fHitASIDLimit = false;
707 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
708 {
709 pCpu->uCurrentAsid = 1; /* Wraparound at 1; host uses 0 */
710 pCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */
711 fHitASIDLimit = true;
712
713 if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
714 {
715 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
716 pCpu->fFlushAsidBeforeUse = true;
717 }
718 else
719 {
720 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
721 pCpu->fFlushAsidBeforeUse = false;
722 }
723 }
724
725 if ( !fHitASIDLimit
726 && pCpu->fFlushAsidBeforeUse)
727 {
728 if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
729 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
730 else
731 {
732 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
733 pCpu->fFlushAsidBeforeUse = false;
734 }
735 }
736
737 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
738 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
739 }
740 else
741 {
742 if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
743 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
744 else
745 pVmcb->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
746 }
747
748 pVCpu->hm.s.fForceTLBFlush = false;
749 }
750 else
751 {
752 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
753 * not be executed. See hmQueueInvlPage() where it is commented
754 * out. Support individual entry flushing someday. */
755 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
756 {
757 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
758 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
759 for (uint32_t i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
760 SVMR0InvlpgA(pVCpu->hm.s.TlbShootdown.aPages[i], pVmcb->ctrl.TLBCtrl.n.u32ASID);
761 }
762 }
763
764 pVCpu->hm.s.TlbShootdown.cPages = 0;
765 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
766
767 /* Update VMCB with the ASID. */
768 if (pVmcb->ctrl.TLBCtrl.n.u32ASID != pVCpu->hm.s.uCurrentAsid)
769 {
770 pVmcb->ctrl.TLBCtrl.n.u32ASID = pVCpu->hm.s.uCurrentAsid;
771 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_ASID;
772 }
773
774 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
775 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
776 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
777 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
778 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
779 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
780
781#ifdef VBOX_WITH_STATISTICS
782 if (pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING)
783 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
784 else if ( pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT
785 || pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS)
786 {
787 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
788 }
789 else
790 Assert(pVmcb->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_ENTIRE)
791#endif
792}
793
794
795/** @name 64-bit guest on 32-bit host OS helper functions.
796 *
797 * The host CPU is still 64-bit capable but the host OS is running in 32-bit
798 * mode (code segment, paging). These wrappers/helpers perform the necessary
799 * bits for the 32->64 switcher.
800 *
801 * @{ */
802#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
803/**
804 * Prepares for and executes VMRUN (64-bit guests on a 32-bit host).
805 *
806 * @returns VBox status code.
807 * @param HCPhysVmcbHost Physical address of host VMCB.
808 * @param HCPhysVmcb Physical address of the VMCB.
809 * @param pCtx Pointer to the guest-CPU context.
810 * @param pVM Pointer to the VM.
811 * @param pVCpu Pointer to the VMCPU.
812 */
813DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS HCPhysVmcbHost, RTHCPHYS HCPhysVmcb, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu)
814{
815 uint32_t aParam[4];
816 aParam[0] = (uint32_t)(HCPhysVmcbHost); /* Param 1: HCPhysVmcbHost - Lo. */
817 aParam[1] = (uint32_t)(HCPhysVmcbHost >> 32); /* Param 1: HCPhysVmcbHost - Hi. */
818 aParam[2] = (uint32_t)(HCPhysVmcb); /* Param 2: HCPhysVmcb - Lo. */
819 aParam[3] = (uint32_t)(HCPhysVmcb >> 32); /* Param 2: HCPhysVmcb - Hi. */
820
821 return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, HM64ON32OP_SVMRCVMRun64, 4, &aParam[0]);
822}
823
824
825/**
826 * Executes the specified VMRUN handler in 64-bit mode.
827 *
828 * @returns VBox status code.
829 * @param pVM Pointer to the VM.
830 * @param pVCpu Pointer to the VMCPU.
831 * @param pCtx Pointer to the guest-CPU context.
832 * @param enmOp The operation to perform.
833 * @param cbParam Number of parameters.
834 * @param paParam Array of 32-bit parameters.
835 */
836VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, HM64ON32OP enmOp, uint32_t cbParam,
837 uint32_t *paParam)
838{
839 AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER);
840 Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END);
841
842 /* Disable interrupts. */
843 RTHCUINTREG uOldEFlags = ASMIntDisableFlags();
844
845#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
846 RTCPUID idHostCpu = RTMpCpuId();
847 CPUMR0SetLApic(pVM, idHostCpu);
848#endif
849
850 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
851 CPUMSetHyperEIP(pVCpu, enmOp);
852 for (int i = (int)cbParam - 1; i >= 0; i--)
853 CPUMPushHyper(pVCpu, paParam[i]);
854
855 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
856 /* Call the switcher. */
857 int rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
858 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
859
860 /* Restore interrupts. */
861 ASMSetFlags(uOldEFlags);
862 return rc;
863}
864
865#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */
866/** @} */
867
868
869DECLINLINE(void) hmR0SvmAddXcptIntercept(uint32_t u32Xcpt)
870{
871 if (!(pVmcb->ctrl.u32InterceptException & RT_BIT(u32Xcpt))
872 {
873 pVmcb->ctrl.u32InterceptException |= RT_BIT(u32Xcpt);
874 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
875 }
876}
877
878DECLINLINE(void) hmR0SvmRemoveXcptIntercept(uint32_t u32Xcpt)
879{
880#ifndef HMSVM_ALWAYS_TRAP_ALL_XCPTS
881 if (pVmcb->ctrl.u32InterceptException & RT_BIT(u32Xcpt))
882 {
883 pVmcb->ctrl.u32InterceptException &= ~RT_BIT(u32Xcpt);
884 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
885 }
886#endif
887}
888
889
890/**
891 * Loads the guest control registers (CR0, CR2, CR3, CR4) into the VMCB.
892 *
893 * @returns VBox status code.
894 * @param pVCpu Pointer to the VMCPU.
895 * @param pVmcb Pointer to the VMCB.
896 * @param pCtx Pointer the guest-CPU context.
897 *
898 * @remarks No-long-jump zone!!!
899 */
900DECLINLINE(int) hmR0SvmLoadGuestControlRegs(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
901{
902 /*
903 * Guest CR0.
904 */
905 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
906 {
907 uint64_t u64GuestCR0 = pCtx->cr0;
908
909 /* Always enable caching. */
910 u64GuestCR0 &= ~(X86_CR0_CD | X86_CR0_NW);
911
912 /*
913 * When Nested Paging is not available use shadow page tables and intercept #PFs (the latter done in SVMR0SetupVM()).
914 */
915 if (!pVM->hm.s.fNestedPaging)
916 {
917 u64GuestCR0 |= X86_CR0_PG; /* When Nested Paging is not available, use shadow page tables. */
918 u64GuestCR0 |= X86_CR0_WP; /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */
919 }
920
921 /*
922 * Guest FPU bits.
923 */
924 bool fInterceptNM = false;
925 bool fInterceptMF = false;
926 u64GuestCR0 |= X86_CR0_NE; /* Use internal x87 FPU exceptions handling rather than external interrupts. */
927 if (CPUMIsGuestFPUStateActive(pVCpu))
928 {
929 /* Catch floating point exceptions if we need to report them to the guest in a different way. */
930 if (!(u64GuestCR0 & X86_CR0_NE))
931 {
932 Log4(("hmR0SvmLoadGuestControlRegs: Intercepting Guest CR0.MP Old-style FPU handling!!!\n"));
933 pVmcb->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_MF);
934 fInterceptMF = true;
935 }
936 }
937 else
938 {
939 fInterceptNM = true; /* Guest FPU inactive, VM-exit on #NM for lazy FPU loading. */
940 u32GuestCR0 |= X86_CR0_TS /* Guest can task switch quickly and do lazy FPU syncing. */
941 | X86_CR0_MP; /* FWAIT/WAIT should not ignore CR0.TS and should generate #NM. */
942 }
943
944 /*
945 * Update the exception intercept bitmap.
946 */
947 if (fInterceptNM)
948 hmR0SvmAddXcptIntercept(X86_XCPT_NM);
949 else
950 hmR0SvmRemoveXcptIntercept(X86_XCPT_NM);
951
952 if (fInterceptMF)
953 hmR0SvmAddXcptIntercept(X86_XCPT_MF);
954 else
955 hmR0SvmRemoveXcptIntercept(X86_XCPT_MF);
956
957 pVmcb->guest.u64CR0 = u64GuestCR0;
958 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
959 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR0;
960 }
961
962 /*
963 * Guest CR2.
964 */
965 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR2)
966 {
967 pVmcb->guest.u64CR2 = pCtx->cr2;
968 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CR2;
969 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR2;
970 }
971
972 /*
973 * Guest CR3.
974 */
975 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
976 {
977 if (pVM->hm.s.fNestedPaging)
978 {
979 PGMMODE enmShwPagingMode;
980#if HC_ARCH_BITS == 32
981 if (CPUMIsGuestInLongModeEx(pCtx))
982 enmShwPagingMode = PGMMODE_AMD64_NX;
983 else
984#endif
985 enmShwPagingMode = PGMGetHostMode(pVM);
986
987 pVmcb->ctrl.u64NestedPagingCR3 = PGMGetNestedCR3(pVCpu, enmShwPagingMode);
988 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_NP;
989 Assert(pVmcb->ctrl.u64NestedPagingCR3);
990 pVmcb->guest.u64CR3 = pCtx->cr3;
991 }
992 else
993 pVmcb->guest.u64CR3 = PGMGetHyperCR3(pVCpu);
994
995 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
996 pVCpu->hm.s.fContextUseFlags &= HM_CHANGED_GUEST_CR3;
997 }
998
999 /*
1000 * Guest CR4.
1001 */
1002 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
1003 {
1004 uint64_t u64GuestCR4 = pCtx->cr4;
1005 if (!pVM->hm.s.fNestedPaging)
1006 {
1007 switch (pVCpu->hm.s.enmShadowMode)
1008 {
1009 case PGMMODE_REAL:
1010 case PGMMODE_PROTECTED: /* Protected mode, no paging. */
1011 AssertFailed();
1012 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1013
1014 case PGMMODE_32_BIT: /* 32-bit paging. */
1015 u64GuestCR4 &= ~X86_CR4_PAE;
1016 break;
1017
1018 case PGMMODE_PAE: /* PAE paging. */
1019 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
1020 /** Must use PAE paging as we could use physical memory > 4 GB */
1021 u64GuestCR4 |= X86_CR4_PAE;
1022 break;
1023
1024 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
1025 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
1026#ifdef VBOX_ENABLE_64_BITS_GUESTS
1027 break;
1028#else
1029 AssertFailed();
1030 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1031#endif
1032
1033 default: /* shut up gcc */
1034 AssertFailed();
1035 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1036 }
1037 }
1038
1039 pVmcb->guest.u64CR4 = u64GuestCR4;
1040 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
1041 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_CR4;
1042 }
1043
1044 return VINF_SUCCESS;
1045}
1046
1047
1048/**
1049 * Loads the guest segment registers into the VMCB.
1050 *
1051 * @returns VBox status code.
1052 * @param pVCpu Pointer to the VMCPU.
1053 * @param pVmcb Pointer to the VMCB.
1054 * @param pCtx Pointer to the guest-CPU context.
1055 *
1056 * @remarks No-long-jump zone!!!
1057 */
1058DECLINLINE(void) hmR0SvmLoadGuestSegmentRegs(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
1059{
1060 /* Guest Segment registers: CS, SS, DS, ES, FS, GS. */
1061 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
1062 {
1063 HMSVM_LOAD_SEG_REG(CS, cs);
1064 HMSVM_LOAD_SEG_REG(SS, cs);
1065 HMSVM_LOAD_SEG_REG(DS, cs);
1066 HMSVM_LOAD_SEG_REG(ES, cs);
1067 HMSVM_LOAD_SEG_REG(FS, cs);
1068 HMSVM_LOAD_SEG_REG(GS, cs);
1069
1070 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_SEG;
1071 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_SEGMENT_REGS;
1072 }
1073
1074 /* Guest TR. */
1075 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
1076 {
1077 HMSVM_LOAD_SEG_REG(TR, tr);
1078 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_TR;
1079 }
1080
1081 /* Guest LDTR. */
1082 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
1083 {
1084 HMSVM_LOAD_SEG_REG(LDTR, ldtr);
1085 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_LDTR;
1086 }
1087
1088 /* Guest GDTR. */
1089 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
1090 {
1091 pVmcb->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt;
1092 pVmcb->guest.GDTR.u64Base = pCtx->gdtr.pGdt;
1093 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT;
1094 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_GDTR;
1095 }
1096
1097 /* Guest IDTR. */
1098 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
1099 {
1100 pVmcb->guest.IDTR.u32Limit = pCtx->idtr.cbIdt;
1101 pVmcb->guest.IDTR.u64Base = pCtx->idtr.pIdt;
1102 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DT;
1103 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_IDTR;
1104 }
1105}
1106
1107
1108/**
1109 * Loads the guest MSRs into the VMCB.
1110 *
1111 * @param pVCpu Pointer to the VMCPU.
1112 * @param pVmcb Pointer to the VMCB.
1113 * @param pCtx Pointer to the guest-CPU context.
1114 *
1115 * @remarks No-long-jump zone!!!
1116 */
1117DECLINLINE(void) hmR0SvmLoadGuestMsrs(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
1118{
1119 /* Guest Sysenter MSRs. */
1120 pVmcb->guest.u64SysEnterCS = pCtx->SysEnter.cs;
1121 pVmcb->guest.u64SysEnterEIP = pCtx->SysEnter.eip;
1122 pVmcb->guest.u64SysEnterESP = pCtx->SysEnter.esp;
1123
1124 /*
1125 * Guest EFER MSR.
1126 * AMD-V requires guest EFER.SVME to be set. Weird. .
1127 * See AMD spec. 15.5.1 "Basic Operation" | "Canonicalization and Consistency Checks".
1128 */
1129 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_SVM_GUEST_EFER_MSR
1130 {
1131 pVmcb->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME;
1132 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
1133 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_SVM_GUEST_EFER_MSR;
1134 }
1135
1136 /* 64-bit MSRs. */
1137 if (CPUMIsGuestInLongModeEx(pCtx))
1138 {
1139 pVmcb->guest.FS.u64Base = pCtx->fs.u64Base;
1140 pVmcb->guest.GS.u64Base = pCtx->gs.u64Base;
1141 }
1142 else
1143 {
1144 /* If the guest isn't in 64-bit mode, clear MSR_K6_LME bit from guest EFER otherwise AMD-V expects amd64 shadow paging. */
1145 if (pCtx->msrEFER & MSR_K6_EFER_LME)
1146 {
1147 pVmcb->guest.u64EFER &= ~MSR_K6_EFER_LME;
1148 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_CRX_EFER;
1149 }
1150 }
1151
1152
1153 /** @todo The following are used in 64-bit only (SYSCALL/SYSRET) but they might
1154 * be writable in 32-bit mode. Clarify with AMD spec. */
1155 pVmcb->guest.u64STAR = pCtx->msrSTAR;
1156 pVmcb->guest.u64LSTAR = pCtx->msrLSTAR;
1157 pVmcb->guest.u64CSTAR = pCtx->msrCSTAR;
1158 pVmcb->guest.u64SFMASK = pCtx->msrSFMASK;
1159 pVmcb->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE;
1160}
1161
1162
1163/**
1164 * Loads the guest debug registers into the VMCB.
1165 *
1166 * @param pVCpu Pointer to the VMCPU.
1167 * @param pCtx Pointer to the guest-CPU context.
1168 *
1169 * @remarks No-long-jump zone!!!
1170 * @remarks Requires EFLAGS to be up-to-date in the VMCB!
1171 */
1172DECLINLINE(void) hmR0SvmLoadGuestDebugRegs(PVMCPU pVCpu, PCPUMCTX pCtx)
1173{
1174 if (!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG))
1175 return;
1176
1177 /** @todo Turn these into assertions if possible. */
1178 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* Set reserved bits to 1. */
1179 pCtx->dr[6] &= ~RT_BIT(12); /* MBZ. */
1180
1181 pCtx->dr[7] &= 0xffffffff; /* Upper 32 bits MBZ. */
1182 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* MBZ. */
1183 pCtx->dr[7] |= 0x400; /* MB1. */
1184
1185 /* Update DR6, DR7 with the guest values. */
1186 pVmcb->guest.u64DR7 = pCtx->dr[7];
1187 pVmcb->guest.u64DR6 = pCtx->dr[6];
1188 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
1189
1190 bool fInterceptDB = false;
1191 bool fInterceptMovDRx = false;
1192 if (DBGFIsStepping(pVCpu))
1193 {
1194 /* AMD-V doesn't have any monitor-trap flag equivalent. Instead, enable tracing in the guest and trap #DB. */
1195 pVmcb->guest.u64RFlags |= X86_EFL_TF;
1196 fInterceptDB = true;
1197 }
1198
1199 if (CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
1200 {
1201 if (!CPUMIsHyperDebugStateActive(pVCpu))
1202 {
1203 rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1204 AssertRC(rc);
1205
1206 /* Update DR6, DR7 with the hypervisor values. */
1207 pVmcb->guest.u64DR7 = CPUMGetHyperDR7(pVCpu);
1208 pVmcb->guest.u64DR6 = CPUMGetHyperDR6(pVCpu);
1209 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
1210 }
1211 Assert(CPUMIsHyperDebugStateActive(pVCpu));
1212 fInterceptMovDRx = true;
1213 }
1214 else if (pCtx->dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD))
1215 {
1216 if (!CPUMIsGuestDebugStateActive(pVCpu))
1217 {
1218 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
1219 AssertRC(rc);
1220 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
1221 }
1222 Assert(CPUMIsGuestDebugStateActive(pVCpu));
1223 Assert(fInterceptMovDRx == false);
1224 }
1225 else if (!CPUMIsGuestDebugStateActive(pVCpu))
1226 {
1227 /* For the first time we would need to intercept MOV DRx accesses even when the guest debug registers aren't loaded. */
1228 fInterceptMovDRx = true;
1229 }
1230
1231 if (fInterceptDB)
1232 hmR0SvmAddXcptIntercept(X86_XCPT_DB);
1233 else
1234 hmR0SvmRemoveXcptIntercept(X86_XCPT_DB);
1235
1236 if (fInterceptMovDRx)
1237 {
1238 if ( pVmcb->ctrl.u16InterceptRdDRx != 0xffff
1239 || pVmcb->ctrl.u16InterceptWrDRx != 0xffff)
1240 {
1241 pVmcb->ctrl.u16InterceptRdDRx = 0xffff;
1242 pVmcb->ctrl.u16InterceptWrDRx = 0xffff;
1243 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
1244 }
1245 }
1246 else
1247 {
1248 if ( pVmcb->ctrl.u16InterceptRdDRx
1249 || pVmcb->ctrl.u16InterceptWrDRx)
1250 {
1251 pVmcb->ctrl.u16InterceptRdDRx = 0;
1252 pVmcb->ctrl.u16InterceptWrDRx = 0;
1253 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
1254 }
1255 }
1256
1257 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_GUEST_DEBUG;
1258}
1259
1260
1261/**
1262 * Loads the guest APIC state (currently just the TPR).
1263 *
1264 * @returns VBox status code.
1265 * @param pVCpu Pointer to the VMCPU.
1266 * @param pVmcb Pointer to the VMCB.
1267 * @param pCtx Pointer to the guest-CPU context.
1268 */
1269DECLINLINE(int) hmR0SvmLoadGuestApicState(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx)
1270{
1271 if (!(pVCpu->hm.s.fContextUseFlags & HM_CHANGED_SVM_GUEST_APIC_STATE))
1272 return VINF_SUCCESS;
1273
1274 bool fPendingIntr;
1275 uint8_t u8Tpr;
1276 int rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPendingIntr, NULL /* pu8PendingIrq */);
1277 AssertRCReturn(rc, rc);
1278
1279 /** Assume that we need to trap all TPR accesses and thus need not check on
1280 * every #VMEXIT if we should update the TPR. */
1281 Assert(pVmcb->ctrl.IntCtrl.n.u1VIrqMasking);
1282 pVCpu->hm.s.svm.fSyncVTpr = false;
1283
1284 /* 32-bit guests uses LSTAR MSR for patching guest code which touches the TPR. */
1285 if (pVCpu->CTX_SUFF(pVM)->hm.s.fTPRPatchingActive)
1286 {
1287 pCtx->msrLSTAR = u8LastTPR;
1288
1289 /* If there are interrupts pending, intercept LSTAR writes, otherwise don't intercept reads or writes. */
1290 if (fPendingIntr)
1291 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_INTERCEPT_WRITE);
1292 else
1293 {
1294 hmR0SvmSetMsrPermission(pVCpu, MSR_K8_LSTAR, SVMMSREXIT_PASSTHRU_READ, SVMMSREXIT_PASSTHRU_WRITE);
1295 pVCpu->hm.s.svm.fSyncVTpr = true;
1296 }
1297
1298 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_IOPM_MSRPM;
1299 }
1300 else
1301 {
1302 /* Bits 3-0 of the VTPR field correspond to bits 7-4 of the TPR (which is the Task-Priority Class). */
1303 pVmcb->ctrl.IntCtrl.n.u8VTPR = (u8Tpr >> 4);
1304
1305 /* If there are interrupts pending, intercept CR8 writes to evaluate ASAP if we can deliver the interrupt to the guest. */
1306 if (fPending)
1307 pVmcb->ctrl.u16InterceptWrCRx |= RT_BIT(8);
1308 else
1309 {
1310 pVmcb->ctrl.u16InterceptWrCRx &= ~RT_BIT(8);
1311 pVCpu->hm.s.svm.fSyncVTpr = true;
1312 }
1313
1314 pVmcb->ctrl.u64VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_TPR);
1315 }
1316
1317 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_SVM_GUEST_APIC_STATE;
1318 return rc;
1319}
1320
1321
1322/**
1323 * Sets up the appropriate function to run guest code.
1324 *
1325 * @returns VBox status code.
1326 * @param pVCpu Pointer to the VMCPU.
1327 * @param pCtx Pointer to the guest-CPU context.
1328 *
1329 * @remarks No-long-jump zone!!!
1330 */
1331static int hmR0SvmSetupVMRunHandler(PVMCPU pVCpu, PCPUMCTX pCtx)
1332{
1333 if (CPUMIsGuestInLongModeEx(pCtx))
1334 {
1335#ifndef VBOX_ENABLE_64_BITS_GUESTS
1336 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1337#endif
1338 Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */
1339#if HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1340 /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */
1341 pVCpu->hm.s.svm.pfnVMRun = SVMR0VMSwitcherRun64;
1342#else
1343 /* 64-bit host or hybrid host. */
1344 pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun64;
1345#endif
1346 }
1347 else
1348 {
1349 /* Guest is not in long mode, use the 32-bit handler. */
1350 pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun;
1351 }
1352 return VINF_SUCCESS;
1353}
1354
1355
1356/**
1357 * Enters the AMD-V session.
1358 *
1359 * @returns VBox status code.
1360 * @param pVM Pointer to the VM.
1361 * @param pVCpu Pointer to the VMCPU.
1362 * @param pCpu Pointer to the CPU info struct.
1363 */
1364VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
1365{
1366 AssertPtr(pVM);
1367 AssertPtr(pVCpu);
1368 Assert(pVM->hm.s.svm.fSupported);
1369 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1370 NOREF(pCpu);
1371
1372 LogFlowFunc(("pVM=%p pVCpu=%p\n", pVM, pVCpu));
1373
1374 /* Nothing to do here. */
1375 return VINF_SUCCESS;
1376}
1377
1378
1379/**
1380 * Leaves the AMD-V session.
1381 *
1382 * @returns VBox status code.
1383 * @param pVM Pointer to the VM.
1384 * @param pVCpu Pointer to the VMCPU.
1385 * @param pCtx Pointer to the guest-CPU context.
1386 */
1387VMMR0DECL(int) SVMR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1388{
1389 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1390 NOREF(pVM);
1391 NOREF(pVCpu);
1392 NOREF(pCtx);
1393
1394 /* Nothing to do here. Everything is taken care of in hmR0SvmLongJmpToRing3(). */
1395 return VINF_SUCCESS;
1396}
1397
1398
1399/**
1400 * Saves the host state.
1401 *
1402 * @returns VBox status code.
1403 * @param pVM Pointer to the VM.
1404 * @param pVCpu Pointer to the VMCPU.
1405 *
1406 * @remarks No-long-jump zone!!!
1407 */
1408VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu)
1409{
1410 NOREF(pVM);
1411 NOREF(pVCpu);
1412 /* Nothing to do here. AMD-V does this for us automatically during the world-switch. */
1413 return VINF_SUCCESS;
1414}
1415
1416
1417/**
1418 * Loads the guest state.
1419 *
1420 * @returns VBox status code.
1421 * @param pVM Pointer to the VM.
1422 * @param pVCpu Pointer to the VMCPU.
1423 * @param pCtx Pointer to the guest-CPU context.
1424 *
1425 * @remarks No-long-jump zone!!!
1426 */
1427VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1428{
1429 AssertPtr(pVM);
1430 AssertPtr(pVCpu);
1431 AssertPtr(pCtx);
1432 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1433
1434 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
1435 AssertMsgReturn(pVmcb, ("Invalid pVmcb\n"), VERR_SVM_INVALID_PVMCB);
1436
1437 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestState, x);
1438
1439 int rc = hmR0SvmLoadGuestControlRegs(pVCpu, pVmcb, pCtx);
1440 AssertLogRelMsgRCReturn(rc, ("hmR0SvmLoadGuestControlRegs! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
1441
1442 hmR0SvmLoadGuestSegmentRegs(pVCpu, pVmcb, pCtx);
1443 hmR0SvmLoadGuestMsrs(pVCpu, pVmcb, pCtx);
1444
1445 pVmcb->guest.u64RIP = pCtx->rip;
1446 pVmcb->guest.u64RSP = pCtx->rsp;
1447 pVmcb->guest.u64RFlags = pCtx->eflags.u32;
1448 pVmcb->guest.u8CPL = pCtx->ss.Attr.n.u2Dpl;
1449 pVmcb->guest.u64RAX = pCtx->rax;
1450
1451 /* hmR0SvmLoadGuestDebugRegs() must be called -after- updating guest RFLAGS as the RFLAGS may need to be changed. */
1452 hmR0SvmLoadGuestDebugRegs(pVCpu, pVmcb, pCtx);
1453
1454 rc = hmR0SvmLoadGuestApicState(pVCpu, pVmcb, pCtx);
1455 AssertLogRelMsgRCReturn(rc, ("hmR0SvmLoadGuestApicState! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
1456
1457 rc = hmR0SvmSetupVMRunHandler(pVCpu, pCtx);
1458 AssertLogRelMsgRCReturn(rc, ("hmR0SvmSetupVMRunHandler! rc=%Rrc (pVM=%p pVCpu=%p)\n", rc, pVM, pVCpu), rc);
1459
1460 /* Clear any unused and reserved bits. */
1461 pVCpu->hm.s.fContextUseFlags &= ~( HM_CHANGED_GUEST_SYSENTER_CS_MSR
1462 | HM_CHANGED_GUEST_SYSENTER_EIP_MSR
1463 | HM_CHANGED_GUEST_SYSENTER_ESP_MSR);
1464
1465 AssertMsg(!pVCpu->hm.s.fContextUseFlags,
1466 ("Missed updating flags while loading guest state. pVM=%p pVCpu=%p fContextUseFlags=%#RX32\n",
1467 pVM, pVCpu, pVCpu->hm.s.fContextUseFlags));
1468
1469 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestState, x);
1470
1471 return rc;
1472}
1473
1474
1475
1476/**
1477 * Saves the entire guest state from the VMCB into the
1478 * guest-CPU context. Currently there is no residual state left in the CPU that
1479 * is not updated in the VMCB.
1480 *
1481 * @returns VBox status code.
1482 * @param pVCpu Pointer to the VMCPU.
1483 * @param pMixedCtx Pointer to the guest-CPU context. The data may be
1484 * out-of-sync. Make sure to update the required fields
1485 * before using them.
1486 */
1487static void hmR0SvmSaveGuestState(PVMCPU pVCpu, PCPUMCTX pMixedCtx)
1488{
1489 Assert(VMMRZCallRing3IsEnabled(pVCpu));
1490
1491 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
1492
1493 pMixedCtx->rip = pVmcb->guest.u64RIP;
1494 pMixedCtx->rsp = pVmcb->guest.u64RSP;
1495 pMixedCtx->eflags.u32 = pVmcb->guest.u64RFlags;
1496 pMixedCtx->rax = pVmcb->guest.u64RAX;
1497
1498 /*
1499 * Guest interrupt shadow.
1500 */
1501 if (pVmcb->ctrl.u64IntShadow & SVM_INTERRUPT_SHADOW_ACTIVE)
1502 EMSetInhibitInterruptsPC(pVCpu, pMixedCtx->rip);
1503 else
1504 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1505
1506 /*
1507 * Guest Control registers: CR2, CR3 (handled at the end) - accesses to other control registers are always intercepted.
1508 */
1509 pMixedCtx->cr2 = pVmcb->guest.u64CR2;
1510
1511 /*
1512 * Guest MSRs.
1513 */
1514 pMixedCtx->msrSTAR = pVmcb->guest.u64STAR; /* legacy syscall eip, cs & ss */
1515 pMixedCtx->msrLSTAR = pVmcb->guest.u64LSTAR; /* 64-bit mode syscall rip */
1516 pMixedCtx->msrCSTAR = pVmcb->guest.u64CSTAR; /* compatibility mode syscall rip */
1517 pMixedCtx->msrSFMASK = pVmcb->guest.u64SFMASK; /* syscall flag mask */
1518 pMixedCtx->msrKERNELGSBASE = pVmcb->guest.u64KernelGSBase; /* swapgs exchange value */
1519 pMixedCtx->SysEnter.cs = pVmcb->guest.u64SysEnterCS;
1520 pMixedCtx->SysEnter.eip = pVmcb->guest.u64SysEnterEIP;
1521 pMixedCtx->SysEnter.esp = pVmcb->guest.u64SysEnterESP;
1522
1523 /*
1524 * Guest segment registers (includes FS, GS base MSRs for 64-bit guests).
1525 */
1526 HMSVM_SAVE_SEG_REG(CS, ss);
1527 HMSVM_SAVE_SEG_REG(SS, cs);
1528 HMSVM_SAVE_SEG_REG(DS, ds);
1529 HMSVM_SAVE_SEG_REG(ES, es);
1530 HMSVM_SAVE_SEG_REG(FS, fs);
1531 HMSVM_SAVE_SEG_REG(GS, gs);
1532
1533 /*
1534 * Correct the hidden CS granularity flag. Haven't seen it being wrong in any other
1535 * register (yet).
1536 */
1537 /** @todo Verify this. */
1538 if ( !pMixedCtx->cs.Attr.n.u1Granularity
1539 && pMixedCtx->cs.Attr.n.u1Present
1540 && pMixedCtx->cs.u32Limit > UINT32_C(0xfffff))
1541 {
1542 Assert((pMixedCtx->cs.u32Limit & 0xfff) == 0xfff);
1543 pMixedCtx->cs.Attr.n.u1Granularity = 1;
1544 }
1545#ifdef VBOX_STRICT
1546# define HMSVM_ASSERT_SEL_GRANULARITY(reg) \
1547 AssertMsg( !pMixedCtx->reg.Attr.n.u1Present \
1548 || ( pMixedCtx->reg.Attr.n.u1Granularity \
1549 ? (pMixedCtx->reg.u32Limit & 0xfff) == 0xfff \
1550 : pMixedCtx->reg.u32Limit <= UINT32_C(0xfffff)), \
1551 ("Invalid Segment Attributes %#x %#x %#llx\n", pMixedCtx->reg.u32Limit,
1552 pMixedCtx->reg.Attr.u, pMixedCtx->reg.u64Base))
1553
1554 HMSVM_ASSERT_SEG_GRANULARITY(cs);
1555 HMSVM_ASSERT_SEG_GRANULARITY(ss);
1556 HMSVM_ASSERT_SEG_GRANULARITY(ds);
1557 HMSVM_ASSERT_SEG_GRANULARITY(es);
1558 HMSVM_ASSERT_SEG_GRANULARITY(fs);
1559 HMSVM_ASSERT_SEG_GRANULARITY(gs);
1560
1561# undef HMSVM_ASSERT_SEL_GRANULARITY
1562#endif
1563
1564 /*
1565 * Sync the hidden SS DPL field. AMD CPUs have a separate CPL field in the VMCB and uses that
1566 * and thus it's possible that when the CPL changes during guest execution that the SS DPL
1567 * isn't updated by AMD-V. Observed on some AMD Fusion CPUs with 64-bit guests.
1568 * See AMD spec. 15.5.1 "Basic operation".
1569 */
1570 Assert(!(pVmcb->guest.u8CPL & ~0x3));
1571 pMixedCtx->ss.Attr.n.u2Dpl = pVmcb->guest.u8CPL & 0x3;
1572
1573 /*
1574 * Guest Descriptor-Table registers.
1575 */
1576 HMSVM_SAVE_SEG_REG(TR, tr);
1577 HMSVM_SAVE_SEG_REG(LDTR, ldtr);
1578 pMixedCtx->gdtr.cbGdt = pVmcb->guest.GDTR.u32Limit;
1579 pMixedCtx->gdtr.pGdt = pVmcb->guest.GDTR.u64Base;
1580
1581 pMixedCtx->idtr.cbIdt = pVmcb->guest.IDTR.u32Limit;
1582 pMixedCtx->idtr.pIdt = pVmcb->guest.IDTR.u64Base;
1583
1584 /*
1585 * Guest Debug registers.
1586 */
1587 pMixedCtx->dr[6] = pVmcb->guest.u64DR6;
1588 pMixedCtx->dr[7] = pVmcb->guest.u64DR7;
1589
1590 /*
1591 * With Nested Paging, CR3 changes are not intercepted. Therefore, sync. it now.
1592 * This is done as the very last step of syncing the guest state, as PGMUpdateCR3() may cause longjmp's to ring-3.
1593 */
1594 if ( pVM->hm.s.fNestedPaging
1595 && pMixedCtx->cr3 != pVmcb->guest.u64CR3)
1596 {
1597 CPUMSetGuestCR3(pVCpu, pVmcb->guest.u64CR3);
1598 PGMUpdateCR3(pVCpu, pVmcb->guest.u64CR3);
1599 }
1600}
1601
1602
1603/**
1604 * Does the necessary state syncing before doing a longjmp to ring-3.
1605 *
1606 * @param pVM Pointer to the VM.
1607 * @param pVCpu Pointer to the VMCPU.
1608 * @param pCtx Pointer to the guest-CPU context.
1609 * @param rcExit The reason for exiting to ring-3. Can be
1610 * VINF_VMM_UNKNOWN_RING3_CALL.
1611 *
1612 * @remarks No-long-jmp zone!!!
1613 */
1614static void hmR0SvmLongJmpToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, int rcExit)
1615{
1616 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
1617 Assert(VMMR0IsLogFlushDisabled(pVCpu));
1618
1619 /* Restore host FPU state if necessary and resync on next R0 reentry .*/
1620 if (CPUMIsGuestFPUStateActive(pVCpu))
1621 {
1622 CPUMR0SaveGuestFPU(pVM, pVCpu, pCtx);
1623 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
1624 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
1625 }
1626
1627 /* Restore host debug registers if necessary and resync on next R0 reentry. */
1628 if (CPUMIsGuestDebugStateActive(pVCpu))
1629 {
1630 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, true /* save DR6 */);
1631 Assert(!CPUMIsGuestDebugStateActive(pVCpu));
1632 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
1633 }
1634 else if (CPUMIsHyperDebugStateActive(pVCpu))
1635 {
1636 CPUMR0LoadHostDebugState(pVM, pVCpu);
1637 Assert(!CPUMIsHyperDebugStateActive(pVCpu));
1638 Assert(pVmcb->ctrl.u16InterceptRdDRx == 0xffff);
1639 Assert(pVmcb->ctrl.u16InterceptWrDRx == 0xffff);
1640 }
1641
1642 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
1643 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC);
1644}
1645
1646
1647/**
1648 * VMMRZCallRing3() callback wrapper which saves the guest state (or restores
1649 * any remaining host state) before we longjump to ring-3 and possibly get
1650 * preempted.
1651 *
1652 * @param pVCpu Pointer to the VMCPU.
1653 * @param enmOperation The operation causing the ring-3 longjump.
1654 * @param pvUser The user argument (pointer to the possibly
1655 * out-of-date guest-CPU context).
1656 *
1657 * @remarks Must never be called with @a enmOperation ==
1658 * VMMCALLRING3_VM_R0_ASSERTION.
1659 */
1660DECLCALLBACK(void) hmR0SvmCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser)
1661{
1662 /* VMMRZCallRing3() already makes sure we never get called as a result of an longjmp due to an assertion, */
1663 Assert(pVCpu);
1664 Assert(pvUser);
1665 Assert(VMMRZCallRing3IsEnabled(pVCpu));
1666 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1667
1668 VMMRZCallRing3Disable(pVCpu);
1669 Assert(VMMR0IsLogFlushDisabled(pVCpu));
1670 Log4(("hmR0SvmCallRing3Callback->hmR0SvmLongJmpToRing3\n"));
1671 hmR0SvmLongJmpToRing3(pVCpu->CTX_SUFF(pVM), pVCpu, (PCPUMCTX)pvUser, VINF_VMM_UNKNOWN_RING3_CALL);
1672 VMMRZCallRing3Enable(pVCpu);
1673}
1674
1675
1676/**
1677 * An action requires us to go back to ring-3. This function does the necessary
1678 * steps before we can safely return to ring-3. This is not the same as longjmps
1679 * to ring-3, this is voluntary.
1680 *
1681 * @param pVM Pointer to the VM.
1682 * @param pVCpu Pointer to the VMCPU.
1683 * @param pCtx Pointer to the guest-CPU context.
1684 * @param rcExit The reason for exiting to ring-3. Can be
1685 * VINF_VMM_UNKNOWN_RING3_CALL.
1686 */
1687static void hmR0SvmExitToRing3(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, int rcExit)
1688{
1689 Assert(pVM);
1690 Assert(pVCpu);
1691 Assert(pCtx);
1692 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
1693
1694 if (RT_UNLIKELY(rcExit == VERR_SVM_INVALID_GUEST_STATE))
1695 {
1696 /* We don't need to do any syncing here, we're not going to come back to execute anything again. */
1697 return;
1698 }
1699
1700 /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */
1701 VMMRZCallRing3Disable(pVCpu);
1702 Log4(("hmR0SvmExitToRing3: rcExit=%d\n", rcExit));
1703
1704 /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */
1705 if (pVCpu->hm.s.Event.fPending)
1706 {
1707 hmR0SvmPendingEventToTrpmTrap(pVCpu);
1708 Assert(!pVCpu->hm.s.Event.fPending);
1709 }
1710
1711 /* Sync. the guest state. */
1712 hmR0SvmLongJmpToRing3(pVM, pVCpu, pCtx, rcExit);
1713 STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3);
1714
1715 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
1716 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR
1717 | CPUM_CHANGED_LDTR
1718 | CPUM_CHANGED_GDTR
1719 | CPUM_CHANGED_IDTR
1720 | CPUM_CHANGED_TR
1721 | CPUM_CHANGED_HIDDEN_SEL_REGS);
1722
1723 /* On our way back from ring-3 the following needs to be done. */
1724 /** @todo This can change with preemption hooks. */
1725 if (rcExit == VINF_EM_RAW_INTERRUPT)
1726 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT;
1727 else
1728 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT | HM_CHANGED_ALL_GUEST;
1729
1730 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3);
1731 VMMRZCallRing3Enable(pVCpu);
1732}
1733
1734
1735/**
1736 * Sets up the usage of TSC offsetting for the VCPU.
1737 *
1738 * @param pVCpu Pointer to the VMCPU.
1739 *
1740 * @remarks No-long-jump zone!!!
1741 */
1742static void hmR0SvmSetupTscOffsetting(PVMCPU pVCpu)
1743{
1744 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
1745 if (TMCpuTickCanUseRealTSC(pVCpu, &pVmcb->ctrl.u64TSCOffset))
1746 {
1747 uint64_t u64CurTSC = ASMReadTSC();
1748 if (u64CurTSC + pVmcb->ctrl.u64TSCOffset > TMCpuTickGetLastSeen(pVCpu))
1749 {
1750 pVmcb->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_RDTSC;
1751 pVmcb->ctrl.u32InterceptCtrl2 &= ~SVM_CTRL2_INTERCEPT_RDTSCP;
1752 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
1753 }
1754 else
1755 {
1756 pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC;
1757 pVmcb->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP;
1758 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
1759 }
1760 }
1761 else
1762 {
1763 pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC;
1764 pVmcb->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP;
1765 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
1766 }
1767
1768 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
1769}
1770
1771
1772/**
1773 * Sets an event as a pending event to be injected into the guest.
1774 *
1775 * @param pVCpu Pointer to the VMCPU.
1776 * @param pEvent Pointer to the SVM event.
1777 * @param GCPtrFaultAddress The fault-address (CR2) in case it's a
1778 * page-fault.
1779 */
1780DECLINLINE(void) hmR0SvmSetPendingEvent(PVMCPU pVCpu, PSVMEVENT pEvent, RTGCUINTPTR GCPtrFaultAddress)
1781{
1782 Assert(!pVCpu->hm.s.Event.fPending);
1783
1784 pVCpu->hm.s.Event.u64IntrInfo = pEvent->u;
1785 pVCpu->hm.s.Event.fPending = true;
1786 pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress;
1787
1788#ifdef VBOX_STRICT
1789 if (GCPtrFaultAddress)
1790 {
1791 AssertMsg( pEvent->n.u8Vector == X86_XCPT_PF
1792 && pEvent->n.u3Type == SVM_EVENT_EXCEPTION,
1793 ("hmR0SvmSetPendingEvent: Setting fault-address for non-#PF. u8Vector=%#x Type=%#RX32 GCPtrFaultAddr=%#RGx\n",
1794 pEvent->n.u8Vector, (uint32_t)pEvent->n.u3Type, GCPtrFaultAddress));
1795 Assert(GCPtrFaultAddress == CPUMGetGuestCR2(pVCpu));
1796 }
1797#endif
1798
1799 Log4(("hmR0SvmSetPendingEvent: u=%#RX64 u8Vector=%#x ErrorCodeValid=%#x ErrorCode=%#RX32\n", pEvent->u,
1800 pEvent->n.u8Vector, pEvent->n.u3Type, (uint8_t)pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode));
1801}
1802
1803
1804/**
1805 * Injects an event into the guest upon VMRUN by updating the relevant field
1806 * in the VMCB.
1807 *
1808 * @param pVCpu Pointer to the VMCPU.
1809 * @param pVmcb Pointer to the guest VMCB.
1810 * @param pCtx Pointer to the guest-CPU context.
1811 * @param pEvent Pointer to the event.
1812 *
1813 * @remarks No-long-jump zone!!!
1814 * @remarks Requires CR0!
1815 */
1816DECLINLINE(void) hmR0SvmInjectEventVmcb(PVMCPU pVCpu, PSVMVMCB pVmcb, PCPUMCTX pCtx, PSVMEVENT pEvent)
1817{
1818 pVmcb->ctrl.EventInject.u = pEvent->u;
1819 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]);
1820}
1821
1822
1823/**
1824 * Converts any TRPM trap into a pending SVM event. This is typically used when
1825 * entering from ring-3 (not longjmp returns).
1826 *
1827 * @param pVCpu Pointer to the VMCPU.
1828 */
1829static void hmR0SvmTrpmTrapToPendingEvent(PVMCPU pVCpu)
1830{
1831 Assert(TRPMHasTrap(pVCpu));
1832 Assert(!pVCpu->hm.s.Event.fPending);
1833
1834 uint8_t uVector;
1835 TRPMEVENT enmTrpmEvent;
1836 RTGCUINT uErrCode;
1837 RTGCUINTPTR GCPtrFaultAddress;
1838 uint8_t cbInstr;
1839
1840 int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr);
1841 AssertRC(rc);
1842
1843 PSVMEVENT pEvent = &pVCpu->hm.s.Event;
1844 pEvent->u = 0;
1845 pEvent->n.u1Valid = 1;
1846
1847 /* Refer AMD spec. 15.20 "Event Injection" for the format. */
1848 if (enmTrpmEvent == TRPM_TRAP)
1849 {
1850 pEvent->n.u3Type = SVM_EVENT_EXCEPTION;
1851 switch (uVector)
1852 {
1853 case X86_XCPT_PF:
1854 case X86_XCPT_DF:
1855 case X86_XCPT_TS:
1856 case X86_XCPT_NP:
1857 case X86_XCPT_SS:
1858 case X86_XCPT_GP:
1859 case X86_XCPT_AC:
1860 {
1861 pEvent->n.u32ErrorCode = uErrCode;
1862 pEvent->n.u1ErrorCodeValid = 1;
1863 break;
1864 }
1865 }
1866 }
1867 else if (enmTrpmEvent == TRPM_HARDWARE_INT)
1868 {
1869 if (uVector == X86_XCPT_NMI)
1870 pEvent->n.u3Type = SVM_EVENT_NMI;
1871 else
1872 pEvent->n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
1873 }
1874 else if (enmTrpmEvent == TRPM_SOFTWARE_INT)
1875 pEvent->n.u3Type = SVM_EVENT_SOFTWARE_INT;
1876 else
1877 AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent));
1878
1879 rc = TRPMResetTrap(pVCpu);
1880 AssertRC(rc);
1881
1882 Log4(("TRPM->HM event: u=%#RX64 u8Vector=%#x uErrorCodeValid=%#x uErrorCode=%#RX32\n", pEvent->u, pEvent->n.u8Vector,
1883 pEvent->n.u1ErrorCodeValid, pEvent->n.u32ErrorCode));
1884}
1885
1886
1887/**
1888 * Converts any pending SVM event into a TRPM trap. Typically used when leaving
1889 * AMD-V to execute any instruction.
1890 *
1891 * @param pvCpu Pointer to the VMCPU.
1892 */
1893static void hmR0SvmPendingEventToTrpmTrap(PVMCPU pVCpu)
1894{
1895 Assert(pVCpu->hm.s.Event.fPending);
1896 Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP);
1897
1898 PSVMEVENT pEvent = &pVCpu->hm.s.Event;
1899 uint8_t uVector = pEvent->n.u8Vector;
1900 uint8_t uVectorType = pEvent->n.u3Type;
1901
1902 TRPMEVENT enmTrapType;
1903 switch (uVectorType)
1904 {
1905 case SVM_EVENT_EXTERNAL_IRQ
1906 case SVM_EVENT_NMI:
1907 enmTrapType = TRPM_HARDWARE_INT;
1908 break;
1909 case SVM_EVENT_SOFTWARE_INT:
1910 enmTrapType = TRPM_SOFTWARE_INT;
1911 break;
1912 case SVM_EVENT_EXCEPTION:
1913 enmTrapType = TRPM_TRAP;
1914 break;
1915 default:
1916 AssertMsgFailed(("Invalid pending-event type %#x\n", uVectorType));
1917 enmTrapType = TRPM_32BIT_HACK;
1918 break;
1919 }
1920
1921 Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, uVectorType));
1922
1923 int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType);
1924 AssertRC(rc);
1925
1926 if (pEvent->n.u1ErrorCodeValid)
1927 TRPMSetErrorCode(pVCpu, pEvent->n.u32ErrorCode);
1928
1929 if ( uVectorType == SVM_EVENT_EXCEPTION
1930 && uVector == X86_XCPT_PF)
1931 {
1932 TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress);
1933 Assert(pVCpu->hm.s.Event.GCPtrFaultAddress == CPUMGetGuestCR2(pVCpu));
1934 }
1935 else if (uVectorType == SVM_EVENT_SOFTWARE_INT)
1936 {
1937 AssertMsg( uVectorType == SVM_EVENT_SOFTWARE_INT
1938 || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF),
1939 ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType));
1940 TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr);
1941 }
1942 pVCpu->hm.s.Event.fPending = false;
1943}
1944
1945
1946/**
1947 * Gets the guest's interrupt-shadow.
1948 *
1949 * @returns The guest's interrupt-shadow.
1950 * @param pVCpu Pointer to the VMCPU.
1951 * @param pCtx Pointer to the guest-CPU context.
1952 *
1953 * @remarks No-long-jump zone!!!
1954 * @remarks Has side-effects with VMCPU_FF_INHIBIT_INTERRUPTS force-flag.
1955 */
1956DECLINLINE(uint32_t) hmR0SvmGetGuestIntrShadow(PVMCPU pVCpu, PCPUMCTX pCtx)
1957{
1958 /*
1959 * Instructions like STI and MOV SS inhibit interrupts till the next instruction completes. Check if we should
1960 * inhibit interrupts or clear any existing interrupt-inhibition.
1961 */
1962 uint32_t uIntrState = 0;
1963 if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1964 {
1965 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
1966 {
1967 /*
1968 * We can clear the inhibit force flag as even if we go back to the recompiler without executing guest code in
1969 * AMD-V, the flag's condition to be cleared is met and thus the cleared state is correct.
1970 */
1971 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1972 }
1973 else
1974 uIntrState = SVM_INTERRUPT_SHADOW_ACTIVE;
1975 }
1976 return uIntrState;
1977}
1978
1979
1980/**
1981 * Sets the virtual interrupt intercept control in the VMCB which
1982 * instructs AMD-V to cause a #VMEXIT as soon as the guest is in a state to
1983 * receive interrupts.
1984 *
1985 * @param pVmcb Pointer to the VMCB.
1986 */
1987DECLINLINE(void) hmR0SvmSetVirtIntrIntercept(PSVMVMCB pVmcb)
1988{
1989 if (!(pVmcb->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_VINTR))
1990 {
1991 pVmcb->ctrl.IntCtrl.n.u1VIrqValid = 1; /* A virtual interrupt is pending. */
1992 pVmcb->ctrl.IntCtrl.n.u8VIrqVector = 0; /* Not necessary as we #VMEXIT for delivering the interrupt. */
1993 pVmcb->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_VINTR;
1994 pVmcb->ctrl.u64VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_TPR);
1995 }
1996}
1997
1998
1999/**
2000 * Injects any pending events into the guest if the guest is in a state to
2001 * receive them.
2002 *
2003 * @param pVCpu Pointer to the VMCPU.
2004 * @param pCtx Pointer to the guest-CPU context.
2005 */
2006static void hmR0SvmInjectPendingEvent(PVMCPU pVCpu, PCPUMCTX pCtx)
2007{
2008 Assert(!TRPMHasTrap(pVCpu));
2009
2010 const bool fIntShadow = !!hmR0SvmGetGuestIntrShadow(pVCpu, pCtx);
2011 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
2012
2013 SVMEVENT Event;
2014 Event.u = 0;
2015 if (pVCpu->hm.s.Event.fPending) /* First, inject any pending HM events. */
2016 {
2017 Assert(Event.n.u1Valid);
2018 Event.u = pVCpu->hm.s.Event.u64IntrInfo;
2019 bool fInject = true;
2020 if ( fIntShadow
2021 && ( Event.n.u3Type == SVM_EVENT_EXTERNAL_IRQ
2022 || Event.n.u3Type == SVM_EVENT_NMI))
2023 {
2024 fInject = false;
2025 }
2026
2027 if (fInject)
2028 {
2029 pVCpu->hm.s.Event.fPending = false;
2030 hmR0SvmInjectEvent(pVCpu, pVmcb, pCtx, &Event);
2031 }
2032 else
2033 hmR0SvmSetVirtIntrIntercept(pVmcb);
2034 } /** @todo SMI. SMIs take priority over NMIs. */
2035 else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts . */
2036 {
2037 if (!fIntShadow)
2038 {
2039 Log4(("Injecting NMI\n"));
2040
2041 Event.n.u1Valid = 1;
2042 Event.n.u8Vector = X86_XCPT_NMI;
2043 Event.n.u3Type = SVM_EVENT_NMI;
2044
2045 hmR0SvmInjectEvent(pVCpu, pVmcb, pCtx, &Event);
2046 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
2047 }
2048 else
2049 hmR0SvmSetVirtIntrIntercept(pVmcb);
2050 }
2051 else if (VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)))
2052 {
2053 /* Check if there are guest external interrupts (PIC/APIC) pending and inject them, if the guest can receive them. */
2054 const bool fBlockInt = !(pCtx->eflags.u32 & X86_EFL_IF);
2055 if ( !fBlockInt
2056 && !fIntShadow)
2057 {
2058 uint8_t u8Interrupt;
2059 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
2060 if (RT_SUCCESS(rc))
2061 {
2062 Log4(("Injecting external interrupt u8Interrupt=%#x\n", u8Interrupt));
2063
2064 Event.n.u1Valid = 1;
2065 Event.n.u8Vector = u8Interrupt;
2066 Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
2067
2068 hmR0SvmInjectEvent(pVCpu, pVmcb, pCtx, &Event);
2069 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntInject);
2070 }
2071 else
2072 {
2073 /** @todo Does this actually happen? If not turn it into an assertion. */
2074 Assert(!VMCPU_FF_IS_PENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC)));
2075 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
2076 }
2077 }
2078 else
2079 hmR0SvmSetVirtIntrIntercept(pVmcb);
2080 }
2081
2082 /* Update the guest interrupt shadow in the VMCB. */
2083 pVmcb->ctrl.u64IntShadow = !!fIntShadow;
2084}
2085
2086
2087/**
2088 * Reports world-switch error and dumps some useful debug info.
2089 *
2090 * @param pVM Pointer to the VM.
2091 * @param pVCpu Pointer to the VMCPU.
2092 * @param rcVMRun The return code from VMRUN (or
2093 * VERR_SVM_INVALID_GUEST_STATE for invalid
2094 * guest-state).
2095 * @param pCtx Pointer to the guest-CPU context.
2096 */
2097static void hmR0SvmReportWorldSwitchError(PVM pVM, PVMCPU pVCpu, int rcVMRun, PCPUMCTX pCtx)
2098{
2099 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2100 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
2101
2102 if (rcVMRun == VERR_SVM_INVALID_GUEST_STATE)
2103 {
2104 HMDumpRegs(pVM, pVCpu, pCtx);
2105#ifdef VBOX_STRICT
2106 Log4(("ctrl.u64VmcbCleanBits %#RX64\n", pVmcb->ctrl.u64VmcbCleanBits));
2107 Log4(("ctrl.u16InterceptRdCRx %#x\n", pVmcb->ctrl.u16InterceptRdCRx));
2108 Log4(("ctrl.u16InterceptWrCRx %#x\n", pVmcb->ctrl.u16InterceptWrCRx));
2109 Log4(("ctrl.u16InterceptRdDRx %#x\n", pVmcb->ctrl.u16InterceptRdDRx));
2110 Log4(("ctrl.u16InterceptWrDRx %#x\n", pVmcb->ctrl.u16InterceptWrDRx));
2111 Log4(("ctrl.u32InterceptException %#x\n", pVmcb->ctrl.u32InterceptException));
2112 Log4(("ctrl.u32InterceptCtrl1 %#x\n", pVmcb->ctrl.u32InterceptCtrl1));
2113 Log4(("ctrl.u32InterceptCtrl2 %#x\n", pVmcb->ctrl.u32InterceptCtrl2));
2114 Log4(("ctrl.u64IOPMPhysAddr %#RX64\n", pVmcb->ctrl.u64IOPMPhysAddr));
2115 Log4(("ctrl.u64MSRPMPhysAddr %#RX64\n", pVmcb->ctrl.u64MSRPMPhysAddr));
2116 Log4(("ctrl.u64TSCOffset %#RX64\n", pVmcb->ctrl.u64TSCOffset));
2117
2118 Log4(("ctrl.TLBCtrl.u32ASID %#x\n", pVmcb->ctrl.TLBCtrl.n.u32ASID));
2119 Log4(("ctrl.TLBCtrl.u8TLBFlush %#x\n", pVmcb->ctrl.TLBCtrl.n.u8TLBFlush));
2120 Log4(("ctrl.TLBCtrl.u24Reserved %#x\n", pVmcb->ctrl.TLBCtrl.n.u24Reserved));
2121
2122 Log4(("ctrl.IntCtrl.u8VTPR %#x\n", pVmcb->ctrl.IntCtrl.n.u8VTPR));
2123 Log4(("ctrl.IntCtrl.u1VIrqValid %#x\n", pVmcb->ctrl.IntCtrl.n.u1VIrqValid));
2124 Log4(("ctrl.IntCtrl.u7Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u7Reserved));
2125 Log4(("ctrl.IntCtrl.u4VIrqPriority %#x\n", pVmcb->ctrl.IntCtrl.n.u4VIrqPriority));
2126 Log4(("ctrl.IntCtrl.u1IgnoreTPR %#x\n", pVmcb->ctrl.IntCtrl.n.u1IgnoreTPR));
2127 Log4(("ctrl.IntCtrl.u3Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u3Reserved));
2128 Log4(("ctrl.IntCtrl.u1VIrqMasking %#x\n", pVmcb->ctrl.IntCtrl.n.u1VIrqMasking));
2129 Log4(("ctrl.IntCtrl.u6Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u6Reserved));
2130 Log4(("ctrl.IntCtrl.u8VIrqVector %#x\n", pVmcb->ctrl.IntCtrl.n.u8VIrqVector));
2131 Log4(("ctrl.IntCtrl.u24Reserved %#x\n", pVmcb->ctrl.IntCtrl.n.u24Reserved));
2132
2133 Log4(("ctrl.u64IntShadow %#RX64\n", pVmcb->ctrl.u64IntShadow));
2134 Log4(("ctrl.u64ExitCode %#RX64\n", pVmcb->ctrl.u64ExitCode));
2135 Log4(("ctrl.u64ExitInfo1 %#RX64\n", pVmcb->ctrl.u64ExitInfo1));
2136 Log4(("ctrl.u64ExitInfo2 %#RX64\n", pVmcb->ctrl.u64ExitInfo2));
2137 Log4(("ctrl.ExitIntInfo.u8Vector %#x\n", pVmcb->ctrl.ExitIntInfo.n.u8Vector));
2138 Log4(("ctrl.ExitIntInfo.u3Type %#x\n", pVmcb->ctrl.ExitIntInfo.n.u3Type));
2139 Log4(("ctrl.ExitIntInfo.u1ErrorCodeValid %#x\n", pVmcb->ctrl.ExitIntInfo.n.u1ErrorCodeValid));
2140 Log4(("ctrl.ExitIntInfo.u19Reserved %#x\n", pVmcb->ctrl.ExitIntInfo.n.u19Reserved));
2141 Log4(("ctrl.ExitIntInfo.u1Valid %#x\n", pVmcb->ctrl.ExitIntInfo.n.u1Valid));
2142 Log4(("ctrl.ExitIntInfo.u32ErrorCode %#x\n", pVmcb->ctrl.ExitIntInfo.n.u32ErrorCode));
2143 Log4(("ctrl.NestedPaging %#RX64\n", pVmcb->ctrl.NestedPaging.u));
2144 Log4(("ctrl.EventInject.u8Vector %#x\n", pVmcb->ctrl.EventInject.n.u8Vector));
2145 Log4(("ctrl.EventInject.u3Type %#x\n", pVmcb->ctrl.EventInject.n.u3Type));
2146 Log4(("ctrl.EventInject.u1ErrorCodeValid %#x\n", pVmcb->ctrl.EventInject.n.u1ErrorCodeValid));
2147 Log4(("ctrl.EventInject.u19Reserved %#x\n", pVmcb->ctrl.EventInject.n.u19Reserved));
2148 Log4(("ctrl.EventInject.u1Valid %#x\n", pVmcb->ctrl.EventInject.n.u1Valid));
2149 Log4(("ctrl.EventInject.u32ErrorCode %#x\n", pVmcb->ctrl.EventInject.n.u32ErrorCode));
2150
2151 Log4(("ctrl.u64NestedPagingCR3 %#RX64\n", pVmcb->ctrl.u64NestedPagingCR3));
2152 Log4(("ctrl.u64LBRVirt %#RX64\n", pVmcb->ctrl.u64LBRVirt));
2153
2154 Log4(("guest.CS.u16Sel %RTsel\n", pVmcb->guest.CS.u16Sel));
2155 Log4(("guest.CS.u16Attr %#x\n", pVmcb->guest.CS.u16Attr));
2156 Log4(("guest.CS.u32Limit %#RX32\n", pVmcb->guest.CS.u32Limit));
2157 Log4(("guest.CS.u64Base %#RX64\n", pVmcb->guest.CS.u64Base));
2158 Log4(("guest.DS.u16Sel %#RTsel\n", pVmcb->guest.DS.u16Sel));
2159 Log4(("guest.DS.u16Attr %#x\n", pVmcb->guest.DS.u16Attr));
2160 Log4(("guest.DS.u32Limit %#RX32\n", pVmcb->guest.DS.u32Limit));
2161 Log4(("guest.DS.u64Base %#RX64\n", pVmcb->guest.DS.u64Base));
2162 Log4(("guest.ES.u16Sel %RTsel\n", pVmcb->guest.ES.u16Sel));
2163 Log4(("guest.ES.u16Attr %#x\n", pVmcb->guest.ES.u16Attr));
2164 Log4(("guest.ES.u32Limit %#RX32\n", pVmcb->guest.ES.u32Limit));
2165 Log4(("guest.ES.u64Base %#RX64\n", pVmcb->guest.ES.u64Base));
2166 Log4(("guest.FS.u16Sel %RTsel\n", pVmcb->guest.FS.u16Sel));
2167 Log4(("guest.FS.u16Attr %#x\n", pVmcb->guest.FS.u16Attr));
2168 Log4(("guest.FS.u32Limit %#RX32\n", pVmcb->guest.FS.u32Limit));
2169 Log4(("guest.FS.u64Base %#RX64\n", pVmcb->guest.FS.u64Base));
2170 Log4(("guest.GS.u16Sel %RTsel\n", pVmcb->guest.GS.u16Sel));
2171 Log4(("guest.GS.u16Attr %#x\n", pVmcb->guest.GS.u16Attr));
2172 Log4(("guest.GS.u32Limit %#RX32\n", pVmcb->guest.GS.u32Limit));
2173 Log4(("guest.GS.u64Base %#RX64\n", pVmcb->guest.GS.u64Base));
2174
2175 Log4(("guest.GDTR.u32Limit %#RX32\n", pVmcb->guest.GDTR.u32Limit));
2176 Log4(("guest.GDTR.u64Base %#RX64\n", pVmcb->guest.GDTR.u64Base));
2177
2178 Log4(("guest.LDTR.u16Sel %RTsel\n", pVmcb->guest.LDTR.u16Sel));
2179 Log4(("guest.LDTR.u16Attr %#x\n", pVmcb->guest.LDTR.u16Attr));
2180 Log4(("guest.LDTR.u32Limit %#RX32\n", pVmcb->guest.LDTR.u32Limit));
2181 Log4(("guest.LDTR.u64Base %#RX64\n", pVmcb->guest.LDTR.u64Base));
2182
2183 Log4(("guest.IDTR.u32Limit %#RX32\n", pVmcb->guest.IDTR.u32Limit));
2184 Log4(("guest.IDTR.u64Base %#RX64\n", pVmcb->guest.IDTR.u64Base));
2185
2186 Log4(("guest.TR.u16Sel %RTsel\n", pVmcb->guest.TR.u16Sel));
2187 Log4(("guest.TR.u16Attr %#x\n", pVmcb->guest.TR.u16Attr));
2188 Log4(("guest.TR.u32Limit %#RX32\n", pVmcb->guest.TR.u32Limit));
2189 Log4(("guest.TR.u64Base %#RX64\n", pVmcb->guest.TR.u64Base));
2190
2191 Log4(("guest.u8CPL %#x\n", pVmcb->guest.u8CPL));
2192 Log4(("guest.u64CR0 %#RX64\n", pVmcb->guest.u64CR0));
2193 Log4(("guest.u64CR2 %#RX64\n", pVmcb->guest.u64CR2));
2194 Log4(("guest.u64CR3 %#RX64\n", pVmcb->guest.u64CR3));
2195 Log4(("guest.u64CR4 %#RX64\n", pVmcb->guest.u64CR4));
2196 Log4(("guest.u64DR6 %#RX64\n", pVmcb->guest.u64DR6));
2197 Log4(("guest.u64DR7 %#RX64\n", pVmcb->guest.u64DR7));
2198
2199 Log4(("guest.u64RIP %#RX64\n", pVmcb->guest.u64RIP));
2200 Log4(("guest.u64RSP %#RX64\n", pVmcb->guest.u64RSP));
2201 Log4(("guest.u64RAX %#RX64\n", pVmcb->guest.u64RAX));
2202 Log4(("guest.u64RFlags %#RX64\n", pVmcb->guest.u64RFlags));
2203
2204 Log4(("guest.u64SysEnterCS %#RX64\n", pVmcb->guest.u64SysEnterCS));
2205 Log4(("guest.u64SysEnterEIP %#RX64\n", pVmcb->guest.u64SysEnterEIP));
2206 Log4(("guest.u64SysEnterESP %#RX64\n", pVmcb->guest.u64SysEnterESP));
2207
2208 Log4(("guest.u64EFER %#RX64\n", pVmcb->guest.u64EFER));
2209 Log4(("guest.u64STAR %#RX64\n", pVmcb->guest.u64STAR));
2210 Log4(("guest.u64LSTAR %#RX64\n", pVmcb->guest.u64LSTAR));
2211 Log4(("guest.u64CSTAR %#RX64\n", pVmcb->guest.u64CSTAR));
2212 Log4(("guest.u64SFMASK %#RX64\n", pVmcb->guest.u64SFMASK));
2213 Log4(("guest.u64KernelGSBase %#RX64\n", pVmcb->guest.u64KernelGSBase));
2214 Log4(("guest.u64GPAT %#RX64\n", pVmcb->guest.u64GPAT));
2215 Log4(("guest.u64DBGCTL %#RX64\n", pVmcb->guest.u64DBGCTL));
2216 Log4(("guest.u64BR_FROM %#RX64\n", pVmcb->guest.u64BR_FROM));
2217 Log4(("guest.u64BR_TO %#RX64\n", pVmcb->guest.u64BR_TO));
2218 Log4(("guest.u64LASTEXCPFROM %#RX64\n", pVmcb->guest.u64LASTEXCPFROM));
2219 Log4(("guest.u64LASTEXCPTO %#RX64\n", pVmcb->guest.u64LASTEXCPTO));
2220#endif
2221 }
2222 else
2223 Log4(("hmR0SvmReportWorldSwitchError: rcVMRun=%d\n", rcVMRun));
2224}
2225
2226
2227/**
2228 * Check per-VM and per-VCPU force flag actions that require us to go back to
2229 * ring-3 for one reason or another.
2230 *
2231 * @returns VBox status code (information status code included).
2232 * @retval VINF_SUCCESS if we don't have any actions that require going back to
2233 * ring-3.
2234 * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync.
2235 * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware
2236 * interrupts)
2237 * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires
2238 * all EMTs to be in ring-3.
2239 * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests.
2240 * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return
2241 * to the EM loop.
2242 *
2243 * @param pVM Pointer to the VM.
2244 * @param pVCpu Pointer to the VMCPU.
2245 * @param pCtx Pointer to the guest-CPU context.
2246 */
2247static int hmR0SvmCheckForceFlags(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2248{
2249 Assert(VMMRZCallRing3IsEnabled(pVCpu));
2250
2251 if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
2252 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
2253 | VMCPU_FF_REQUEST | VMCPU_FF_HM_UPDATE_CR3))
2254 {
2255 /* Pending HM CR3 sync. No PAE PDPEs (VMCPU_FF_HM_UPDATE_PAE_PDPES) on AMD-V. */
2256 if (VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3))
2257 {
2258 rc = PGMUpdateCR3(pVCpu, pCtx->cr3);
2259 Assert(rc == VINF_SUCCESS || rc == VINF_PGM_SYNC_CR3);
2260 Assert(!VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_UPDATE_CR3));
2261 }
2262
2263 /* Pending PGM C3 sync. */
2264 if (VMCPU_FF_IS_PENDING(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
2265 {
2266 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
2267 if (rc != VINF_SUCCESS)
2268 {
2269 AssertRC(rc);
2270 Log4(("hmR0SvmCheckForceFlags: PGMSyncCR3 forcing us back to ring-3. rc=%d\n", rc));
2271 return rc;
2272 }
2273 }
2274
2275 /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */
2276 /* -XXX- what was that about single stepping? */
2277 if ( VM_FF_IS_PENDING(pVM, VM_FF_HM_TO_R3_MASK)
2278 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
2279 {
2280 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF);
2281 rc = RT_UNLIKELY(VM_FF_IS_PENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
2282 Log4(("hmR0SvmCheckForceFlags: HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc));
2283 return rc;
2284 }
2285
2286 /* Pending VM request packets, such as hardware interrupts. */
2287 if ( VM_FF_IS_PENDING(pVM, VM_FF_REQUEST)
2288 || VMCPU_FF_IS_PENDING(pVCpu, VMCPU_FF_REQUEST))
2289 {
2290 Log4(("hmR0SvmCheckForceFlags: Pending VM request forcing us back to ring-3\n"));
2291 return VINF_EM_PENDING_REQUEST;
2292 }
2293
2294 /* Pending PGM pool flushes. */
2295 if (VM_FF_IS_PENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
2296 {
2297 Log4(("hmR0SvmCheckForceFlags: PGM pool flush pending forcing us back to ring-3\n"));
2298 return VINF_PGM_POOL_FLUSH_PENDING;
2299 }
2300
2301 /* Pending DMA requests. */
2302 if (VM_FF_IS_PENDING(pVM, VM_FF_PDM_DMA))
2303 {
2304 Log4(("hmR0SvmCheckForceFlags: Pending DMA request forcing us back to ring-3\n"));
2305 return VINF_EM_RAW_TO_R3;
2306 }
2307 }
2308
2309 /* Paranoia. */
2310 Assert(rc != VERR_EM_INTERPRETER);
2311 return VINF_SUCCESS;
2312}
2313
2314
2315/**
2316 * Does the preparations before executing guest code in AMD-V.
2317 *
2318 * This may cause longjmps to ring-3 and may even result in rescheduling to the
2319 * recompiler. We must be cautious what we do here regarding committing
2320 * guest-state information into the the VMCB assuming we assuredly execute the
2321 * guest in AMD-V. If we fall back to the recompiler after updating the VMCB and
2322 * clearing the common-state (TRPM/forceflags), we must undo those changes so
2323 * that the recompiler can (and should) use them when it resumes guest
2324 * execution. Otherwise such operations must be done when we can no longer
2325 * exit to ring-3.
2326 *
2327 * @returns VBox status code (informational status codes included).
2328 * @retval VINF_SUCCESS if we can proceed with running the guest.
2329 * @retval VINF_* scheduling changes, we have to go back to ring-3.
2330 *
2331 * @param pVCpu Pointer to the VMCPU.
2332 * @param pCtx Pointer to the guest-CPU context.
2333 * @param pSvmTransient Pointer to the SVM transient structure.
2334 */
2335DECLINE(int) hmR0SvmPreRunGuest(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
2336{
2337 /* Check force flag actions that might require us to go back to ring-3. */
2338 int rc = hmR0SvmCheckForceFlags(pVM, pVCpu, pCtx);
2339 if (rc != VINF_SUCCESS)
2340 return rc;
2341
2342#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2343 /* We disable interrupts so that we don't miss any interrupts that would flag preemption (IPI/timers etc.) */
2344 pSvmTransient->uEFlags = ASMIntDisableFlags();
2345 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
2346 {
2347 ASMSetFlags(pSvmTransient->uEFlags);
2348 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
2349 /* Don't use VINF_EM_RAW_INTERRUPT_HYPER as we can't assume the host does kernel preemption. Maybe some day? */
2350 return VINF_EM_RAW_INTERRUPT;
2351 }
2352 VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
2353 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2354#endif
2355
2356 /* Convert any pending TRPM traps to HM events for injection. */
2357 /** @todo Optimization: move this before disabling interrupts, restore state
2358 * using pVmcb->ctrl.EventInject.u. */
2359 if (TRPMHasTrap(pVCpu))
2360 hmR0SvmTrpmTrapToPendingEvent(pVCpu);
2361
2362 hmR0SvmInjectPendingEvent(pVCpu, pCtx);
2363
2364 return VINF_SUCCESS;
2365}
2366
2367
2368/**
2369 * Prepares to run guest code in VT-x and we've committed to doing so. This
2370 * means there is no backing out to ring-3 or anywhere else at this
2371 * point.
2372 *
2373 * @param pVM Pointer to the VM.
2374 * @param pVCpu Pointer to the VMCPU.
2375 * @param pCtx Pointer to the guest-CPU context.
2376 * @param pSvmTransient Pointer to the SVM transient structure.
2377 *
2378 * @remarks Called with preemption disabled.
2379 * @remarks No-long-jump zone!!!
2380 */
2381DECLINLINE(void) hmR0SvmPreRunGuestCommitted(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
2382{
2383 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
2384 Assert(VMMR0IsLogFlushDisabled(pVCpu));
2385
2386#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2387 /** @todo I don't see the point of this, VMMR0EntryFast() already disables interrupts for the entire period. */
2388 pSvmTransient->uEFlags = ASMIntDisableFlags();
2389 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
2390#endif
2391
2392 /*
2393 * Re-enable nested paging (automatically disabled on every VM-exit). See AMD spec. 15.25.3 "Enabling Nested Paging".
2394 * We avoid changing the corresponding VMCB Clean Bit as we're not changing it to a different value since the previous run.
2395 */
2396 /** @todo The above assumption could be wrong. It's not documented what
2397 * should be done wrt to the VMCB Clean Bit, but we'll find out the
2398 * hard way. */
2399 pVmcb->ctrl.NestedPaging.n.u1NestedPaging = pVM->hm.s.fNestedPaging;
2400
2401 /* Load the guest state. */
2402 int rc = SVMR0LoadGuestState(pVM, pVCpu, pCtx);
2403 AssertRC(rc);
2404 AssertMsg(!pVCpu->hm.s.fContextUseFlags, ("fContextUseFlags =%#x\n", pVCpu->hm.s.fContextUseFlags));
2405 STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadFull);
2406
2407 /*
2408 * If we're not intercepting TPR changes in the guest, save the guest TPR before the world-switch
2409 * so we can update it on the way back if the guest changed the TPR.
2410 */
2411 if (pVCpu->hm.s.svm.fSyncVTpr)
2412 {
2413 if (pVM->hm.s.fTPRPatchingActive)
2414 pSvmTransient->u8GuestTpr = pCtx->msrLSTAR;
2415 else
2416 pSvmTransient->u8GuestTpr = pVmcb->ctrl.IntCtrl.n.u8VTPR;
2417 }
2418
2419 /* Flush the appropriate tagged-TLB entries. */
2420 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB-shootdowns, set this across the world switch. */
2421 hmR0SvmFlushTaggedTlb(pVCpu);
2422 Assert(HMR0GetCurrentCpu()->idCpu == pVCpu->hm.s.idLastCpu);
2423
2424 TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about
2425 to start executing. */
2426
2427 /*
2428 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
2429 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
2430 *
2431 * This should be done -after- any RDTSCPs for obtaining the host timestamp (TM, STAM etc).
2432 */
2433 if ( (pVM->hm.s.cpuid.u32AMDFeatureEDX & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)
2434 && !(pVmcb->ctrl.u32InterceptCtrl2 & SVM_CTRL2_INTERCEPT_RDTSCP))
2435 {
2436 pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
2437 uint64_t u64GuestTscAux = 0;
2438 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTscAux);
2439 AssertRC(rc2);
2440 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTscAux);
2441 }
2442}
2443
2444
2445/**
2446 * Wrapper for running the guest code in AMD-V.
2447 *
2448 * @returns VBox strict status code.
2449 * @param pVM Pointer to the VM.
2450 * @param pVCpu Pointer to the VMCPU.
2451 * @param pCtx Pointer to the guest-CPU context.
2452 *
2453 * @remarks No-long-jump zone!!!
2454 */
2455DECLINLINE(int) hmR0SvmRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2456{
2457 /*
2458 * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses floating-point operations
2459 * using SSE instructions. Some XMM registers (XMM6-XMM15) are callee-saved and thus the need for this XMM wrapper.
2460 * Refer MSDN docs. "Configuring Programs for 64-bit / x64 Software Conventions / Register Usage" for details.
2461 */
2462#ifdef VBOX_WITH_KERNEL_USING_XMM
2463 return HMR0SVMRunWrapXMM(pVCpu->hm.s.svm.HCPhysVmcbHost, pVCpu->hm.s.svm.HCPhysVmcb, pCtx, pVM, pVCpu,
2464 pVCpu->hm.s.svm.pfnVMRun);
2465#else
2466 return pVCpu->hm.s.svm.pfnVMRun(pVCpu->hm.s.svm.HCPhysVmcbHost, pVCpu->hm.s.svm.HCPhysVmcb, pCtx, pVM, pVCpu);
2467#endif
2468}
2469
2470
2471/**
2472 * Performs some essential restoration of state after running guest code in
2473 * AMD-V.
2474 *
2475 * @param pVM Pointer to the VM.
2476 * @param pVCpu Pointer to the VMCPU.
2477 * @param pMixedCtx Pointer to the guest-CPU context. The data maybe
2478 * out-of-sync. Make sure to update the required fields
2479 * before using them.
2480 * @param pSvmTransient Pointer to the SVM transient structure.
2481 * @param rcVMRun Return code of VMRUN.
2482 *
2483 * @remarks Called with interrupts disabled.
2484 * @remarks No-long-jump zone!!! This function will however re-enable longjmps
2485 * unconditionally when it is safe to do so.
2486 */
2487DECLINLINE(void) hmR0SvmPostRunGuest(PVM pVM, PVMCPU pVCpu, PCPUMCTX pMixedCtx, PSVMTRANSIENT pSvmTransient, rcVMRun)
2488{
2489 Assert(!VMMRZCallRing3IsEnabled(pVCpu));
2490
2491 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB-shootdowns. */
2492 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for TLB-shootdowns. */
2493
2494 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
2495 pVmcb->ctrl.u64VmcbCleanBits = HMSVM_VMCB_CLEAN_ALL; /* Mark the VMCB-state cache as unmodified by VMM. */
2496
2497 /* Restore host's TSC_AUX if required. */
2498 if (!(pVmcb->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_RDTSC))
2499 {
2500 if (pVM->hm.s.cpuid.u32AMDFeatureEDX & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)
2501 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux);
2502
2503 /** @todo Find a way to fix hardcoding a guestimate. */
2504 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() +
2505 pVmcb->ctrl.u64TSCOffset - 0x400);
2506 }
2507
2508 TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */
2509 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM);
2510
2511 Assert(!(ASMGetFlags() & X86_EFL_IF));
2512 ASMSetFlags(pSvmTransient->uEFlags); /* Enable interrupts. */
2513
2514 VMMRZCallRing3SetNotification(pVCpu, hmR0SvmCallRing3Callback, pMixedCtx);
2515 VMMRZCallRing3Enable(pVCpu); /* It is now safe to do longjmps to ring-3!!! */
2516
2517 /* If VMRUN failed, we can bail out early. This does -not- cover SVM_EXIT_INVALID. */
2518 if (RT_UNLIKELY(rcVMRun != VINF_SUCCESS))
2519 {
2520 Log4(("VMRUN failure: rcVMRun=%Rrc\n", rcVMRun));
2521 return;
2522 }
2523
2524 pSvmTransient->u64ExitCode = pVmcb->ctrl.u64ExitCode;
2525 hmR0SvmSaveGuestState(pVCpu, pMixedCtx); /* Save the guest state from the VMCB to the guest-CPU context. */
2526
2527 if (RT_LIKELY(pSvmTransient->u64ExitCode != SVM_EXIT_INVALID))
2528 {
2529 if (pVCpu->hm.s.svm.fSyncVTpr)
2530 {
2531 /* TPR patching (for 32-bit guests) uses LSTAR MSR for holding the TPR value, otherwise uses the VTPR. */
2532 if ( pVM->hm.s.fTPRPatchingActive
2533 && (pCtx->msrLSTAR & 0xff) != pSvmTransient->u8GuestTpr)
2534 {
2535 int rc = PDMApicSetTPR(pVCpu, pCtx->msrLSTAR & 0xff);
2536 AssertRC(rc);
2537 }
2538 else if ((uint8_t)(pSvmTransient->u8GuestTpr >> 4) != pVmcb->ctrl.IntCtrl.n.u8VTPR)
2539 {
2540 int rc = PDMApicSetTPR(pVCpu, (pVmcb->ctrl.IntCtrl.n.u8VTPR << 4));
2541 AssertRC(rc);
2542 }
2543 }
2544
2545 /* -XXX- premature interruption during event injection */
2546 }
2547}
2548
2549
2550/**
2551 * Runs the guest code using AMD-V.
2552 *
2553 * @returns VBox status code.
2554 * @param pVM Pointer to the VM.
2555 * @param pVCpu Pointer to the VMCPU.
2556 * @param pCtx Pointer to the guest-CPU context.
2557 */
2558VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2559{
2560 Assert(VMMRZCallRing3IsEnabled(pVCpu));
2561 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2562
2563 SVMTRANSIENT SvmTransient;
2564 uint32_t cLoops = 0;
2565 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
2566 int rc = VERR_INTERNAL_ERROR_5;
2567
2568 for (;; cLoops++)
2569 {
2570 Assert(!HMR0SuspendPending());
2571 AssertMsg(pVCpu->hm.s.idEnteredCpu == RTMpCpuId(),
2572 ("Illegal migration! Entered on CPU %u Current %u cLoops=%u\n", (unsigned)pVCpu->hm.s.idEnteredCpu,
2573 (unsigned)RTMpCpuId(), cLoops));
2574
2575 /* Preparatory work for running guest code, this may return to ring-3 for some last minute updates. */
2576 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
2577 rc = hmR0SvmPreRunGuest(pVM, pVCpu, pCtx, &SvmTransient);
2578 if (rc != VINF_SUCCESS)
2579 break;
2580
2581 /*
2582 * No longjmps to ring-3 from this point on!!!
2583 * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic.
2584 * This also disables flushing of the R0-logger instance (if any).
2585 */
2586 VMMRZCallRing3Disable(pVCpu);
2587 VMMRZCallRing3RemoveNotification(pVCpu);
2588 hmR0SvmPreRunGuestCommitted(pVM, pVCpu, pCtx, &SvmTransient);
2589
2590 rc = hmR0SvmRunGuest(pVM, pVCpu, pCtx);
2591
2592 /*
2593 * Restore any residual host-state and save any bits shared between host and guest into the guest-CPU state.
2594 * This will also re-enable longjmps to ring-3 when it has reached a safe point!!!
2595 */
2596 hmR0SvmPostRunGuest(pVM, pVCpu, pCtx, &SvmTransient, rc);
2597 if (RT_UNLIKELY( rc != VINF_SUCCESS /* Check for errors with running the VM (VMRUN). */
2598 || SvmTransient.u64ExitCode == SVM_EXIT_INVALID)) /* Check for errors due to invalid guest state. */
2599 {
2600 if (rc == VINF_SUCCESS);
2601 rc = VERR_SVM_INVALID_GUEST_STATE;
2602 hmR0SvmReportWorldSwitchError(pVM, pVCpu, rc, pCtx, &SvmTransient);
2603 return rc;
2604 }
2605
2606 /* Handle the #VMEXIT. */
2607 AssertMsg(SvmTransient.u64ExitCode != SVM_EXIT_INVALID, ("%#x\n", SvmTransient.u64ExitCode));
2608 HMSVM_EXITCODE_STAM_COUNTER_INC(SvmTransient.u64ExitCode);
2609 rc = hmR0SvmHandleExit(pVCpu, pCtx, &SvmTransient);
2610 if (rc != VINF_SUCCESS)
2611 break;
2612 else if (cLoops > pVM->hm.s.cMaxResumeLoops)
2613 {
2614 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
2615 rc = VINF_EM_RAW_INTERRUPT;
2616 break;
2617 }
2618 }
2619
2620 if (rc == VERR_EM_INTERPRETER)
2621 rc = VINF_EM_RAW_EMULATE_INSTR;
2622 else if (rc == VINF_EM_RESET)
2623 rc = VINF_EM_TRIPLE_FAULT;
2624 hmR0SvmExitToRing3(pVM, pVCpu, pCtx, rc);
2625 return rc;
2626}
2627
2628
2629/**
2630 * Handles a #VMEXIT (for all EXITCODE values except SVM_EXIT_INVALID).
2631 *
2632 * @returns VBox status code (informational status codes included).
2633 * @param pVCpu Pointer to the VMCPU.
2634 * @param pCtx Pointer to the guest-CPU context.
2635 * @param pSvmTransient Pointer to the SVM transient structure.
2636 */
2637DECLINLINE(int) hmR0SvmHandleExit(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
2638{
2639 Assert(pSvmTransient->u64ExitCode > 0);
2640 Assert(pSvmTransient->u64ExitCode <= SVM_EXIT_MAX);
2641
2642 /*
2643 * The ordering of the case labels is based on most-frequently-occurring VM-exits for most guests under
2644 * normal workloads (for some definition of "normal").
2645 */
2646 uint32_t u32ExitCode = pSvmTransient->u64ExitCode;
2647 switch (pSvmTransient->u64ExitCode)
2648 {
2649 case SVM_EXIT_NPF:
2650 return hmR0SvmExitNestedPF(pVCpu, pCtx, pSvmTransient);
2651
2652 case SVM_EXIT_IOIO:
2653 return hmR0SvmExitIOInstr(pVCpu, pCtx, pSvmTransient);
2654
2655 case SVM_EXIT_RDTSC:
2656 return hmR0SvmExitRdtsc(pVCpu, pCtx, pSvmTransient);
2657
2658 case SVM_EXIT_RDTSCP:
2659 return hmR0SvmExitRdtscp(pVCpu, pCtx, pSvmTransient);
2660
2661 case SVM_EXIT_CPUID:
2662 return hmR0SvmExitCpuid(pVCpu, pCtx, pSvmTransient);
2663
2664 case SVM_EXIT_EXCEPTION_E: /* X86_XCPT_PF */
2665 return hmR0SvmExitXcptPF(pVCpu, pCtx, pSvmTransient);
2666
2667 case SVM_EXIT_EXCEPTION_7: /* X86_XCPT_NM */
2668 return hmR0SvmExitXcptNM(pVCpu, pCtx, pSvmTransient);
2669
2670 case SVM_EXIT_EXCEPTION_10: /* X86_XCPT_MF */
2671 return hmR0SvmExitXcptMF(pVCpu, pCtx, pSvmTransient);
2672
2673 case SVM_EXIT_EXCEPTION_1: /* X86_XCPT_DB */
2674 return hmR0SvmExitXcptDB(pVCpu, pCtx, pSvmTransient);
2675
2676 case SVM_EXIT_MONITOR:
2677 return hmR0SvmExitMonitor(pVCpu, pCtx, pSvmTransient);
2678
2679 case SVM_EXIT_MWAIT:
2680 return hmR0SvmExitMwait(pVCpu, pCtx, pSvmTransient);
2681
2682 case SVM_EXIT_READ_CR0:
2683 case SVM_EXIT_READ_CR3:
2684 case SVM_EXIT_READ_CR4:
2685 return hmR0SvmExitReadCRx(pVCpu, pCtx, pSvmTransient);
2686
2687 case SVM_EXIT_WRITE_CR0:
2688 case SVM_EXIT_WRITE_CR3:
2689 case SVM_EXIT_WRITE_CR4:
2690 case SVM_EXIT_WRITE_CR8:
2691 return hmR0SvmExitWriteCRx(pVCpu, pCtx, pSvmTransient);
2692
2693 case SVM_EXIT_VINTR:
2694 return hmR0SvmExitVIntr(pVCpu, pCtx, pSvmTransient);
2695
2696 case SVM_EXIT_INTR:
2697 case SVM_EXIT_FERR_FREEZE:
2698 case SVM_EXIT_NMI:
2699 case SVM_EXIT_INIT:
2700 return hmR0SvmExitIntr(pVCpu, pCtx, pSvmTransient);
2701
2702 case SVM_EXIT_MSR:
2703 return hmR0SvmExitMsr(pVCpu, pCtx, pSvmTransient);
2704
2705 case SVM_EXIT_INVLPG:
2706 return hmR0SvmExitInvlpg(pVCpu, pCtx, pSvmTransient);
2707
2708 case SVM_EXIT_WBINVD:
2709 return hmR0SvmExitWbinvd(pVCpu, pCtx, pSvmTransient);
2710
2711 case SVM_EXIT_INVD:
2712 return hmR0SvmExitInvd(pVCpu, pCtx, pSvmTransient);
2713
2714 case SVM_EXIT_RDPMC:
2715 return hmR0SvmExitRdpmc(pVCpu, pCtx, pSvmTransient);
2716
2717 default:
2718 {
2719 case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3:
2720 case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7: case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9:
2721 case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11: case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13:
2722 case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15:
2723 return hmR0SvmExitReadDRx(pVCpu, pCtx, pSvmTransient);
2724
2725 case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3:
2726 case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7: case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9:
2727 case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11: case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13:
2728 case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15:
2729 return hmR0SvmExitWriteDRx(pVCpu, pCtx, pSvmTransient);
2730
2731 case SVM_EXIT_TASK_SWITCH:
2732 return hmR0SvmExitTaskSwitch(pVCpu, pCtx, pSvmTransient);
2733
2734 case SVM_EXIT_VMMCALL:
2735 return hmR0SvmExitVmmCall(pVCpu, pCtx, pSvmTransient);
2736
2737 case SVM_EXIT_INVLPGA:
2738 case SVM_EXIT_RSM:
2739 case SVM_EXIT_VMRUN:
2740 case SVM_EXIT_VMLOAD:
2741 case SVM_EXIT_VMSAVE:
2742 case SVM_EXIT_STGI:
2743 case SVM_EXIT_CLGI:
2744 case SVM_EXIT_SKINIT:
2745 return hmR0SvmExitSetPendingXcptUD(pVCpu, pCtx, pSvmTransient);
2746
2747#ifdef HMSVM_ALWAYS_TRAP_ALL_XCPTS
2748 case SVM_EXIT_EXCEPTION_0: /* X86_XCPT_DE */
2749 case SVM_EXIT_EXCEPTION_3: /* X86_XCPT_BP */
2750 case SVM_EXIT_EXCEPTION_6: /* X86_XCPT_UD */
2751 case SVM_EXIT_EXCEPTION_B: /* X86_XCPT_NP */
2752 case SVM_EXIT_EXCEPTION_C: /* X86_XCPT_SS */
2753 case SVM_EXIT_EXCEPTION_D: /* X86_XCPT_GP */
2754 return
2755#endif
2756
2757 default:
2758 {
2759 AssertMsgFailed(("hmR0SvmHandleExit: Unexpected exit code %#x\n", u32ExitCode));
2760 return VERR_SVM_UNEXPECTED_EXIT;
2761 }
2762 }
2763 }
2764 return VERR_INTERNAL_ERROR_5; /* Should never happen. */
2765}
2766
2767
2768#ifdef DEBUG
2769/* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */
2770# define HMSVM_ASSERT_PREEMPT_CPUID_VAR() \
2771 RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId()
2772
2773# define HMSVM_ASSERT_PREEMPT_CPUID() \
2774 do \
2775 { \
2776 RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \
2777 AssertMsg(idAssertCpu == idAssertCpuNow, ("SVM %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \
2778 } while (0)
2779
2780# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS() \
2781 do { \
2782 AssertPtr(pVCpu); \
2783 AssertPtr(pMixedCtx); \
2784 AssertPtr(pSvmTransient); \
2785 Assert(ASMIntAreEnabled()); \
2786 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); \
2787 HMSVM_ASSERT_PREEMPT_CPUID_VAR(); \
2788 Log4Func(("vcpu[%u] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-\n", (uint32_t)pVCpu->idCpu)); \
2789 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); \
2790 if (VMMR0IsLogFlushDisabled(pVCpu)) \
2791 HMSVM_ASSERT_PREEMPT_CPUID(); \
2792 HMSVM_STOP_EXIT_DISPATCH_PROF(); \
2793 } while (0)
2794#else /* Release builds */
2795# define HMSVM_VALIDATE_EXIT_HANDLER_PARAMS() do { } while(0)
2796#endif
2797
2798
2799/**
2800 * Worker for hmR0SvmInterpretInvlpg().
2801 *
2802 * @return VBox status code.
2803 * @param pVCpu Pointer to the VMCPU.
2804 * @param pCpu Pointer to the disassembler state.
2805 * @param pRegFrame Pointer to the register frame.
2806 */
2807static int hmR0SvmInterpretInvlPgEx(PVMCPU pVCpu, PDISCPUSTATE pCpu, PCPUMCTXCORE pRegFrame)
2808{
2809 DISQPVPARAMVAL Param1;
2810 RTGCPTR GCPtrPage;
2811
2812 int rc = DISQueryParamVal(pRegFrame, pCpu, &pCpu->Param1, &Param1, DISQPVWHICH_SRC);
2813 if (RT_FAILURE(rc))
2814 return VERR_EM_INTERPRETER;
2815
2816 if ( Param1.type == DISQPV_TYPE_IMMEDIATE
2817 || Param1.type == DISQPV_TYPE_ADDRESS)
2818 {
2819 if (!(Param1.flags & (DISQPV_FLAG_32 | DISQPV_FLAG_64)))
2820 return VERR_EM_INTERPRETER;
2821
2822 GCPtrPage = Param1.val.val64;
2823 rc = EMInterpretInvlpg(pVCpu->CTX_SUFF(pVM), pVCpu, pRegFrame, GCPtrPage);
2824 }
2825 else
2826 {
2827 Log4(("hmR0SvmInterpretInvlPgEx invalid parameter type %#x\n", Param1.type));
2828 rc = VERR_EM_INTERPRETER;
2829 }
2830
2831 return rc;
2832}
2833
2834
2835/**
2836 * Interprets INVLPG.
2837 *
2838 * @returns VBox status code.
2839 * @retval VINF_* Scheduling instructions.
2840 * @retval VERR_EM_INTERPRETER Something we can't cope with.
2841 * @retval VERR_* Fatal errors.
2842 *
2843 * @param pVM Pointer to the VM.
2844 * @param pRegFrame Pointer to the register frame.
2845 *
2846 * @remarks Updates the RIP if the instruction was executed successfully.
2847 */
2848static int hmR0SvmInterpretInvlpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame)
2849{
2850 /* Only allow 32 & 64 bit code. */
2851 if (CPUMGetGuestCodeBits(pVCpu) != 16)
2852 {
2853 PDISSTATE pDis = &pVCpu->hm.s.DisState;
2854 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL /* pcbInstr */);
2855 if ( RT_SUCCESS(rc)
2856 && pDis->pCurInstr->uOpcode == OP_INVLPG)
2857 {
2858 rc = hmR0SvmInterpretInvlPgEx(pVCpu, pDis, pRegFrame);
2859 if (RT_SUCCESS(rc))
2860 pRegFrame->rip += pDis->cbInstr;
2861 return rc;
2862 }
2863 else
2864 Log4(("hmR0SvmInterpretInvlpg: EMInterpretDisasCurrent returned %Rrc uOpCode=%#x\n", rc, pDis->pCurInstr->uOpcode));
2865 }
2866 return VERR_EM_INTERPRETER;
2867}
2868
2869
2870/**
2871 * Sets an invalid-opcode (#UD) exception as pending-for-injection into the VM.
2872 *
2873 * @param pVCpu Pointer to the VMCPU.
2874 */
2875DECLINLINE(void) hmR0SvmSetPendingXcptUD(PVMCPU pVCpu)
2876{
2877 SVMEVENT Event;
2878 Event.u = 0;
2879 Event.n.u1Valid = 1;
2880 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2881 Event.n.u8Vector = X86_XCPT_UD;
2882 hmR0SvmSetPendingEvent(pVCpu, &Event);
2883}
2884
2885
2886/**
2887 * Sets an debug (#DB) exception as pending-for-injection into the VM.
2888 *
2889 * @param pVCpu Pointer to the VMCPU.
2890 */
2891DECLINLINE(void) hmR0SvmSetPendingXcptDB(PVMCPU pVCpu)
2892{
2893 SVMEVENT Event;
2894 Event.u = 0;
2895 Event.n.u1Valid = 1;
2896 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2897 Event.n.u8Vector = X86_XCPT_DB;
2898 hmR0SvmSetPendingEvent(pVCpu, &Event);
2899}
2900
2901
2902/**
2903 * Sets a page fault (#PF) exception as pending-for-injection into the VM.
2904 *
2905 * @param pVCpu Pointer to the VMCPU.
2906 * @param pCtx Pointer to the guest-CPU context.
2907 * @param u32ErrCode The error-code for the page-fault.
2908 * @param uFaultAddress The page fault address (CR2).
2909 *
2910 * @remarks This updates the guest CR2 with @a uFaultAddress!
2911 */
2912DECLINLINE(void) hmR0SvmSetPendingXcptPF(PVMCPU pVCpu, PCPUMCTX pCtx, uint32_t u32ErrCode, RTGCUINTPTR uFaultAddress)
2913{
2914 SVMEVENT Event;
2915 Event.u = 0;
2916 Event.n.u1Valid = 1;
2917 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2918 Event.n.u8Vector = X86_XCPT_PF;
2919 Event.n.u1ErrorCodeValid = 1;
2920 Event.n.u32ErrorCode = u32ErrCode;
2921
2922 /* Update CR2 of the guest. */
2923 pCtx->cr2 = uFaultAddress;
2924
2925 hmR0SvmSetPendingEvent(pVCpu, &Event);
2926 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
2927}
2928
2929
2930/**
2931 * Sets a device-not-available (#NM) exception as pending-for-injection into the
2932 * VM.
2933 *
2934 * @param pVCpu Pointer to the VMCPU.
2935 */
2936DECLINLINE(void) hmR0SvmSetPendingXcptNM(PVMCPU pVCpu)
2937{
2938 SVMEVENT Event;
2939 Event.u = 0;
2940 Event.n.u1Valid = 1;
2941 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2942 Event.n.u8Vector = X86_XCPT_NM;
2943 hmR0SvmSetPendingEvent(pVCpu, &Event);
2944}
2945
2946
2947/**
2948 * Sets a math-fault (#MF) exception as pending-for-injection into the VM.
2949 *
2950 * @param pVCpu Pointer to the VMCPU.
2951 */
2952DECLINLINE(void) hmR0SvmSetPendingXcptMF(PVMCPU pVCpu)
2953{
2954 SVMEVENT Event;
2955 Event.u = 0;
2956 Event.n.u1Valid = 1;
2957 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2958 Event.n.u8Vector = X86_XCPT_MF;
2959 hmR0SvmSetPendingEvent(pVCpu, &Event);
2960}
2961
2962
2963/**
2964 * Emulates a simple MOV TPR (CR8) instruction, used for TPR patching on 32-bit
2965 * guests. This simply looks up the patch record at EIP and does the required.
2966 *
2967 * This VMMCALL is used a fallback mechanism when mov to/from cr8 isn't exactly
2968 * like how we want it to be (e.g. not followed by shr 4 as is usually done for
2969 * TPR). See hmR3ReplaceTprInstr() for the details.
2970 *
2971 * @returns VBox status code.
2972 * @param pVM Pointer to the VM.
2973 * @param pVCpu Pointer to the VMCPU.
2974 * @param pCtx Pointer to the guest-CPU context.
2975 */
2976static int hmR0SvmEmulateMovTpr(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2977{
2978 Log4(("Emulated VMMCall TPR access replacement at RIP=%RGv\n", pCtx->rip));
2979 for (;;)
2980 {
2981 bool fPending;
2982 uint8_t u8Tpr;
2983
2984 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2985 if (!pPatch)
2986 break;
2987
2988 switch (pPatch->enmType)
2989 {
2990 case HMTPRINSTR_READ:
2991 {
2992 int rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPending, NULL /* pu8PendingIrq */);
2993 AssertRC(rc);
2994
2995 rc = DISWriteReg32(CPUMCTX2CORE(pCtx), pPatch->uDstOperand, u8Tpr);
2996 AssertRC(rc);
2997 pCtx->rip += pPatch->cbOp;
2998 break;
2999 }
3000
3001 case HMTPRINSTR_WRITE_REG:
3002 case HMTPRINSTR_WRITE_IMM:
3003 {
3004 if (pPatch->enmType == HMTPRINSTR_WRITE_REG)
3005 {
3006 uint32_t u32Val;
3007 int rc = DISFetchReg32(CPUMCTX2CORE(pCtx), pPatch->uSrcOperand, &u32Val);
3008 AssertRC(rc);
3009 u8Tpr = u32Val;
3010 }
3011 else
3012 u8Tpr = (uint8_t)pPatch->uSrcOperand;
3013
3014 int rc2 = PDMApicSetTPR(pVCpu, u8Tpr);
3015 AssertRC(rc2);
3016 pCtx->rip += pPatch->cbOp;
3017 break;
3018 }
3019
3020 default:
3021 AssertMsgFailedReturn(("Unexpected patch type %d\n", pPatch->enmType), VERR_SVM_UNEXPECTED_PATCH_TYPE);
3022 break;
3023 }
3024 }
3025
3026 return VINF_SUCCESS;
3027}
3028
3029
3030/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
3031/* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- #VMEXIT handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */
3032/* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */
3033
3034/**
3035 * #VMEXIT handler for external interrupts, NMIs, FPU assertion freeze and INIT
3036 * signals (SVM_EXIT_INTR, SVM_EXIT_NMI, SVM_EXIT_FERR_FREEZE, SVM_EXIT_INIT).
3037 */
3038HMSVM_EXIT_DECL hmR0SvmExitIntr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3039{
3040 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3041 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt);
3042 /* 32-bit Windows hosts (4 cores) has trouble with this on Intel; causes higher interrupt latency. Assuming the
3043 same for AMD-V.*/
3044#if HC_ARCH_BITS == 64 && defined(VBOX_WITH_VMMR0_DISABLE_PREEMPTION)
3045 Assert(ASMIntAreEnabled());
3046 return VINF_SUCCESS;
3047#else
3048 return VINF_EM_RAW_INTERRUPT;
3049#endif
3050}
3051
3052
3053/**
3054 * #VMEXIT handler for WBINVD (SVM_EXIT_WBINVD). Conditional #VMEXIT.
3055 */
3056HMSVM_EXIT_DECL hmR0SvmExitWbinvd(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3057{
3058 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3059 pCtx->rip += 2; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3060 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWbinvd);
3061 return VINF_SUCCESS;
3062}
3063
3064
3065/**
3066 * #VMEXIT handler for INVD (SVM_EXIT_INVD). Unconditional #VMEXIT.
3067 */
3068HMSVM_EXIT_DECL hmR0SvmExitInvd(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3069{
3070 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3071 pCtx->rip += 2; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3072 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
3073 return VINF_SUCCESS;
3074}
3075
3076
3077/**
3078 * #VMEXIT handler for INVD (SVM_EXIT_CPUID). Conditional #VMEXIT.
3079 */
3080HMSVM_EXIT_DECL hmR0SvmExitCpuid(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3081{
3082 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3083 PVM pVM = pVCpu->CTX_SUFF(pVM);
3084 int rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3085 if (RT_LIKELY(rc == VINF_SUCCESS))
3086 pCtx->rip += 2; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3087 else
3088 {
3089 AssertMsgFailed(("hmR0SvmExitCpuid: EMInterpretCpuId failed with %Rrc\n", rc));
3090 rc = VERR_EM_INTERPRETER;
3091 }
3092 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
3093 return rc;
3094}
3095
3096
3097/**
3098 * #VMEXIT handler for RDTSC (SVM_EXIT_RDTSC). Conditional #VMEXIT.
3099 */
3100HMSVM_EXIT_DECL hmR0SvmExitRdtsc(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3101{
3102 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3103 PVM pVM = pVCpu->CTX_SUFF(pVM);
3104 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3105 if (RT_LIKELY(rc == VINF_SUCCESS))
3106 pCtx->rip += 2; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3107 else
3108 {
3109 AssertMsgFailed(("hmR0SvmExitRdtsc: EMInterpretRdtsc failed with %Rrc\n", rc));
3110 rc = VERR_EM_INTERPRETER;
3111 }
3112 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
3113 return rc;
3114}
3115
3116
3117/**
3118 * #VMEXIT handler for RDTSCP (SVM_EXIT_RDTSCP). Conditional #VMEXIT.
3119 */
3120HMSVM_EXIT_DECL hmR0SvmExitRdtscp(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3121{
3122 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3123 int rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
3124 if (RT_LIKELY(rc == VINF_SUCCESS))
3125 pCtx->rip += 3; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3126 else
3127 {
3128 AssertMsgFailed(("hmR0SvmExitRdtsc: EMInterpretRdtscp failed with %Rrc\n", rc));
3129 rc = VERR_EM_INTERPRETER;
3130 }
3131 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp);
3132 return rc;
3133}
3134
3135
3136/**
3137 * #VMEXIT handler for RDPMC (SVM_EXIT_RDPMC). Conditional #VMEXIT.
3138 */
3139HMSVM_EXIT_DECL hmR0SvmExitRdpmc(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3140{
3141 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3142 int rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3143 if (RT_LIKELY(rc == VINF_SUCCESS))
3144 pCtx->rip += 2; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3145 else
3146 {
3147 AssertMsgFailed(("hmR0SvmExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc));
3148 rc = VERR_EM_INTERPRETER;
3149 }
3150 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
3151 return rc;
3152}
3153
3154
3155/**
3156 * #VMEXIT handler for INVLPG (SVM_EXIT_INVLPG). Conditional #VMEXIT.
3157 */
3158HMSVM_EXIT_DECL hmR0SvmExitInvlpg(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3159{
3160 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3161 Assert(!pVM->hm.s.fNestedPaging);
3162
3163 /** @todo Decode Assist. */
3164 int rc = hmR0SvmInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx)); /* Updates RIP if successful. */
3165 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
3166 Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER);
3167 return rc;
3168}
3169
3170
3171/**
3172 * #VMEXIT handler for HLT (SVM_EXIT_HLT). Conditional #VMEXIT.
3173 */
3174HMSVM_EXIT_DECL hmR0SvmExitHlt(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3175{
3176 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3177 pCtx->rip++; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3178 int rc = EMShouldContinueAfterHalt(pVCpu, pCtx) ? VINF_SUCCESS : VINF_EM_HALT;
3179 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
3180 return rc;
3181}
3182
3183
3184/**
3185 * #VMEXIT handler for MONITOR (SVM_EXIT_MONITOR). Conditional #VMEXIT.
3186 */
3187HMSVM_EXIT_DECL hmR0SvmExitMonitor(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3188{
3189 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3190 int rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3191 if (RT_LIKELY(rc == VINF_SUCCESS))
3192 pCtx->rip += 3; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3193 else
3194 {
3195 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc));
3196 rc = VERR_EM_INTERPRETER;
3197 }
3198 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
3199 return rc;
3200}
3201
3202
3203/**
3204 * #VMEXIT handler for MWAIT (SVM_EXIT_MWAIT). Conditional #VMEXIT.
3205 */
3206HMSVM_EXIT_DECL hmR0SvmExitMwait(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3207{
3208 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3209 int rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
3210 if ( rc == VINF_EM_HALT
3211 || rc == VINF_SUCCESS)
3212 {
3213 pCtx->rip += 3; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3214
3215 if ( rc == VINF_EM_HALT
3216 && EMShouldContinueAfterHalt(pVCpu, pMixedCtx))
3217 {
3218 rc = VINF_SUCCESS;
3219 }
3220 }
3221 else
3222 {
3223 AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMwait: EMInterpretMWait failed with %Rrc\n", rc));
3224 rc = VERR_EM_INTERPRETER;
3225 }
3226 AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER,
3227 ("hmR0SvmExitMwait: EMInterpretMWait failed rc=%Rrc\n", rc));
3228 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
3229 return rc;
3230}
3231
3232
3233/**
3234 * #VMEXIT handler for shutdown (triple-fault) (SVM_EXIT_SHUTDOWN).
3235 * Conditional #VMEXIT.
3236 */
3237HMSVM_EXIT_DECL hmR0SvmExitShutdown(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3238{
3239 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3240 return VINF_EM_RESET;
3241}
3242
3243
3244/**
3245 * #VMEXIT handler for CRx reads (SVM_EXIT_READ_CR*). Conditional #VMEXIT.
3246 */
3247HMSVM_EXIT_DECL hmR0SvmExitReadCRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3248{
3249 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3250 /** @todo Decode Assist. */
3251 int rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */);
3252 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3253 Assert((pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0) <= 15);
3254 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[pSvmTransient->u64ExitCode - SVM_EXIT_READ_CR0]);
3255 return rc;
3256}
3257
3258
3259/**
3260 * #VMEXIT handler for CRx writes (SVM_EXIT_WRITE_CR*). Conditional #VMEXIT.
3261 */
3262HMSVM_EXIT_DECL hmR0SvmExitWriteCRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3263{
3264 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3265 /** @todo Decode Assist. */
3266 int rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */);
3267 if (rc == VINF_SUCCCES)
3268 {
3269 /* RIP has been updated by EMInterpretInstruction(). */
3270 Assert((pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0) <= 15);
3271 switch (pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0)
3272 {
3273 case 0: /* CR0. */
3274 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
3275 break;
3276
3277 case 3: /* CR3. */
3278 Assert(!pVM->hm.s.fNestedPaging);
3279 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
3280 break;
3281
3282 case 4: /* CR4. */
3283 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
3284 break;
3285
3286 case 8: /* CR8 (TPR). */
3287 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_SVM_GUEST_APIC_STATE;
3288 break;
3289
3290 default:
3291 AsserMsgFailed(("hmR0SvmExitWriteCRx: Invalid/Unexpected Write-CRx exit. u64ExitCode=%#RX64 %#x CRx=%#RX64\n",
3292 pSvmTransient->u64ExitCode, pSvmTransient->u64ExitCode - SVM_EXIT_WRITE_CR0));
3293 break;
3294 }
3295 }
3296 else
3297 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
3298 return rc;
3299}
3300
3301
3302/**
3303 * #VMEXIT handler for instructions that result in a #UD exception delivered to
3304 * the guest.
3305 */
3306HMSVM_EXIT_DECL hmR0SvmExitSetPendingXcptUD(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3307{
3308 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3309 return hmR0SvmSetPendingXcptUD(pVCpu);
3310}
3311
3312
3313/**
3314 * #VMEXIT handler for MSR read and writes (SVM_EXIT_MSR). Conditional #VMEXIT.
3315 */
3316HMSVM_EXIT_DECL hmR0SvmExitMsr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3317{
3318 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3319 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
3320
3321 int rc;
3322 if (pVmcb->ctrl.u64ExitInfo1 == SVM_EXIT1_MSR_WRITE)
3323 {
3324 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr);
3325
3326 /* Handle TPR patching; intercepted LSTAR write. */
3327 if ( pVM->hm.s.fTPRPatchingActive
3328 && pCtx->ecx == MSR_K8_LSTAR)
3329 {
3330 if ((pCtx->eax & 0xff) != pSvmTransient->u8GuestTpr)
3331 {
3332 /* Our patch code uses LSTAR for TPR caching for 32-bit guests. */
3333 int rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
3334 AssertRC(rc2);
3335 }
3336 pCtx->rip += 2; /* Hardcoded opcode, AMD-V doesn't give us this information. */
3337 return VINF_SUCCESS;
3338 }
3339
3340 rc = EMInterpretWrmsr(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
3341 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMsr: EMInterpretWrmsr failed rc=%Rrc\n", rc));
3342
3343 if (pCtx->ecx == MSR_K6_EFER)
3344 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_SVM_GUEST_EFER_MSR;
3345 }
3346 else
3347 {
3348 /* MSR Read access. */
3349 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr);
3350 int rc = EMInterpretRdmsr(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx));
3351 AssertMsg(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER, ("hmR0SvmExitMsr: EMInterpretRdmsr failed rc=%Rrc\n", rc));
3352 }
3353
3354 /* RIP has been updated by EMInterpret[Rd|Wr]msr(). */
3355 return rc;
3356}
3357
3358
3359/**
3360 * #VMEXIT handler for DRx read (SVM_EXIT_READ_DRx). Conditional #VMEXIT.
3361 */
3362HMSVM_EXIT_DECL hmR0SvmExitReadDRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3363{
3364 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3365 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
3366
3367 /* We should -not- get this VM-exit if the guest is debugging. */
3368 if (CPUMIsGuestDebugStateActive(pVCpu))
3369 {
3370 AssertMsgFailed(("hmR0SvmExitReadDRx: Unexpected exit. pVCpu=%p pCtx=%p\n", pVCpu, pCtx));
3371 return VERR_SVM_UNEXPECTED_EXIT;
3372 }
3373
3374 if ( !DBGFIsStepping(pVCpu)
3375 && !CPUMIsHyperDebugStateActive(pVCpu))
3376 {
3377 /* Don't intercept DRx read and writes. */
3378 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
3379 pVmcb->ctrl.u16InterceptRdDRx = 0;
3380 pVmcb->ctrl.u16InterceptWrDRx = 0;
3381 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_INTERCEPTS;
3382
3383 /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */
3384 PVM pVM = pVCpu->CTX_SUFF(pVM);
3385 rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, true /* include DR6 */);
3386 AssertRC(rc);
3387 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3388
3389 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
3390 return VINF_SUCCESS;
3391 }
3392
3393 /** @todo Decode assist. */
3394 int rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0 /* pvFault */);
3395 if (RT_LIKELY(rc == VINF_SUCCESS))
3396 {
3397 /* Not necessary for read accesses but whatever doesn't hurt for now, will be fixed with decode assist. */
3398 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
3399 }
3400 else
3401 Assert(c == VERR_EM_INTERPRETER);
3402 return rc;
3403}
3404
3405
3406/**
3407 * #VMEXIT handler for DRx write (SVM_EXIT_WRITE_DRx). Conditional #VMEXIT.
3408 */
3409HMSVM_EXIT_DECL hmR0SvmExitWriteDRx(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3410{
3411 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3412 /* For now it's the same since we interpret the instruction anyway. Will change when using of Decode Assist is implemented. */
3413 int rc = hmR0SvmExitReadDRx(pVCpu, pCtx, pSvmTransient);
3414 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
3415 STAM_COUNTER_DEC(&pVCpu->hm.s.StatExitDRxRead);
3416 return rc;
3417}
3418
3419
3420/**
3421 * #VMEXIT handler for I/O instructions (SVM_EXIT_IOIO). Conditional #VMEXIT.
3422 */
3423HMSVM_EXIT_DECL hmR0SvmExitIOInstr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3424{
3425 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3426
3427 /* I/O operation lookup arrays. */
3428 static uint32_t const s_aIOSize[8] = { 0, 1, 2, 0, 4, 0, 0, 0 }; /* Size of the I/O accesses in bytes. */
3429 static uint32_t const s_aIOOpAnd[8] = { 0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0 }; /* AND masks for saving
3430 the result (in AL/AX/EAX). */
3431
3432 /* Refer AMD spec. 15.10.2 "IN and OUT Behaviour" and Figure 15-2. "EXITINFO1 for IOIO Intercept" for the format. */
3433 SVMIOIOEXIT IoExitInfo;
3434 IoExitInfo.u = (uint32_t)pVmcb->ctrl.u64ExitInfo1;
3435 uint32_t uIOWidth = (IoExitInfo.u >> 4) & 0x7;
3436 uint32_t uIOSize = s_aIOSize[uIOWidth];
3437 uint32_t uAndVal = s_aIOOpAnd[uIOWidth];
3438
3439 if (RT_UNLIKELY(!uIOSize))
3440 {
3441 AssertMsgFailed(("hmR0SvmExitIOInstr: Invalid IO operation. uIOWidth=%u\n", uIOWidth));
3442 return VERR_EM_INTERPRETER;
3443 }
3444
3445 int rc;
3446 if (IoExitInfo.n.u1STR)
3447 {
3448 /* INS/OUTS - I/O String instruction. */
3449 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
3450
3451 /** @todo Huh? why can't we use the segment prefix information given by AMD-V
3452 * in EXITINFO1? Investigate once this thing is up and running. */
3453
3454 rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
3455 if (rc == VINF_SUCCESS)
3456 {
3457 if (IoExitInfo.n.u1Type == 0) /* OUT */
3458 {
3459 rc = IOMInterpretOUTSEx(pVM, pVCpu, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix,
3460 (DISCPUMODE)pDis->uAddrMode, uIOSize);
3461 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
3462 }
3463 else
3464 {
3465 rc = IOMInterpretINSEx(pVM, pVCpu, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix,
3466 (DISCPUMODE)pDis->uAddrMode, uIOSize);
3467 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
3468 }
3469 }
3470 else
3471 rc = VINF_EM_RAW_EMULATE_INSTR;
3472 }
3473 else
3474 {
3475 /* IN/OUT - I/O instruction. */
3476 Assert(!IoExitInfo.n.u1REP);
3477
3478 if (IoExitInfo.n.u1Type == 0) /* OUT */
3479 {
3480 rc = IOMIOPortWrite(pVM, pVCpu, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize);
3481 if (rc == VINF_IOM_R3_IOPORT_WRITE)
3482 HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pVmcb->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, uIOSize);
3483
3484 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
3485 }
3486 else
3487 {
3488 uint32_t u32Val = 0;
3489
3490 rc = IOMIOPortRead(pVM, pVCpu, IoExitInfo.n.u16Port, &u32Val, uIOSize);
3491 if (IOM_SUCCESS(rc))
3492 {
3493 /* Save result of I/O IN instr. in AL/AX/EAX. */
3494 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
3495 }
3496 else if (rc == VINF_IOM_R3_IOPORT_READ)
3497 HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pVmcb->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, uIOSize);
3498
3499 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
3500 }
3501 }
3502
3503 if (IOM_SUCCESS(rc))
3504 {
3505 /* AMD-V saves the RIP of the instruction following the IO instruction in EXITINFO2. */
3506 pCtx->rip = pVmcb->ctrl.u64ExitInfo2;
3507
3508 if (RT_LIKELY(rc == VINF_SUCCESS))
3509 {
3510 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
3511 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
3512 {
3513 /* I/O breakpoint length, in bytes. */
3514 static uint32_t const s_aIOBPLen[4] = { 1, 2, 0, 4 };
3515
3516 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
3517 for (unsigned i = 0; i < 4; i++)
3518 {
3519 unsigned uBPLen = s_aIOBPLen[X86_DR7_GET_LEN(pCtx->dr[7], i)];
3520
3521 if ( IoExitInfo.n.u16Port >= pCtx->dr[i]
3522 && IoExitInfo.n.u16Port < pCtx->dr[i] + uBPLen
3523 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
3524 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
3525 {
3526 Assert(CPUMIsGuestDebugStateActive(pVCpu));
3527
3528 /* Clear all breakpoint status flags and set the one we just hit. */
3529 pCtx->dr[6] &= ~(X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3);
3530 pCtx->dr[6] |= (uint64_t)RT_BIT(i);
3531
3532 /*
3533 * Note: AMD64 Architecture Programmer's Manual 13.1:
3534 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared
3535 * by software after the contents have been read.
3536 */
3537 pVmcb->guest.u64DR6 = pCtx->dr[6];
3538
3539 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
3540 pCtx->dr[7] &= ~X86_DR7_GD;
3541
3542 /* Paranoia. */
3543 pMixedCtx->dr[7] &= 0xffffffff; /* Upper 32 bits MBZ. */
3544 pMixedCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* MBZ. */
3545 pMixedCtx->dr[7] |= 0x400; /* MB1. */
3546
3547 pVmcb->guest.u64DR7 = pCtx->dr[7];
3548 pVmcb->ctrl.u64VmcbCleanBits &= ~HMSVM_VMCB_CLEAN_DRX;
3549
3550 /* Inject the debug exception. */
3551 hmR0SvmSetPendingXcptDB(pVCpu);
3552 break;
3553 }
3554 }
3555 }
3556 }
3557 }
3558
3559#ifdef DEBUG
3560 if (rc == VINF_IOM_R3_IOPORT_READ)
3561 Assert(IoExitInfo.n.u1Type != 0);
3562 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
3563 Assert(IoExitInfo.n.u1Type == 0);
3564 else
3565 {
3566 AssertMsg( RT_FAILURE(rc)
3567 || rc == VINF_SUCCESS
3568 || rc == VINF_EM_RAW_EMULATE_INSTR
3569 || rc == VINF_EM_RAW_GUEST_TRAP
3570 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", rc));
3571 }
3572#endif
3573 return rc;
3574}
3575
3576
3577/**
3578 * #VMEXIT handler for Nested Page-faults (SVM_EXIT_NPF). Conditional
3579 * #VMEXIT.
3580 */
3581HMSVM_EXIT_DECL hmR0SvmExitNestedPF(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3582{
3583 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3584 PVM pVM = pVCpu->CTX_SUFF(pVM);
3585 Assert(pVM->hm.s.fNestedPaging);
3586
3587 /* See AMD spec. 15.25.6 "Nested versus Guest Page Faults, Fault Ordering" for VMCB details for #NPF. */
3588 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
3589 uint32_t u32ErrCode = pVmcb->ctrl.u64ExitInfo1;
3590 RTGCPHYS GCPhysFaultAddr = pVmcb->ctrl.u64ExitInfo2;
3591
3592 Log4(("#NPF at CS:RIP=%04x:%#RX64 faultaddr=%RGp errcode=%#x \n", pCtx->cs.Sel, pCtx->rip, GCPhysFaultAddr, u32ErrCode));
3593
3594#ifdef VBOX_HM_WITH_GUEST_PATCHING
3595 /* TPR patching for 32-bit guests, using the reserved bit in the page tables for MMIO regions. */
3596 if ( pVM->hm.s.fTRPPatchingAllowed
3597 && (GCPhysFaultAddr & PAGE_OFFSET_MASK) == 0x80
3598 && ( !(u32ErrCode & X86_TRAP_PF_P) /* Not present */
3599 || (u32ErrCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) /* MMIO page. */
3600 && !CPUMGetGuestCPL(pVCpu)
3601 && !CPUMIsGuestInLongModeEx(pCtx)
3602 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
3603 {
3604 RTGCPHYS GCPhysApicBase = pCtx->msrApicBase;
3605 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3606
3607 if (GCPhysFaultAddr == GCPhysApicBase + 0x80)
3608 {
3609 /* Only attempt to patch the instruction once. */
3610 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3611 if (!pPatch)
3612 {
3613 rc = VINF_EM_HM_PATCH_TPR_INSTR;
3614 return rc;
3615 }
3616 }
3617 }
3618#endif
3619
3620 /*
3621 * Determine the nested paging mode.
3622 */
3623 PGMMODE enmNestedPagingMode;
3624#if HC_ARCH_BITS == 32
3625 if (CPUMIsGuestInLongModeEx(pCtx))
3626 enmNestedPagingMode = PGMMODE_AMD64_NX;
3627 else
3628#endif
3629 enmNestedPagingMode = PGMGetHostMode(pVM);
3630
3631 /*
3632 * MMIO optimization using the reserved (RSVD) bit in the guest page tables for MMIO pages.
3633 */
3634 int rc;
3635 Assert((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD);
3636 if ((u32ErrCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
3637 {
3638 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmNestedPagingMode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr, u32ErrCode);
3639
3640 /*
3641 * If we succeed, resume guest execution.
3642 * If we fail in interpreting the instruction because we couldn't get the guest physical address
3643 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
3644 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
3645 * weird case. See @bugref{6043}.
3646 */
3647 if ( rc == VINF_SUCCESS
3648 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3649 || rc == VERR_PAGE_NOT_PRESENT)
3650 {
3651 /* Successfully handled MMIO operation. */
3652 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_SVM_GUEST_APIC_STATE;
3653 rc = VINF_SUCCESS;
3654 }
3655 return rc;
3656 }
3657
3658 TRPMAssertXcptPF(pVCpu, GCPhysFaultAddr, u32ErrCode);
3659 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmNestedPagingMode, u32ErrCode, CPUMCTX2CORE(pCtx), GCPhysFaultAddr);
3660 TRPMResetTrap(pVCpu);
3661
3662 Log2(("#NPF: PGMR0Trap0eHandlerNestedPaging returned %Rrc\n", rc));
3663
3664 /*
3665 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}.
3666 */
3667 if ( rc == VINF_SUCCESS
3668 || rc == VERR_PAGE_TABLE_NOT_PRESENT
3669 || rc == VERR_PAGE_NOT_PRESENT)
3670 {
3671 /* We've successfully synced our shadow page tables. */
3672 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
3673 rc = VINF_SUCCESS;
3674 }
3675
3676 return rc;
3677}
3678
3679
3680/**
3681 * #VMEXIT handler for virtual interrupt (SVM_EXIT_VINTR). Conditional #VMEXIT.
3682 */
3683HMSVM_EXIT_DECL hmR0SvmExitVIntr(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3684{
3685 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3686
3687 pVmcb->ctrl.IntCtrl.n.u1VIrqValid = 0; /* No virtual interrupts pending, we'll inject the current one before reentry. */
3688 pVmcb->ctrl.IntCtrl.n.u8VIrqVector = 0;
3689
3690 /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */
3691 pVmcb->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_VINTR;
3692 pVmcb->ctrl.u64VmcbCleanBits &= ~(HMSVM_VMCB_CLEAN_INTERCEPTS | HMSVM_VMCB_CLEAN_TPR);
3693
3694 /* Deliver the pending interrupt via hmR0SvmPreRunGuest()->hmR0SvmInjectEvent() and resume guest execution. */
3695 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow);
3696 return VINF_SUCCESS;
3697}
3698
3699
3700/**
3701 * #VMEXIT handler for task switches (SVM_EXIT_TASK_SWITCH). Conditional #VMEXIT.
3702 */
3703HMSVM_EXIT_DECL hmR0SvmExitTaskSwitch(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3704{
3705 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3706
3707 /* Check if this task-switch occurred while delivery an event through the guest IDT. */
3708 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
3709 if ( !(pVmcb->ctrl.u64ExitInfo2 & (SVM_EXIT2_TASK_SWITCH_IRET | SVM_EXIT2_TASK_SWITCH_JMP))
3710 && pVCpu->hm.s.Event.fPending)
3711 {
3712 /*
3713 * AMD-V does not provide us with the original exception but we have it in u64IntrInfo since we
3714 * injected the event during VM-entry. Software interrupts and exceptions will be regenerated
3715 * when the recompiler restarts the instruction.
3716 */
3717 SVMEVENT Event;
3718 Event.u = pVCpu->hm.s.Event.u64IntrInfo;
3719 if ( Event.n.u3Type == SVM_EVENT_EXCEPTION
3720 || Event.n.u3Type == SVM_EVENT_SOFTWARE_INT)
3721 {
3722 pVCpu->hm.s.Event.fPending = false;
3723 }
3724 else
3725 Log4(("hmR0SvmExitTaskSwitch: TS occurred during event delivery. Kept pending u8Vector=%#x\n", Event.n.u8Vector));
3726 }
3727
3728 /** @todo Emulate task switch someday, currently just going back to ring-3 for
3729 * emulation. */
3730 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch);
3731 return VERR_EM_INTERPRETER;
3732}
3733
3734
3735/**
3736 * #VMEXIT handler for VMMCALL (SVM_EXIT_VMMCALL). Conditional #VMEXIT.
3737 */
3738HMSVM_EXIT_DECL hmR0SvmExitVmmCall(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3739{
3740 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3741
3742 int rc = hmR0SvmEmulateMovTpr(pVM, pVCpu, pCtx);
3743 if (RT_UNLIKELY(rc != VINF_SUCCESS))
3744 hmR0SvmSetPendingXcptUD(pVCpu);
3745 return VINF_SUCCESS;
3746}
3747
3748
3749/**
3750 * #VMEXIT handler for page faults (SVM_EXIT_PF). Conditional #VMEXIT.
3751 */
3752HMSVM_EXIT_DECL hmR0SvmExitXcptPF(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3753{
3754 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3755
3756 /* -XXX- @todo Vectoring pagefaults!! */
3757
3758 /* See AMD spec. 15.12.15 "#PF (Page Fault)". */
3759 PSVMVMCB pVmcb = (PSVMVMCB)pVCpu->hm.s.svm.pvVmcb;
3760 uint32_t u32ErrCode = pVmcb->ctrl.u64ExitInfo1;
3761 RTGCUINTPTR uFaultAddress = pVmcb->ctrl.u64ExitInfo2;
3762
3763#if defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) || defined(HMVMX_ALWAYS_TRAP_PF)
3764 if (pVM->hm.s.fNestedPaging)
3765 {
3766 /* A genuine guest #PF, reflect it to the guest. */
3767 Log4(("#PF: Guest page fault at %04X:%RGv FaultAddr=%RGv ErrCode=%#x\n", pCtx->cs, (RTGCPTR)pCtx->rip, uFaultAddress,
3768 u32ErrCode));
3769 hmR0SvmSetPendingXcptPF(pVCpu, pCtx, u32ErrCode, uFaultAddress);
3770 return VINF_SUCCESS;
3771 }
3772#endif
3773
3774 Assert(!pVM->hm.s.fNestedPaging);
3775
3776#ifdef VBOX_HM_WITH_GUEST_PATCHING
3777 /* Shortcut for APIC TPR reads and writes; only applicable to 32-bit guests. */
3778 if ( pVM->hm.s.fTRPPatchingAllowed
3779 && (uFaultAddress & 0xfff) == 0x80 /* TPR offset. */
3780 && !(u32ErrCode & X86_TRAP_PF_P) /* Not present */
3781 && !CPUMGetGuestCPL(pVCpu)
3782 && !CPUMIsGuestInLongModeEx(pCtx)
3783 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
3784 {
3785 RTGCPHYS GCPhysApicBase;
3786 GCPhysApicBase = pCtx->msrApicBase;
3787 GCPhysApicBase &= PAGE_BASE_GC_MASK;
3788
3789 /* Check if the page at the fault-address is the APIC base. */
3790 RTGCPHYS GCPhysPage;
3791 rc = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL /* pfFlags */, &GCPhysPage);
3792 if ( rc == VINF_SUCCESS
3793 && GCPhys == GCPhysApicBase)
3794 {
3795 /* Only attempt to patch the instruction once. */
3796 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
3797 if (!pPatch)
3798 return VINF_EM_HM_PATCH_TPR_INSTR;
3799 }
3800 }
3801#endif
3802
3803 Log4(("#PF: uFaultAddress=%#RX64 cs:rip=%#04x:%#RX64 u32ErrCode %#RX32 cr3=%#RX64\n", uFaultAddress, pCtx->cs.Sel,
3804 pCtx->rip, u32ErrCode, pCtx->cr3));
3805
3806 TRPMAssertXcptPF(pVCpu, uFaultAddress, u32ErrCode);
3807 int rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress);
3808
3809 Log2(("#PF rc=%Rrc\n", rc));
3810 if (rc == VINF_SUCCESS)
3811 {
3812 /* Successfully synced shadow pages tables or emulated an MMIO instruction. */
3813 TRPMResetTrap(pVCpu);
3814 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
3815 return rc;
3816 }
3817 else if (rc == VINF_EM_RAW_GUEST_TRAP)
3818 {
3819 /* It's a guest page fault and needs to be reflected to the guest. */
3820
3821 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3822 u32ErrCode = TRPMGetErrorCode(pVCpu); /* The error code might have been changed. */
3823 TRPMResetTrap(pVCpu);
3824
3825 hmR0SvmSetPendingXcptPF(pVCpu, pCtx, u32ErrCode, uFaultAddress);
3826 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
3827 return VINF_SUCCESS;
3828 }
3829
3830 TRPMResetTrap(pVCpu);
3831 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM);
3832 return rc;
3833}
3834
3835
3836/**
3837 * #VMEXIT handler for device-not-available exception (SVM_EXIT_NM). Conditional
3838 * #VMEXIT.
3839 */
3840HMSVM_EXIT_DECL hmR0SvmExitXcptNM(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3841{
3842 HMSVM_VALIDATE_EXIT_HANDLER_PARAMS();
3843
3844#ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS
3845 Assert(!CPUMIsGuestFPUStateActive(pVCpu));
3846#endif
3847
3848 /* Lazy FPU loading; load the guest-FPU state transparently and continue execution of the guest. */
3849 PVM pVM = pVCpu->CTX_SUFF(pVM);
3850 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
3851 if (rc == VINF_SUCCESS)
3852 {
3853 Assert(CPUMIsGuestFPUStateActive(pVCpu));
3854 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
3855 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
3856 return VINF_SUCCESS;
3857 }
3858
3859 /* Forward #NM to the guest. */
3860 Assert(rc == VINF_EM_RAW_GUEST_TRAP);
3861 hmR0SvmSetPendingXcptNM(pVCpu);
3862 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
3863 return VINF_SUCCESS;
3864}
3865
3866
3867/**
3868 * #VMEXIT handler for math-fault (SVM_EXIT_MF). Conditional #VMEXIT.
3869 */
3870HMSVM_EXIT_DECL hmR0SvmExitXcptMF(PVMCPU pVCpu, PCPUMCTX pCtx, PSVMTRANSIENT pSvmTransient)
3871{
3872 int rc;
3873 if (!(pMixedCtx->cr0 & X86_CR0_NE))
3874 {
3875 /* Old-style FPU error reporting needs some extra work. */
3876 /** @todo don't fall back to the recompiler, but do it manually. */
3877 rc = VERR_EM_INTERPRETER;
3878 }
3879 else
3880 {
3881 hmR0SvmSetPendingXcptMF(pVCpu);
3882 rc = VINF_SUCCESS;
3883 }
3884 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
3885 return rc;
3886}
3887
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette