VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWSVMR0.cpp@ 43867

Last change on this file since 43867 was 43867, checked in by vboxsync, 12 years ago

Do not inject events into VMCB in VM exit path. Fixes hangs immediately after VM reset.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 123.9 KB
Line 
1/* $Id: HWSVMR0.cpp 43867 2012-11-13 17:23:34Z vboxsync $ */
2/** @file
3 * HM SVM (AMD-V) - Host Context Ring-0.
4 */
5
6/*
7 * Copyright (C) 2006-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_HM
22#include <VBox/vmm/hm.h>
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/selm.h>
25#include <VBox/vmm/iom.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/tm.h>
29#include <VBox/vmm/pdmapi.h>
30#include "HMInternal.h"
31#include <VBox/vmm/vm.h>
32#include <VBox/vmm/hm_svm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35#include <VBox/dis.h>
36#include <VBox/disopcode.h>
37#include <iprt/param.h>
38#include <iprt/assert.h>
39#include <iprt/asm.h>
40#include <iprt/asm-amd64-x86.h>
41#include <iprt/cpuset.h>
42#include <iprt/mp.h>
43#include <iprt/time.h>
44#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
45# include <iprt/thread.h>
46#endif
47#include <iprt/x86.h>
48#include "HWSVMR0.h"
49
50#include "dtrace/VBoxVMM.h"
51
52
53/*******************************************************************************
54* Internal Functions *
55*******************************************************************************/
56static int hmR0SvmInterpretInvlpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame);
57static int hmR0SvmEmulateTprVMMCall(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
58static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
59
60/*******************************************************************************
61* Defined Constants And Macros *
62*******************************************************************************/
63/** Convert hidden selector attribute word between VMX and SVM formats. */
64#define SVM_HIDSEGATTR_VMX2SVM(a) (a & 0xFF) | ((a & 0xF000) >> 4)
65#define SVM_HIDSEGATTR_SVM2VMX(a) (a & 0xFF) | ((a & 0x0F00) << 4)
66
67#define SVM_WRITE_SELREG(REG, reg) \
68 do \
69 { \
70 Assert(pCtx->reg.fFlags & CPUMSELREG_FLAGS_VALID); \
71 Assert(pCtx->reg.ValidSel == pCtx->reg.Sel); \
72 pvVMCB->guest.REG.u16Sel = pCtx->reg.Sel; \
73 pvVMCB->guest.REG.u32Limit = pCtx->reg.u32Limit; \
74 pvVMCB->guest.REG.u64Base = pCtx->reg.u64Base; \
75 pvVMCB->guest.REG.u16Attr = SVM_HIDSEGATTR_VMX2SVM(pCtx->reg.Attr.u); \
76 } while (0)
77
78#define SVM_READ_SELREG(REG, reg) \
79 do \
80 { \
81 pCtx->reg.Sel = pvVMCB->guest.REG.u16Sel; \
82 pCtx->reg.ValidSel = pvVMCB->guest.REG.u16Sel; \
83 pCtx->reg.fFlags = CPUMSELREG_FLAGS_VALID; \
84 pCtx->reg.u32Limit = pvVMCB->guest.REG.u32Limit; \
85 pCtx->reg.u64Base = pvVMCB->guest.REG.u64Base; \
86 pCtx->reg.Attr.u = SVM_HIDSEGATTR_SVM2VMX(pvVMCB->guest.REG.u16Attr); \
87 } while (0)
88
89/*******************************************************************************
90* Global Variables *
91*******************************************************************************/
92/* IO operation lookup arrays. */
93static uint32_t const g_aIOSize[8] = {0, 1, 2, 0, 4, 0, 0, 0};
94static uint32_t const g_aIOOpAnd[8] = {0, 0xff, 0xffff, 0, 0xffffffff, 0, 0, 0};
95
96
97/**
98 * Sets up and activates AMD-V on the current CPU.
99 *
100 * @returns VBox status code.
101 * @param pCpu Pointer to the CPU info struct.
102 * @param pVM Pointer to the VM (can be NULL after a resume!).
103 * @param pvCpuPage Pointer to the global CPU page.
104 * @param HCPhysCpuPage Physical address of the global CPU page.
105 */
106VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost)
107{
108 AssertReturn(!fEnabledByHost, VERR_INVALID_PARAMETER);
109 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
110 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
111
112 /*
113 * We must turn on AMD-V and setup the host state physical address, as those MSRs are per cpu/core.
114 */
115 uint64_t fEfer = ASMRdMsr(MSR_K6_EFER);
116 if (fEfer & MSR_K6_EFER_SVME)
117 {
118 /*
119 * If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE is active, then we blindly use AMD-V.
120 */
121 if ( pVM
122 && pVM->hm.s.svm.fIgnoreInUseError)
123 {
124 pCpu->fIgnoreAMDVInUseError = true;
125 }
126
127 if (!pCpu->fIgnoreAMDVInUseError)
128 return VERR_SVM_IN_USE;
129 }
130
131 /* Turn on AMD-V in the EFER MSR. */
132 ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME);
133
134 /* Write the physical page address where the CPU will store the host state while executing the VM. */
135 ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage);
136
137 /*
138 * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all non-zero ASIDs
139 * when enabling SVM. AMD doesn't have an SVM instruction to flush all ASIDs (flushing is done
140 * upon VMRUN). Therefore, just set the fFlushAsidBeforeUse flag which instructs hmR0SvmSetupTLB()
141 * to flush the TLB with before using a new ASID.
142 */
143 pCpu->fFlushAsidBeforeUse = true;
144
145 /*
146 * Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}.
147 */
148 ++pCpu->cTlbFlushes;
149
150 return VINF_SUCCESS;
151}
152
153
154/**
155 * Deactivates AMD-V on the current CPU.
156 *
157 * @returns VBox status code.
158 * @param pCpu Pointer to the CPU info struct.
159 * @param pvCpuPage Pointer to the global CPU page.
160 * @param HCPhysCpuPage Physical address of the global CPU page.
161 */
162VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
163{
164 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
165 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
166 NOREF(pCpu);
167
168 /* Turn off AMD-V in the EFER MSR. */
169 uint64_t fEfer = ASMRdMsr(MSR_K6_EFER);
170 ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME);
171
172 /* Invalidate host state physical address. */
173 ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0);
174
175 return VINF_SUCCESS;
176}
177
178
179/**
180 * Does Ring-0 per VM AMD-V init.
181 *
182 * @returns VBox status code.
183 * @param pVM Pointer to the VM.
184 */
185VMMR0DECL(int) SVMR0InitVM(PVM pVM)
186{
187 int rc;
188
189 pVM->hm.s.svm.hMemObjIOBitmap = NIL_RTR0MEMOBJ;
190
191 /* Allocate 12 KB for the IO bitmap (doesn't seem to be a way to convince SVM not to use it) */
192 rc = RTR0MemObjAllocCont(&pVM->hm.s.svm.hMemObjIOBitmap, 3 << PAGE_SHIFT, false /* fExecutable */);
193 if (RT_FAILURE(rc))
194 return rc;
195
196 pVM->hm.s.svm.pvIOBitmap = RTR0MemObjAddress(pVM->hm.s.svm.hMemObjIOBitmap);
197 pVM->hm.s.svm.HCPhysIOBitmap = RTR0MemObjGetPagePhysAddr(pVM->hm.s.svm.hMemObjIOBitmap, 0);
198 /* Set all bits to intercept all IO accesses. */
199 ASMMemFill32(pVM->hm.s.svm.pvIOBitmap, 3 << PAGE_SHIFT, 0xffffffff);
200
201 /*
202 * Erratum 170 which requires a forced TLB flush for each world switch:
203 * See http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/33610.pdf
204 *
205 * All BH-G1/2 and DH-G1/2 models include a fix:
206 * Athlon X2: 0x6b 1/2
207 * 0x68 1/2
208 * Athlon 64: 0x7f 1
209 * 0x6f 2
210 * Sempron: 0x7f 1/2
211 * 0x6f 2
212 * 0x6c 2
213 * 0x7c 2
214 * Turion 64: 0x68 2
215 */
216 uint32_t u32Dummy;
217 uint32_t u32Version, u32Family, u32Model, u32Stepping, u32BaseFamily;
218 ASMCpuId(1, &u32Version, &u32Dummy, &u32Dummy, &u32Dummy);
219 u32BaseFamily = (u32Version >> 8) & 0xf;
220 u32Family = u32BaseFamily + (u32BaseFamily == 0xf ? ((u32Version >> 20) & 0x7f) : 0);
221 u32Model = ((u32Version >> 4) & 0xf);
222 u32Model = u32Model | ((u32BaseFamily == 0xf ? (u32Version >> 16) & 0x0f : 0) << 4);
223 u32Stepping = u32Version & 0xf;
224 if ( u32Family == 0xf
225 && !((u32Model == 0x68 || u32Model == 0x6b || u32Model == 0x7f) && u32Stepping >= 1)
226 && !((u32Model == 0x6f || u32Model == 0x6c || u32Model == 0x7c) && u32Stepping >= 2))
227 {
228 Log(("SVMR0InitVM: AMD cpu with erratum 170 family %x model %x stepping %x\n", u32Family, u32Model, u32Stepping));
229 pVM->hm.s.svm.fAlwaysFlushTLB = true;
230 }
231
232 /* Allocate VMCBs for all guest CPUs. */
233 for (VMCPUID i = 0; i < pVM->cCpus; i++)
234 {
235 PVMCPU pVCpu = &pVM->aCpus[i];
236
237 pVCpu->hm.s.svm.hMemObjVMCBHost = NIL_RTR0MEMOBJ;
238 pVCpu->hm.s.svm.hMemObjVMCB = NIL_RTR0MEMOBJ;
239 pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
240
241 /* Allocate one page for the host context */
242 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVMCBHost, 1 << PAGE_SHIFT, false /* fExecutable */);
243 if (RT_FAILURE(rc))
244 return rc;
245
246 pVCpu->hm.s.svm.pvVMCBHost = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVMCBHost);
247 pVCpu->hm.s.svm.HCPhysVMCBHost = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVMCBHost, 0);
248 Assert(pVCpu->hm.s.svm.HCPhysVMCBHost < _4G);
249 ASMMemZeroPage(pVCpu->hm.s.svm.pvVMCBHost);
250
251 /* Allocate one page for the VM control block (VMCB). */
252 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjVMCB, 1 << PAGE_SHIFT, false /* fExecutable */);
253 if (RT_FAILURE(rc))
254 return rc;
255
256 pVCpu->hm.s.svm.pvVMCB = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjVMCB);
257 pVCpu->hm.s.svm.HCPhysVMCB = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjVMCB, 0);
258 Assert(pVCpu->hm.s.svm.HCPhysVMCB < _4G);
259 ASMMemZeroPage(pVCpu->hm.s.svm.pvVMCB);
260
261 /* Allocate 8 KB for the MSR bitmap (doesn't seem to be a way to convince SVM not to use it) */
262 rc = RTR0MemObjAllocCont(&pVCpu->hm.s.svm.hMemObjMsrBitmap, 2 << PAGE_SHIFT, false /* fExecutable */);
263 if (RT_FAILURE(rc))
264 return rc;
265
266 pVCpu->hm.s.svm.pvMsrBitmap = RTR0MemObjAddress(pVCpu->hm.s.svm.hMemObjMsrBitmap);
267 pVCpu->hm.s.svm.HCPhysMsrBitmap = RTR0MemObjGetPagePhysAddr(pVCpu->hm.s.svm.hMemObjMsrBitmap, 0);
268 /* Set all bits to intercept all MSR accesses. */
269 ASMMemFill32(pVCpu->hm.s.svm.pvMsrBitmap, 2 << PAGE_SHIFT, 0xffffffff);
270 }
271
272 return VINF_SUCCESS;
273}
274
275
276/**
277 * Does Ring-0 per VM AMD-V termination.
278 *
279 * @returns VBox status code.
280 * @param pVM Pointer to the VM.
281 */
282VMMR0DECL(int) SVMR0TermVM(PVM pVM)
283{
284 for (VMCPUID i = 0; i < pVM->cCpus; i++)
285 {
286 PVMCPU pVCpu = &pVM->aCpus[i];
287
288 if (pVCpu->hm.s.svm.hMemObjVMCBHost != NIL_RTR0MEMOBJ)
289 {
290 RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVMCBHost, false);
291 pVCpu->hm.s.svm.pvVMCBHost = 0;
292 pVCpu->hm.s.svm.HCPhysVMCBHost = 0;
293 pVCpu->hm.s.svm.hMemObjVMCBHost = NIL_RTR0MEMOBJ;
294 }
295
296 if (pVCpu->hm.s.svm.hMemObjVMCB != NIL_RTR0MEMOBJ)
297 {
298 RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjVMCB, false);
299 pVCpu->hm.s.svm.pvVMCB = 0;
300 pVCpu->hm.s.svm.HCPhysVMCB = 0;
301 pVCpu->hm.s.svm.hMemObjVMCB = NIL_RTR0MEMOBJ;
302 }
303 if (pVCpu->hm.s.svm.hMemObjMsrBitmap != NIL_RTR0MEMOBJ)
304 {
305 RTR0MemObjFree(pVCpu->hm.s.svm.hMemObjMsrBitmap, false);
306 pVCpu->hm.s.svm.pvMsrBitmap = 0;
307 pVCpu->hm.s.svm.HCPhysMsrBitmap = 0;
308 pVCpu->hm.s.svm.hMemObjMsrBitmap = NIL_RTR0MEMOBJ;
309 }
310 }
311 if (pVM->hm.s.svm.hMemObjIOBitmap != NIL_RTR0MEMOBJ)
312 {
313 RTR0MemObjFree(pVM->hm.s.svm.hMemObjIOBitmap, false);
314 pVM->hm.s.svm.pvIOBitmap = 0;
315 pVM->hm.s.svm.HCPhysIOBitmap = 0;
316 pVM->hm.s.svm.hMemObjIOBitmap = NIL_RTR0MEMOBJ;
317 }
318 return VINF_SUCCESS;
319}
320
321
322/**
323 * Sets up AMD-V for the specified VM.
324 *
325 * @returns VBox status code.
326 * @param pVM Pointer to the VM.
327 */
328VMMR0DECL(int) SVMR0SetupVM(PVM pVM)
329{
330 int rc = VINF_SUCCESS;
331
332 AssertReturn(pVM, VERR_INVALID_PARAMETER);
333 Assert(pVM->hm.s.svm.fSupported);
334
335 for (VMCPUID i = 0; i < pVM->cCpus; i++)
336 {
337 PVMCPU pVCpu = &pVM->aCpus[i];
338 SVM_VMCB *pvVMCB = (SVM_VMCB *)pVM->aCpus[i].hm.s.svm.pvVMCB;
339
340 AssertMsgReturn(pvVMCB, ("Invalid pvVMCB\n"), VERR_SVM_INVALID_PVMCB);
341
342 /*
343 * Program the control fields. Most of them never have to be changed again.
344 * CR0/4 reads must be intercepted, our shadow values are not necessarily the same as the guest's.
345 * Note: CR0 & CR4 can be safely read when guest and shadow copies are identical.
346 */
347 pvVMCB->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4);
348
349 /* CR0/4 writes must be intercepted for obvious reasons. */
350 pvVMCB->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4);
351
352 /* Intercept all DRx reads and writes by default. Changed later on. */
353 pvVMCB->ctrl.u16InterceptRdDRx = 0xFFFF;
354 pvVMCB->ctrl.u16InterceptWrDRx = 0xFFFF;
355
356 /* Intercept traps; only #NM is always intercepted. */
357 pvVMCB->ctrl.u32InterceptException = RT_BIT(X86_XCPT_NM);
358#ifdef VBOX_ALWAYS_TRAP_PF
359 pvVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
360#endif
361#ifdef VBOX_STRICT
362 pvVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_BP)
363 | RT_BIT(X86_XCPT_DB)
364 | RT_BIT(X86_XCPT_DE)
365 | RT_BIT(X86_XCPT_UD)
366 | RT_BIT(X86_XCPT_NP)
367 | RT_BIT(X86_XCPT_SS)
368 | RT_BIT(X86_XCPT_GP)
369 | RT_BIT(X86_XCPT_MF)
370 ;
371#endif
372
373 /* Set up instruction and miscellaneous intercepts. */
374 pvVMCB->ctrl.u32InterceptCtrl1 = SVM_CTRL1_INTERCEPT_INTR
375 | SVM_CTRL1_INTERCEPT_VINTR
376 | SVM_CTRL1_INTERCEPT_NMI
377 | SVM_CTRL1_INTERCEPT_SMI
378 | SVM_CTRL1_INTERCEPT_INIT
379 | SVM_CTRL1_INTERCEPT_RDPMC
380 | SVM_CTRL1_INTERCEPT_CPUID
381 | SVM_CTRL1_INTERCEPT_RSM
382 | SVM_CTRL1_INTERCEPT_HLT
383 | SVM_CTRL1_INTERCEPT_INOUT_BITMAP
384 | SVM_CTRL1_INTERCEPT_MSR_SHADOW
385 | SVM_CTRL1_INTERCEPT_INVLPGA /* AMD only */
386 | SVM_CTRL1_INTERCEPT_SHUTDOWN /* fatal */
387 | SVM_CTRL1_INTERCEPT_FERR_FREEZE; /* Legacy FPU FERR handling. */
388 ;
389 pvVMCB->ctrl.u32InterceptCtrl2 = SVM_CTRL2_INTERCEPT_VMRUN /* required */
390 | SVM_CTRL2_INTERCEPT_VMMCALL
391 | SVM_CTRL2_INTERCEPT_VMLOAD
392 | SVM_CTRL2_INTERCEPT_VMSAVE
393 | SVM_CTRL2_INTERCEPT_STGI
394 | SVM_CTRL2_INTERCEPT_CLGI
395 | SVM_CTRL2_INTERCEPT_SKINIT
396 | SVM_CTRL2_INTERCEPT_WBINVD
397 | SVM_CTRL2_INTERCEPT_MONITOR
398 | SVM_CTRL2_INTERCEPT_MWAIT_UNCOND; /* don't execute mwait or else we'll idle inside the
399 guest (host thinks the cpu load is high) */
400
401 Log(("pvVMCB->ctrl.u32InterceptException = %x\n", pvVMCB->ctrl.u32InterceptException));
402 Log(("pvVMCB->ctrl.u32InterceptCtrl1 = %x\n", pvVMCB->ctrl.u32InterceptCtrl1));
403 Log(("pvVMCB->ctrl.u32InterceptCtrl2 = %x\n", pvVMCB->ctrl.u32InterceptCtrl2));
404
405 /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */
406 pvVMCB->ctrl.IntCtrl.n.u1VIrqMasking = 1;
407
408 /* Ignore the priority in the TPR; just deliver it when we tell it to. */
409 pvVMCB->ctrl.IntCtrl.n.u1IgnoreTPR = 1;
410
411 /* Set IO and MSR bitmap addresses. */
412 pvVMCB->ctrl.u64IOPMPhysAddr = pVM->hm.s.svm.HCPhysIOBitmap;
413 pvVMCB->ctrl.u64MSRPMPhysAddr = pVCpu->hm.s.svm.HCPhysMsrBitmap;
414
415 /* No LBR virtualization. */
416 pvVMCB->ctrl.u64LBRVirt = 0;
417
418 /* The ASID must start at 1; the host uses 0. */
419 pvVMCB->ctrl.TLBCtrl.n.u32ASID = 1;
420
421 /*
422 * Setup the PAT MSR (nested paging only)
423 * The default value should be 0x0007040600070406ULL, but we want to treat all guest memory as WB,
424 * so choose type 6 for all PAT slots.
425 */
426 pvVMCB->guest.u64GPAT = 0x0006060606060606ULL;
427
428 /* If nested paging is not in use, additional intercepts have to be set up. */
429 if (!pVM->hm.s.fNestedPaging)
430 {
431 /* CR3 reads/writes must be intercepted; our shadow values are different from guest's. */
432 pvVMCB->ctrl.u16InterceptRdCRx |= RT_BIT(3);
433 pvVMCB->ctrl.u16InterceptWrCRx |= RT_BIT(3);
434
435 /*
436 * We must also intercept:
437 * - INVLPG (must go through shadow paging)
438 * - task switches (may change CR3/EFLAGS/LDT)
439 */
440 pvVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_INVLPG
441 | SVM_CTRL1_INTERCEPT_TASK_SWITCH;
442
443 /* Page faults must be intercepted to implement shadow paging. */
444 pvVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
445 }
446
447 /*
448 * The following MSRs are saved automatically by vmload/vmsave, so we allow the guest
449 * to modify them directly.
450 */
451 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
452 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_CSTAR, true, true);
453 hmR0SvmSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
454 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
455 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
456 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
457 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
458 hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
459 hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
460 hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
461 }
462
463 return rc;
464}
465
466
467/**
468 * Sets the permission bits for the specified MSR.
469 *
470 * @param pVCpu Pointer to the VMCPU.
471 * @param ulMSR MSR value.
472 * @param fRead Whether reading is allowed.
473 * @param fWrite Whether writing is allowed.
474 */
475static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
476{
477 unsigned ulBit;
478 uint8_t *pvMsrBitmap = (uint8_t *)pVCpu->hm.s.svm.pvMsrBitmap;
479
480 if (ulMSR <= 0x00001FFF)
481 {
482 /* Pentium-compatible MSRs */
483 ulBit = ulMSR * 2;
484 }
485 else if ( ulMSR >= 0xC0000000
486 && ulMSR <= 0xC0001FFF)
487 {
488 /* AMD Sixth Generation x86 Processor MSRs and SYSCALL */
489 ulBit = (ulMSR - 0xC0000000) * 2;
490 pvMsrBitmap += 0x800;
491 }
492 else if ( ulMSR >= 0xC0010000
493 && ulMSR <= 0xC0011FFF)
494 {
495 /* AMD Seventh and Eighth Generation Processor MSRs */
496 ulBit = (ulMSR - 0xC0001000) * 2;
497 pvMsrBitmap += 0x1000;
498 }
499 else
500 {
501 AssertFailed();
502 return;
503 }
504 Assert(ulBit < 16 * 1024 - 1);
505 if (fRead)
506 ASMBitClear(pvMsrBitmap, ulBit);
507 else
508 ASMBitSet(pvMsrBitmap, ulBit);
509
510 if (fWrite)
511 ASMBitClear(pvMsrBitmap, ulBit + 1);
512 else
513 ASMBitSet(pvMsrBitmap, ulBit + 1);
514}
515
516/**
517 * Posts a pending event (trap or external interrupt). An injected event should only
518 * be written to the VMCB immediately before VMRUN, otherwise we might have stale events
519 * injected across VM resets and suchlike. See @bugref{6220}.
520 *
521 * @param pVCpu Pointer to the VMCPU.
522 * @param pCtx Pointer to the guest CPU context.
523 * @param pIntInfo Pointer to the SVM interrupt info.
524 */
525DECLINLINE(void) hmR0SvmSetPendingEvent(PVMCPU pVCpu, SVM_EVENT *pEvent)
526{
527#ifdef VBOX_STRICT
528 Log(("SVM: Set pending event: intInfo=%016llx\n", pEvent->au64[0]));
529#endif
530
531 /* If there's an event pending already, we're in trouble... */
532 Assert(!pVCpu->hm.s.Event.fPending);
533
534 /* Set pending event state. */
535 pVCpu->hm.s.Event.intInfo = pEvent->au64[0];
536 pVCpu->hm.s.Event.fPending = true;
537}
538
539/**
540 * Injects an event (trap or external interrupt).
541 *
542 * @param pVCpu Pointer to the VMCPU.
543 * @param pvVMCB Pointer to the VMCB.
544 * @param pCtx Pointer to the guest CPU context.
545 * @param pIntInfo Pointer to the SVM interrupt info.
546 */
547DECLINLINE(void) hmR0SvmInjectEvent(PVMCPU pVCpu, SVM_VMCB *pvVMCB, CPUMCTX *pCtx, SVM_EVENT *pEvent)
548{
549#ifdef VBOX_WITH_STATISTICS
550 STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]);
551#endif
552
553#ifdef VBOX_STRICT
554 if (pEvent->n.u8Vector == 0xE)
555 {
556 Log(("SVM: Inject int %d at %RGv error code=%02x CR2=%RGv intInfo=%08x\n", pEvent->n.u8Vector,
557 (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode, (RTGCPTR)pCtx->cr2, pEvent->au64[0]));
558 }
559 else if (pEvent->n.u8Vector < 0x20)
560 Log(("SVM: Inject int %d at %RGv error code=%08x\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode));
561 else
562 {
563 Log(("INJ-EI: %x at %RGv\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip));
564 Assert(!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
565 Assert(pCtx->eflags.u32 & X86_EFL_IF);
566 }
567#endif
568
569 /* Set event injection state. */
570 pvVMCB->ctrl.EventInject.au64[0] = pEvent->au64[0];
571}
572
573
574/**
575 * Checks for pending guest interrupts and injects them.
576 *
577 * @returns VBox status code.
578 * @param pVM Pointer to the VM.
579 * @param pVCpu Pointer to the VMCPU.
580 * @param pvVMCB Pointer to the VMCB.
581 * @param pCtx Pointer to the guest CPU Context.
582 */
583static int hmR0SvmCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, SVM_VMCB *pvVMCB, CPUMCTX *pCtx)
584{
585 int rc;
586 NOREF(pVM);
587
588 /*
589 * Dispatch any pending interrupts (injected before, but a VM-exit occurred prematurely).
590 */
591 if (pVCpu->hm.s.Event.fPending)
592 {
593 SVM_EVENT Event;
594
595 Log(("Reinjecting event %08x %08x at %RGv\n", pVCpu->hm.s.Event.intInfo, pVCpu->hm.s.Event.errCode,
596 (RTGCPTR)pCtx->rip));
597 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntReinject);
598 Event.au64[0] = pVCpu->hm.s.Event.intInfo;
599 hmR0SvmInjectEvent(pVCpu, pvVMCB, pCtx, &Event);
600
601 pVCpu->hm.s.Event.fPending = false;
602 return VINF_SUCCESS;
603 }
604
605 /*
606 * If an active trap is already pending, we must forward it first!
607 */
608 if (!TRPMHasTrap(pVCpu))
609 {
610 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
611 {
612 SVM_EVENT Event;
613
614 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
615 Event.n.u8Vector = X86_XCPT_NMI;
616 Event.n.u1Valid = 1;
617 Event.n.u32ErrorCode = 0;
618 Event.n.u3Type = SVM_EVENT_NMI;
619
620 hmR0SvmInjectEvent(pVCpu, pvVMCB, pCtx, &Event);
621 return VINF_SUCCESS;
622 }
623
624 /** @todo SMI interrupts. */
625
626 /*
627 * When external interrupts are pending, we should exit the VM when IF is set.
628 */
629 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
630 {
631 if ( !(pCtx->eflags.u32 & X86_EFL_IF)
632 || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
633 {
634 if (!pvVMCB->ctrl.IntCtrl.n.u1VIrqValid)
635 {
636 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
637 LogFlow(("Enable irq window exit!\n"));
638 else
639 {
640 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS -> irq window exit\n",
641 (RTGCPTR)pCtx->rip));
642 }
643
644 /** @todo Use virtual interrupt method to inject a pending IRQ; dispatched as
645 * soon as guest.IF is set. */
646 pvVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_VINTR;
647 pvVMCB->ctrl.IntCtrl.n.u1VIrqValid = 1;
648 pvVMCB->ctrl.IntCtrl.n.u8VIrqVector = 0; /* don't care */
649 }
650 }
651 else
652 {
653 uint8_t u8Interrupt;
654
655 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
656 Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc\n", u8Interrupt, u8Interrupt, rc));
657 if (RT_SUCCESS(rc))
658 {
659 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
660 AssertRC(rc);
661 }
662 else
663 {
664 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
665 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
666 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq);
667 /* Just continue */
668 }
669 }
670 }
671 }
672
673#ifdef VBOX_STRICT
674 if (TRPMHasTrap(pVCpu))
675 {
676 uint8_t u8Vector;
677 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
678 AssertRC(rc);
679 }
680#endif
681
682 if ( (pCtx->eflags.u32 & X86_EFL_IF)
683 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
684 && TRPMHasTrap(pVCpu)
685 )
686 {
687 uint8_t u8Vector;
688 TRPMEVENT enmType;
689 SVM_EVENT Event;
690 RTGCUINT u32ErrorCode;
691
692 Event.au64[0] = 0;
693
694 /* If a new event is pending, then dispatch it now. */
695 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &u32ErrorCode, 0);
696 AssertRC(rc);
697 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
698 Assert(enmType != TRPM_SOFTWARE_INT);
699
700 /* Clear the pending trap. */
701 rc = TRPMResetTrap(pVCpu);
702 AssertRC(rc);
703
704 Event.n.u8Vector = u8Vector;
705 Event.n.u1Valid = 1;
706 Event.n.u32ErrorCode = u32ErrorCode;
707
708 if (enmType == TRPM_TRAP)
709 {
710 switch (u8Vector)
711 {
712 case X86_XCPT_DF:
713 case X86_XCPT_TS:
714 case X86_XCPT_NP:
715 case X86_XCPT_SS:
716 case X86_XCPT_GP:
717 case X86_XCPT_PF:
718 case X86_XCPT_AC:
719 /* Valid error codes. */
720 Event.n.u1ErrorCodeValid = 1;
721 break;
722 default:
723 break;
724 }
725 if (u8Vector == X86_XCPT_NMI)
726 Event.n.u3Type = SVM_EVENT_NMI;
727 else
728 Event.n.u3Type = SVM_EVENT_EXCEPTION;
729 }
730 else
731 Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
732
733 STAM_COUNTER_INC(&pVCpu->hm.s.StatIntInject);
734 hmR0SvmInjectEvent(pVCpu, pvVMCB, pCtx, &Event);
735 } /* if (interrupts can be dispatched) */
736
737 return VINF_SUCCESS;
738}
739
740
741/**
742 * Save the host state.
743 *
744 * @returns VBox status code.
745 * @param pVM Pointer to the VM.
746 * @param pVCpu Pointer to the VMCPU.
747 */
748VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu)
749{
750 NOREF(pVM);
751 NOREF(pVCpu);
752 /* Nothing to do here. */
753 return VINF_SUCCESS;
754}
755
756
757/**
758 * Loads the guest state.
759 *
760 * NOTE: Don't do anything here that can cause a jump back to ring-3!!!
761 *
762 * @returns VBox status code.
763 * @param pVM Pointer to the VM.
764 * @param pVCpu Pointer to the VMCPU.
765 * @param pCtx Pointer to the guest CPU context.
766 */
767VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
768{
769 RTGCUINTPTR val;
770 SVM_VMCB *pvVMCB;
771
772 if (pVM == NULL)
773 return VERR_INVALID_PARAMETER;
774
775 /* Setup AMD SVM. */
776 Assert(pVM->hm.s.svm.fSupported);
777
778 pvVMCB = (SVM_VMCB *)pVCpu->hm.s.svm.pvVMCB;
779 AssertMsgReturn(pvVMCB, ("Invalid pvVMCB\n"), VERR_SVM_INVALID_PVMCB);
780
781 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
782 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_SEGMENT_REGS)
783 {
784 SVM_WRITE_SELREG(CS, cs);
785 SVM_WRITE_SELREG(SS, ss);
786 SVM_WRITE_SELREG(DS, ds);
787 SVM_WRITE_SELREG(ES, es);
788 SVM_WRITE_SELREG(FS, fs);
789 SVM_WRITE_SELREG(GS, gs);
790 }
791
792 /* Guest CPU context: LDTR. */
793 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_LDTR)
794 {
795 SVM_WRITE_SELREG(LDTR, ldtr);
796 }
797
798 /* Guest CPU context: TR. */
799 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_TR)
800 {
801 SVM_WRITE_SELREG(TR, tr);
802 }
803
804 /* Guest CPU context: GDTR. */
805 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_GDTR)
806 {
807 pvVMCB->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt;
808 pvVMCB->guest.GDTR.u64Base = pCtx->gdtr.pGdt;
809 }
810
811 /* Guest CPU context: IDTR. */
812 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_IDTR)
813 {
814 pvVMCB->guest.IDTR.u32Limit = pCtx->idtr.cbIdt;
815 pvVMCB->guest.IDTR.u64Base = pCtx->idtr.pIdt;
816 }
817
818 /*
819 * Sysenter MSRs (unconditional)
820 */
821 pvVMCB->guest.u64SysEnterCS = pCtx->SysEnter.cs;
822 pvVMCB->guest.u64SysEnterEIP = pCtx->SysEnter.eip;
823 pvVMCB->guest.u64SysEnterESP = pCtx->SysEnter.esp;
824
825 /* Control registers */
826 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR0)
827 {
828 val = pCtx->cr0;
829 if (!CPUMIsGuestFPUStateActive(pVCpu))
830 {
831 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
832 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
833 }
834 else
835 {
836 /** @todo check if we support the old style mess correctly. */
837 if (!(val & X86_CR0_NE))
838 {
839 Log(("Forcing X86_CR0_NE!!!\n"));
840
841 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
842 if (!pVCpu->hm.s.fFPUOldStyleOverride)
843 {
844 pvVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_MF);
845 pVCpu->hm.s.fFPUOldStyleOverride = true;
846 }
847 }
848 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
849 }
850 /* Always enable caching. */
851 val &= ~(X86_CR0_CD|X86_CR0_NW);
852
853 /*
854 * Note: WP is not relevant in nested paging mode as we catch accesses on the (guest) physical level.
855 * Note: In nested paging mode, the guest is allowed to run with paging disabled; the guest-physical to host-physical
856 * translation will remain active.
857 */
858 if (!pVM->hm.s.fNestedPaging)
859 {
860 val |= X86_CR0_PG; /* Paging is always enabled; even when the guest is running in real mode or PE without paging. */
861 val |= X86_CR0_WP; /* Must set this as we rely on protecting various pages and supervisor writes must be caught. */
862 }
863 pvVMCB->guest.u64CR0 = val;
864 }
865 /* CR2 as well */
866 pvVMCB->guest.u64CR2 = pCtx->cr2;
867
868 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR3)
869 {
870 /* Save our shadow CR3 register. */
871 if (pVM->hm.s.fNestedPaging)
872 {
873 PGMMODE enmShwPagingMode;
874
875#if HC_ARCH_BITS == 32
876 if (CPUMIsGuestInLongModeEx(pCtx))
877 enmShwPagingMode = PGMMODE_AMD64_NX;
878 else
879#endif
880 enmShwPagingMode = PGMGetHostMode(pVM);
881
882 pvVMCB->ctrl.u64NestedPagingCR3 = PGMGetNestedCR3(pVCpu, enmShwPagingMode);
883 Assert(pvVMCB->ctrl.u64NestedPagingCR3);
884 pvVMCB->guest.u64CR3 = pCtx->cr3;
885 }
886 else
887 {
888 pvVMCB->guest.u64CR3 = PGMGetHyperCR3(pVCpu);
889 Assert(pvVMCB->guest.u64CR3 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
890 }
891 }
892
893 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_CR4)
894 {
895 val = pCtx->cr4;
896 if (!pVM->hm.s.fNestedPaging)
897 {
898 switch (pVCpu->hm.s.enmShadowMode)
899 {
900 case PGMMODE_REAL:
901 case PGMMODE_PROTECTED: /* Protected mode, no paging. */
902 AssertFailed();
903 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
904
905 case PGMMODE_32_BIT: /* 32-bit paging. */
906 val &= ~X86_CR4_PAE;
907 break;
908
909 case PGMMODE_PAE: /* PAE paging. */
910 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
911 /** Must use PAE paging as we could use physical memory > 4 GB */
912 val |= X86_CR4_PAE;
913 break;
914
915 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
916 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
917#ifdef VBOX_ENABLE_64_BITS_GUESTS
918 break;
919#else
920 AssertFailed();
921 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
922#endif
923
924 default: /* shut up gcc */
925 AssertFailed();
926 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
927 }
928 }
929 pvVMCB->guest.u64CR4 = val;
930 }
931
932 /* Debug registers. */
933 if (pVCpu->hm.s.fContextUseFlags & HM_CHANGED_GUEST_DEBUG)
934 {
935 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
936 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
937
938 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
939 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
940 pCtx->dr[7] |= 0x400; /* must be one */
941
942 pvVMCB->guest.u64DR7 = pCtx->dr[7];
943 pvVMCB->guest.u64DR6 = pCtx->dr[6];
944
945#ifdef DEBUG
946 /* Sync the hypervisor debug state now if any breakpoint is armed. */
947 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
948 && !CPUMIsHyperDebugStateActive(pVCpu)
949 && !DBGFIsStepping(pVCpu))
950 {
951 /* Save the host and load the hypervisor debug state. */
952 int rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
953 AssertRC(rc);
954
955 /* DRx intercepts remain enabled. */
956
957 /* Override dr6 & dr7 with the hypervisor values. */
958 pvVMCB->guest.u64DR7 = CPUMGetHyperDR7(pVCpu);
959 pvVMCB->guest.u64DR6 = CPUMGetHyperDR6(pVCpu);
960 }
961 else
962#endif
963 /* Sync the debug state now if any breakpoint is armed. */
964 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
965 && !CPUMIsGuestDebugStateActive(pVCpu)
966 && !DBGFIsStepping(pVCpu))
967 {
968 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed);
969
970 /* Disable drx move intercepts. */
971 pvVMCB->ctrl.u16InterceptRdDRx = 0;
972 pvVMCB->ctrl.u16InterceptWrDRx = 0;
973
974 /* Save the host and load the guest debug state. */
975 int rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
976 AssertRC(rc);
977 }
978 }
979
980 /* EIP, ESP and EFLAGS */
981 pvVMCB->guest.u64RIP = pCtx->rip;
982 pvVMCB->guest.u64RSP = pCtx->rsp;
983 pvVMCB->guest.u64RFlags = pCtx->eflags.u32;
984
985 /* Set CPL */
986 pvVMCB->guest.u8CPL = pCtx->ss.Attr.n.u2Dpl;
987
988 /* RAX/EAX too, as VMRUN uses RAX as an implicit parameter. */
989 pvVMCB->guest.u64RAX = pCtx->rax;
990
991 /* vmrun will fail without MSR_K6_EFER_SVME. */
992 pvVMCB->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME;
993
994 /* 64 bits guest mode? */
995 if (CPUMIsGuestInLongModeEx(pCtx))
996 {
997#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
998 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
999#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
1000 pVCpu->hm.s.svm.pfnVMRun = SVMR0VMSwitcherRun64;
1001#else
1002# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
1003 if (!pVM->hm.s.fAllow64BitGuests)
1004 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
1005# endif
1006 pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun64;
1007#endif
1008 /* Unconditionally update these as wrmsr might have changed them. (HM_CHANGED_GUEST_SEGMENT_REGS will not be set) */
1009 pvVMCB->guest.FS.u64Base = pCtx->fs.u64Base;
1010 pvVMCB->guest.GS.u64Base = pCtx->gs.u64Base;
1011 }
1012 else
1013 {
1014 /* Filter out the MSR_K6_LME bit or else AMD-V expects amd64 shadow paging. */
1015 pvVMCB->guest.u64EFER &= ~MSR_K6_EFER_LME;
1016
1017 pVCpu->hm.s.svm.pfnVMRun = SVMR0VMRun;
1018 }
1019
1020 /* TSC offset. */
1021 if (TMCpuTickCanUseRealTSC(pVCpu, &pvVMCB->ctrl.u64TSCOffset))
1022 {
1023 uint64_t u64CurTSC = ASMReadTSC();
1024 if (u64CurTSC + pvVMCB->ctrl.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
1025 {
1026 pvVMCB->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_RDTSC;
1027 pvVMCB->ctrl.u32InterceptCtrl2 &= ~SVM_CTRL2_INTERCEPT_RDTSCP;
1028 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset);
1029 }
1030 else
1031 {
1032 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
1033 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC,
1034 pvVMCB->ctrl.u64TSCOffset, u64CurTSC + pvVMCB->ctrl.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu),
1035 TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pvVMCB->ctrl.u64TSCOffset, TMCpuTickGet(pVCpu)));
1036 pvVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC;
1037 pvVMCB->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP;
1038 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscInterceptOverFlow);
1039 }
1040 }
1041 else
1042 {
1043 pvVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC;
1044 pvVMCB->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP;
1045 STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept);
1046 }
1047
1048 /* Sync the various MSRs for 64-bit mode. */
1049 pvVMCB->guest.u64STAR = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
1050 pvVMCB->guest.u64LSTAR = pCtx->msrLSTAR; /* 64-bit mode syscall rip */
1051 pvVMCB->guest.u64CSTAR = pCtx->msrCSTAR; /* compatibility mode syscall rip */
1052 pvVMCB->guest.u64SFMASK = pCtx->msrSFMASK; /* syscall flag mask */
1053 pvVMCB->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE; /* SWAPGS exchange value */
1054
1055#ifdef DEBUG
1056 /* Intercept X86_XCPT_DB if stepping is enabled */
1057 if ( DBGFIsStepping(pVCpu)
1058 || CPUMIsHyperDebugStateActive(pVCpu))
1059 pvVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_DB);
1060 else
1061 pvVMCB->ctrl.u32InterceptException &= ~RT_BIT(X86_XCPT_DB);
1062#endif
1063
1064 /* Done. */
1065 pVCpu->hm.s.fContextUseFlags &= ~HM_CHANGED_ALL_GUEST;
1066
1067 return VINF_SUCCESS;
1068}
1069
1070
1071/**
1072 * Setup TLB for ASID.
1073 *
1074 * @param pVM Pointer to the VM.
1075 * @param pVCpu Pointer to the VMCPU.
1076 */
1077static void hmR0SvmSetupTLB(PVM pVM, PVMCPU pVCpu)
1078{
1079 PHMGLOBLCPUINFO pCpu;
1080
1081 AssertPtr(pVM);
1082 AssertPtr(pVCpu);
1083
1084 SVM_VMCB *pvVMCB = (SVM_VMCB *)pVCpu->hm.s.svm.pvVMCB;
1085 pCpu = HMR0GetCurrentCpu();
1086
1087 /*
1088 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last.
1089 * This can happen both for start & resume due to long jumps back to ring-3.
1090 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB,
1091 * so we cannot reuse the ASIDs without flushing.
1092 */
1093 bool fNewAsid = false;
1094 if ( pVCpu->hm.s.idLastCpu != pCpu->idCpu
1095 || pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1096 {
1097 pVCpu->hm.s.fForceTLBFlush = true;
1098 fNewAsid = true;
1099 }
1100
1101 /*
1102 * Set TLB flush state as checked until we return from the world switch.
1103 */
1104 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true);
1105
1106 /*
1107 * Check for TLB shootdown flushes.
1108 */
1109 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1110 pVCpu->hm.s.fForceTLBFlush = true;
1111
1112 pVCpu->hm.s.idLastCpu = pCpu->idCpu;
1113 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING;
1114
1115 if (RT_UNLIKELY(pVM->hm.s.svm.fAlwaysFlushTLB))
1116 {
1117 /*
1118 * This is the AMD erratum 170. We need to flush the entire TLB for each world switch. Sad.
1119 */
1120 pCpu->uCurrentAsid = 1;
1121 pVCpu->hm.s.uCurrentAsid = 1;
1122 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1123 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1124 }
1125 else if (pVCpu->hm.s.fForceTLBFlush)
1126 {
1127 if (fNewAsid)
1128 {
1129 ++pCpu->uCurrentAsid;
1130 bool fHitASIDLimit = false;
1131 if (pCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid)
1132 {
1133 pCpu->uCurrentAsid = 1; /* start at 1; host uses 0 */
1134 pCpu->cTlbFlushes++;
1135 fHitASIDLimit = true;
1136
1137 if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
1138 {
1139 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
1140 pCpu->fFlushAsidBeforeUse = true;
1141 }
1142 else
1143 {
1144 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1145 pCpu->fFlushAsidBeforeUse = false;
1146 }
1147 }
1148
1149 if ( !fHitASIDLimit
1150 && pCpu->fFlushAsidBeforeUse)
1151 {
1152 if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
1153 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
1154 else
1155 {
1156 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1157 pCpu->fFlushAsidBeforeUse = false;
1158 }
1159 }
1160
1161 pVCpu->hm.s.uCurrentAsid = pCpu->uCurrentAsid;
1162 pVCpu->hm.s.cTlbFlushes = pCpu->cTlbFlushes;
1163 }
1164 else
1165 {
1166 if (pVM->hm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
1167 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
1168 else
1169 pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1170 }
1171
1172 pVCpu->hm.s.fForceTLBFlush = false;
1173 }
1174 else
1175 {
1176 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
1177 * not be executed. See hmQueueInvlPage() where it is commented
1178 * out. Support individual entry flushing someday. */
1179 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1180 {
1181 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
1182 STAM_COUNTER_INC(&pVCpu->hm.s.StatTlbShootdown);
1183 for (unsigned i = 0; i < pVCpu->hm.s.TlbShootdown.cPages; i++)
1184 SVMR0InvlpgA(pVCpu->hm.s.TlbShootdown.aPages[i], pvVMCB->ctrl.TLBCtrl.n.u32ASID);
1185 }
1186 }
1187
1188 pVCpu->hm.s.TlbShootdown.cPages = 0;
1189 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1190
1191 /* Update VMCB with the ASID. */
1192 pvVMCB->ctrl.TLBCtrl.n.u32ASID = pVCpu->hm.s.uCurrentAsid;
1193
1194 AssertMsg(pVCpu->hm.s.cTlbFlushes == pCpu->cTlbFlushes,
1195 ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1196 AssertMsg(pCpu->uCurrentAsid >= 1 && pCpu->uCurrentAsid < pVM->hm.s.uMaxAsid,
1197 ("cpu%d uCurrentAsid = %x\n", pCpu->idCpu, pCpu->uCurrentAsid));
1198 AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid,
1199 ("cpu%d VM uCurrentAsid = %x\n", pCpu->idCpu, pVCpu->hm.s.uCurrentAsid));
1200
1201#ifdef VBOX_WITH_STATISTICS
1202 if (pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING)
1203 STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch);
1204 else if ( pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT
1205 || pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS)
1206 {
1207 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid);
1208 }
1209 else
1210 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch);
1211#endif
1212}
1213
1214
1215/**
1216 * Runs guest code in an AMD-V VM.
1217 *
1218 * @returns VBox status code.
1219 * @param pVM Pointer to the VM.
1220 * @param pVCpu Pointer to the VMCPU.
1221 * @param pCtx Pointer to the guest CPU context.
1222 */
1223VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1224{
1225 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x);
1226 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit1);
1227 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExit2);
1228
1229 VBOXSTRICTRC rc = VINF_SUCCESS;
1230 int rc2;
1231 uint64_t exitCode = (uint64_t)SVM_EXIT_INVALID;
1232 SVM_VMCB *pvVMCB = NULL;
1233 bool fSyncTPR = false;
1234 unsigned cResume = 0;
1235 uint8_t u8LastTPR = 0; /* Initialized for potentially stupid compilers. */
1236 uint32_t u32HostExtFeatures = 0;
1237 PHMGLOBLCPUINFO pCpu = 0;
1238 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
1239#ifdef VBOX_STRICT
1240 RTCPUID idCpuCheck;
1241#endif
1242#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
1243 uint64_t u64LastTime = RTTimeMilliTS();
1244#endif
1245
1246 pvVMCB = (SVM_VMCB *)pVCpu->hm.s.svm.pvVMCB;
1247 AssertMsgReturn(pvVMCB, ("Invalid pvVMCB\n"), VERR_SVM_INVALID_PVMCB);
1248
1249 /*
1250 * We can jump to this point to resume execution after determining that a VM-exit is innocent.
1251 */
1252ResumeExecution:
1253 if (!STAM_PROFILE_ADV_IS_RUNNING(&pVCpu->hm.s.StatEntry))
1254 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit2, &pVCpu->hm.s.StatEntry, x);
1255 Assert(!HMR0SuspendPending());
1256
1257 /*
1258 * Safety precaution; looping for too long here can have a very bad effect on the host.
1259 */
1260 if (RT_UNLIKELY(++cResume > pVM->hm.s.cMaxResumeLoops))
1261 {
1262 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMaxResume);
1263 rc = VINF_EM_RAW_INTERRUPT;
1264 goto end;
1265 }
1266
1267 /*
1268 * Check for IRQ inhibition due to instruction fusing (sti, mov ss).
1269 */
1270 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1271 {
1272 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
1273 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
1274 {
1275 /*
1276 * Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
1277 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
1278 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
1279 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
1280 */
1281 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1282 /* Irq inhibition is no longer active; clear the corresponding SVM state. */
1283 pvVMCB->ctrl.u64IntShadow = 0;
1284 }
1285 }
1286 else
1287 {
1288 /* Irq inhibition is no longer active; clear the corresponding SVM state. */
1289 pvVMCB->ctrl.u64IntShadow = 0;
1290 }
1291
1292#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
1293 if (RT_UNLIKELY((cResume & 0xf) == 0))
1294 {
1295 uint64_t u64CurTime = RTTimeMilliTS();
1296
1297 if (RT_UNLIKELY(u64CurTime > u64LastTime))
1298 {
1299 u64LastTime = u64CurTime;
1300 TMTimerPollVoid(pVM, pVCpu);
1301 }
1302 }
1303#endif
1304
1305 /*
1306 * Check for pending actions that force us to go back to ring-3.
1307 */
1308 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
1309 || VMCPU_FF_ISPENDING(pVCpu,
1310 VMCPU_FF_HM_TO_R3_MASK
1311 | VMCPU_FF_PGM_SYNC_CR3
1312 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL
1313 | VMCPU_FF_REQUEST))
1314 {
1315 /* Check if a sync operation is pending. */
1316 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
1317 {
1318 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
1319 AssertRC(VBOXSTRICTRC_VAL(rc));
1320 if (rc != VINF_SUCCESS)
1321 {
1322 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
1323 goto end;
1324 }
1325 }
1326
1327#ifdef DEBUG
1328 /* Intercept X86_XCPT_DB if stepping is enabled */
1329 if (!DBGFIsStepping(pVCpu))
1330#endif
1331 {
1332 if ( VM_FF_ISPENDING(pVM, VM_FF_HM_TO_R3_MASK)
1333 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
1334 {
1335 STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchToR3);
1336 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
1337 goto end;
1338 }
1339 }
1340
1341 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
1342 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
1343 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
1344 {
1345 rc = VINF_EM_PENDING_REQUEST;
1346 goto end;
1347 }
1348
1349 /* Check if a pgm pool flush is in progress. */
1350 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
1351 {
1352 rc = VINF_PGM_POOL_FLUSH_PENDING;
1353 goto end;
1354 }
1355
1356 /* Check if DMA work is pending (2nd+ run). */
1357 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
1358 {
1359 rc = VINF_EM_RAW_TO_R3;
1360 goto end;
1361 }
1362 }
1363
1364#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
1365 /*
1366 * Exit to ring-3 preemption/work is pending.
1367 *
1368 * Interrupts are disabled before the call to make sure we don't miss any interrupt
1369 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
1370 * further down, but hmR0SvmCheckPendingInterrupt makes that impossible.)
1371 *
1372 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
1373 * shootdowns rely on this.
1374 */
1375 uOldEFlags = ASMIntDisableFlags();
1376 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
1377 {
1378 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptPending);
1379 rc = VINF_EM_RAW_INTERRUPT;
1380 goto end;
1381 }
1382 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
1383#endif
1384
1385 /*
1386 * When external interrupts are pending, we should exit the VM when IF is set.
1387 * Note: *After* VM_FF_INHIBIT_INTERRUPTS check!!
1388 */
1389 rc = hmR0SvmCheckPendingInterrupt(pVM, pVCpu, pvVMCB, pCtx);
1390 if (RT_FAILURE(rc))
1391 goto end;
1392
1393 /*
1394 * TPR caching using CR8 is only available in 64-bit mode or with 32-bit guests when X86_CPUID_AMD_FEATURE_ECX_CR8L is
1395 * supported.
1396 * Note: we can't do this in LoddGuestState as PDMApicGetTPR can jump back to ring 3 (lock)! (no longer true)
1397 */
1398 /** @todo query and update the TPR only when it could have been changed (mmio access)
1399 */
1400 if (pVM->hm.s.fHasIoApic)
1401 {
1402 /* TPR caching in CR8 */
1403 bool fPending;
1404 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
1405 AssertRC(rc2);
1406
1407 if (pVM->hm.s.fTPRPatchingActive)
1408 {
1409 /* Our patch code uses LSTAR for TPR caching. */
1410 pCtx->msrLSTAR = u8LastTPR;
1411
1412 if (fPending)
1413 {
1414 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
1415 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
1416 }
1417 else
1418 {
1419 /*
1420 * No interrupts are pending, so we don't need to be explicitely notified.
1421 * There are enough world switches for detecting pending interrupts.
1422 */
1423 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
1424 }
1425 }
1426 else
1427 {
1428 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
1429 pvVMCB->ctrl.IntCtrl.n.u8VTPR = (u8LastTPR >> 4);
1430
1431 if (fPending)
1432 {
1433 /* A TPR change could activate a pending interrupt, so catch cr8 writes. */
1434 pvVMCB->ctrl.u16InterceptWrCRx |= RT_BIT(8);
1435 }
1436 else
1437 {
1438 /*
1439 * No interrupts are pending, so we don't need to be explicitly notified.
1440 * There are enough world switches for detecting pending interrupts.
1441 */
1442 pvVMCB->ctrl.u16InterceptWrCRx &= ~RT_BIT(8);
1443 }
1444 }
1445 fSyncTPR = !fPending;
1446 }
1447
1448 /* All done! Let's start VM execution. */
1449
1450 /* Enable nested paging if necessary (disabled each time after #VMEXIT). */
1451 pvVMCB->ctrl.NestedPaging.n.u1NestedPaging = pVM->hm.s.fNestedPaging;
1452
1453#ifdef LOG_ENABLED
1454 pCpu = HMR0GetCurrentCpu();
1455 if (pVCpu->hm.s.idLastCpu != pCpu->idCpu)
1456 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hm.s.idLastCpu, pCpu->idCpu));
1457 else if (pVCpu->hm.s.cTlbFlushes != pCpu->cTlbFlushes)
1458 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hm.s.cTlbFlushes, pCpu->cTlbFlushes));
1459 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
1460 LogFlow(("Manual TLB flush\n"));
1461#endif
1462
1463 /*
1464 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
1465 * (until the actual world switch)
1466 */
1467#ifdef VBOX_STRICT
1468 idCpuCheck = RTMpCpuId();
1469#endif
1470 VMMR0LogFlushDisable(pVCpu);
1471
1472 /*
1473 * Load the guest state; *must* be here as it sets up the shadow CR0 for lazy FPU syncing!
1474 */
1475 rc = SVMR0LoadGuestState(pVM, pVCpu, pCtx);
1476 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1477 {
1478 VMMR0LogFlushEnable(pVCpu);
1479 goto end;
1480 }
1481
1482#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
1483 /*
1484 * Disable interrupts to make sure a poke will interrupt execution.
1485 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
1486 */
1487 uOldEFlags = ASMIntDisableFlags();
1488 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
1489#endif
1490 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x);
1491
1492 /* Setup TLB control and ASID in the VMCB. */
1493 hmR0SvmSetupTLB(pVM, pVCpu);
1494
1495 /* In case we execute a goto ResumeExecution later on. */
1496 pVCpu->hm.s.fResumeVM = true;
1497 pVCpu->hm.s.fForceTLBFlush = pVM->hm.s.svm.fAlwaysFlushTLB;
1498
1499 Assert(sizeof(pVCpu->hm.s.svm.HCPhysVMCB) == 8);
1500 Assert(pvVMCB->ctrl.IntCtrl.n.u1VIrqMasking);
1501 Assert(pvVMCB->ctrl.u64IOPMPhysAddr == pVM->hm.s.svm.HCPhysIOBitmap);
1502 Assert(pvVMCB->ctrl.u64MSRPMPhysAddr == pVCpu->hm.s.svm.HCPhysMsrBitmap);
1503 Assert(pvVMCB->ctrl.u64LBRVirt == 0);
1504
1505#ifdef VBOX_STRICT
1506 Assert(idCpuCheck == RTMpCpuId());
1507#endif
1508 TMNotifyStartOfExecution(pVCpu);
1509
1510 /*
1511 * Save the current Host TSC_AUX and write the guest TSC_AUX to the host, so that
1512 * RDTSCPs (that don't cause exits) reads the guest MSR. See @bugref{3324}.
1513 */
1514 u32HostExtFeatures = pVM->hm.s.cpuid.u32AMDFeatureEDX;
1515 if ( (u32HostExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)
1516 && !(pvVMCB->ctrl.u32InterceptCtrl2 & SVM_CTRL2_INTERCEPT_RDTSCP))
1517 {
1518 pVCpu->hm.s.u64HostTscAux = ASMRdMsr(MSR_K8_TSC_AUX);
1519 uint64_t u64GuestTscAux = 0;
1520 rc2 = CPUMQueryGuestMsr(pVCpu, MSR_K8_TSC_AUX, &u64GuestTscAux);
1521 AssertRC(rc2);
1522 ASMWrMsr(MSR_K8_TSC_AUX, u64GuestTscAux);
1523 }
1524
1525#ifdef VBOX_WITH_KERNEL_USING_XMM
1526 hmR0SVMRunWrapXMM(pVCpu->hm.s.svm.HCPhysVMCBHost, pVCpu->hm.s.svm.HCPhysVMCB, pCtx, pVM, pVCpu,
1527 pVCpu->hm.s.svm.pfnVMRun);
1528#else
1529 pVCpu->hm.s.svm.pfnVMRun(pVCpu->hm.s.svm.HCPhysVMCBHost, pVCpu->hm.s.svm.HCPhysVMCB, pCtx, pVM, pVCpu);
1530#endif
1531
1532 ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false);
1533 ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits);
1534 /* Possibly the last TSC value seen by the guest (too high) (only when we're in TSC offset mode). */
1535 if (!(pvVMCB->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_RDTSC))
1536 {
1537 /* Restore host's TSC_AUX. */
1538 if (u32HostExtFeatures & X86_CPUID_EXT_FEATURE_EDX_RDTSCP)
1539 ASMWrMsr(MSR_K8_TSC_AUX, pVCpu->hm.s.u64HostTscAux);
1540
1541 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() +
1542 pvVMCB->ctrl.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
1543 }
1544
1545 TMNotifyEndOfExecution(pVCpu);
1546 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
1547 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatExit1, x);
1548 ASMSetFlags(uOldEFlags);
1549#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
1550 uOldEFlags = ~(RTCCUINTREG)0;
1551#endif
1552
1553 /*
1554 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1555 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING-3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
1556 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1557 */
1558
1559 /* Reason for the VM exit */
1560 exitCode = pvVMCB->ctrl.u64ExitCode;
1561
1562 if (RT_UNLIKELY(exitCode == (uint64_t)SVM_EXIT_INVALID)) /* Invalid guest state. */
1563 {
1564 HMDumpRegs(pVM, pVCpu, pCtx);
1565#ifdef DEBUG
1566 Log(("ctrl.u16InterceptRdCRx %x\n", pvVMCB->ctrl.u16InterceptRdCRx));
1567 Log(("ctrl.u16InterceptWrCRx %x\n", pvVMCB->ctrl.u16InterceptWrCRx));
1568 Log(("ctrl.u16InterceptRdDRx %x\n", pvVMCB->ctrl.u16InterceptRdDRx));
1569 Log(("ctrl.u16InterceptWrDRx %x\n", pvVMCB->ctrl.u16InterceptWrDRx));
1570 Log(("ctrl.u32InterceptException %x\n", pvVMCB->ctrl.u32InterceptException));
1571 Log(("ctrl.u32InterceptCtrl1 %x\n", pvVMCB->ctrl.u32InterceptCtrl1));
1572 Log(("ctrl.u32InterceptCtrl2 %x\n", pvVMCB->ctrl.u32InterceptCtrl2));
1573 Log(("ctrl.u64IOPMPhysAddr %RX64\n", pvVMCB->ctrl.u64IOPMPhysAddr));
1574 Log(("ctrl.u64MSRPMPhysAddr %RX64\n", pvVMCB->ctrl.u64MSRPMPhysAddr));
1575 Log(("ctrl.u64TSCOffset %RX64\n", pvVMCB->ctrl.u64TSCOffset));
1576
1577 Log(("ctrl.TLBCtrl.u32ASID %x\n", pvVMCB->ctrl.TLBCtrl.n.u32ASID));
1578 Log(("ctrl.TLBCtrl.u8TLBFlush %x\n", pvVMCB->ctrl.TLBCtrl.n.u8TLBFlush));
1579 Log(("ctrl.TLBCtrl.u24Reserved %x\n", pvVMCB->ctrl.TLBCtrl.n.u24Reserved));
1580
1581 Log(("ctrl.IntCtrl.u8VTPR %x\n", pvVMCB->ctrl.IntCtrl.n.u8VTPR));
1582 Log(("ctrl.IntCtrl.u1VIrqValid %x\n", pvVMCB->ctrl.IntCtrl.n.u1VIrqValid));
1583 Log(("ctrl.IntCtrl.u7Reserved %x\n", pvVMCB->ctrl.IntCtrl.n.u7Reserved));
1584 Log(("ctrl.IntCtrl.u4VIrqPriority %x\n", pvVMCB->ctrl.IntCtrl.n.u4VIrqPriority));
1585 Log(("ctrl.IntCtrl.u1IgnoreTPR %x\n", pvVMCB->ctrl.IntCtrl.n.u1IgnoreTPR));
1586 Log(("ctrl.IntCtrl.u3Reserved %x\n", pvVMCB->ctrl.IntCtrl.n.u3Reserved));
1587 Log(("ctrl.IntCtrl.u1VIrqMasking %x\n", pvVMCB->ctrl.IntCtrl.n.u1VIrqMasking));
1588 Log(("ctrl.IntCtrl.u7Reserved2 %x\n", pvVMCB->ctrl.IntCtrl.n.u7Reserved2));
1589 Log(("ctrl.IntCtrl.u8VIrqVector %x\n", pvVMCB->ctrl.IntCtrl.n.u8VIrqVector));
1590 Log(("ctrl.IntCtrl.u24Reserved %x\n", pvVMCB->ctrl.IntCtrl.n.u24Reserved));
1591
1592 Log(("ctrl.u64IntShadow %RX64\n", pvVMCB->ctrl.u64IntShadow));
1593 Log(("ctrl.u64ExitCode %RX64\n", pvVMCB->ctrl.u64ExitCode));
1594 Log(("ctrl.u64ExitInfo1 %RX64\n", pvVMCB->ctrl.u64ExitInfo1));
1595 Log(("ctrl.u64ExitInfo2 %RX64\n", pvVMCB->ctrl.u64ExitInfo2));
1596 Log(("ctrl.ExitIntInfo.u8Vector %x\n", pvVMCB->ctrl.ExitIntInfo.n.u8Vector));
1597 Log(("ctrl.ExitIntInfo.u3Type %x\n", pvVMCB->ctrl.ExitIntInfo.n.u3Type));
1598 Log(("ctrl.ExitIntInfo.u1ErrorCodeValid %x\n", pvVMCB->ctrl.ExitIntInfo.n.u1ErrorCodeValid));
1599 Log(("ctrl.ExitIntInfo.u19Reserved %x\n", pvVMCB->ctrl.ExitIntInfo.n.u19Reserved));
1600 Log(("ctrl.ExitIntInfo.u1Valid %x\n", pvVMCB->ctrl.ExitIntInfo.n.u1Valid));
1601 Log(("ctrl.ExitIntInfo.u32ErrorCode %x\n", pvVMCB->ctrl.ExitIntInfo.n.u32ErrorCode));
1602 Log(("ctrl.NestedPaging %RX64\n", pvVMCB->ctrl.NestedPaging.au64));
1603 Log(("ctrl.EventInject.u8Vector %x\n", pvVMCB->ctrl.EventInject.n.u8Vector));
1604 Log(("ctrl.EventInject.u3Type %x\n", pvVMCB->ctrl.EventInject.n.u3Type));
1605 Log(("ctrl.EventInject.u1ErrorCodeValid %x\n", pvVMCB->ctrl.EventInject.n.u1ErrorCodeValid));
1606 Log(("ctrl.EventInject.u19Reserved %x\n", pvVMCB->ctrl.EventInject.n.u19Reserved));
1607 Log(("ctrl.EventInject.u1Valid %x\n", pvVMCB->ctrl.EventInject.n.u1Valid));
1608 Log(("ctrl.EventInject.u32ErrorCode %x\n", pvVMCB->ctrl.EventInject.n.u32ErrorCode));
1609
1610 Log(("ctrl.u64NestedPagingCR3 %RX64\n", pvVMCB->ctrl.u64NestedPagingCR3));
1611 Log(("ctrl.u64LBRVirt %RX64\n", pvVMCB->ctrl.u64LBRVirt));
1612
1613 Log(("guest.CS.u16Sel %04X\n", pvVMCB->guest.CS.u16Sel));
1614 Log(("guest.CS.u16Attr %04X\n", pvVMCB->guest.CS.u16Attr));
1615 Log(("guest.CS.u32Limit %X\n", pvVMCB->guest.CS.u32Limit));
1616 Log(("guest.CS.u64Base %RX64\n", pvVMCB->guest.CS.u64Base));
1617 Log(("guest.DS.u16Sel %04X\n", pvVMCB->guest.DS.u16Sel));
1618 Log(("guest.DS.u16Attr %04X\n", pvVMCB->guest.DS.u16Attr));
1619 Log(("guest.DS.u32Limit %X\n", pvVMCB->guest.DS.u32Limit));
1620 Log(("guest.DS.u64Base %RX64\n", pvVMCB->guest.DS.u64Base));
1621 Log(("guest.ES.u16Sel %04X\n", pvVMCB->guest.ES.u16Sel));
1622 Log(("guest.ES.u16Attr %04X\n", pvVMCB->guest.ES.u16Attr));
1623 Log(("guest.ES.u32Limit %X\n", pvVMCB->guest.ES.u32Limit));
1624 Log(("guest.ES.u64Base %RX64\n", pvVMCB->guest.ES.u64Base));
1625 Log(("guest.FS.u16Sel %04X\n", pvVMCB->guest.FS.u16Sel));
1626 Log(("guest.FS.u16Attr %04X\n", pvVMCB->guest.FS.u16Attr));
1627 Log(("guest.FS.u32Limit %X\n", pvVMCB->guest.FS.u32Limit));
1628 Log(("guest.FS.u64Base %RX64\n", pvVMCB->guest.FS.u64Base));
1629 Log(("guest.GS.u16Sel %04X\n", pvVMCB->guest.GS.u16Sel));
1630 Log(("guest.GS.u16Attr %04X\n", pvVMCB->guest.GS.u16Attr));
1631 Log(("guest.GS.u32Limit %X\n", pvVMCB->guest.GS.u32Limit));
1632 Log(("guest.GS.u64Base %RX64\n", pvVMCB->guest.GS.u64Base));
1633
1634 Log(("guest.GDTR.u32Limit %X\n", pvVMCB->guest.GDTR.u32Limit));
1635 Log(("guest.GDTR.u64Base %RX64\n", pvVMCB->guest.GDTR.u64Base));
1636
1637 Log(("guest.LDTR.u16Sel %04X\n", pvVMCB->guest.LDTR.u16Sel));
1638 Log(("guest.LDTR.u16Attr %04X\n", pvVMCB->guest.LDTR.u16Attr));
1639 Log(("guest.LDTR.u32Limit %X\n", pvVMCB->guest.LDTR.u32Limit));
1640 Log(("guest.LDTR.u64Base %RX64\n", pvVMCB->guest.LDTR.u64Base));
1641
1642 Log(("guest.IDTR.u32Limit %X\n", pvVMCB->guest.IDTR.u32Limit));
1643 Log(("guest.IDTR.u64Base %RX64\n", pvVMCB->guest.IDTR.u64Base));
1644
1645 Log(("guest.TR.u16Sel %04X\n", pvVMCB->guest.TR.u16Sel));
1646 Log(("guest.TR.u16Attr %04X\n", pvVMCB->guest.TR.u16Attr));
1647 Log(("guest.TR.u32Limit %X\n", pvVMCB->guest.TR.u32Limit));
1648 Log(("guest.TR.u64Base %RX64\n", pvVMCB->guest.TR.u64Base));
1649
1650 Log(("guest.u8CPL %X\n", pvVMCB->guest.u8CPL));
1651 Log(("guest.u64CR0 %RX64\n", pvVMCB->guest.u64CR0));
1652 Log(("guest.u64CR2 %RX64\n", pvVMCB->guest.u64CR2));
1653 Log(("guest.u64CR3 %RX64\n", pvVMCB->guest.u64CR3));
1654 Log(("guest.u64CR4 %RX64\n", pvVMCB->guest.u64CR4));
1655 Log(("guest.u64DR6 %RX64\n", pvVMCB->guest.u64DR6));
1656 Log(("guest.u64DR7 %RX64\n", pvVMCB->guest.u64DR7));
1657
1658 Log(("guest.u64RIP %RX64\n", pvVMCB->guest.u64RIP));
1659 Log(("guest.u64RSP %RX64\n", pvVMCB->guest.u64RSP));
1660 Log(("guest.u64RAX %RX64\n", pvVMCB->guest.u64RAX));
1661 Log(("guest.u64RFlags %RX64\n", pvVMCB->guest.u64RFlags));
1662
1663 Log(("guest.u64SysEnterCS %RX64\n", pvVMCB->guest.u64SysEnterCS));
1664 Log(("guest.u64SysEnterEIP %RX64\n", pvVMCB->guest.u64SysEnterEIP));
1665 Log(("guest.u64SysEnterESP %RX64\n", pvVMCB->guest.u64SysEnterESP));
1666
1667 Log(("guest.u64EFER %RX64\n", pvVMCB->guest.u64EFER));
1668 Log(("guest.u64STAR %RX64\n", pvVMCB->guest.u64STAR));
1669 Log(("guest.u64LSTAR %RX64\n", pvVMCB->guest.u64LSTAR));
1670 Log(("guest.u64CSTAR %RX64\n", pvVMCB->guest.u64CSTAR));
1671 Log(("guest.u64SFMASK %RX64\n", pvVMCB->guest.u64SFMASK));
1672 Log(("guest.u64KernelGSBase %RX64\n", pvVMCB->guest.u64KernelGSBase));
1673 Log(("guest.u64GPAT %RX64\n", pvVMCB->guest.u64GPAT));
1674 Log(("guest.u64DBGCTL %RX64\n", pvVMCB->guest.u64DBGCTL));
1675 Log(("guest.u64BR_FROM %RX64\n", pvVMCB->guest.u64BR_FROM));
1676 Log(("guest.u64BR_TO %RX64\n", pvVMCB->guest.u64BR_TO));
1677 Log(("guest.u64LASTEXCPFROM %RX64\n", pvVMCB->guest.u64LASTEXCPFROM));
1678 Log(("guest.u64LASTEXCPTO %RX64\n", pvVMCB->guest.u64LASTEXCPTO));
1679#endif
1680 rc = VERR_SVM_UNABLE_TO_START_VM;
1681 VMMR0LogFlushEnable(pVCpu);
1682 goto end;
1683 }
1684
1685 /* Let's first sync back EIP, ESP, and EFLAGS. */
1686 pCtx->rip = pvVMCB->guest.u64RIP;
1687 pCtx->rsp = pvVMCB->guest.u64RSP;
1688 pCtx->eflags.u32 = pvVMCB->guest.u64RFlags;
1689 /* eax is saved/restore across the vmrun instruction */
1690 pCtx->rax = pvVMCB->guest.u64RAX;
1691
1692 /*
1693 * Save all the MSRs that can be changed by the guest without causing a world switch.
1694 * FS & GS base are saved with SVM_READ_SELREG.
1695 */
1696 pCtx->msrSTAR = pvVMCB->guest.u64STAR; /* legacy syscall eip, cs & ss */
1697 pCtx->msrLSTAR = pvVMCB->guest.u64LSTAR; /* 64-bit mode syscall rip */
1698 pCtx->msrCSTAR = pvVMCB->guest.u64CSTAR; /* compatibility mode syscall rip */
1699 pCtx->msrSFMASK = pvVMCB->guest.u64SFMASK; /* syscall flag mask */
1700 pCtx->msrKERNELGSBASE = pvVMCB->guest.u64KernelGSBase; /* swapgs exchange value */
1701 pCtx->SysEnter.cs = pvVMCB->guest.u64SysEnterCS;
1702 pCtx->SysEnter.eip = pvVMCB->guest.u64SysEnterEIP;
1703 pCtx->SysEnter.esp = pvVMCB->guest.u64SysEnterESP;
1704
1705 /* Can be updated behind our back in the nested paging case. */
1706 pCtx->cr2 = pvVMCB->guest.u64CR2;
1707
1708 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1709 SVM_READ_SELREG(SS, ss);
1710 SVM_READ_SELREG(CS, cs);
1711 SVM_READ_SELREG(DS, ds);
1712 SVM_READ_SELREG(ES, es);
1713 SVM_READ_SELREG(FS, fs);
1714 SVM_READ_SELREG(GS, gs);
1715
1716 /*
1717 * Correct the hidden CS granularity flag. Haven't seen it being wrong in any other
1718 * register (yet).
1719 */
1720 if ( !pCtx->cs.Attr.n.u1Granularity
1721 && pCtx->cs.Attr.n.u1Present
1722 && pCtx->cs.u32Limit > UINT32_C(0xfffff))
1723 {
1724 Assert((pCtx->cs.u32Limit & 0xfff) == 0xfff);
1725 pCtx->cs.Attr.n.u1Granularity = 1;
1726 }
1727#define SVM_ASSERT_SEL_GRANULARITY(reg) \
1728 AssertMsg( !pCtx->reg.Attr.n.u1Present \
1729 || ( pCtx->reg.Attr.n.u1Granularity \
1730 ? (pCtx->reg.u32Limit & 0xfff) == 0xfff \
1731 : pCtx->reg.u32Limit <= 0xfffff), \
1732 ("%#x %#x %#llx\n", pCtx->reg.u32Limit, pCtx->reg.Attr.u, pCtx->reg.u64Base))
1733 SVM_ASSERT_SEL_GRANULARITY(ss);
1734 SVM_ASSERT_SEL_GRANULARITY(cs);
1735 SVM_ASSERT_SEL_GRANULARITY(ds);
1736 SVM_ASSERT_SEL_GRANULARITY(es);
1737 SVM_ASSERT_SEL_GRANULARITY(fs);
1738 SVM_ASSERT_SEL_GRANULARITY(gs);
1739#undef SVM_ASSERT_SEL_GRANULARITY
1740
1741 /*
1742 * Correct the hidden SS DPL field. It can be wrong on certain CPUs
1743 * sometimes (seen it on AMD Fusion CPUs with 64-bit guests). The CPU
1744 * always uses the CPL field in the VMCB instead of the DPL in the hidden
1745 * SS (chapter AMD spec. 15.5.1 Basic operation).
1746 */
1747 Assert(!(pvVMCB->guest.u8CPL & ~0x3));
1748 pCtx->ss.Attr.n.u2Dpl = pvVMCB->guest.u8CPL & 0x3;
1749
1750 /*
1751 * Remaining guest CPU context: TR, IDTR, GDTR, LDTR;
1752 * must sync everything otherwise we can get out of sync when jumping back to ring-3.
1753 */
1754 SVM_READ_SELREG(LDTR, ldtr);
1755 SVM_READ_SELREG(TR, tr);
1756
1757 pCtx->gdtr.cbGdt = pvVMCB->guest.GDTR.u32Limit;
1758 pCtx->gdtr.pGdt = pvVMCB->guest.GDTR.u64Base;
1759
1760 pCtx->idtr.cbIdt = pvVMCB->guest.IDTR.u32Limit;
1761 pCtx->idtr.pIdt = pvVMCB->guest.IDTR.u64Base;
1762
1763 /*
1764 * No reason to sync back the CRx and DRx registers as they cannot be changed by the guest
1765 * unless in the nested paging case where CR3 can be changed by the guest.
1766 */
1767 if ( pVM->hm.s.fNestedPaging
1768 && pCtx->cr3 != pvVMCB->guest.u64CR3)
1769 {
1770 CPUMSetGuestCR3(pVCpu, pvVMCB->guest.u64CR3);
1771 PGMUpdateCR3(pVCpu, pvVMCB->guest.u64CR3);
1772 }
1773
1774 /* Note! NOW IT'S SAFE FOR LOGGING! */
1775 VMMR0LogFlushEnable(pVCpu);
1776
1777 /* Take care of instruction fusing (sti, mov ss) (see AMD spec. 15.20.5 Interrupt Shadows) */
1778 if (pvVMCB->ctrl.u64IntShadow & SVM_INTERRUPT_SHADOW_ACTIVE)
1779 {
1780 Log(("uInterruptState %x rip=%RGv\n", pvVMCB->ctrl.u64IntShadow, (RTGCPTR)pCtx->rip));
1781 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
1782 }
1783 else
1784 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1785
1786 Log2(("exitCode = %x\n", exitCode));
1787
1788 /* Sync back DR6 as it could have been changed by hitting breakpoints. */
1789 pCtx->dr[6] = pvVMCB->guest.u64DR6;
1790 /* DR7.GD can be cleared by debug exceptions, so sync it back as well. */
1791 pCtx->dr[7] = pvVMCB->guest.u64DR7;
1792
1793 /* Check if an injected event was interrupted prematurely. */
1794 pVCpu->hm.s.Event.intInfo = pvVMCB->ctrl.ExitIntInfo.au64[0];
1795 if ( pvVMCB->ctrl.ExitIntInfo.n.u1Valid
1796 /* we don't care about 'int xx' as the instruction will be restarted. */
1797 && pvVMCB->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT)
1798 {
1799 Log(("Pending inject %RX64 at %RGv exit=%08x\n", pVCpu->hm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitCode));
1800
1801#ifdef LOG_ENABLED
1802 SVM_EVENT Event;
1803 Event.au64[0] = pVCpu->hm.s.Event.intInfo;
1804
1805 if ( exitCode == SVM_EXIT_EXCEPTION_E
1806 && Event.n.u8Vector == 0xE)
1807 {
1808 Log(("Double fault!\n"));
1809 }
1810#endif
1811
1812 pVCpu->hm.s.Event.fPending = true;
1813 /* Error code present? (redundant) */
1814 if (pvVMCB->ctrl.ExitIntInfo.n.u1ErrorCodeValid)
1815 pVCpu->hm.s.Event.errCode = pvVMCB->ctrl.ExitIntInfo.n.u32ErrorCode;
1816 else
1817 pVCpu->hm.s.Event.errCode = 0;
1818 }
1819#ifdef VBOX_WITH_STATISTICS
1820 if (exitCode == SVM_EXIT_NPF)
1821 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf);
1822 else
1823 STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[exitCode & MASK_EXITREASON_STAT]);
1824#endif
1825
1826 /* Sync back the TPR if it was changed. */
1827 if (fSyncTPR)
1828 {
1829 if (pVM->hm.s.fTPRPatchingActive)
1830 {
1831 if ((pCtx->msrLSTAR & 0xff) != u8LastTPR)
1832 {
1833 /* Our patch code uses LSTAR for TPR caching. */
1834 rc2 = PDMApicSetTPR(pVCpu, pCtx->msrLSTAR & 0xff);
1835 AssertRC(rc2);
1836 }
1837 }
1838 else
1839 {
1840 if ((uint8_t)(u8LastTPR >> 4) != pvVMCB->ctrl.IntCtrl.n.u8VTPR)
1841 {
1842 /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
1843 rc2 = PDMApicSetTPR(pVCpu, pvVMCB->ctrl.IntCtrl.n.u8VTPR << 4);
1844 AssertRC(rc2);
1845 }
1846 }
1847 }
1848
1849#ifdef DBGFTRACE_ENABLED /** @todo DTrace */
1850 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x at %04:%08RX64 %RX64 %RX64 %RX64",
1851 exitCode, pCtx->cs.Sel, pCtx->rip,
1852 pvVMCB->ctrl.u64ExitInfo1, pvVMCB->ctrl.u64ExitInfo2, pvVMCB->ctrl.ExitIntInfo.au64[0]);
1853#endif
1854#if ARCH_BITS == 64 /* for the time being */
1855 VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, exitCode, pvVMCB->ctrl.u64ExitInfo1, pvVMCB->ctrl.u64ExitInfo2,
1856 pvVMCB->ctrl.ExitIntInfo.au64[0], UINT64_MAX);
1857#endif
1858 STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatExit1, &pVCpu->hm.s.StatExit2, x);
1859
1860 /* Deal with the reason of the VM-exit. */
1861 switch (exitCode)
1862 {
1863 case SVM_EXIT_EXCEPTION_0: case SVM_EXIT_EXCEPTION_1: case SVM_EXIT_EXCEPTION_2: case SVM_EXIT_EXCEPTION_3:
1864 case SVM_EXIT_EXCEPTION_4: case SVM_EXIT_EXCEPTION_5: case SVM_EXIT_EXCEPTION_6: case SVM_EXIT_EXCEPTION_7:
1865 case SVM_EXIT_EXCEPTION_8: case SVM_EXIT_EXCEPTION_9: case SVM_EXIT_EXCEPTION_A: case SVM_EXIT_EXCEPTION_B:
1866 case SVM_EXIT_EXCEPTION_C: case SVM_EXIT_EXCEPTION_D: case SVM_EXIT_EXCEPTION_E: case SVM_EXIT_EXCEPTION_F:
1867 case SVM_EXIT_EXCEPTION_10: case SVM_EXIT_EXCEPTION_11: case SVM_EXIT_EXCEPTION_12: case SVM_EXIT_EXCEPTION_13:
1868 case SVM_EXIT_EXCEPTION_14: case SVM_EXIT_EXCEPTION_15: case SVM_EXIT_EXCEPTION_16: case SVM_EXIT_EXCEPTION_17:
1869 case SVM_EXIT_EXCEPTION_18: case SVM_EXIT_EXCEPTION_19: case SVM_EXIT_EXCEPTION_1A: case SVM_EXIT_EXCEPTION_1B:
1870 case SVM_EXIT_EXCEPTION_1C: case SVM_EXIT_EXCEPTION_1D: case SVM_EXIT_EXCEPTION_1E: case SVM_EXIT_EXCEPTION_1F:
1871 {
1872 /* Pending trap. */
1873 SVM_EVENT Event;
1874 uint32_t vector = exitCode - SVM_EXIT_EXCEPTION_0;
1875
1876 Log2(("Hardware/software interrupt %d\n", vector));
1877 switch (vector)
1878 {
1879 case X86_XCPT_DB:
1880 {
1881 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB);
1882
1883 /* Note that we don't support guest and host-initiated debugging at the same time. */
1884 Assert(DBGFIsStepping(pVCpu) || CPUMIsHyperDebugStateActive(pVCpu));
1885
1886 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pCtx->dr[6]);
1887 if (rc == VINF_EM_RAW_GUEST_TRAP)
1888 {
1889 Log(("Trap %x (debug) at %016RX64\n", vector, pCtx->rip));
1890
1891 /* Reinject the exception. */
1892 Event.au64[0] = 0;
1893 Event.n.u3Type = SVM_EVENT_EXCEPTION; /* trap or fault */
1894 Event.n.u1Valid = 1;
1895 Event.n.u8Vector = X86_XCPT_DB;
1896
1897 hmR0SvmSetPendingEvent(pVCpu, &Event);
1898 goto ResumeExecution;
1899 }
1900 /* Return to ring 3 to deal with the debug exit code. */
1901 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs.Sel, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
1902 break;
1903 }
1904
1905 case X86_XCPT_NM:
1906 {
1907 Log(("#NM fault at %RGv\n", (RTGCPTR)pCtx->rip));
1908
1909 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
1910 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
1911 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
1912 if (rc == VINF_SUCCESS)
1913 {
1914 Assert(CPUMIsGuestFPUStateActive(pVCpu));
1915 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowNM);
1916
1917 /* Continue execution. */
1918 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
1919
1920 goto ResumeExecution;
1921 }
1922
1923 Log(("Forward #NM fault to the guest\n"));
1924 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM);
1925
1926 Event.au64[0] = 0;
1927 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1928 Event.n.u1Valid = 1;
1929 Event.n.u8Vector = X86_XCPT_NM;
1930
1931 hmR0SvmSetPendingEvent(pVCpu, &Event);
1932 goto ResumeExecution;
1933 }
1934
1935 case X86_XCPT_PF: /* Page fault */
1936 {
1937 uint32_t errCode = pvVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
1938 RTGCUINTPTR uFaultAddress = pvVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */
1939
1940#ifdef VBOX_ALWAYS_TRAP_PF
1941 if (pVM->hm.s.fNestedPaging)
1942 {
1943 /*
1944 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
1945 */
1946 Log(("Guest page fault at %04X:%RGv cr2=%RGv error code %x rsp=%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip,
1947 uFaultAddress, errCode, (RTGCPTR)pCtx->rsp));
1948 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
1949
1950 /* Now we must update CR2. */
1951 pCtx->cr2 = uFaultAddress;
1952
1953 Event.au64[0] = 0;
1954 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1955 Event.n.u1Valid = 1;
1956 Event.n.u8Vector = X86_XCPT_PF;
1957 Event.n.u1ErrorCodeValid = 1;
1958 Event.n.u32ErrorCode = errCode;
1959
1960 hmR0SvmSetPendingEvent(pVCpu, &Event);
1961 goto ResumeExecution;
1962 }
1963#endif
1964 Assert(!pVM->hm.s.fNestedPaging);
1965
1966#ifdef VBOX_HM_WITH_GUEST_PATCHING
1967 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
1968 if ( pVM->hm.s.fTRPPatchingAllowed
1969 && (uFaultAddress & 0xfff) == 0x080
1970 && !(errCode & X86_TRAP_PF_P) /* not present */
1971 && CPUMGetGuestCPL(pVCpu) == 0
1972 && !CPUMIsGuestInLongModeEx(pCtx)
1973 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
1974 {
1975 RTGCPHYS GCPhysApicBase, GCPhys;
1976 GCPhysApicBase = pCtx->msrApicBase;
1977 GCPhysApicBase &= PAGE_BASE_GC_MASK;
1978
1979 rc = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL, &GCPhys);
1980 if ( rc == VINF_SUCCESS
1981 && GCPhys == GCPhysApicBase)
1982 {
1983 /* Only attempt to patch the instruction once. */
1984 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
1985 if (!pPatch)
1986 {
1987 rc = VINF_EM_HM_PATCH_TPR_INSTR;
1988 break;
1989 }
1990 }
1991 }
1992#endif
1993
1994 Log2(("Page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
1995 /* Exit qualification contains the linear address of the page fault. */
1996 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
1997 TRPMSetErrorCode(pVCpu, errCode);
1998 TRPMSetFaultAddress(pVCpu, uFaultAddress);
1999
2000 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
2001 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress);
2002 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
2003 if (rc == VINF_SUCCESS)
2004 {
2005 /* We've successfully synced our shadow pages, so let's just continue execution. */
2006 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
2007 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
2008
2009 TRPMResetTrap(pVCpu);
2010 goto ResumeExecution;
2011 }
2012 else if (rc == VINF_EM_RAW_GUEST_TRAP)
2013 {
2014 /*
2015 * A genuine pagefault. Forward the trap to the guest by injecting the exception and resuming execution.
2016 */
2017 Log2(("Forward page fault to the guest\n"));
2018 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF);
2019 /* The error code might have been changed. */
2020 errCode = TRPMGetErrorCode(pVCpu);
2021
2022 TRPMResetTrap(pVCpu);
2023
2024 /* Now we must update CR2. */
2025 pCtx->cr2 = uFaultAddress;
2026
2027 Event.au64[0] = 0;
2028 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2029 Event.n.u1Valid = 1;
2030 Event.n.u8Vector = X86_XCPT_PF;
2031 Event.n.u1ErrorCodeValid = 1;
2032 Event.n.u32ErrorCode = errCode;
2033
2034 hmR0SvmSetPendingEvent(pVCpu, &Event);
2035 goto ResumeExecution;
2036 }
2037#ifdef VBOX_STRICT
2038 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
2039 LogFlow(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
2040#endif
2041 /* Need to go back to the recompiler to emulate the instruction. */
2042 TRPMResetTrap(pVCpu);
2043 break;
2044 }
2045
2046 case X86_XCPT_MF: /* Floating point exception. */
2047 {
2048 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF);
2049 if (!(pCtx->cr0 & X86_CR0_NE))
2050 {
2051 /* old style FPU error reporting needs some extra work. */
2052 /** @todo don't fall back to the recompiler, but do it manually. */
2053 rc = VINF_EM_RAW_EMULATE_INSTR;
2054 break;
2055 }
2056 Log(("Trap %x at %RGv\n", vector, (RTGCPTR)pCtx->rip));
2057
2058 Event.au64[0] = 0;
2059 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2060 Event.n.u1Valid = 1;
2061 Event.n.u8Vector = X86_XCPT_MF;
2062
2063 hmR0SvmSetPendingEvent(pVCpu, &Event);
2064 goto ResumeExecution;
2065 }
2066
2067#ifdef VBOX_STRICT
2068 case X86_XCPT_BP: /* Breakpoint. */
2069 case X86_XCPT_GP: /* General protection failure exception.*/
2070 case X86_XCPT_UD: /* Unknown opcode exception. */
2071 case X86_XCPT_DE: /* Divide error. */
2072 case X86_XCPT_SS: /* Stack segment exception. */
2073 case X86_XCPT_NP: /* Segment not present exception. */
2074 {
2075 Event.au64[0] = 0;
2076 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2077 Event.n.u1Valid = 1;
2078 Event.n.u8Vector = vector;
2079
2080 switch (vector)
2081 {
2082 case X86_XCPT_GP:
2083 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP);
2084 Event.n.u1ErrorCodeValid = 1;
2085 Event.n.u32ErrorCode = pvVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
2086 break;
2087 case X86_XCPT_BP:
2088 /** Saves the wrong EIP on the stack (pointing to the int3 instead of the next instruction. */
2089 break;
2090 case X86_XCPT_DE:
2091 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE);
2092 break;
2093 case X86_XCPT_UD:
2094 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD);
2095 break;
2096 case X86_XCPT_SS:
2097 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS);
2098 Event.n.u1ErrorCodeValid = 1;
2099 Event.n.u32ErrorCode = pvVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
2100 break;
2101 case X86_XCPT_NP:
2102 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP);
2103 Event.n.u1ErrorCodeValid = 1;
2104 Event.n.u32ErrorCode = pvVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
2105 break;
2106 }
2107 Log(("Trap %x at %04x:%RGv esi=%x\n", vector, pCtx->cs.Sel, (RTGCPTR)pCtx->rip, pCtx->esi));
2108 hmR0SvmSetPendingEvent(pVCpu, &Event);
2109 goto ResumeExecution;
2110 }
2111#endif
2112 default:
2113 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
2114 rc = VERR_SVM_UNEXPECTED_XCPT_EXIT;
2115 break;
2116
2117 } /* switch (vector) */
2118 break;
2119 }
2120
2121 case SVM_EXIT_NPF:
2122 {
2123 /* EXITINFO1 contains fault errorcode; EXITINFO2 contains the guest physical address causing the fault. */
2124 uint32_t errCode = pvVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
2125 RTGCPHYS GCPhysFault = pvVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */
2126 PGMMODE enmShwPagingMode;
2127
2128 Assert(pVM->hm.s.fNestedPaging);
2129 LogFlow(("Nested page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode));
2130
2131#ifdef VBOX_HM_WITH_GUEST_PATCHING
2132 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
2133 if ( pVM->hm.s.fTRPPatchingAllowed
2134 && (GCPhysFault & PAGE_OFFSET_MASK) == 0x080
2135 && ( !(errCode & X86_TRAP_PF_P) /* not present */
2136 || (errCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD) /* mmio optimization */)
2137 && CPUMGetGuestCPL(pVCpu) == 0
2138 && !CPUMIsGuestInLongModeEx(pCtx)
2139 && pVM->hm.s.cPatches < RT_ELEMENTS(pVM->hm.s.aPatches))
2140 {
2141 RTGCPHYS GCPhysApicBase = pCtx->msrApicBase;
2142 GCPhysApicBase &= PAGE_BASE_GC_MASK;
2143
2144 if (GCPhysFault == GCPhysApicBase + 0x80)
2145 {
2146 /* Only attempt to patch the instruction once. */
2147 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2148 if (!pPatch)
2149 {
2150 rc = VINF_EM_HM_PATCH_TPR_INSTR;
2151 break;
2152 }
2153 }
2154 }
2155#endif
2156
2157 /* Handle the pagefault trap for the nested shadow table. */
2158#if HC_ARCH_BITS == 32 /** @todo shadow this in a variable. */
2159 if (CPUMIsGuestInLongModeEx(pCtx))
2160 enmShwPagingMode = PGMMODE_AMD64_NX;
2161 else
2162#endif
2163 enmShwPagingMode = PGMGetHostMode(pVM);
2164
2165 /* MMIO optimization */
2166 Assert((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD);
2167 if ((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2168 {
2169 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmShwPagingMode, CPUMCTX2CORE(pCtx), GCPhysFault, errCode);
2170
2171 /*
2172 * If we succeed, resume execution.
2173 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
2174 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
2175 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
2176 * weird case. See @bugref{6043}.
2177 */
2178 if ( rc == VINF_SUCCESS
2179 || rc == VERR_PAGE_TABLE_NOT_PRESENT
2180 || rc == VERR_PAGE_NOT_PRESENT)
2181 {
2182 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip));
2183 goto ResumeExecution;
2184 }
2185 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip));
2186 break;
2187 }
2188
2189 /* Exit qualification contains the linear address of the page fault. */
2190 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2191 TRPMSetErrorCode(pVCpu, errCode);
2192 TRPMSetFaultAddress(pVCpu, GCPhysFault);
2193
2194 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmShwPagingMode, errCode, CPUMCTX2CORE(pCtx), GCPhysFault);
2195 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
2196
2197 /*
2198 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}.
2199 */
2200 if ( rc == VINF_SUCCESS
2201 || rc == VERR_PAGE_TABLE_NOT_PRESENT
2202 || rc == VERR_PAGE_NOT_PRESENT)
2203 {
2204 /* We've successfully synced our shadow pages, so let's just continue execution. */
2205 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode));
2206 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF);
2207
2208 TRPMResetTrap(pVCpu);
2209 goto ResumeExecution;
2210 }
2211
2212#ifdef VBOX_STRICT
2213 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2214 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", VBOXSTRICTRC_VAL(rc)));
2215#endif
2216 /* Need to go back to the recompiler to emulate the instruction. */
2217 TRPMResetTrap(pVCpu);
2218 break;
2219 }
2220
2221 case SVM_EXIT_VINTR:
2222 /* A virtual interrupt is about to be delivered, which means IF=1. */
2223 Log(("SVM_EXIT_VINTR IF=%d\n", pCtx->eflags.Bits.u1IF));
2224 pvVMCB->ctrl.IntCtrl.n.u1VIrqValid = 0;
2225 pvVMCB->ctrl.IntCtrl.n.u8VIrqVector = 0;
2226 goto ResumeExecution;
2227
2228 case SVM_EXIT_FERR_FREEZE:
2229 case SVM_EXIT_INTR:
2230 case SVM_EXIT_NMI:
2231 case SVM_EXIT_SMI:
2232 case SVM_EXIT_INIT:
2233 /* External interrupt; leave to allow it to be dispatched again. */
2234 rc = VINF_EM_RAW_INTERRUPT;
2235 break;
2236
2237 case SVM_EXIT_WBINVD:
2238 case SVM_EXIT_INVD: /* Guest software attempted to execute INVD. */
2239 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvd);
2240 /* Skip instruction and continue directly. */
2241 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2242 /* Continue execution.*/
2243 goto ResumeExecution;
2244
2245 case SVM_EXIT_CPUID: /* Guest software attempted to execute CPUID. */
2246 {
2247 Log2(("SVM: Cpuid at %RGv for %x\n", (RTGCPTR)pCtx->rip, pCtx->eax));
2248 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCpuid);
2249 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2250 if (rc == VINF_SUCCESS)
2251 {
2252 /* Update EIP and continue execution. */
2253 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2254 goto ResumeExecution;
2255 }
2256 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2257 rc = VINF_EM_RAW_EMULATE_INSTR;
2258 break;
2259 }
2260
2261 case SVM_EXIT_RDTSC: /* Guest software attempted to execute RDTSC. */
2262 {
2263 Log2(("SVM: Rdtsc\n"));
2264 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtsc);
2265 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2266 if (rc == VINF_SUCCESS)
2267 {
2268 /* Update EIP and continue execution. */
2269 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2270 goto ResumeExecution;
2271 }
2272 rc = VINF_EM_RAW_EMULATE_INSTR;
2273 break;
2274 }
2275
2276 case SVM_EXIT_RDPMC: /* Guest software attempted to execute RDPMC. */
2277 {
2278 Log2(("SVM: Rdpmc %x\n", pCtx->ecx));
2279 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdpmc);
2280 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2281 if (rc == VINF_SUCCESS)
2282 {
2283 /* Update EIP and continue execution. */
2284 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2285 goto ResumeExecution;
2286 }
2287 rc = VINF_EM_RAW_EMULATE_INSTR;
2288 break;
2289 }
2290
2291 case SVM_EXIT_RDTSCP: /* Guest software attempted to execute RDTSCP. */
2292 {
2293 Log2(("SVM: Rdtscp\n"));
2294 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdtscp);
2295 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
2296 if (rc == VINF_SUCCESS)
2297 {
2298 /* Update EIP and continue execution. */
2299 pCtx->rip += 3; /* Note! hardcoded opcode size! */
2300 goto ResumeExecution;
2301 }
2302 AssertMsgFailed(("EMU: rdtscp failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2303 rc = VINF_EM_RAW_EMULATE_INSTR;
2304 break;
2305 }
2306
2307 case SVM_EXIT_INVLPG: /* Guest software attempted to execute INVLPG. */
2308 {
2309 Log2(("SVM: invlpg\n"));
2310 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitInvlpg);
2311
2312 Assert(!pVM->hm.s.fNestedPaging);
2313
2314 /* Truly a pita. Why can't SVM give the same information as VT-x? */
2315 rc = hmR0SvmInterpretInvlpg(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2316 if (rc == VINF_SUCCESS)
2317 {
2318 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushPageInvlpg);
2319 goto ResumeExecution; /* eip already updated */
2320 }
2321 break;
2322 }
2323
2324 case SVM_EXIT_WRITE_CR0: case SVM_EXIT_WRITE_CR1: case SVM_EXIT_WRITE_CR2: case SVM_EXIT_WRITE_CR3:
2325 case SVM_EXIT_WRITE_CR4: case SVM_EXIT_WRITE_CR5: case SVM_EXIT_WRITE_CR6: case SVM_EXIT_WRITE_CR7:
2326 case SVM_EXIT_WRITE_CR8: case SVM_EXIT_WRITE_CR9: case SVM_EXIT_WRITE_CR10: case SVM_EXIT_WRITE_CR11:
2327 case SVM_EXIT_WRITE_CR12: case SVM_EXIT_WRITE_CR13: case SVM_EXIT_WRITE_CR14: case SVM_EXIT_WRITE_CR15:
2328 {
2329 Log2(("SVM: %RGv mov cr%d, \n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_WRITE_CR0));
2330 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxWrite[exitCode - SVM_EXIT_WRITE_CR0]);
2331 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2332
2333 switch (exitCode - SVM_EXIT_WRITE_CR0)
2334 {
2335 case 0:
2336 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR0;
2337 break;
2338 case 2:
2339 break;
2340 case 3:
2341 Assert(!pVM->hm.s.fNestedPaging);
2342 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR3;
2343 break;
2344 case 4:
2345 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_CR4;
2346 break;
2347 case 8:
2348 break;
2349 default:
2350 AssertFailed();
2351 }
2352 if (rc == VINF_SUCCESS)
2353 {
2354 /* EIP has been updated already. */
2355 /* Only resume if successful. */
2356 goto ResumeExecution;
2357 }
2358 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2359 break;
2360 }
2361
2362 case SVM_EXIT_READ_CR0: case SVM_EXIT_READ_CR1: case SVM_EXIT_READ_CR2: case SVM_EXIT_READ_CR3:
2363 case SVM_EXIT_READ_CR4: case SVM_EXIT_READ_CR5: case SVM_EXIT_READ_CR6: case SVM_EXIT_READ_CR7:
2364 case SVM_EXIT_READ_CR8: case SVM_EXIT_READ_CR9: case SVM_EXIT_READ_CR10: case SVM_EXIT_READ_CR11:
2365 case SVM_EXIT_READ_CR12: case SVM_EXIT_READ_CR13: case SVM_EXIT_READ_CR14: case SVM_EXIT_READ_CR15:
2366 {
2367 Log2(("SVM: %RGv mov x, cr%d\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_READ_CR0));
2368 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCRxRead[exitCode - SVM_EXIT_READ_CR0]);
2369 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2370 if (rc == VINF_SUCCESS)
2371 {
2372 /* EIP has been updated already. */
2373 /* Only resume if successful. */
2374 goto ResumeExecution;
2375 }
2376 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2377 break;
2378 }
2379
2380 case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3:
2381 case SVM_EXIT_WRITE_DR4: case SVM_EXIT_WRITE_DR5: case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7:
2382 case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9: case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11:
2383 case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13: case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15:
2384 {
2385 Log2(("SVM: %RGv mov dr%d, x\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_WRITE_DR0));
2386 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite);
2387
2388 if ( !DBGFIsStepping(pVCpu)
2389 && !CPUMIsHyperDebugStateActive(pVCpu))
2390 {
2391 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
2392
2393 /* Disable drx move intercepts. */
2394 pvVMCB->ctrl.u16InterceptRdDRx = 0;
2395 pvVMCB->ctrl.u16InterceptWrDRx = 0;
2396
2397 /* Save the host and load the guest debug state. */
2398 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
2399 AssertRC(rc2);
2400 goto ResumeExecution;
2401 }
2402
2403 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2404 if (rc == VINF_SUCCESS)
2405 {
2406 /* EIP has been updated already. */
2407 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
2408
2409 /* Only resume if successful. */
2410 goto ResumeExecution;
2411 }
2412 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2413 break;
2414 }
2415
2416 case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3:
2417 case SVM_EXIT_READ_DR4: case SVM_EXIT_READ_DR5: case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7:
2418 case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9: case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11:
2419 case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13: case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15:
2420 {
2421 Log2(("SVM: %RGv mov x, dr%d\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_READ_DR0));
2422 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead);
2423
2424 if (!DBGFIsStepping(pVCpu))
2425 {
2426 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch);
2427
2428 /* Disable DRx move intercepts. */
2429 pvVMCB->ctrl.u16InterceptRdDRx = 0;
2430 pvVMCB->ctrl.u16InterceptWrDRx = 0;
2431
2432 /* Save the host and load the guest debug state. */
2433 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
2434 AssertRC(rc2);
2435 goto ResumeExecution;
2436 }
2437
2438 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2439 if (rc == VINF_SUCCESS)
2440 {
2441 /* EIP has been updated already. */
2442 /* Only resume if successful. */
2443 goto ResumeExecution;
2444 }
2445 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2446 break;
2447 }
2448
2449 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
2450 case SVM_EXIT_IOIO: /* I/O instruction. */
2451 {
2452 SVM_IOIO_EXIT IoExitInfo;
2453
2454 IoExitInfo.au32[0] = pvVMCB->ctrl.u64ExitInfo1;
2455 unsigned uIdx = (IoExitInfo.au32[0] >> 4) & 0x7;
2456 uint32_t uIOSize = g_aIOSize[uIdx];
2457 uint32_t uAndVal = g_aIOOpAnd[uIdx];
2458 if (RT_UNLIKELY(!uIOSize))
2459 {
2460 AssertFailed(); /* should be fatal. */
2461 rc = VINF_EM_RAW_EMULATE_INSTR; /** @todo r=ramshankar: would this really fall back to the recompiler and work? */
2462 break;
2463 }
2464
2465 if (IoExitInfo.n.u1STR)
2466 {
2467 /* ins/outs */
2468 PDISCPUSTATE pDis = &pVCpu->hm.s.DisState;
2469
2470 /* Disassemble manually to deal with segment prefixes. */
2471 rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
2472 if (rc == VINF_SUCCESS)
2473 {
2474 if (IoExitInfo.n.u1Type == 0)
2475 {
2476 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize));
2477 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringWrite);
2478 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix,
2479 (DISCPUMODE)pDis->uAddrMode, uIOSize);
2480 }
2481 else
2482 {
2483 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize));
2484 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOStringRead);
2485 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->fPrefix,
2486 (DISCPUMODE)pDis->uAddrMode, uIOSize);
2487 }
2488 }
2489 else
2490 rc = VINF_EM_RAW_EMULATE_INSTR;
2491 }
2492 else
2493 {
2494 /* Normal in/out */
2495 Assert(!IoExitInfo.n.u1REP);
2496
2497 if (IoExitInfo.n.u1Type == 0)
2498 {
2499 Log2(("IOMIOPortWrite %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, pCtx->eax & uAndVal,
2500 uIOSize));
2501 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite);
2502 rc = IOMIOPortWrite(pVM, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize);
2503 if (rc == VINF_IOM_R3_IOPORT_WRITE)
2504 {
2505 HMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pvVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port,
2506 uAndVal, uIOSize);
2507 }
2508 }
2509 else
2510 {
2511 uint32_t u32Val = 0;
2512
2513 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead);
2514 rc = IOMIOPortRead(pVM, IoExitInfo.n.u16Port, &u32Val, uIOSize);
2515 if (IOM_SUCCESS(rc))
2516 {
2517 /* Write back to the EAX register. */
2518 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
2519 Log2(("IOMIOPortRead %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, u32Val & uAndVal,
2520 uIOSize));
2521 }
2522 else if (rc == VINF_IOM_R3_IOPORT_READ)
2523 {
2524 HMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pvVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port,
2525 uAndVal, uIOSize);
2526 }
2527 }
2528 }
2529
2530 /*
2531 * Handled the I/O return codes.
2532 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
2533 */
2534 if (IOM_SUCCESS(rc))
2535 {
2536 /* Update EIP and continue execution. */
2537 pCtx->rip = pvVMCB->ctrl.u64ExitInfo2; /* RIP/EIP of the next instruction is saved in EXITINFO2. */
2538 if (RT_LIKELY(rc == VINF_SUCCESS))
2539 {
2540 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
2541 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
2542 {
2543 /* IO operation lookup arrays. */
2544 static uint32_t const aIOSize[4] = { 1, 2, 0, 4 };
2545
2546 STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck);
2547 for (unsigned i = 0; i < 4; i++)
2548 {
2549 unsigned uBPLen = aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
2550
2551 if ( (IoExitInfo.n.u16Port >= pCtx->dr[i] && IoExitInfo.n.u16Port < pCtx->dr[i] + uBPLen)
2552 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
2553 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
2554 {
2555 SVM_EVENT Event;
2556
2557 Assert(CPUMIsGuestDebugStateActive(pVCpu));
2558
2559 /* Clear all breakpoint status flags and set the one we just hit. */
2560 pCtx->dr[6] &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
2561 pCtx->dr[6] |= (uint64_t)RT_BIT(i);
2562
2563 /*
2564 * Note: AMD64 Architecture Programmer's Manual 13.1:
2565 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared
2566 * by software after the contents have been read.
2567 */
2568 pvVMCB->guest.u64DR6 = pCtx->dr[6];
2569
2570 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2571 pCtx->dr[7] &= ~X86_DR7_GD;
2572
2573 /* Paranoia. */
2574 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2575 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2576 pCtx->dr[7] |= 0x400; /* must be one */
2577
2578 pvVMCB->guest.u64DR7 = pCtx->dr[7];
2579
2580 /* Inject the exception. */
2581 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
2582
2583 Event.au64[0] = 0;
2584 Event.n.u3Type = SVM_EVENT_EXCEPTION; /* trap or fault */
2585 Event.n.u1Valid = 1;
2586 Event.n.u8Vector = X86_XCPT_DB;
2587
2588 hmR0SvmSetPendingEvent(pVCpu, &Event);
2589 goto ResumeExecution;
2590 }
2591 }
2592 }
2593 goto ResumeExecution;
2594 }
2595 Log2(("EM status from IO at %RGv %x size %d: %Rrc\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize,
2596 VBOXSTRICTRC_VAL(rc)));
2597 break;
2598 }
2599
2600#ifdef VBOX_STRICT
2601 if (rc == VINF_IOM_R3_IOPORT_READ)
2602 Assert(IoExitInfo.n.u1Type != 0);
2603 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
2604 Assert(IoExitInfo.n.u1Type == 0);
2605 else
2606 {
2607 AssertMsg( RT_FAILURE(rc)
2608 || rc == VINF_EM_RAW_EMULATE_INSTR
2609 || rc == VINF_EM_RAW_GUEST_TRAP
2610 || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
2611 }
2612#endif
2613 Log2(("Failed IO at %RGv %x size %d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize));
2614 break;
2615 }
2616
2617 case SVM_EXIT_HLT:
2618 /* Check if external interrupts are pending; if so, don't switch back. */
2619 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt);
2620 pCtx->rip++; /* skip hlt */
2621 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
2622 goto ResumeExecution;
2623
2624 rc = VINF_EM_HALT;
2625 break;
2626
2627 case SVM_EXIT_MWAIT_UNCOND:
2628 Log2(("SVM: mwait\n"));
2629 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait);
2630 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2631 if ( rc == VINF_EM_HALT
2632 || rc == VINF_SUCCESS)
2633 {
2634 /* Update EIP and continue execution. */
2635 pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */
2636
2637 /* Check if external interrupts are pending; if so, don't switch back. */
2638 if ( rc == VINF_SUCCESS
2639 || ( rc == VINF_EM_HALT
2640 && EMShouldContinueAfterHalt(pVCpu, pCtx))
2641 )
2642 goto ResumeExecution;
2643 }
2644 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2645 break;
2646
2647 case SVM_EXIT_MONITOR:
2648 {
2649 Log2(("SVM: monitor\n"));
2650
2651 STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor);
2652 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2653 if (rc == VINF_SUCCESS)
2654 {
2655 /* Update EIP and continue execution. */
2656 pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */
2657 goto ResumeExecution;
2658 }
2659 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2660 break;
2661 }
2662
2663 case SVM_EXIT_VMMCALL:
2664 rc = hmR0SvmEmulateTprVMMCall(pVM, pVCpu, pCtx);
2665 if (rc == VINF_SUCCESS)
2666 {
2667 goto ResumeExecution; /* rip already updated. */
2668 }
2669 /* no break */
2670
2671 case SVM_EXIT_RSM:
2672 case SVM_EXIT_INVLPGA:
2673 case SVM_EXIT_VMRUN:
2674 case SVM_EXIT_VMLOAD:
2675 case SVM_EXIT_VMSAVE:
2676 case SVM_EXIT_STGI:
2677 case SVM_EXIT_CLGI:
2678 case SVM_EXIT_SKINIT:
2679 {
2680 /* Unsupported instructions. */
2681 SVM_EVENT Event;
2682
2683 Event.au64[0] = 0;
2684 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2685 Event.n.u1Valid = 1;
2686 Event.n.u8Vector = X86_XCPT_UD;
2687
2688 Log(("Forced #UD trap at %RGv\n", (RTGCPTR)pCtx->rip));
2689 hmR0SvmSetPendingEvent(pVCpu, &Event);
2690 goto ResumeExecution;
2691 }
2692
2693 /* Emulate in ring-3. */
2694 case SVM_EXIT_MSR:
2695 {
2696 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
2697 if ( pVM->hm.s.fTPRPatchingActive
2698 && pCtx->ecx == MSR_K8_LSTAR
2699 && pvVMCB->ctrl.u64ExitInfo1 == 1 /* wrmsr */)
2700 {
2701 if ((pCtx->eax & 0xff) != u8LastTPR)
2702 {
2703 Log(("SVM: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
2704
2705 /* Our patch code uses LSTAR for TPR caching. */
2706 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
2707 AssertRC(rc2);
2708 }
2709
2710 /* Skip the instruction and continue. */
2711 pCtx->rip += 2; /* wrmsr = [0F 30] */
2712
2713 /* Only resume if successful. */
2714 goto ResumeExecution;
2715 }
2716
2717 /*
2718 * The Intel spec. claims there's an REX version of RDMSR that's slightly different,
2719 * so we play safe by completely disassembling the instruction.
2720 */
2721 STAM_COUNTER_INC((pvVMCB->ctrl.u64ExitInfo1 == 0) ? &pVCpu->hm.s.StatExitRdmsr : &pVCpu->hm.s.StatExitWrmsr);
2722 Log(("SVM: %s\n", (pvVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr"));
2723 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2724 if (rc == VINF_SUCCESS)
2725 {
2726 /* EIP has been updated already. */
2727 /* Only resume if successful. */
2728 goto ResumeExecution;
2729 }
2730 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (pvVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr",
2731 VBOXSTRICTRC_VAL(rc)));
2732 break;
2733 }
2734
2735 case SVM_EXIT_TASK_SWITCH: /* too complicated to emulate, so fall back to the recompiler */
2736 Log(("SVM_EXIT_TASK_SWITCH: exit2=%RX64\n", pvVMCB->ctrl.u64ExitInfo2));
2737 if ( !(pvVMCB->ctrl.u64ExitInfo2 & (SVM_EXIT2_TASK_SWITCH_IRET | SVM_EXIT2_TASK_SWITCH_JMP))
2738 && pVCpu->hm.s.Event.fPending)
2739 {
2740 SVM_EVENT Event;
2741 Event.au64[0] = pVCpu->hm.s.Event.intInfo;
2742
2743 /* Caused by an injected interrupt. */
2744 pVCpu->hm.s.Event.fPending = false;
2745 switch (Event.n.u3Type)
2746 {
2747 case SVM_EVENT_EXTERNAL_IRQ:
2748 case SVM_EVENT_NMI:
2749 Log(("SVM_EXIT_TASK_SWITCH: reassert trap %d\n", Event.n.u8Vector));
2750 Assert(!Event.n.u1ErrorCodeValid);
2751 rc2 = TRPMAssertTrap(pVCpu, Event.n.u8Vector, TRPM_HARDWARE_INT);
2752 AssertRC(rc2);
2753 break;
2754
2755 default:
2756 /* Exceptions and software interrupts can just be restarted. */
2757 break;
2758 }
2759 }
2760 rc = VERR_EM_INTERPRETER;
2761 break;
2762
2763 case SVM_EXIT_PAUSE:
2764 case SVM_EXIT_MWAIT_ARMED:
2765 rc = VERR_EM_INTERPRETER;
2766 break;
2767
2768 case SVM_EXIT_SHUTDOWN:
2769 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
2770 break;
2771
2772 case SVM_EXIT_IDTR_READ:
2773 case SVM_EXIT_GDTR_READ:
2774 case SVM_EXIT_LDTR_READ:
2775 case SVM_EXIT_TR_READ:
2776 case SVM_EXIT_IDTR_WRITE:
2777 case SVM_EXIT_GDTR_WRITE:
2778 case SVM_EXIT_LDTR_WRITE:
2779 case SVM_EXIT_TR_WRITE:
2780 case SVM_EXIT_CR0_SEL_WRITE:
2781 default:
2782 /* Unexpected exit codes. */
2783 rc = VERR_SVM_UNEXPECTED_EXIT;
2784 AssertMsgFailed(("Unexpected exit code %x\n", exitCode)); /* Can't happen. */
2785 break;
2786 }
2787
2788end:
2789
2790 /*
2791 * We are now going back to ring-3, so clear the forced action flag.
2792 */
2793 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
2794
2795 /*
2796 * Signal changes to the recompiler.
2797 */
2798 CPUMSetChangedFlags(pVCpu,
2799 CPUM_CHANGED_SYSENTER_MSR
2800 | CPUM_CHANGED_LDTR
2801 | CPUM_CHANGED_GDTR
2802 | CPUM_CHANGED_IDTR
2803 | CPUM_CHANGED_TR
2804 | CPUM_CHANGED_HIDDEN_SEL_REGS);
2805
2806 /*
2807 * If we executed vmrun and an external IRQ was pending, then we don't have to do a full sync the next time.
2808 */
2809 if (exitCode == SVM_EXIT_INTR)
2810 {
2811 STAM_COUNTER_INC(&pVCpu->hm.s.StatPendingHostIrq);
2812 /* On the next entry we'll only sync the host context. */
2813 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_HOST_CONTEXT;
2814 }
2815 else
2816 {
2817 /* On the next entry we'll sync everything. */
2818 /** @todo we can do better than this */
2819 /* Not in the VINF_PGM_CHANGE_MODE though! */
2820 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_ALL;
2821 }
2822
2823 /* Translate into a less severe return code */
2824 if (rc == VERR_EM_INTERPRETER)
2825 rc = VINF_EM_RAW_EMULATE_INSTR;
2826
2827 /* Just set the correct state here instead of trying to catch every goto above. */
2828 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
2829
2830#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2831 /* Restore interrupts if we exitted after disabling them. */
2832 if (uOldEFlags != ~(RTCCUINTREG)0)
2833 ASMSetFlags(uOldEFlags);
2834#endif
2835
2836 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit2, x);
2837 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExit1, x);
2838 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x);
2839 return VBOXSTRICTRC_TODO(rc);
2840}
2841
2842
2843/**
2844 * Emulate simple mov tpr instruction.
2845 *
2846 * @returns VBox status code.
2847 * @param pVM Pointer to the VM.
2848 * @param pVCpu Pointer to the VMCPU.
2849 * @param pCtx Pointer to the guest CPU context.
2850 */
2851static int hmR0SvmEmulateTprVMMCall(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2852{
2853 int rc;
2854
2855 LogFlow(("Emulated VMMCall TPR access replacement at %RGv\n", pCtx->rip));
2856
2857 for (;;)
2858 {
2859 bool fPending;
2860 uint8_t u8Tpr;
2861
2862 PHMTPRPATCH pPatch = (PHMTPRPATCH)RTAvloU32Get(&pVM->hm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2863 if (!pPatch)
2864 break;
2865
2866 switch (pPatch->enmType)
2867 {
2868 case HMTPRINSTR_READ:
2869 /* TPR caching in CR8 */
2870 rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPending);
2871 AssertRC(rc);
2872
2873 rc = DISWriteReg32(CPUMCTX2CORE(pCtx), pPatch->uDstOperand, u8Tpr);
2874 AssertRC(rc);
2875
2876 LogFlow(("Emulated read successfully\n"));
2877 pCtx->rip += pPatch->cbOp;
2878 break;
2879
2880 case HMTPRINSTR_WRITE_REG:
2881 case HMTPRINSTR_WRITE_IMM:
2882 /* Fetch the new TPR value */
2883 if (pPatch->enmType == HMTPRINSTR_WRITE_REG)
2884 {
2885 uint32_t val;
2886
2887 rc = DISFetchReg32(CPUMCTX2CORE(pCtx), pPatch->uSrcOperand, &val);
2888 AssertRC(rc);
2889 u8Tpr = val;
2890 }
2891 else
2892 u8Tpr = (uint8_t)pPatch->uSrcOperand;
2893
2894 rc = PDMApicSetTPR(pVCpu, u8Tpr);
2895 AssertRC(rc);
2896 LogFlow(("Emulated write successfully\n"));
2897 pCtx->rip += pPatch->cbOp;
2898 break;
2899
2900 default:
2901 AssertMsgFailedReturn(("Unexpected type %d\n", pPatch->enmType), VERR_SVM_UNEXPECTED_PATCH_TYPE);
2902 }
2903 }
2904 return VINF_SUCCESS;
2905}
2906
2907
2908/**
2909 * Enters the AMD-V session.
2910 *
2911 * @returns VBox status code.
2912 * @param pVM Pointer to the VM.
2913 * @param pVCpu Pointer to the VMCPU.
2914 * @param pCpu Pointer to the CPU info struct.
2915 */
2916VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
2917{
2918 Assert(pVM->hm.s.svm.fSupported);
2919
2920 LogFlow(("SVMR0Enter cpu%d last=%d asid=%d\n", pCpu->idCpu, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.uCurrentAsid));
2921 pVCpu->hm.s.fResumeVM = false;
2922
2923 /* Force to reload LDTR, so we'll execute VMLoad to load additional guest state. */
2924 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_LDTR;
2925
2926 return VINF_SUCCESS;
2927}
2928
2929
2930/**
2931 * Leaves the AMD-V session.
2932 *
2933 * @returns VBox status code.
2934 * @param pVM Pointer to the VM.
2935 * @param pVCpu Pointer to the VMCPU.
2936 * @param pCtx Pointer to the guest CPU context.
2937 */
2938VMMR0DECL(int) SVMR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2939{
2940 SVM_VMCB *pvVMCB = (SVM_VMCB *)pVCpu->hm.s.svm.pvVMCB;
2941
2942 Assert(pVM->hm.s.svm.fSupported);
2943
2944#ifdef DEBUG
2945 if (CPUMIsHyperDebugStateActive(pVCpu))
2946 {
2947 CPUMR0LoadHostDebugState(pVM, pVCpu);
2948 }
2949 else
2950#endif
2951 /* Save the guest debug state if necessary. */
2952 if (CPUMIsGuestDebugStateActive(pVCpu))
2953 {
2954 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, false /* skip DR6 */);
2955
2956 /* Intercept all DRx reads and writes again. Changed later on. */
2957 pvVMCB->ctrl.u16InterceptRdDRx = 0xFFFF;
2958 pvVMCB->ctrl.u16InterceptWrDRx = 0xFFFF;
2959
2960 /* Resync the debug registers the next time. */
2961 pVCpu->hm.s.fContextUseFlags |= HM_CHANGED_GUEST_DEBUG;
2962 }
2963 else
2964 Assert(pvVMCB->ctrl.u16InterceptRdDRx == 0xFFFF && pvVMCB->ctrl.u16InterceptWrDRx == 0xFFFF);
2965
2966 return VINF_SUCCESS;
2967}
2968
2969
2970/**
2971 * Worker for Interprets INVLPG.
2972 *
2973 * @return VBox status code.
2974 * @param pVCpu Pointer to the VMCPU.
2975 * @param pCpu Pointer to the CPU info struct.
2976 * @param pRegFrame Pointer to the register frame.
2977 */
2978static int hmR0svmInterpretInvlPgEx(PVMCPU pVCpu, PDISCPUSTATE pCpu, PCPUMCTXCORE pRegFrame)
2979{
2980 DISQPVPARAMVAL param1;
2981 RTGCPTR addr;
2982
2983 int rc = DISQueryParamVal(pRegFrame, pCpu, &pCpu->Param1, &param1, DISQPVWHICH_SRC);
2984 if (RT_FAILURE(rc))
2985 return VERR_EM_INTERPRETER;
2986
2987 switch (param1.type)
2988 {
2989 case DISQPV_TYPE_IMMEDIATE:
2990 case DISQPV_TYPE_ADDRESS:
2991 if (!(param1.flags & (DISQPV_FLAG_32 | DISQPV_FLAG_64)))
2992 return VERR_EM_INTERPRETER;
2993 addr = param1.val.val64;
2994 break;
2995
2996 default:
2997 return VERR_EM_INTERPRETER;
2998 }
2999
3000 /** @todo is addr always a flat linear address or ds based
3001 * (in absence of segment override prefixes)????
3002 */
3003 rc = PGMInvalidatePage(pVCpu, addr);
3004 if (RT_SUCCESS(rc))
3005 return VINF_SUCCESS;
3006
3007 AssertRC(rc);
3008 return rc;
3009}
3010
3011
3012/**
3013 * Interprets INVLPG.
3014 *
3015 * @returns VBox status code.
3016 * @retval VINF_* Scheduling instructions.
3017 * @retval VERR_EM_INTERPRETER Something we can't cope with.
3018 * @retval VERR_* Fatal errors.
3019 *
3020 * @param pVM Pointer to the VM.
3021 * @param pRegFrame Pointer to the register frame.
3022 *
3023 * @remarks Updates the EIP if an instruction was executed successfully.
3024 */
3025static int hmR0SvmInterpretInvlpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame)
3026{
3027 /*
3028 * Only allow 32 & 64 bit code.
3029 */
3030 if (CPUMGetGuestCodeBits(pVCpu) != 16)
3031 {
3032 PDISSTATE pDis = &pVCpu->hm.s.DisState;
3033 int rc = EMInterpretDisasCurrent(pVM, pVCpu, pDis, NULL);
3034 if (RT_SUCCESS(rc) && pDis->pCurInstr->uOpcode == OP_INVLPG)
3035 {
3036 rc = hmR0svmInterpretInvlPgEx(pVCpu, pDis, pRegFrame);
3037 if (RT_SUCCESS(rc))
3038 pRegFrame->rip += pDis->cbInstr; /* Move on to the next instruction. */
3039 return rc;
3040 }
3041 }
3042 return VERR_EM_INTERPRETER;
3043}
3044
3045
3046/**
3047 * Invalidates a guest page by guest virtual address.
3048 *
3049 * @returns VBox status code.
3050 * @param pVM Pointer to the VM.
3051 * @param pVCpu Pointer to the VMCPU.
3052 * @param GCVirt Guest virtual address of the page to invalidate.
3053 */
3054VMMR0DECL(int) SVMR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
3055{
3056 bool fFlushPending = pVM->hm.s.svm.fAlwaysFlushTLB | VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
3057
3058 /* Skip it if a TLB flush is already pending. */
3059 if (!fFlushPending)
3060 {
3061 SVM_VMCB *pvVMCB;
3062
3063 Log2(("SVMR0InvalidatePage %RGv\n", GCVirt));
3064 AssertReturn(pVM, VERR_INVALID_PARAMETER);
3065 Assert(pVM->hm.s.svm.fSupported);
3066
3067 pvVMCB = (SVM_VMCB *)pVCpu->hm.s.svm.pvVMCB;
3068 AssertMsgReturn(pvVMCB, ("Invalid pvVMCB\n"), VERR_SVM_INVALID_PVMCB);
3069
3070#if HC_ARCH_BITS == 32
3071 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invlpga takes only 32 bits addresses. */
3072 if (CPUMIsGuestInLongMode(pVCpu))
3073 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
3074 else
3075#endif
3076 SVMR0InvlpgA(GCVirt, pvVMCB->ctrl.TLBCtrl.n.u32ASID);
3077 }
3078 return VINF_SUCCESS;
3079}
3080
3081
3082#if 0 /* obsolete, but left here for clarification. */
3083/**
3084 * Invalidates a guest page by physical address.
3085 *
3086 * @returns VBox status code.
3087 * @param pVM Pointer to the VM.
3088 * @param pVCpu Pointer to the VMCPU.
3089 * @param GCPhys Guest physical address of the page to invalidate.
3090 */
3091VMMR0DECL(int) SVMR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
3092{
3093 Assert(pVM->hm.s.fNestedPaging);
3094 /* invlpga only invalidates TLB entries for guest virtual addresses; we have no choice but to force a TLB flush here. */
3095 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
3096 STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpga);
3097 return VINF_SUCCESS;
3098}
3099#endif
3100
3101
3102#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
3103/**
3104 * Prepares for and executes VMRUN (64-bit guests from a 32-bit host).
3105 *
3106 * @returns VBox status code.
3107 * @param HCPhysVMCBHost Physical address of host VMCB.
3108 * @param HCPhysVMCB Physical address of the VMCB.
3109 * @param pCtx Pointer to the guest CPU context.
3110 * @param pVM Pointer to the VM.
3111 * @param pVCpu Pointer to the VMCPU.
3112 */
3113DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS HCPhysVMCBHost, RTHCPHYS HCPhysVMCB, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu)
3114{
3115 uint32_t aParam[4];
3116
3117 aParam[0] = (uint32_t)(HCPhysVMCBHost); /* Param 1: HCPhysVMCBHost - Lo. */
3118 aParam[1] = (uint32_t)(HCPhysVMCBHost >> 32); /* Param 1: HCPhysVMCBHost - Hi. */
3119 aParam[2] = (uint32_t)(HCPhysVMCB); /* Param 2: HCPhysVMCB - Lo. */
3120 aParam[3] = (uint32_t)(HCPhysVMCB >> 32); /* Param 2: HCPhysVMCB - Hi. */
3121
3122 return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hm.s.pfnSVMGCVMRun64, 4, &aParam[0]);
3123}
3124
3125
3126/**
3127 * Executes the specified handler in 64-bit mode.
3128 *
3129 * @returns VBox status code.
3130 * @param pVM Pointer to the VM.
3131 * @param pVCpu Pointer to the VMCPU.
3132 * @param pCtx Pointer to the guest CPU context.
3133 * @param pfnHandler Pointer to the RC handler function.
3134 * @param cbParam Number of parameters.
3135 * @param paParam Array of 32-bit parameters.
3136 */
3137VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam,
3138 uint32_t *paParam)
3139{
3140 int rc;
3141 RTHCUINTREG uOldEFlags;
3142
3143 Assert(pfnHandler);
3144
3145 /* Disable interrupts. */
3146 uOldEFlags = ASMIntDisableFlags();
3147
3148#ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI
3149 RTCPUID idHostCpu = RTMpCpuId();
3150 CPUMR0SetLApic(pVM, idHostCpu);
3151#endif
3152
3153 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
3154 CPUMSetHyperEIP(pVCpu, pfnHandler);
3155 for (int i = (int)cbParam - 1; i >= 0; i--)
3156 CPUMPushHyper(pVCpu, paParam[i]);
3157
3158 STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z);
3159 /* Call switcher. */
3160 rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
3161 STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z);
3162
3163 ASMSetFlags(uOldEFlags);
3164 return rc;
3165}
3166
3167#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */
3168
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette