VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/HWSVMR0.cpp@ 41318

Last change on this file since 41318 was 41312, checked in by vboxsync, 13 years ago

VMM/VMMR0: Remove superfluous fFlushTLB flag in HMGLOBLCPUINFO. Fixes some TLB flush statistics.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
File size: 119.8 KB
Line 
1/* $Id: HWSVMR0.cpp 41312 2012-05-15 13:43:43Z vboxsync $ */
2/** @file
3 * HM SVM (AMD-V) - Host Context Ring 0.
4 */
5
6/*
7 * Copyright (C) 2006-2011 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_HWACCM
22#include <VBox/vmm/hwaccm.h>
23#include <VBox/vmm/pgm.h>
24#include <VBox/vmm/selm.h>
25#include <VBox/vmm/iom.h>
26#include <VBox/vmm/dbgf.h>
27#include <VBox/vmm/dbgftrace.h>
28#include <VBox/vmm/tm.h>
29#include <VBox/vmm/pdmapi.h>
30#include "HWACCMInternal.h"
31#include <VBox/vmm/vm.h>
32#include <VBox/vmm/hwacc_svm.h>
33#include <VBox/err.h>
34#include <VBox/log.h>
35#include <VBox/dis.h>
36#include <VBox/disopcode.h>
37#include <iprt/param.h>
38#include <iprt/assert.h>
39#include <iprt/asm.h>
40#include <iprt/asm-amd64-x86.h>
41#include <iprt/cpuset.h>
42#include <iprt/mp.h>
43#include <iprt/time.h>
44#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
45# include <iprt/thread.h>
46#endif
47#include <iprt/x86.h>
48#include "HWSVMR0.h"
49
50#include "dtrace/VBoxVMM.h"
51
52
53/*******************************************************************************
54* Internal Functions *
55*******************************************************************************/
56static int hmR0SvmInterpretInvpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, uint32_t uASID);
57static int hmR0SvmEmulateTprVMMCall(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx);
58static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite);
59
60/*******************************************************************************
61* Global Variables *
62*******************************************************************************/
63
64/**
65 * Sets up and activates AMD-V on the current CPU
66 *
67 * @returns VBox status code.
68 * @param pCpu CPU info struct
69 * @param pVM The VM to operate on. (can be NULL after a resume!!)
70 * @param pvCpuPage Pointer to the global cpu page.
71 * @param HCPhysCpuPage Physical address of the global cpu page.
72 */
73VMMR0DECL(int) SVMR0EnableCpu(PHMGLOBLCPUINFO pCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
74{
75 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
76 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
77
78 /* We must turn on AMD-V and setup the host state physical address, as
79 those MSRs are per-cpu/core. */
80 uint64_t fEfer = ASMRdMsr(MSR_K6_EFER);
81 if (fEfer & MSR_K6_EFER_SVME)
82 {
83 /* If the VBOX_HWVIRTEX_IGNORE_SVM_IN_USE hack is active, then we
84 blindly use AMD-V. */
85 if ( pVM
86 && pVM->hwaccm.s.svm.fIgnoreInUseError)
87 pCpu->fIgnoreAMDVInUseError = true;
88 if (!pCpu->fIgnoreAMDVInUseError)
89 return VERR_SVM_IN_USE;
90 }
91
92 /* Turn on AMD-V in the EFER MSR. */
93 ASMWrMsr(MSR_K6_EFER, fEfer | MSR_K6_EFER_SVME);
94
95 /* Write the physical page address where the CPU will store the host state
96 while executing the VM. */
97 ASMWrMsr(MSR_K8_VM_HSAVE_PA, HCPhysCpuPage);
98
99 /*
100 * Theoretically, other hypervisors may have used ASIDs, ideally we should flush all non-zero ASIDs
101 * when enabling SVM. AMD doesn't have an SVM instruction to flush all ASIDs (flushing is done
102 * upon VMRUN). Therefore, just set the fFlushASIDBeforeUse flag which instructs hmR0SvmSetupTLB()
103 * to flush the TLB with before using a new ASID.
104 */
105 pCpu->fFlushASIDBeforeUse = true;
106 return VINF_SUCCESS;
107}
108
109/**
110 * Deactivates AMD-V on the current CPU
111 *
112 * @returns VBox status code.
113 * @param pCpu CPU info struct
114 * @param pvCpuPage Pointer to the global cpu page.
115 * @param HCPhysCpuPage Physical address of the global cpu page.
116 */
117VMMR0DECL(int) SVMR0DisableCpu(PHMGLOBLCPUINFO pCpu, void *pvCpuPage, RTHCPHYS HCPhysCpuPage)
118{
119 AssertReturn(HCPhysCpuPage != 0 && HCPhysCpuPage != NIL_RTHCPHYS, VERR_INVALID_PARAMETER);
120 AssertReturn(pvCpuPage, VERR_INVALID_PARAMETER);
121 NOREF(pCpu);
122
123 /* Turn off AMD-V in the EFER MSR. */
124 uint64_t fEfer = ASMRdMsr(MSR_K6_EFER);
125 ASMWrMsr(MSR_K6_EFER, fEfer & ~MSR_K6_EFER_SVME);
126
127 /* Invalidate host state physical address. */
128 ASMWrMsr(MSR_K8_VM_HSAVE_PA, 0);
129
130 return VINF_SUCCESS;
131}
132
133/**
134 * Does Ring-0 per VM AMD-V init.
135 *
136 * @returns VBox status code.
137 * @param pVM The VM to operate on.
138 */
139VMMR0DECL(int) SVMR0InitVM(PVM pVM)
140{
141 int rc;
142
143 pVM->hwaccm.s.svm.pMemObjIOBitmap = NIL_RTR0MEMOBJ;
144
145 /* Allocate 12 KB for the IO bitmap (doesn't seem to be a way to convince SVM not to use it) */
146 rc = RTR0MemObjAllocCont(&pVM->hwaccm.s.svm.pMemObjIOBitmap, 3 << PAGE_SHIFT, true /* executable R0 mapping */);
147 if (RT_FAILURE(rc))
148 return rc;
149
150 pVM->hwaccm.s.svm.pIOBitmap = RTR0MemObjAddress(pVM->hwaccm.s.svm.pMemObjIOBitmap);
151 pVM->hwaccm.s.svm.pIOBitmapPhys = RTR0MemObjGetPagePhysAddr(pVM->hwaccm.s.svm.pMemObjIOBitmap, 0);
152 /* Set all bits to intercept all IO accesses. */
153 ASMMemFill32(pVM->hwaccm.s.svm.pIOBitmap, PAGE_SIZE*3, 0xffffffff);
154
155 /* Erratum 170 which requires a forced TLB flush for each world switch:
156 * See http://www.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/33610.pdf
157 *
158 * All BH-G1/2 and DH-G1/2 models include a fix:
159 * Athlon X2: 0x6b 1/2
160 * 0x68 1/2
161 * Athlon 64: 0x7f 1
162 * 0x6f 2
163 * Sempron: 0x7f 1/2
164 * 0x6f 2
165 * 0x6c 2
166 * 0x7c 2
167 * Turion 64: 0x68 2
168 *
169 */
170 uint32_t u32Dummy;
171 uint32_t u32Version, u32Family, u32Model, u32Stepping, u32BaseFamily;
172 ASMCpuId(1, &u32Version, &u32Dummy, &u32Dummy, &u32Dummy);
173 u32BaseFamily= (u32Version >> 8) & 0xf;
174 u32Family = u32BaseFamily + (u32BaseFamily == 0xf ? ((u32Version >> 20) & 0x7f) : 0);
175 u32Model = ((u32Version >> 4) & 0xf);
176 u32Model = u32Model | ((u32BaseFamily == 0xf ? (u32Version >> 16) & 0x0f : 0) << 4);
177 u32Stepping = u32Version & 0xf;
178 if ( u32Family == 0xf
179 && !((u32Model == 0x68 || u32Model == 0x6b || u32Model == 0x7f) && u32Stepping >= 1)
180 && !((u32Model == 0x6f || u32Model == 0x6c || u32Model == 0x7c) && u32Stepping >= 2))
181 {
182 Log(("SVMR0InitVM: AMD cpu with erratum 170 family %x model %x stepping %x\n", u32Family, u32Model, u32Stepping));
183 pVM->hwaccm.s.svm.fAlwaysFlushTLB = true;
184 }
185
186 /* Allocate VMCBs for all guest CPUs. */
187 for (VMCPUID i = 0; i < pVM->cCpus; i++)
188 {
189 PVMCPU pVCpu = &pVM->aCpus[i];
190
191 pVCpu->hwaccm.s.svm.pMemObjVMCBHost = NIL_RTR0MEMOBJ;
192 pVCpu->hwaccm.s.svm.pMemObjVMCB = NIL_RTR0MEMOBJ;
193 pVCpu->hwaccm.s.svm.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
194
195 /* Allocate one page for the host context */
196 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.svm.pMemObjVMCBHost, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
197 if (RT_FAILURE(rc))
198 return rc;
199
200 pVCpu->hwaccm.s.svm.pVMCBHost = RTR0MemObjAddress(pVCpu->hwaccm.s.svm.pMemObjVMCBHost);
201 pVCpu->hwaccm.s.svm.pVMCBHostPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.svm.pMemObjVMCBHost, 0);
202 Assert(pVCpu->hwaccm.s.svm.pVMCBHostPhys < _4G);
203 ASMMemZeroPage(pVCpu->hwaccm.s.svm.pVMCBHost);
204
205 /* Allocate one page for the VM control block (VMCB). */
206 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.svm.pMemObjVMCB, 1 << PAGE_SHIFT, true /* executable R0 mapping */);
207 if (RT_FAILURE(rc))
208 return rc;
209
210 pVCpu->hwaccm.s.svm.pVMCB = RTR0MemObjAddress(pVCpu->hwaccm.s.svm.pMemObjVMCB);
211 pVCpu->hwaccm.s.svm.pVMCBPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.svm.pMemObjVMCB, 0);
212 Assert(pVCpu->hwaccm.s.svm.pVMCBPhys < _4G);
213 ASMMemZeroPage(pVCpu->hwaccm.s.svm.pVMCB);
214
215 /* Allocate 8 KB for the MSR bitmap (doesn't seem to be a way to convince SVM not to use it) */
216 rc = RTR0MemObjAllocCont(&pVCpu->hwaccm.s.svm.pMemObjMSRBitmap, 2 << PAGE_SHIFT, true /* executable R0 mapping */);
217 if (RT_FAILURE(rc))
218 return rc;
219
220 pVCpu->hwaccm.s.svm.pMSRBitmap = RTR0MemObjAddress(pVCpu->hwaccm.s.svm.pMemObjMSRBitmap);
221 pVCpu->hwaccm.s.svm.pMSRBitmapPhys = RTR0MemObjGetPagePhysAddr(pVCpu->hwaccm.s.svm.pMemObjMSRBitmap, 0);
222 /* Set all bits to intercept all MSR accesses. */
223 ASMMemFill32(pVCpu->hwaccm.s.svm.pMSRBitmap, PAGE_SIZE*2, 0xffffffff);
224 }
225
226 return VINF_SUCCESS;
227}
228
229/**
230 * Does Ring-0 per VM AMD-V termination.
231 *
232 * @returns VBox status code.
233 * @param pVM The VM to operate on.
234 */
235VMMR0DECL(int) SVMR0TermVM(PVM pVM)
236{
237 for (VMCPUID i = 0; i < pVM->cCpus; i++)
238 {
239 PVMCPU pVCpu = &pVM->aCpus[i];
240
241 if (pVCpu->hwaccm.s.svm.pMemObjVMCBHost != NIL_RTR0MEMOBJ)
242 {
243 RTR0MemObjFree(pVCpu->hwaccm.s.svm.pMemObjVMCBHost, false);
244 pVCpu->hwaccm.s.svm.pVMCBHost = 0;
245 pVCpu->hwaccm.s.svm.pVMCBHostPhys = 0;
246 pVCpu->hwaccm.s.svm.pMemObjVMCBHost = NIL_RTR0MEMOBJ;
247 }
248
249 if (pVCpu->hwaccm.s.svm.pMemObjVMCB != NIL_RTR0MEMOBJ)
250 {
251 RTR0MemObjFree(pVCpu->hwaccm.s.svm.pMemObjVMCB, false);
252 pVCpu->hwaccm.s.svm.pVMCB = 0;
253 pVCpu->hwaccm.s.svm.pVMCBPhys = 0;
254 pVCpu->hwaccm.s.svm.pMemObjVMCB = NIL_RTR0MEMOBJ;
255 }
256 if (pVCpu->hwaccm.s.svm.pMemObjMSRBitmap != NIL_RTR0MEMOBJ)
257 {
258 RTR0MemObjFree(pVCpu->hwaccm.s.svm.pMemObjMSRBitmap, false);
259 pVCpu->hwaccm.s.svm.pMSRBitmap = 0;
260 pVCpu->hwaccm.s.svm.pMSRBitmapPhys = 0;
261 pVCpu->hwaccm.s.svm.pMemObjMSRBitmap = NIL_RTR0MEMOBJ;
262 }
263 }
264 if (pVM->hwaccm.s.svm.pMemObjIOBitmap != NIL_RTR0MEMOBJ)
265 {
266 RTR0MemObjFree(pVM->hwaccm.s.svm.pMemObjIOBitmap, false);
267 pVM->hwaccm.s.svm.pIOBitmap = 0;
268 pVM->hwaccm.s.svm.pIOBitmapPhys = 0;
269 pVM->hwaccm.s.svm.pMemObjIOBitmap = NIL_RTR0MEMOBJ;
270 }
271 return VINF_SUCCESS;
272}
273
274/**
275 * Sets up AMD-V for the specified VM
276 *
277 * @returns VBox status code.
278 * @param pVM The VM to operate on.
279 */
280VMMR0DECL(int) SVMR0SetupVM(PVM pVM)
281{
282 int rc = VINF_SUCCESS;
283
284 AssertReturn(pVM, VERR_INVALID_PARAMETER);
285
286 Assert(pVM->hwaccm.s.svm.fSupported);
287
288 for (VMCPUID i = 0; i < pVM->cCpus; i++)
289 {
290 PVMCPU pVCpu = &pVM->aCpus[i];
291 SVM_VMCB *pVMCB = (SVM_VMCB *)pVM->aCpus[i].hwaccm.s.svm.pVMCB;
292
293 AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB);
294
295 /* Program the control fields. Most of them never have to be changed again.
296 * CR0/4 reads must be intercepted, our shadow values are not necessarily the same as the guest's.
297 * Note: CR0 & CR4 can be safely read when guest and shadow copies are identical.
298 */
299 pVMCB->ctrl.u16InterceptRdCRx = RT_BIT(0) | RT_BIT(4);
300
301 /* CR0/4 writes must be intercepted for obvious reasons. */
302 pVMCB->ctrl.u16InterceptWrCRx = RT_BIT(0) | RT_BIT(4);
303
304 /* Intercept all DRx reads and writes by default. Changed later on. */
305 pVMCB->ctrl.u16InterceptRdDRx = 0xFFFF;
306 pVMCB->ctrl.u16InterceptWrDRx = 0xFFFF;
307
308 /* Intercept traps; only #NM is always intercepted. */
309 pVMCB->ctrl.u32InterceptException = RT_BIT(X86_XCPT_NM);
310#ifdef VBOX_ALWAYS_TRAP_PF
311 pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
312#endif
313#ifdef VBOX_STRICT
314 pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_BP)
315 | RT_BIT(X86_XCPT_DB)
316 | RT_BIT(X86_XCPT_DE)
317 | RT_BIT(X86_XCPT_UD)
318 | RT_BIT(X86_XCPT_NP)
319 | RT_BIT(X86_XCPT_SS)
320 | RT_BIT(X86_XCPT_GP)
321 | RT_BIT(X86_XCPT_MF)
322 ;
323#endif
324
325 /* Set up instruction and miscellaneous intercepts. */
326 pVMCB->ctrl.u32InterceptCtrl1 = SVM_CTRL1_INTERCEPT_INTR
327 | SVM_CTRL1_INTERCEPT_VINTR
328 | SVM_CTRL1_INTERCEPT_NMI
329 | SVM_CTRL1_INTERCEPT_SMI
330 | SVM_CTRL1_INTERCEPT_INIT
331 | SVM_CTRL1_INTERCEPT_RDPMC
332 | SVM_CTRL1_INTERCEPT_CPUID
333 | SVM_CTRL1_INTERCEPT_RSM
334 | SVM_CTRL1_INTERCEPT_HLT
335 | SVM_CTRL1_INTERCEPT_INOUT_BITMAP
336 | SVM_CTRL1_INTERCEPT_MSR_SHADOW
337 | SVM_CTRL1_INTERCEPT_INVLPGA /* AMD only */
338 | SVM_CTRL1_INTERCEPT_SHUTDOWN /* fatal */
339 | SVM_CTRL1_INTERCEPT_FERR_FREEZE; /* Legacy FPU FERR handling. */
340 ;
341 pVMCB->ctrl.u32InterceptCtrl2 = SVM_CTRL2_INTERCEPT_VMRUN /* required */
342 | SVM_CTRL2_INTERCEPT_VMMCALL
343 | SVM_CTRL2_INTERCEPT_VMLOAD
344 | SVM_CTRL2_INTERCEPT_VMSAVE
345 | SVM_CTRL2_INTERCEPT_STGI
346 | SVM_CTRL2_INTERCEPT_CLGI
347 | SVM_CTRL2_INTERCEPT_SKINIT
348 | SVM_CTRL2_INTERCEPT_WBINVD
349 | SVM_CTRL2_INTERCEPT_MONITOR
350 | SVM_CTRL2_INTERCEPT_MWAIT_UNCOND; /* don't execute mwait or else we'll idle inside the guest (host thinks the cpu load is high) */
351 ;
352 Log(("pVMCB->ctrl.u32InterceptException = %x\n", pVMCB->ctrl.u32InterceptException));
353 Log(("pVMCB->ctrl.u32InterceptCtrl1 = %x\n", pVMCB->ctrl.u32InterceptCtrl1));
354 Log(("pVMCB->ctrl.u32InterceptCtrl2 = %x\n", pVMCB->ctrl.u32InterceptCtrl2));
355
356 /* Virtualize masking of INTR interrupts. (reads/writes from/to CR8 go to the V_TPR register) */
357 pVMCB->ctrl.IntCtrl.n.u1VIrqMasking = 1;
358 /* Ignore the priority in the TPR; just deliver it when we tell it to. */
359 pVMCB->ctrl.IntCtrl.n.u1IgnoreTPR = 1;
360
361 /* Set IO and MSR bitmap addresses. */
362 pVMCB->ctrl.u64IOPMPhysAddr = pVM->hwaccm.s.svm.pIOBitmapPhys;
363 pVMCB->ctrl.u64MSRPMPhysAddr = pVCpu->hwaccm.s.svm.pMSRBitmapPhys;
364
365 /* No LBR virtualization. */
366 pVMCB->ctrl.u64LBRVirt = 0;
367
368 /* The ASID must start at 1; the host uses 0. */
369 pVMCB->ctrl.TLBCtrl.n.u32ASID = 1;
370
371 /* Setup the PAT msr (nested paging only) */
372 /* The default value should be 0x0007040600070406ULL, but we want to treat all guest memory as WB, so choose type 6 for all PAT slots. */
373 pVMCB->guest.u64GPAT = 0x0006060606060606ULL;
374
375 /* If nested paging is not in use, additional intercepts have to be set up. */
376 if (!pVM->hwaccm.s.fNestedPaging)
377 {
378 /* CR3 reads/writes must be intercepted; our shadow values are different from guest's. */
379 pVMCB->ctrl.u16InterceptRdCRx |= RT_BIT(3);
380 pVMCB->ctrl.u16InterceptWrCRx |= RT_BIT(3);
381
382 /* We must also intercept:
383 * - INVLPG (must go through shadow paging)
384 * - task switches (may change CR3/EFLAGS/LDT)
385 */
386 pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_INVLPG
387 | SVM_CTRL1_INTERCEPT_TASK_SWITCH
388 ;
389
390 /* Page faults must be intercepted to implement shadow paging. */
391 pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_PF);
392 }
393
394 /* The following MSRs are saved automatically by vmload/vmsave, so we allow the guest
395 * to modify them directly.
396 */
397 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
398 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_CSTAR, true, true);
399 hmR0SvmSetMSRPermission(pVCpu, MSR_K6_STAR, true, true);
400 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_SF_MASK, true, true);
401 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_FS_BASE, true, true);
402 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_GS_BASE, true, true);
403 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, true, true);
404 hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_CS, true, true);
405 hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_ESP, true, true);
406 hmR0SvmSetMSRPermission(pVCpu, MSR_IA32_SYSENTER_EIP, true, true);
407 }
408
409 return rc;
410}
411
412
413/**
414 * Sets the permission bits for the specified MSR
415 *
416 * @param pVCpu The VMCPU to operate on.
417 * @param ulMSR MSR value
418 * @param fRead Reading allowed/disallowed
419 * @param fWrite Writing allowed/disallowed
420 */
421static void hmR0SvmSetMSRPermission(PVMCPU pVCpu, unsigned ulMSR, bool fRead, bool fWrite)
422{
423 unsigned ulBit;
424 uint8_t *pMSRBitmap = (uint8_t *)pVCpu->hwaccm.s.svm.pMSRBitmap;
425
426 if (ulMSR <= 0x00001FFF)
427 {
428 /* Pentium-compatible MSRs */
429 ulBit = ulMSR * 2;
430 }
431 else
432 if ( ulMSR >= 0xC0000000
433 && ulMSR <= 0xC0001FFF)
434 {
435 /* AMD Sixth Generation x86 Processor MSRs and SYSCALL */
436 ulBit = (ulMSR - 0xC0000000) * 2;
437 pMSRBitmap += 0x800;
438 }
439 else
440 if ( ulMSR >= 0xC0010000
441 && ulMSR <= 0xC0011FFF)
442 {
443 /* AMD Seventh and Eighth Generation Processor MSRs */
444 ulBit = (ulMSR - 0xC0001000) * 2;
445 pMSRBitmap += 0x1000;
446 }
447 else
448 {
449 AssertFailed();
450 return;
451 }
452 Assert(ulBit < 16 * 1024 - 1);
453 if (fRead)
454 ASMBitClear(pMSRBitmap, ulBit);
455 else
456 ASMBitSet(pMSRBitmap, ulBit);
457
458 if (fWrite)
459 ASMBitClear(pMSRBitmap, ulBit + 1);
460 else
461 ASMBitSet(pMSRBitmap, ulBit + 1);
462}
463
464/**
465 * Injects an event (trap or external interrupt)
466 *
467 * @param pVCpu The VMCPU to operate on.
468 * @param pVMCB SVM control block
469 * @param pCtx CPU Context
470 * @param pIntInfo SVM interrupt info
471 */
472DECLINLINE(void) hmR0SvmInjectEvent(PVMCPU pVCpu, SVM_VMCB *pVMCB, CPUMCTX *pCtx, SVM_EVENT *pEvent)
473{
474#ifdef VBOX_WITH_STATISTICS
475 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatInjectedIrqsR0[pEvent->n.u8Vector & MASK_INJECT_IRQ_STAT]);
476#endif
477
478#ifdef VBOX_STRICT
479 if (pEvent->n.u8Vector == 0xE)
480 Log(("SVM: Inject int %d at %RGv error code=%02x CR2=%RGv intInfo=%08x\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode, (RTGCPTR)pCtx->cr2, pEvent->au64[0]));
481 else
482 if (pEvent->n.u8Vector < 0x20)
483 Log(("SVM: Inject int %d at %RGv error code=%08x\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip, pEvent->n.u32ErrorCode));
484 else
485 {
486 Log(("INJ-EI: %x at %RGv\n", pEvent->n.u8Vector, (RTGCPTR)pCtx->rip));
487 Assert(!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS));
488 Assert(pCtx->eflags.u32 & X86_EFL_IF);
489 }
490#endif
491
492 /* Set event injection state. */
493 pVMCB->ctrl.EventInject.au64[0] = pEvent->au64[0];
494}
495
496
497/**
498 * Checks for pending guest interrupts and injects them
499 *
500 * @returns VBox status code.
501 * @param pVM The VM to operate on.
502 * @param pVCpu The VM CPU to operate on.
503 * @param pVMCB SVM control block
504 * @param pCtx CPU Context
505 */
506static int hmR0SvmCheckPendingInterrupt(PVM pVM, PVMCPU pVCpu, SVM_VMCB *pVMCB, CPUMCTX *pCtx)
507{
508 int rc;
509 NOREF(pVM);
510
511 /* Dispatch any pending interrupts. (injected before, but a VM exit occurred prematurely) */
512 if (pVCpu->hwaccm.s.Event.fPending)
513 {
514 SVM_EVENT Event;
515
516 Log(("Reinjecting event %08x %08x at %RGv\n", pVCpu->hwaccm.s.Event.intInfo, pVCpu->hwaccm.s.Event.errCode, (RTGCPTR)pCtx->rip));
517 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntReinject);
518 Event.au64[0] = pVCpu->hwaccm.s.Event.intInfo;
519 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
520
521 pVCpu->hwaccm.s.Event.fPending = false;
522 return VINF_SUCCESS;
523 }
524
525 /* If an active trap is already pending, then we must forward it first! */
526 if (!TRPMHasTrap(pVCpu))
527 {
528 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI))
529 {
530 SVM_EVENT Event;
531
532 Log(("CPU%d: injecting #NMI\n", pVCpu->idCpu));
533 Event.n.u8Vector = X86_XCPT_NMI;
534 Event.n.u1Valid = 1;
535 Event.n.u32ErrorCode = 0;
536 Event.n.u3Type = SVM_EVENT_NMI;
537
538 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
539 return VINF_SUCCESS;
540 }
541
542 /* @todo SMI interrupts. */
543
544 /* When external interrupts are pending, we should exit the VM when IF is set. */
545 if (VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)))
546 {
547 if ( !(pCtx->eflags.u32 & X86_EFL_IF)
548 || VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
549 {
550 if (!pVMCB->ctrl.IntCtrl.n.u1VIrqValid)
551 {
552 if (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
553 LogFlow(("Enable irq window exit!\n"));
554 else
555 Log(("Pending interrupt blocked at %RGv by VM_FF_INHIBIT_INTERRUPTS -> irq window exit\n", (RTGCPTR)pCtx->rip));
556
557 /** @todo use virtual interrupt method to inject a pending irq; dispatched as soon as guest.IF is set. */
558 pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_VINTR;
559 pVMCB->ctrl.IntCtrl.n.u1VIrqValid = 1;
560 pVMCB->ctrl.IntCtrl.n.u8VIrqVector = 0; /* don't care */
561 }
562 }
563 else
564 {
565 uint8_t u8Interrupt;
566
567 rc = PDMGetInterrupt(pVCpu, &u8Interrupt);
568 Log(("Dispatch interrupt: u8Interrupt=%x (%d) rc=%Rrc\n", u8Interrupt, u8Interrupt, rc));
569 if (RT_SUCCESS(rc))
570 {
571 rc = TRPMAssertTrap(pVCpu, u8Interrupt, TRPM_HARDWARE_INT);
572 AssertRC(rc);
573 }
574 else
575 {
576 /* Can only happen in rare cases where a pending interrupt is cleared behind our back */
577 Assert(!VMCPU_FF_ISPENDING(pVCpu, (VMCPU_FF_INTERRUPT_APIC|VMCPU_FF_INTERRUPT_PIC)));
578 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchGuestIrq);
579 /* Just continue */
580 }
581 }
582 }
583 }
584
585#ifdef VBOX_STRICT
586 if (TRPMHasTrap(pVCpu))
587 {
588 uint8_t u8Vector;
589 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, 0, 0, 0);
590 AssertRC(rc);
591 }
592#endif
593
594 if ( (pCtx->eflags.u32 & X86_EFL_IF)
595 && (!VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
596 && TRPMHasTrap(pVCpu)
597 )
598 {
599 uint8_t u8Vector;
600 TRPMEVENT enmType;
601 SVM_EVENT Event;
602 RTGCUINT u32ErrorCode;
603
604 Event.au64[0] = 0;
605
606 /* If a new event is pending, then dispatch it now. */
607 rc = TRPMQueryTrapAll(pVCpu, &u8Vector, &enmType, &u32ErrorCode, 0);
608 AssertRC(rc);
609 Assert(pCtx->eflags.Bits.u1IF == 1 || enmType == TRPM_TRAP);
610 Assert(enmType != TRPM_SOFTWARE_INT);
611
612 /* Clear the pending trap. */
613 rc = TRPMResetTrap(pVCpu);
614 AssertRC(rc);
615
616 Event.n.u8Vector = u8Vector;
617 Event.n.u1Valid = 1;
618 Event.n.u32ErrorCode = u32ErrorCode;
619
620 if (enmType == TRPM_TRAP)
621 {
622 switch (u8Vector) {
623 case X86_XCPT_DF:
624 case X86_XCPT_TS:
625 case X86_XCPT_NP:
626 case X86_XCPT_SS:
627 case X86_XCPT_GP:
628 case X86_XCPT_PF:
629 case X86_XCPT_AC:
630 /* Valid error codes. */
631 Event.n.u1ErrorCodeValid = 1;
632 break;
633 default:
634 break;
635 }
636 if (u8Vector == X86_XCPT_NMI)
637 Event.n.u3Type = SVM_EVENT_NMI;
638 else
639 Event.n.u3Type = SVM_EVENT_EXCEPTION;
640 }
641 else
642 Event.n.u3Type = SVM_EVENT_EXTERNAL_IRQ;
643
644 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatIntInject);
645 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
646 } /* if (interrupts can be dispatched) */
647
648 return VINF_SUCCESS;
649}
650
651/**
652 * Save the host state
653 *
654 * @returns VBox status code.
655 * @param pVM The VM to operate on.
656 * @param pVCpu The VM CPU to operate on.
657 */
658VMMR0DECL(int) SVMR0SaveHostState(PVM pVM, PVMCPU pVCpu)
659{
660 NOREF(pVM);
661 NOREF(pVCpu);
662 /* Nothing to do here. */
663 return VINF_SUCCESS;
664}
665
666/**
667 * Loads the guest state
668 *
669 * NOTE: Don't do anything here that can cause a jump back to ring 3!!!!!
670 *
671 * @returns VBox status code.
672 * @param pVM The VM to operate on.
673 * @param pVCpu The VM CPU to operate on.
674 * @param pCtx Guest context
675 */
676VMMR0DECL(int) SVMR0LoadGuestState(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
677{
678 RTGCUINTPTR val;
679 SVM_VMCB *pVMCB;
680
681 if (pVM == NULL)
682 return VERR_INVALID_PARAMETER;
683
684 /* Setup AMD SVM. */
685 Assert(pVM->hwaccm.s.svm.fSupported);
686
687 pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB;
688 AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB);
689
690 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
691 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_SEGMENT_REGS)
692 {
693 SVM_WRITE_SELREG(CS, cs);
694 SVM_WRITE_SELREG(SS, ss);
695 SVM_WRITE_SELREG(DS, ds);
696 SVM_WRITE_SELREG(ES, es);
697 SVM_WRITE_SELREG(FS, fs);
698 SVM_WRITE_SELREG(GS, gs);
699 }
700
701 /* Guest CPU context: LDTR. */
702 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_LDTR)
703 {
704 SVM_WRITE_SELREG(LDTR, ldtr);
705 }
706
707 /* Guest CPU context: TR. */
708 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_TR)
709 {
710 SVM_WRITE_SELREG(TR, tr);
711 }
712
713 /* Guest CPU context: GDTR. */
714 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_GDTR)
715 {
716 pVMCB->guest.GDTR.u32Limit = pCtx->gdtr.cbGdt;
717 pVMCB->guest.GDTR.u64Base = pCtx->gdtr.pGdt;
718 }
719
720 /* Guest CPU context: IDTR. */
721 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_IDTR)
722 {
723 pVMCB->guest.IDTR.u32Limit = pCtx->idtr.cbIdt;
724 pVMCB->guest.IDTR.u64Base = pCtx->idtr.pIdt;
725 }
726
727 /*
728 * Sysenter MSRs (unconditional)
729 */
730 pVMCB->guest.u64SysEnterCS = pCtx->SysEnter.cs;
731 pVMCB->guest.u64SysEnterEIP = pCtx->SysEnter.eip;
732 pVMCB->guest.u64SysEnterESP = pCtx->SysEnter.esp;
733
734 /* Control registers */
735 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR0)
736 {
737 val = pCtx->cr0;
738 if (!CPUMIsGuestFPUStateActive(pVCpu))
739 {
740 /* Always use #NM exceptions to load the FPU/XMM state on demand. */
741 val |= X86_CR0_TS | X86_CR0_ET | X86_CR0_NE | X86_CR0_MP;
742 }
743 else
744 {
745 /** @todo check if we support the old style mess correctly. */
746 if (!(val & X86_CR0_NE))
747 {
748 Log(("Forcing X86_CR0_NE!!!\n"));
749
750 /* Also catch floating point exceptions as we need to report them to the guest in a different way. */
751 if (!pVCpu->hwaccm.s.fFPUOldStyleOverride)
752 {
753 pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_MF);
754 pVCpu->hwaccm.s.fFPUOldStyleOverride = true;
755 }
756 }
757 val |= X86_CR0_NE; /* always turn on the native mechanism to report FPU errors (old style uses interrupts) */
758 }
759 /* Always enable caching. */
760 val &= ~(X86_CR0_CD|X86_CR0_NW);
761
762 /* Note: WP is not relevant in nested paging mode as we catch accesses on the (guest) physical level. */
763 /* Note: In nested paging mode the guest is allowed to run with paging disabled; the guest physical to host physical translation will remain active. */
764 if (!pVM->hwaccm.s.fNestedPaging)
765 {
766 val |= X86_CR0_PG; /* Paging is always enabled; even when the guest is running in real mode or PE without paging. */
767 val |= X86_CR0_WP; /* Must set this as we rely on protecting various pages and supervisor writes must be caught. */
768 }
769 pVMCB->guest.u64CR0 = val;
770 }
771 /* CR2 as well */
772 pVMCB->guest.u64CR2 = pCtx->cr2;
773
774 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR3)
775 {
776 /* Save our shadow CR3 register. */
777 if (pVM->hwaccm.s.fNestedPaging)
778 {
779 PGMMODE enmShwPagingMode;
780
781#if HC_ARCH_BITS == 32
782 if (CPUMIsGuestInLongModeEx(pCtx))
783 enmShwPagingMode = PGMMODE_AMD64_NX;
784 else
785#endif
786 enmShwPagingMode = PGMGetHostMode(pVM);
787
788 pVMCB->ctrl.u64NestedPagingCR3 = PGMGetNestedCR3(pVCpu, enmShwPagingMode);
789 Assert(pVMCB->ctrl.u64NestedPagingCR3);
790 pVMCB->guest.u64CR3 = pCtx->cr3;
791 }
792 else
793 {
794 pVMCB->guest.u64CR3 = PGMGetHyperCR3(pVCpu);
795 Assert(pVMCB->guest.u64CR3 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL));
796 }
797 }
798
799 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_CR4)
800 {
801 val = pCtx->cr4;
802 if (!pVM->hwaccm.s.fNestedPaging)
803 {
804 switch(pVCpu->hwaccm.s.enmShadowMode)
805 {
806 case PGMMODE_REAL:
807 case PGMMODE_PROTECTED: /* Protected mode, no paging. */
808 AssertFailed();
809 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
810
811 case PGMMODE_32_BIT: /* 32-bit paging. */
812 val &= ~X86_CR4_PAE;
813 break;
814
815 case PGMMODE_PAE: /* PAE paging. */
816 case PGMMODE_PAE_NX: /* PAE paging with NX enabled. */
817 /** Must use PAE paging as we could use physical memory > 4 GB */
818 val |= X86_CR4_PAE;
819 break;
820
821 case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */
822 case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */
823#ifdef VBOX_ENABLE_64_BITS_GUESTS
824 break;
825#else
826 AssertFailed();
827 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
828#endif
829
830 default: /* shut up gcc */
831 AssertFailed();
832 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
833 }
834 }
835 pVMCB->guest.u64CR4 = val;
836 }
837
838 /* Debug registers. */
839 if (pVCpu->hwaccm.s.fContextUseFlags & HWACCM_CHANGED_GUEST_DEBUG)
840 {
841 pCtx->dr[6] |= X86_DR6_INIT_VAL; /* set all reserved bits to 1. */
842 pCtx->dr[6] &= ~RT_BIT(12); /* must be zero. */
843
844 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
845 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
846 pCtx->dr[7] |= 0x400; /* must be one */
847
848 pVMCB->guest.u64DR7 = pCtx->dr[7];
849 pVMCB->guest.u64DR6 = pCtx->dr[6];
850
851#ifdef DEBUG
852 /* Sync the hypervisor debug state now if any breakpoint is armed. */
853 if ( CPUMGetHyperDR7(pVCpu) & (X86_DR7_ENABLED_MASK|X86_DR7_GD)
854 && !CPUMIsHyperDebugStateActive(pVCpu)
855 && !DBGFIsStepping(pVCpu))
856 {
857 /* Save the host and load the hypervisor debug state. */
858 int rc = CPUMR0LoadHyperDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
859 AssertRC(rc);
860
861 /* DRx intercepts remain enabled. */
862
863 /* Override dr6 & dr7 with the hypervisor values. */
864 pVMCB->guest.u64DR7 = CPUMGetHyperDR7(pVCpu);
865 pVMCB->guest.u64DR6 = CPUMGetHyperDR6(pVCpu);
866 }
867 else
868#endif
869 /* Sync the debug state now if any breakpoint is armed. */
870 if ( (pCtx->dr[7] & (X86_DR7_ENABLED_MASK|X86_DR7_GD))
871 && !CPUMIsGuestDebugStateActive(pVCpu)
872 && !DBGFIsStepping(pVCpu))
873 {
874 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxArmed);
875
876 /* Disable drx move intercepts. */
877 pVMCB->ctrl.u16InterceptRdDRx = 0;
878 pVMCB->ctrl.u16InterceptWrDRx = 0;
879
880 /* Save the host and load the guest debug state. */
881 int rc = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
882 AssertRC(rc);
883 }
884 }
885
886 /* EIP, ESP and EFLAGS */
887 pVMCB->guest.u64RIP = pCtx->rip;
888 pVMCB->guest.u64RSP = pCtx->rsp;
889 pVMCB->guest.u64RFlags = pCtx->eflags.u32;
890
891 /* Set CPL */
892 pVMCB->guest.u8CPL = pCtx->ssHid.Attr.n.u2Dpl;
893
894 /* RAX/EAX too, as VMRUN uses RAX as an implicit parameter. */
895 pVMCB->guest.u64RAX = pCtx->rax;
896
897 /* vmrun will fail without MSR_K6_EFER_SVME. */
898 pVMCB->guest.u64EFER = pCtx->msrEFER | MSR_K6_EFER_SVME;
899
900 /* 64 bits guest mode? */
901 if (CPUMIsGuestInLongModeEx(pCtx))
902 {
903#if !defined(VBOX_ENABLE_64_BITS_GUESTS)
904 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
905#elif HC_ARCH_BITS == 32 && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
906 pVCpu->hwaccm.s.svm.pfnVMRun = SVMR0VMSwitcherRun64;
907#else
908# ifdef VBOX_WITH_HYBRID_32BIT_KERNEL
909 if (!pVM->hwaccm.s.fAllow64BitGuests)
910 return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE;
911# endif
912 pVCpu->hwaccm.s.svm.pfnVMRun = SVMR0VMRun64;
913#endif
914 /* Unconditionally update these as wrmsr might have changed them. (HWACCM_CHANGED_GUEST_SEGMENT_REGS will not be set) */
915 pVMCB->guest.FS.u64Base = pCtx->fsHid.u64Base;
916 pVMCB->guest.GS.u64Base = pCtx->gsHid.u64Base;
917 }
918 else
919 {
920 /* Filter out the MSR_K6_LME bit or else AMD-V expects amd64 shadow paging. */
921 pVMCB->guest.u64EFER &= ~MSR_K6_EFER_LME;
922
923 pVCpu->hwaccm.s.svm.pfnVMRun = SVMR0VMRun;
924 }
925
926 /* TSC offset. */
927 if (TMCpuTickCanUseRealTSC(pVCpu, &pVMCB->ctrl.u64TSCOffset))
928 {
929 uint64_t u64CurTSC = ASMReadTSC();
930 if (u64CurTSC + pVMCB->ctrl.u64TSCOffset >= TMCpuTickGetLastSeen(pVCpu))
931 {
932 pVMCB->ctrl.u32InterceptCtrl1 &= ~SVM_CTRL1_INTERCEPT_RDTSC;
933 pVMCB->ctrl.u32InterceptCtrl2 &= ~SVM_CTRL2_INTERCEPT_RDTSCP;
934 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCOffset);
935 }
936 else
937 {
938 /* Fall back to rdtsc emulation as we would otherwise pass decreasing tsc values to the guest. */
939 LogFlow(("TSC %RX64 offset %RX64 time=%RX64 last=%RX64 (diff=%RX64, virt_tsc=%RX64)\n", u64CurTSC, pVMCB->ctrl.u64TSCOffset, u64CurTSC + pVMCB->ctrl.u64TSCOffset, TMCpuTickGetLastSeen(pVCpu), TMCpuTickGetLastSeen(pVCpu) - u64CurTSC - pVMCB->ctrl.u64TSCOffset, TMCpuTickGet(pVCpu)));
940 pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC;
941 pVMCB->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP;
942 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCInterceptOverFlow);
943 }
944 }
945 else
946 {
947 pVMCB->ctrl.u32InterceptCtrl1 |= SVM_CTRL1_INTERCEPT_RDTSC;
948 pVMCB->ctrl.u32InterceptCtrl2 |= SVM_CTRL2_INTERCEPT_RDTSCP;
949 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTSCIntercept);
950 }
951
952 /* Sync the various msrs for 64 bits mode. */
953 pVMCB->guest.u64STAR = pCtx->msrSTAR; /* legacy syscall eip, cs & ss */
954 pVMCB->guest.u64LSTAR = pCtx->msrLSTAR; /* 64 bits mode syscall rip */
955 pVMCB->guest.u64CSTAR = pCtx->msrCSTAR; /* compatibility mode syscall rip */
956 pVMCB->guest.u64SFMASK = pCtx->msrSFMASK; /* syscall flag mask */
957 pVMCB->guest.u64KernelGSBase = pCtx->msrKERNELGSBASE; /* swapgs exchange value */
958
959#ifdef DEBUG
960 /* Intercept X86_XCPT_DB if stepping is enabled */
961 if ( DBGFIsStepping(pVCpu)
962 || CPUMIsHyperDebugStateActive(pVCpu))
963 pVMCB->ctrl.u32InterceptException |= RT_BIT(X86_XCPT_DB);
964 else
965 pVMCB->ctrl.u32InterceptException &= ~RT_BIT(X86_XCPT_DB);
966#endif
967
968 /* Done. */
969 pVCpu->hwaccm.s.fContextUseFlags &= ~HWACCM_CHANGED_ALL_GUEST;
970
971 return VINF_SUCCESS;
972}
973
974/**
975 * Setup TLB for ASID.
976 *
977 * @param pVM The VM to operate on.
978 * @param pVCpu The VM CPU to operate on.
979 */
980static void hmR0SvmSetupTLB(PVM pVM, PVMCPU pVCpu)
981{
982 PHMGLOBLCPUINFO pCpu;
983
984 AssertPtr(pVM);
985 AssertPtr(pVCpu);
986
987 SVM_VMCB *pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB;
988 pCpu = HWACCMR0GetCurrentCpu();
989
990 /*
991 * Force a TLB flush for the first world switch if the current CPU differs from the one we ran on last.
992 * This can happen both for start & resume due to long jumps back to ring-3.
993 * If the TLB flush count changed, another VM (VCPU rather) has hit the ASID limit while flushing the TLB,
994 * so we cannot reuse the ASIDs without flushing.
995 */
996 bool fNewASID = false;
997 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
998 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
999 {
1000 pVCpu->hwaccm.s.fForceTLBFlush = true;
1001 fNewASID = true;
1002 }
1003
1004 /*
1005 * Set TLB flush state as checked until we return from the world switch.
1006 */
1007 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, true);
1008
1009 /*
1010 * Check for TLB shootdown flushes.
1011 */
1012 if (VMCPU_FF_TESTANDCLEAR(pVCpu, VMCPU_FF_TLB_FLUSH))
1013 pVCpu->hwaccm.s.fForceTLBFlush = true;
1014
1015 pVCpu->hwaccm.s.idLastCpu = pCpu->idCpu;
1016 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_NOTHING;
1017
1018 if (RT_UNLIKELY(pVM->hwaccm.s.svm.fAlwaysFlushTLB))
1019 {
1020 /*
1021 * This is the AMD erratum 170. We need to flush the entire TLB for each world switch. Sad.
1022 */
1023 pCpu->uCurrentASID = 1;
1024 pVCpu->hwaccm.s.uCurrentASID = 1;
1025 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1026 }
1027 else if (pVCpu->hwaccm.s.fForceTLBFlush)
1028 {
1029 if (fNewASID)
1030 {
1031 ++pCpu->uCurrentASID;
1032 bool fHitASIDLimit = false;
1033 if (pCpu->uCurrentASID >= pVM->hwaccm.s.uMaxASID)
1034 {
1035 pCpu->uCurrentASID = 1; /* start at 1; host uses 0 */
1036 pCpu->cTLBFlushes++;
1037 fHitASIDLimit = true;
1038
1039 if (pVM->hwaccm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
1040 {
1041 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
1042 pCpu->fFlushASIDBeforeUse = true;
1043 }
1044 else
1045 {
1046 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1047 pCpu->fFlushASIDBeforeUse = false;
1048 }
1049 }
1050
1051 if ( !fHitASIDLimit
1052 && pCpu->fFlushASIDBeforeUse)
1053 {
1054 if (pVM->hwaccm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
1055 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
1056 else
1057 {
1058 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1059 pCpu->fFlushASIDBeforeUse = false;
1060 }
1061 }
1062
1063 pVCpu->hwaccm.s.uCurrentASID = pCpu->uCurrentASID;
1064 pVCpu->hwaccm.s.cTLBFlushes = pCpu->cTLBFlushes;
1065 }
1066 else
1067 {
1068 if (pVM->hwaccm.s.svm.u32Features & AMD_CPUID_SVM_FEATURE_EDX_FLUSH_BY_ASID)
1069 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_SINGLE_CONTEXT;
1070 else
1071 pVMCB->ctrl.TLBCtrl.n.u8TLBFlush = SVM_TLB_FLUSH_ENTIRE;
1072 }
1073
1074 pVCpu->hwaccm.s.fForceTLBFlush = false;
1075 }
1076 else
1077 {
1078 /** @todo We never set VMCPU_FF_TLB_SHOOTDOWN anywhere so this path should
1079 * not be executed. See hwaccmQueueInvlPage() where it is commented
1080 * out. Support individual entry flushing someday. */
1081 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_TLB_SHOOTDOWN))
1082 {
1083 /* Deal with pending TLB shootdown actions which were queued when we were not executing code. */
1084 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatTlbShootdown);
1085 for (unsigned i = 0; i < pVCpu->hwaccm.s.TlbShootdown.cPages; i++)
1086 SVMR0InvlpgA(pVCpu->hwaccm.s.TlbShootdown.aPages[i], pVMCB->ctrl.TLBCtrl.n.u32ASID);
1087 }
1088 }
1089
1090 pVCpu->hwaccm.s.TlbShootdown.cPages = 0;
1091 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_SHOOTDOWN);
1092
1093 /* Update VMCB with the ASID. */
1094 pVMCB->ctrl.TLBCtrl.n.u32ASID = pVCpu->hwaccm.s.uCurrentASID;
1095
1096 AssertMsg(pVCpu->hwaccm.s.cTLBFlushes == pCpu->cTLBFlushes, ("Flush count mismatch for cpu %d (%x vs %x)\n", pCpu->idCpu, pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1097 AssertMsg(pCpu->uCurrentASID >= 1 && pCpu->uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d uCurrentASID = %x\n", pCpu->idCpu, pCpu->uCurrentASID));
1098 AssertMsg(pVCpu->hwaccm.s.uCurrentASID >= 1 && pVCpu->hwaccm.s.uCurrentASID < pVM->hwaccm.s.uMaxASID, ("cpu%d VM uCurrentASID = %x\n", pCpu->idCpu, pVCpu->hwaccm.s.uCurrentASID));
1099
1100#ifdef VBOX_WITH_STATISTICS
1101 if (pVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_NOTHING)
1102 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatNoFlushTLBWorldSwitch);
1103 else if ( pVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT
1104 || pVMCB->ctrl.TLBCtrl.n.u8TLBFlush == SVM_TLB_FLUSH_SINGLE_CONTEXT_RETAIN_GLOBALS)
1105 {
1106 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushASID);
1107 }
1108 else
1109 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBWorldSwitch);
1110#endif
1111}
1112
1113
1114/**
1115 * Runs guest code in an AMD-V VM.
1116 *
1117 * @returns VBox status code.
1118 * @param pVM The VM to operate on.
1119 * @param pVCpu The VM CPU to operate on.
1120 * @param pCtx Guest context
1121 */
1122VMMR0DECL(int) SVMR0RunGuestCode(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
1123{
1124 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatEntry, x);
1125 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit1);
1126 STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hwaccm.s.StatExit2);
1127
1128 VBOXSTRICTRC rc = VINF_SUCCESS;
1129 int rc2;
1130 uint64_t exitCode = (uint64_t)SVM_EXIT_INVALID;
1131 SVM_VMCB *pVMCB;
1132 bool fSyncTPR = false;
1133 unsigned cResume = 0;
1134 uint8_t u8LastTPR = 0; /* Initialized for potentially stupid compilers. */
1135 PHMGLOBLCPUINFO pCpu = 0;
1136 RTCCUINTREG uOldEFlags = ~(RTCCUINTREG)0;
1137#ifdef VBOX_STRICT
1138 RTCPUID idCpuCheck;
1139#endif
1140#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
1141 uint64_t u64LastTime = RTTimeMilliTS();
1142#endif
1143
1144 pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB;
1145 AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB);
1146
1147 /* We can jump to this point to resume execution after determining that a VM-exit is innocent.
1148 */
1149ResumeExecution:
1150 if (!STAM_PROFILE_ADV_IS_RUNNING(&pVCpu->hwaccm.s.StatEntry))
1151 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit2, &pVCpu->hwaccm.s.StatEntry, x);
1152 Assert(!HWACCMR0SuspendPending());
1153
1154 /* Safety precaution; looping for too long here can have a very bad effect on the host */
1155 if (RT_UNLIKELY(++cResume > pVM->hwaccm.s.cMaxResumeLoops))
1156 {
1157 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMaxResume);
1158 rc = VINF_EM_RAW_INTERRUPT;
1159 goto end;
1160 }
1161
1162 /* Check for irq inhibition due to instruction fusing (sti, mov ss). */
1163 if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS))
1164 {
1165 Log(("VM_FF_INHIBIT_INTERRUPTS at %RGv successor %RGv\n", (RTGCPTR)pCtx->rip, EMGetInhibitInterruptsPC(pVCpu)));
1166 if (pCtx->rip != EMGetInhibitInterruptsPC(pVCpu))
1167 {
1168 /* Note: we intentionally don't clear VM_FF_INHIBIT_INTERRUPTS here.
1169 * Before we are able to execute this instruction in raw mode (iret to guest code) an external interrupt might
1170 * force a world switch again. Possibly allowing a guest interrupt to be dispatched in the process. This could
1171 * break the guest. Sounds very unlikely, but such timing sensitive problems are not as rare as you might think.
1172 */
1173 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1174 /* Irq inhibition is no longer active; clear the corresponding SVM state. */
1175 pVMCB->ctrl.u64IntShadow = 0;
1176 }
1177 }
1178 else
1179 {
1180 /* Irq inhibition is no longer active; clear the corresponding SVM state. */
1181 pVMCB->ctrl.u64IntShadow = 0;
1182 }
1183
1184#ifdef VBOX_HIGH_RES_TIMERS_HACK_IN_RING0
1185 if (RT_UNLIKELY((cResume & 0xf) == 0))
1186 {
1187 uint64_t u64CurTime = RTTimeMilliTS();
1188
1189 if (RT_UNLIKELY(u64CurTime > u64LastTime))
1190 {
1191 u64LastTime = u64CurTime;
1192 TMTimerPollVoid(pVM, pVCpu);
1193 }
1194 }
1195#endif
1196
1197 /* Check for pending actions that force us to go back to ring 3. */
1198 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK | VM_FF_REQUEST | VM_FF_PGM_POOL_FLUSH_PENDING | VM_FF_PDM_DMA)
1199 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK | VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL | VMCPU_FF_REQUEST))
1200 {
1201 /* Check if a sync operation is pending. */
1202 if (VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
1203 {
1204 rc = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_ISSET(pVCpu, VMCPU_FF_PGM_SYNC_CR3));
1205 AssertRC(VBOXSTRICTRC_VAL(rc));
1206 if (rc != VINF_SUCCESS)
1207 {
1208 Log(("Pending pool sync is forcing us back to ring 3; rc=%d\n", VBOXSTRICTRC_VAL(rc)));
1209 goto end;
1210 }
1211 }
1212
1213#ifdef DEBUG
1214 /* Intercept X86_XCPT_DB if stepping is enabled */
1215 if (!DBGFIsStepping(pVCpu))
1216#endif
1217 {
1218 if ( VM_FF_ISPENDING(pVM, VM_FF_HWACCM_TO_R3_MASK)
1219 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_HWACCM_TO_R3_MASK))
1220 {
1221 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatSwitchToR3);
1222 rc = RT_UNLIKELY(VM_FF_ISPENDING(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_NO_MEMORY : VINF_EM_RAW_TO_R3;
1223 goto end;
1224 }
1225 }
1226
1227 /* Pending request packets might contain actions that need immediate attention, such as pending hardware interrupts. */
1228 if ( VM_FF_ISPENDING(pVM, VM_FF_REQUEST)
1229 || VMCPU_FF_ISPENDING(pVCpu, VMCPU_FF_REQUEST))
1230 {
1231 rc = VINF_EM_PENDING_REQUEST;
1232 goto end;
1233 }
1234
1235 /* Check if a pgm pool flush is in progress. */
1236 if (VM_FF_ISPENDING(pVM, VM_FF_PGM_POOL_FLUSH_PENDING))
1237 {
1238 rc = VINF_PGM_POOL_FLUSH_PENDING;
1239 goto end;
1240 }
1241
1242 /* Check if DMA work is pending (2nd+ run). */
1243 if (VM_FF_ISPENDING(pVM, VM_FF_PDM_DMA) && cResume > 1)
1244 {
1245 rc = VINF_EM_RAW_TO_R3;
1246 goto end;
1247 }
1248 }
1249
1250#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
1251 /*
1252 * Exit to ring-3 preemption/work is pending.
1253 *
1254 * Interrupts are disabled before the call to make sure we don't miss any interrupt
1255 * that would flag preemption (IPI, timer tick, ++). (Would've been nice to do this
1256 * further down, but hmR0SvmCheckPendingInterrupt makes that impossible.)
1257 *
1258 * Note! Interrupts must be disabled done *before* we check for TLB flushes; TLB
1259 * shootdowns rely on this.
1260 */
1261 uOldEFlags = ASMIntDisableFlags();
1262 if (RTThreadPreemptIsPending(NIL_RTTHREAD))
1263 {
1264 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitPreemptPending);
1265 rc = VINF_EM_RAW_INTERRUPT;
1266 goto end;
1267 }
1268 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
1269#endif
1270
1271 /* When external interrupts are pending, we should exit the VM when IF is set. */
1272 /* Note! *After* VM_FF_INHIBIT_INTERRUPTS check!!! */
1273 rc = hmR0SvmCheckPendingInterrupt(pVM, pVCpu, pVMCB, pCtx);
1274 if (RT_FAILURE(rc))
1275 goto end;
1276
1277 /* TPR caching using CR8 is only available in 64 bits mode or with 32 bits guests when X86_CPUID_AMD_FEATURE_ECX_CR8L is supported. */
1278 /* Note: we can't do this in LoadGuestState as PDMApicGetTPR can jump back to ring 3 (lock)!!!!!!!! (no longer true)
1279 */
1280 /** @todo query and update the TPR only when it could have been changed (mmio access)
1281 */
1282 if (pVM->hwaccm.s.fHasIoApic)
1283 {
1284 /* TPR caching in CR8 */
1285 bool fPending;
1286 rc2 = PDMApicGetTPR(pVCpu, &u8LastTPR, &fPending);
1287 AssertRC(rc2);
1288
1289 if (pVM->hwaccm.s.fTPRPatchingActive)
1290 {
1291 /* Our patch code uses LSTAR for TPR caching. */
1292 pCtx->msrLSTAR = u8LastTPR;
1293
1294 if (fPending)
1295 {
1296 /* A TPR change could activate a pending interrupt, so catch lstar writes. */
1297 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, false);
1298 }
1299 else
1300 /* No interrupts are pending, so we don't need to be explicitely notified.
1301 * There are enough world switches for detecting pending interrupts.
1302 */
1303 hmR0SvmSetMSRPermission(pVCpu, MSR_K8_LSTAR, true, true);
1304 }
1305 else
1306 {
1307 pVMCB->ctrl.IntCtrl.n.u8VTPR = (u8LastTPR >> 4); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
1308
1309 if (fPending)
1310 {
1311 /* A TPR change could activate a pending interrupt, so catch cr8 writes. */
1312 pVMCB->ctrl.u16InterceptWrCRx |= RT_BIT(8);
1313 }
1314 else
1315 /* No interrupts are pending, so we don't need to be explicitely notified.
1316 * There are enough world switches for detecting pending interrupts.
1317 */
1318 pVMCB->ctrl.u16InterceptWrCRx &= ~RT_BIT(8);
1319 }
1320 fSyncTPR = !fPending;
1321 }
1322
1323 /* All done! Let's start VM execution. */
1324
1325 /* Enable nested paging if necessary (disabled each time after #VMEXIT). */
1326 pVMCB->ctrl.NestedPaging.n.u1NestedPaging = pVM->hwaccm.s.fNestedPaging;
1327
1328#ifdef LOG_ENABLED
1329 pCpu = HWACCMR0GetCurrentCpu();
1330 if ( pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu
1331 || pVCpu->hwaccm.s.cTLBFlushes != pCpu->cTLBFlushes)
1332 {
1333 if (pVCpu->hwaccm.s.idLastCpu != pCpu->idCpu)
1334 LogFlow(("Force TLB flush due to rescheduling to a different cpu (%d vs %d)\n", pVCpu->hwaccm.s.idLastCpu, pCpu->idCpu));
1335 else
1336 LogFlow(("Force TLB flush due to changed TLB flush count (%x vs %x)\n", pVCpu->hwaccm.s.cTLBFlushes, pCpu->cTLBFlushes));
1337 }
1338 else if (VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH))
1339 LogFlow(("Manual TLB flush\n"));
1340#endif
1341
1342 /*
1343 * NOTE: DO NOT DO ANYTHING AFTER THIS POINT THAT MIGHT JUMP BACK TO RING 3!
1344 * (until the actual world switch)
1345 */
1346#ifdef VBOX_STRICT
1347 idCpuCheck = RTMpCpuId();
1348#endif
1349 VMMR0LogFlushDisable(pVCpu);
1350
1351 /* Load the guest state; *must* be here as it sets up the shadow cr0 for lazy fpu syncing! */
1352 rc = SVMR0LoadGuestState(pVM, pVCpu, pCtx);
1353 if (RT_UNLIKELY(rc != VINF_SUCCESS))
1354 {
1355 VMMR0LogFlushEnable(pVCpu);
1356 goto end;
1357 }
1358
1359#ifndef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
1360 /* Disable interrupts to make sure a poke will interrupt execution.
1361 * This must be done *before* we check for TLB flushes; TLB shootdowns rely on this.
1362 */
1363 uOldEFlags = ASMIntDisableFlags();
1364 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC);
1365#endif
1366 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatEntry, &pVCpu->hwaccm.s.StatInGC, x);
1367
1368 /*
1369 * Setup TLB control and ASID in the VMCB.
1370 */
1371 hmR0SvmSetupTLB(pVM, pVCpu);
1372
1373 /* In case we execute a goto ResumeExecution later on. */
1374 pVCpu->hwaccm.s.fResumeVM = true;
1375 pVCpu->hwaccm.s.fForceTLBFlush = pVM->hwaccm.s.svm.fAlwaysFlushTLB;
1376
1377 Assert(sizeof(pVCpu->hwaccm.s.svm.pVMCBPhys) == 8);
1378 Assert(pVMCB->ctrl.IntCtrl.n.u1VIrqMasking);
1379 Assert(pVMCB->ctrl.u64IOPMPhysAddr == pVM->hwaccm.s.svm.pIOBitmapPhys);
1380 Assert(pVMCB->ctrl.u64MSRPMPhysAddr == pVCpu->hwaccm.s.svm.pMSRBitmapPhys);
1381 Assert(pVMCB->ctrl.u64LBRVirt == 0);
1382
1383#ifdef VBOX_STRICT
1384 Assert(idCpuCheck == RTMpCpuId());
1385#endif
1386 TMNotifyStartOfExecution(pVCpu);
1387#ifdef VBOX_WITH_KERNEL_USING_XMM
1388 hwaccmR0SVMRunWrapXMM(pVCpu->hwaccm.s.svm.pVMCBHostPhys, pVCpu->hwaccm.s.svm.pVMCBPhys, pCtx, pVM, pVCpu, pVCpu->hwaccm.s.svm.pfnVMRun);
1389#else
1390 pVCpu->hwaccm.s.svm.pfnVMRun(pVCpu->hwaccm.s.svm.pVMCBHostPhys, pVCpu->hwaccm.s.svm.pVMCBPhys, pCtx, pVM, pVCpu);
1391#endif
1392 ASMAtomicWriteBool(&pVCpu->hwaccm.s.fCheckedTLBFlush, false);
1393 ASMAtomicIncU32(&pVCpu->hwaccm.s.cWorldSwitchExits);
1394 /* Possibly the last TSC value seen by the guest (too high) (only when we're in tsc offset mode). */
1395 if (!(pVMCB->ctrl.u32InterceptCtrl1 & SVM_CTRL1_INTERCEPT_RDTSC))
1396 TMCpuTickSetLastSeen(pVCpu, ASMReadTSC() + pVMCB->ctrl.u64TSCOffset - 0x400 /* guestimate of world switch overhead in clock ticks */);
1397 TMNotifyEndOfExecution(pVCpu);
1398 VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED);
1399 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatInGC, &pVCpu->hwaccm.s.StatExit1, x);
1400 ASMSetFlags(uOldEFlags);
1401#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
1402 uOldEFlags = ~(RTCCUINTREG)0;
1403#endif
1404
1405 /*
1406 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1407 * IMPORTANT: WE CAN'T DO ANY LOGGING OR OPERATIONS THAT CAN DO A LONGJMP BACK TO RING 3 *BEFORE* WE'VE SYNCED BACK (MOST OF) THE GUEST STATE
1408 * !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1409 */
1410
1411 /* Reason for the VM exit */
1412 exitCode = pVMCB->ctrl.u64ExitCode;
1413
1414 if (RT_UNLIKELY(exitCode == (uint64_t)SVM_EXIT_INVALID)) /* Invalid guest state. */
1415 {
1416 HWACCMDumpRegs(pVM, pVCpu, pCtx);
1417#ifdef DEBUG
1418 Log(("ctrl.u16InterceptRdCRx %x\n", pVMCB->ctrl.u16InterceptRdCRx));
1419 Log(("ctrl.u16InterceptWrCRx %x\n", pVMCB->ctrl.u16InterceptWrCRx));
1420 Log(("ctrl.u16InterceptRdDRx %x\n", pVMCB->ctrl.u16InterceptRdDRx));
1421 Log(("ctrl.u16InterceptWrDRx %x\n", pVMCB->ctrl.u16InterceptWrDRx));
1422 Log(("ctrl.u32InterceptException %x\n", pVMCB->ctrl.u32InterceptException));
1423 Log(("ctrl.u32InterceptCtrl1 %x\n", pVMCB->ctrl.u32InterceptCtrl1));
1424 Log(("ctrl.u32InterceptCtrl2 %x\n", pVMCB->ctrl.u32InterceptCtrl2));
1425 Log(("ctrl.u64IOPMPhysAddr %RX64\n", pVMCB->ctrl.u64IOPMPhysAddr));
1426 Log(("ctrl.u64MSRPMPhysAddr %RX64\n", pVMCB->ctrl.u64MSRPMPhysAddr));
1427 Log(("ctrl.u64TSCOffset %RX64\n", pVMCB->ctrl.u64TSCOffset));
1428
1429 Log(("ctrl.TLBCtrl.u32ASID %x\n", pVMCB->ctrl.TLBCtrl.n.u32ASID));
1430 Log(("ctrl.TLBCtrl.u8TLBFlush %x\n", pVMCB->ctrl.TLBCtrl.n.u8TLBFlush));
1431 Log(("ctrl.TLBCtrl.u24Reserved %x\n", pVMCB->ctrl.TLBCtrl.n.u24Reserved));
1432
1433 Log(("ctrl.IntCtrl.u8VTPR %x\n", pVMCB->ctrl.IntCtrl.n.u8VTPR));
1434 Log(("ctrl.IntCtrl.u1VIrqValid %x\n", pVMCB->ctrl.IntCtrl.n.u1VIrqValid));
1435 Log(("ctrl.IntCtrl.u7Reserved %x\n", pVMCB->ctrl.IntCtrl.n.u7Reserved));
1436 Log(("ctrl.IntCtrl.u4VIrqPriority %x\n", pVMCB->ctrl.IntCtrl.n.u4VIrqPriority));
1437 Log(("ctrl.IntCtrl.u1IgnoreTPR %x\n", pVMCB->ctrl.IntCtrl.n.u1IgnoreTPR));
1438 Log(("ctrl.IntCtrl.u3Reserved %x\n", pVMCB->ctrl.IntCtrl.n.u3Reserved));
1439 Log(("ctrl.IntCtrl.u1VIrqMasking %x\n", pVMCB->ctrl.IntCtrl.n.u1VIrqMasking));
1440 Log(("ctrl.IntCtrl.u7Reserved2 %x\n", pVMCB->ctrl.IntCtrl.n.u7Reserved2));
1441 Log(("ctrl.IntCtrl.u8VIrqVector %x\n", pVMCB->ctrl.IntCtrl.n.u8VIrqVector));
1442 Log(("ctrl.IntCtrl.u24Reserved %x\n", pVMCB->ctrl.IntCtrl.n.u24Reserved));
1443
1444 Log(("ctrl.u64IntShadow %RX64\n", pVMCB->ctrl.u64IntShadow));
1445 Log(("ctrl.u64ExitCode %RX64\n", pVMCB->ctrl.u64ExitCode));
1446 Log(("ctrl.u64ExitInfo1 %RX64\n", pVMCB->ctrl.u64ExitInfo1));
1447 Log(("ctrl.u64ExitInfo2 %RX64\n", pVMCB->ctrl.u64ExitInfo2));
1448 Log(("ctrl.ExitIntInfo.u8Vector %x\n", pVMCB->ctrl.ExitIntInfo.n.u8Vector));
1449 Log(("ctrl.ExitIntInfo.u3Type %x\n", pVMCB->ctrl.ExitIntInfo.n.u3Type));
1450 Log(("ctrl.ExitIntInfo.u1ErrorCodeValid %x\n", pVMCB->ctrl.ExitIntInfo.n.u1ErrorCodeValid));
1451 Log(("ctrl.ExitIntInfo.u19Reserved %x\n", pVMCB->ctrl.ExitIntInfo.n.u19Reserved));
1452 Log(("ctrl.ExitIntInfo.u1Valid %x\n", pVMCB->ctrl.ExitIntInfo.n.u1Valid));
1453 Log(("ctrl.ExitIntInfo.u32ErrorCode %x\n", pVMCB->ctrl.ExitIntInfo.n.u32ErrorCode));
1454 Log(("ctrl.NestedPaging %RX64\n", pVMCB->ctrl.NestedPaging.au64));
1455 Log(("ctrl.EventInject.u8Vector %x\n", pVMCB->ctrl.EventInject.n.u8Vector));
1456 Log(("ctrl.EventInject.u3Type %x\n", pVMCB->ctrl.EventInject.n.u3Type));
1457 Log(("ctrl.EventInject.u1ErrorCodeValid %x\n", pVMCB->ctrl.EventInject.n.u1ErrorCodeValid));
1458 Log(("ctrl.EventInject.u19Reserved %x\n", pVMCB->ctrl.EventInject.n.u19Reserved));
1459 Log(("ctrl.EventInject.u1Valid %x\n", pVMCB->ctrl.EventInject.n.u1Valid));
1460 Log(("ctrl.EventInject.u32ErrorCode %x\n", pVMCB->ctrl.EventInject.n.u32ErrorCode));
1461
1462 Log(("ctrl.u64NestedPagingCR3 %RX64\n", pVMCB->ctrl.u64NestedPagingCR3));
1463 Log(("ctrl.u64LBRVirt %RX64\n", pVMCB->ctrl.u64LBRVirt));
1464
1465 Log(("guest.CS.u16Sel %04X\n", pVMCB->guest.CS.u16Sel));
1466 Log(("guest.CS.u16Attr %04X\n", pVMCB->guest.CS.u16Attr));
1467 Log(("guest.CS.u32Limit %X\n", pVMCB->guest.CS.u32Limit));
1468 Log(("guest.CS.u64Base %RX64\n", pVMCB->guest.CS.u64Base));
1469 Log(("guest.DS.u16Sel %04X\n", pVMCB->guest.DS.u16Sel));
1470 Log(("guest.DS.u16Attr %04X\n", pVMCB->guest.DS.u16Attr));
1471 Log(("guest.DS.u32Limit %X\n", pVMCB->guest.DS.u32Limit));
1472 Log(("guest.DS.u64Base %RX64\n", pVMCB->guest.DS.u64Base));
1473 Log(("guest.ES.u16Sel %04X\n", pVMCB->guest.ES.u16Sel));
1474 Log(("guest.ES.u16Attr %04X\n", pVMCB->guest.ES.u16Attr));
1475 Log(("guest.ES.u32Limit %X\n", pVMCB->guest.ES.u32Limit));
1476 Log(("guest.ES.u64Base %RX64\n", pVMCB->guest.ES.u64Base));
1477 Log(("guest.FS.u16Sel %04X\n", pVMCB->guest.FS.u16Sel));
1478 Log(("guest.FS.u16Attr %04X\n", pVMCB->guest.FS.u16Attr));
1479 Log(("guest.FS.u32Limit %X\n", pVMCB->guest.FS.u32Limit));
1480 Log(("guest.FS.u64Base %RX64\n", pVMCB->guest.FS.u64Base));
1481 Log(("guest.GS.u16Sel %04X\n", pVMCB->guest.GS.u16Sel));
1482 Log(("guest.GS.u16Attr %04X\n", pVMCB->guest.GS.u16Attr));
1483 Log(("guest.GS.u32Limit %X\n", pVMCB->guest.GS.u32Limit));
1484 Log(("guest.GS.u64Base %RX64\n", pVMCB->guest.GS.u64Base));
1485
1486 Log(("guest.GDTR.u32Limit %X\n", pVMCB->guest.GDTR.u32Limit));
1487 Log(("guest.GDTR.u64Base %RX64\n", pVMCB->guest.GDTR.u64Base));
1488
1489 Log(("guest.LDTR.u16Sel %04X\n", pVMCB->guest.LDTR.u16Sel));
1490 Log(("guest.LDTR.u16Attr %04X\n", pVMCB->guest.LDTR.u16Attr));
1491 Log(("guest.LDTR.u32Limit %X\n", pVMCB->guest.LDTR.u32Limit));
1492 Log(("guest.LDTR.u64Base %RX64\n", pVMCB->guest.LDTR.u64Base));
1493
1494 Log(("guest.IDTR.u32Limit %X\n", pVMCB->guest.IDTR.u32Limit));
1495 Log(("guest.IDTR.u64Base %RX64\n", pVMCB->guest.IDTR.u64Base));
1496
1497 Log(("guest.TR.u16Sel %04X\n", pVMCB->guest.TR.u16Sel));
1498 Log(("guest.TR.u16Attr %04X\n", pVMCB->guest.TR.u16Attr));
1499 Log(("guest.TR.u32Limit %X\n", pVMCB->guest.TR.u32Limit));
1500 Log(("guest.TR.u64Base %RX64\n", pVMCB->guest.TR.u64Base));
1501
1502 Log(("guest.u8CPL %X\n", pVMCB->guest.u8CPL));
1503 Log(("guest.u64CR0 %RX64\n", pVMCB->guest.u64CR0));
1504 Log(("guest.u64CR2 %RX64\n", pVMCB->guest.u64CR2));
1505 Log(("guest.u64CR3 %RX64\n", pVMCB->guest.u64CR3));
1506 Log(("guest.u64CR4 %RX64\n", pVMCB->guest.u64CR4));
1507 Log(("guest.u64DR6 %RX64\n", pVMCB->guest.u64DR6));
1508 Log(("guest.u64DR7 %RX64\n", pVMCB->guest.u64DR7));
1509
1510 Log(("guest.u64RIP %RX64\n", pVMCB->guest.u64RIP));
1511 Log(("guest.u64RSP %RX64\n", pVMCB->guest.u64RSP));
1512 Log(("guest.u64RAX %RX64\n", pVMCB->guest.u64RAX));
1513 Log(("guest.u64RFlags %RX64\n", pVMCB->guest.u64RFlags));
1514
1515 Log(("guest.u64SysEnterCS %RX64\n", pVMCB->guest.u64SysEnterCS));
1516 Log(("guest.u64SysEnterEIP %RX64\n", pVMCB->guest.u64SysEnterEIP));
1517 Log(("guest.u64SysEnterESP %RX64\n", pVMCB->guest.u64SysEnterESP));
1518
1519 Log(("guest.u64EFER %RX64\n", pVMCB->guest.u64EFER));
1520 Log(("guest.u64STAR %RX64\n", pVMCB->guest.u64STAR));
1521 Log(("guest.u64LSTAR %RX64\n", pVMCB->guest.u64LSTAR));
1522 Log(("guest.u64CSTAR %RX64\n", pVMCB->guest.u64CSTAR));
1523 Log(("guest.u64SFMASK %RX64\n", pVMCB->guest.u64SFMASK));
1524 Log(("guest.u64KernelGSBase %RX64\n", pVMCB->guest.u64KernelGSBase));
1525 Log(("guest.u64GPAT %RX64\n", pVMCB->guest.u64GPAT));
1526 Log(("guest.u64DBGCTL %RX64\n", pVMCB->guest.u64DBGCTL));
1527 Log(("guest.u64BR_FROM %RX64\n", pVMCB->guest.u64BR_FROM));
1528 Log(("guest.u64BR_TO %RX64\n", pVMCB->guest.u64BR_TO));
1529 Log(("guest.u64LASTEXCPFROM %RX64\n", pVMCB->guest.u64LASTEXCPFROM));
1530 Log(("guest.u64LASTEXCPTO %RX64\n", pVMCB->guest.u64LASTEXCPTO));
1531
1532#endif
1533 rc = VERR_SVM_UNABLE_TO_START_VM;
1534 VMMR0LogFlushEnable(pVCpu);
1535 goto end;
1536 }
1537
1538 /* Let's first sync back eip, esp, and eflags. */
1539 pCtx->rip = pVMCB->guest.u64RIP;
1540 pCtx->rsp = pVMCB->guest.u64RSP;
1541 pCtx->eflags.u32 = pVMCB->guest.u64RFlags;
1542 /* eax is saved/restore across the vmrun instruction */
1543 pCtx->rax = pVMCB->guest.u64RAX;
1544
1545 /* Save all the MSRs that can be changed by the guest without causing a world switch. (fs & gs base are saved with SVM_READ_SELREG) */
1546 pCtx->msrSTAR = pVMCB->guest.u64STAR; /* legacy syscall eip, cs & ss */
1547 pCtx->msrLSTAR = pVMCB->guest.u64LSTAR; /* 64 bits mode syscall rip */
1548 pCtx->msrCSTAR = pVMCB->guest.u64CSTAR; /* compatibility mode syscall rip */
1549 pCtx->msrSFMASK = pVMCB->guest.u64SFMASK; /* syscall flag mask */
1550 pCtx->msrKERNELGSBASE = pVMCB->guest.u64KernelGSBase; /* swapgs exchange value */
1551 pCtx->SysEnter.cs = pVMCB->guest.u64SysEnterCS;
1552 pCtx->SysEnter.eip = pVMCB->guest.u64SysEnterEIP;
1553 pCtx->SysEnter.esp = pVMCB->guest.u64SysEnterESP;
1554
1555 /* Can be updated behind our back in the nested paging case. */
1556 pCtx->cr2 = pVMCB->guest.u64CR2;
1557
1558 /* Guest CPU context: ES, CS, SS, DS, FS, GS. */
1559 SVM_READ_SELREG(SS, ss);
1560 SVM_READ_SELREG(CS, cs);
1561 SVM_READ_SELREG(DS, ds);
1562 SVM_READ_SELREG(ES, es);
1563 SVM_READ_SELREG(FS, fs);
1564 SVM_READ_SELREG(GS, gs);
1565
1566 /* Correct the hidden CS granularity flag. Haven't seen it being wrong in
1567 any other register (yet). */
1568 if ( !pCtx->csHid.Attr.n.u1Granularity
1569 && pCtx->csHid.Attr.n.u1Present
1570 && pCtx->csHid.u32Limit > UINT32_C(0xfffff))
1571 {
1572 Assert((pCtx->csHid.u32Limit & 0xfff) == 0xfff);
1573 pCtx->csHid.Attr.n.u1Granularity = 1;
1574 }
1575#define SVM_ASSERT_SEL_GRANULARITY(reg) \
1576 AssertMsg( !pCtx->reg##Hid.Attr.n.u1Present \
1577 || ( pCtx->reg##Hid.Attr.n.u1Granularity \
1578 ? (pCtx->reg##Hid.u32Limit & 0xfff) == 0xfff \
1579 : pCtx->reg##Hid.u32Limit <= 0xfffff), \
1580 ("%#x %#x %#llx\n", pCtx->reg##Hid.u32Limit, pCtx->reg##Hid.Attr.u, pCtx->reg##Hid.u64Base))
1581 SVM_ASSERT_SEL_GRANULARITY(ss);
1582 SVM_ASSERT_SEL_GRANULARITY(cs);
1583 SVM_ASSERT_SEL_GRANULARITY(ds);
1584 SVM_ASSERT_SEL_GRANULARITY(es);
1585 SVM_ASSERT_SEL_GRANULARITY(fs);
1586 SVM_ASSERT_SEL_GRANULARITY(gs);
1587#undef SVM_ASSERT_SEL_GRANULARITY
1588
1589 /*
1590 * Correct the hidden SS DPL field. It can be wrong on certain CPUs
1591 * sometimes (seen it on AMD Fusion APUs with 64bit guests). The CPU
1592 * always uses the CPL field in the VMCB instead of the DPL in the hidden
1593 * SS (chapter 15.5.1 Basic operation).
1594 */
1595 Assert(!(pVMCB->guest.u8CPL & ~0x3));
1596 pCtx->ssHid.Attr.n.u2Dpl = pVMCB->guest.u8CPL & 0x3;
1597
1598 /* Remaining guest CPU context: TR, IDTR, GDTR, LDTR; must sync everything otherwise we can get out of sync when jumping to ring 3. */
1599 SVM_READ_SELREG(LDTR, ldtr);
1600 SVM_READ_SELREG(TR, tr);
1601
1602 pCtx->gdtr.cbGdt = pVMCB->guest.GDTR.u32Limit;
1603 pCtx->gdtr.pGdt = pVMCB->guest.GDTR.u64Base;
1604
1605 pCtx->idtr.cbIdt = pVMCB->guest.IDTR.u32Limit;
1606 pCtx->idtr.pIdt = pVMCB->guest.IDTR.u64Base;
1607
1608 /* Note: no reason to sync back the CRx and DRx registers. They can't be changed by the guest. */
1609 /* Note: only in the nested paging case can CR3 & CR4 be changed by the guest. */
1610 if ( pVM->hwaccm.s.fNestedPaging
1611 && pCtx->cr3 != pVMCB->guest.u64CR3)
1612 {
1613 CPUMSetGuestCR3(pVCpu, pVMCB->guest.u64CR3);
1614 PGMUpdateCR3(pVCpu, pVMCB->guest.u64CR3);
1615 }
1616
1617 /* Note! NOW IT'S SAFE FOR LOGGING! */
1618 VMMR0LogFlushEnable(pVCpu);
1619
1620 /* Take care of instruction fusing (sti, mov ss) (see 15.20.5 Interrupt Shadows) */
1621 if (pVMCB->ctrl.u64IntShadow & SVM_INTERRUPT_SHADOW_ACTIVE)
1622 {
1623 Log(("uInterruptState %x rip=%RGv\n", pVMCB->ctrl.u64IntShadow, (RTGCPTR)pCtx->rip));
1624 EMSetInhibitInterruptsPC(pVCpu, pCtx->rip);
1625 }
1626 else
1627 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
1628
1629 Log2(("exitCode = %x\n", exitCode));
1630
1631 /* Sync back DR6 as it could have been changed by hitting breakpoints. */
1632 pCtx->dr[6] = pVMCB->guest.u64DR6;
1633 /* DR7.GD can be cleared by debug exceptions, so sync it back as well. */
1634 pCtx->dr[7] = pVMCB->guest.u64DR7;
1635
1636 /* Check if an injected event was interrupted prematurely. */
1637 pVCpu->hwaccm.s.Event.intInfo = pVMCB->ctrl.ExitIntInfo.au64[0];
1638 if ( pVMCB->ctrl.ExitIntInfo.n.u1Valid
1639 && pVMCB->ctrl.ExitIntInfo.n.u3Type != SVM_EVENT_SOFTWARE_INT /* we don't care about 'int xx' as the instruction will be restarted. */)
1640 {
1641 Log(("Pending inject %RX64 at %RGv exit=%08x\n", pVCpu->hwaccm.s.Event.intInfo, (RTGCPTR)pCtx->rip, exitCode));
1642
1643#ifdef LOG_ENABLED
1644 SVM_EVENT Event;
1645 Event.au64[0] = pVCpu->hwaccm.s.Event.intInfo;
1646
1647 if ( exitCode == SVM_EXIT_EXCEPTION_E
1648 && Event.n.u8Vector == 0xE)
1649 {
1650 Log(("Double fault!\n"));
1651 }
1652#endif
1653
1654 pVCpu->hwaccm.s.Event.fPending = true;
1655 /* Error code present? (redundant) */
1656 if (pVMCB->ctrl.ExitIntInfo.n.u1ErrorCodeValid)
1657 pVCpu->hwaccm.s.Event.errCode = pVMCB->ctrl.ExitIntInfo.n.u32ErrorCode;
1658 else
1659 pVCpu->hwaccm.s.Event.errCode = 0;
1660 }
1661#ifdef VBOX_WITH_STATISTICS
1662 if (exitCode == SVM_EXIT_NPF)
1663 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitReasonNPF);
1664 else
1665 STAM_COUNTER_INC(&pVCpu->hwaccm.s.paStatExitReasonR0[exitCode & MASK_EXITREASON_STAT]);
1666#endif
1667
1668 /* Sync back the TPR if it was changed. */
1669 if (fSyncTPR)
1670 {
1671 if (pVM->hwaccm.s.fTPRPatchingActive)
1672 {
1673 if ((pCtx->msrLSTAR & 0xff) != u8LastTPR)
1674 {
1675 /* Our patch code uses LSTAR for TPR caching. */
1676 rc2 = PDMApicSetTPR(pVCpu, pCtx->msrLSTAR & 0xff);
1677 AssertRC(rc2);
1678 }
1679 }
1680 else
1681 {
1682 if ((uint8_t)(u8LastTPR >> 4) != pVMCB->ctrl.IntCtrl.n.u8VTPR)
1683 {
1684 rc2 = PDMApicSetTPR(pVCpu, pVMCB->ctrl.IntCtrl.n.u8VTPR << 4); /* cr8 bits 3-0 correspond to bits 7-4 of the task priority mmio register. */
1685 AssertRC(rc2);
1686 }
1687 }
1688 }
1689
1690#ifdef DBGFTRACE_ENABLED /** @todo DTrace */
1691 RTTraceBufAddMsgF(pVM->CTX_SUFF(hTraceBuf), "vmexit %08x at %04:%08RX64 %RX64 %RX64 %RX64",
1692 exitCode, pCtx->cs, pCtx->rip,
1693 pVMCB->ctrl.u64ExitInfo1, pVMCB->ctrl.u64ExitInfo2, pVMCB->ctrl.ExitIntInfo.au64[0]);
1694#endif
1695#if ARCH_BITS == 64 /* for the time being */
1696 VBOXVMM_R0_HMSVM_VMEXIT(pVCpu, pCtx, exitCode, pVMCB->ctrl.u64ExitInfo1, pVMCB->ctrl.u64ExitInfo2, pVMCB->ctrl.ExitIntInfo.au64[0], UINT64_MAX);
1697#endif
1698 STAM_PROFILE_ADV_STOP_START(&pVCpu->hwaccm.s.StatExit1, &pVCpu->hwaccm.s.StatExit2, x);
1699
1700 /* Deal with the reason of the VM-exit. */
1701 switch (exitCode)
1702 {
1703 case SVM_EXIT_EXCEPTION_0: case SVM_EXIT_EXCEPTION_1: case SVM_EXIT_EXCEPTION_2: case SVM_EXIT_EXCEPTION_3:
1704 case SVM_EXIT_EXCEPTION_4: case SVM_EXIT_EXCEPTION_5: case SVM_EXIT_EXCEPTION_6: case SVM_EXIT_EXCEPTION_7:
1705 case SVM_EXIT_EXCEPTION_8: case SVM_EXIT_EXCEPTION_9: case SVM_EXIT_EXCEPTION_A: case SVM_EXIT_EXCEPTION_B:
1706 case SVM_EXIT_EXCEPTION_C: case SVM_EXIT_EXCEPTION_D: case SVM_EXIT_EXCEPTION_E: case SVM_EXIT_EXCEPTION_F:
1707 case SVM_EXIT_EXCEPTION_10: case SVM_EXIT_EXCEPTION_11: case SVM_EXIT_EXCEPTION_12: case SVM_EXIT_EXCEPTION_13:
1708 case SVM_EXIT_EXCEPTION_14: case SVM_EXIT_EXCEPTION_15: case SVM_EXIT_EXCEPTION_16: case SVM_EXIT_EXCEPTION_17:
1709 case SVM_EXIT_EXCEPTION_18: case SVM_EXIT_EXCEPTION_19: case SVM_EXIT_EXCEPTION_1A: case SVM_EXIT_EXCEPTION_1B:
1710 case SVM_EXIT_EXCEPTION_1C: case SVM_EXIT_EXCEPTION_1D: case SVM_EXIT_EXCEPTION_1E: case SVM_EXIT_EXCEPTION_1F:
1711 {
1712 /* Pending trap. */
1713 SVM_EVENT Event;
1714 uint32_t vector = exitCode - SVM_EXIT_EXCEPTION_0;
1715
1716 Log2(("Hardware/software interrupt %d\n", vector));
1717 switch (vector)
1718 {
1719 case X86_XCPT_DB:
1720 {
1721 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDB);
1722
1723 /* Note that we don't support guest and host-initiated debugging at the same time. */
1724 Assert(DBGFIsStepping(pVCpu) || CPUMIsHyperDebugStateActive(pVCpu));
1725
1726 rc = DBGFRZTrap01Handler(pVM, pVCpu, CPUMCTX2CORE(pCtx), pCtx->dr[6]);
1727 if (rc == VINF_EM_RAW_GUEST_TRAP)
1728 {
1729 Log(("Trap %x (debug) at %016RX64\n", vector, pCtx->rip));
1730
1731 /* Reinject the exception. */
1732 Event.au64[0] = 0;
1733 Event.n.u3Type = SVM_EVENT_EXCEPTION; /* trap or fault */
1734 Event.n.u1Valid = 1;
1735 Event.n.u8Vector = X86_XCPT_DB;
1736
1737 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
1738 goto ResumeExecution;
1739 }
1740 /* Return to ring 3 to deal with the debug exit code. */
1741 Log(("Debugger hardware BP at %04x:%RGv (rc=%Rrc)\n", pCtx->cs, pCtx->rip, VBOXSTRICTRC_VAL(rc)));
1742 break;
1743 }
1744
1745 case X86_XCPT_NM:
1746 {
1747 Log(("#NM fault at %RGv\n", (RTGCPTR)pCtx->rip));
1748
1749 /** @todo don't intercept #NM exceptions anymore when we've activated the guest FPU state. */
1750 /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */
1751 rc = CPUMR0LoadGuestFPU(pVM, pVCpu, pCtx);
1752 if (rc == VINF_SUCCESS)
1753 {
1754 Assert(CPUMIsGuestFPUStateActive(pVCpu));
1755 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowNM);
1756
1757 /* Continue execution. */
1758 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
1759
1760 goto ResumeExecution;
1761 }
1762
1763 Log(("Forward #NM fault to the guest\n"));
1764 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNM);
1765
1766 Event.au64[0] = 0;
1767 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1768 Event.n.u1Valid = 1;
1769 Event.n.u8Vector = X86_XCPT_NM;
1770
1771 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
1772 goto ResumeExecution;
1773 }
1774
1775 case X86_XCPT_PF: /* Page fault */
1776 {
1777 uint32_t errCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
1778 RTGCUINTPTR uFaultAddress = pVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */
1779
1780#ifdef VBOX_ALWAYS_TRAP_PF
1781 if (pVM->hwaccm.s.fNestedPaging)
1782 { /* A genuine pagefault.
1783 * Forward the trap to the guest by injecting the exception and resuming execution.
1784 */
1785 Log(("Guest page fault at %04X:%RGv cr2=%RGv error code %x rsp=%RGv\n", pCtx->cs, (RTGCPTR)pCtx->rip, uFaultAddress, errCode, (RTGCPTR)pCtx->rsp));
1786 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
1787
1788 /* Now we must update CR2. */
1789 pCtx->cr2 = uFaultAddress;
1790
1791 Event.au64[0] = 0;
1792 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1793 Event.n.u1Valid = 1;
1794 Event.n.u8Vector = X86_XCPT_PF;
1795 Event.n.u1ErrorCodeValid = 1;
1796 Event.n.u32ErrorCode = errCode;
1797
1798 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
1799 goto ResumeExecution;
1800 }
1801#endif
1802 Assert(!pVM->hwaccm.s.fNestedPaging);
1803
1804#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
1805 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
1806 if ( pVM->hwaccm.s.fTRPPatchingAllowed
1807 && (uFaultAddress & 0xfff) == 0x080
1808 && !(errCode & X86_TRAP_PF_P) /* not present */
1809 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
1810 && !CPUMIsGuestInLongModeEx(pCtx)
1811 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
1812 {
1813 RTGCPHYS GCPhysApicBase, GCPhys;
1814 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
1815 GCPhysApicBase &= PAGE_BASE_GC_MASK;
1816
1817 rc = PGMGstGetPage(pVCpu, (RTGCPTR)uFaultAddress, NULL, &GCPhys);
1818 if ( rc == VINF_SUCCESS
1819 && GCPhys == GCPhysApicBase)
1820 {
1821 /* Only attempt to patch the instruction once. */
1822 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
1823 if (!pPatch)
1824 {
1825 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
1826 break;
1827 }
1828 }
1829 }
1830#endif
1831
1832 Log2(("Page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
1833 /* Exit qualification contains the linear address of the page fault. */
1834 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
1835 TRPMSetErrorCode(pVCpu, errCode);
1836 TRPMSetFaultAddress(pVCpu, uFaultAddress);
1837
1838 /* Forward it to our trap handler first, in case our shadow pages are out of sync. */
1839 rc = PGMTrap0eHandler(pVCpu, errCode, CPUMCTX2CORE(pCtx), (RTGCPTR)uFaultAddress);
1840 Log2(("PGMTrap0eHandler %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
1841 if (rc == VINF_SUCCESS)
1842 { /* We've successfully synced our shadow pages, so let's just continue execution. */
1843 Log2(("Shadow page fault at %RGv cr2=%RGv error code %x\n", (RTGCPTR)pCtx->rip, uFaultAddress, errCode));
1844 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
1845
1846 TRPMResetTrap(pVCpu);
1847 goto ResumeExecution;
1848 }
1849 else
1850 if (rc == VINF_EM_RAW_GUEST_TRAP)
1851 { /* A genuine pagefault.
1852 * Forward the trap to the guest by injecting the exception and resuming execution.
1853 */
1854 Log2(("Forward page fault to the guest\n"));
1855 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestPF);
1856 /* The error code might have been changed. */
1857 errCode = TRPMGetErrorCode(pVCpu);
1858
1859 TRPMResetTrap(pVCpu);
1860
1861 /* Now we must update CR2. */
1862 pCtx->cr2 = uFaultAddress;
1863
1864 Event.au64[0] = 0;
1865 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1866 Event.n.u1Valid = 1;
1867 Event.n.u8Vector = X86_XCPT_PF;
1868 Event.n.u1ErrorCodeValid = 1;
1869 Event.n.u32ErrorCode = errCode;
1870
1871 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
1872 goto ResumeExecution;
1873 }
1874#ifdef VBOX_STRICT
1875 if (rc != VINF_EM_RAW_EMULATE_INSTR && rc != VINF_EM_RAW_EMULATE_IO_BLOCK)
1876 LogFlow(("PGMTrap0eHandler failed with %d\n", VBOXSTRICTRC_VAL(rc)));
1877#endif
1878 /* Need to go back to the recompiler to emulate the instruction. */
1879 TRPMResetTrap(pVCpu);
1880 break;
1881 }
1882
1883 case X86_XCPT_MF: /* Floating point exception. */
1884 {
1885 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestMF);
1886 if (!(pCtx->cr0 & X86_CR0_NE))
1887 {
1888 /* old style FPU error reporting needs some extra work. */
1889 /** @todo don't fall back to the recompiler, but do it manually. */
1890 rc = VINF_EM_RAW_EMULATE_INSTR;
1891 break;
1892 }
1893 Log(("Trap %x at %RGv\n", vector, (RTGCPTR)pCtx->rip));
1894
1895 Event.au64[0] = 0;
1896 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1897 Event.n.u1Valid = 1;
1898 Event.n.u8Vector = X86_XCPT_MF;
1899
1900 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
1901 goto ResumeExecution;
1902 }
1903
1904#ifdef VBOX_STRICT
1905 case X86_XCPT_BP: /* Breakpoint. */
1906 case X86_XCPT_GP: /* General protection failure exception.*/
1907 case X86_XCPT_UD: /* Unknown opcode exception. */
1908 case X86_XCPT_DE: /* Divide error. */
1909 case X86_XCPT_SS: /* Stack segment exception. */
1910 case X86_XCPT_NP: /* Segment not present exception. */
1911 {
1912 Event.au64[0] = 0;
1913 Event.n.u3Type = SVM_EVENT_EXCEPTION;
1914 Event.n.u1Valid = 1;
1915 Event.n.u8Vector = vector;
1916
1917 switch(vector)
1918 {
1919 case X86_XCPT_GP:
1920 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestGP);
1921 Event.n.u1ErrorCodeValid = 1;
1922 Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
1923 break;
1924 case X86_XCPT_BP:
1925 /** Saves the wrong EIP on the stack (pointing to the int3 instead of the next instruction. */
1926 break;
1927 case X86_XCPT_DE:
1928 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestDE);
1929 break;
1930 case X86_XCPT_UD:
1931 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestUD);
1932 break;
1933 case X86_XCPT_SS:
1934 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestSS);
1935 Event.n.u1ErrorCodeValid = 1;
1936 Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
1937 break;
1938 case X86_XCPT_NP:
1939 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitGuestNP);
1940 Event.n.u1ErrorCodeValid = 1;
1941 Event.n.u32ErrorCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
1942 break;
1943 }
1944 Log(("Trap %x at %04x:%RGv esi=%x\n", vector, pCtx->cs, (RTGCPTR)pCtx->rip, pCtx->esi));
1945 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
1946 goto ResumeExecution;
1947 }
1948#endif
1949 default:
1950 AssertMsgFailed(("Unexpected vm-exit caused by exception %x\n", vector));
1951 rc = VERR_HMSVM_UNEXPECTED_XCPT_EXIT;
1952 break;
1953
1954 } /* switch (vector) */
1955 break;
1956 }
1957
1958 case SVM_EXIT_NPF:
1959 {
1960 /* EXITINFO1 contains fault errorcode; EXITINFO2 contains the guest physical address causing the fault. */
1961 uint32_t errCode = pVMCB->ctrl.u64ExitInfo1; /* EXITINFO1 = error code */
1962 RTGCPHYS GCPhysFault = pVMCB->ctrl.u64ExitInfo2; /* EXITINFO2 = fault address */
1963 PGMMODE enmShwPagingMode;
1964
1965 Assert(pVM->hwaccm.s.fNestedPaging);
1966 LogFlow(("Nested page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode));
1967
1968#ifdef VBOX_HWACCM_WITH_GUEST_PATCHING
1969 /* Shortcut for APIC TPR reads and writes; 32 bits guests only */
1970 if ( pVM->hwaccm.s.fTRPPatchingAllowed
1971 && (GCPhysFault & PAGE_OFFSET_MASK) == 0x080
1972 && ( !(errCode & X86_TRAP_PF_P) /* not present */
1973 || (errCode & (X86_TRAP_PF_P | X86_TRAP_PF_RSVD)) == (X86_TRAP_PF_P | X86_TRAP_PF_RSVD) /* mmio optimization */)
1974 && CPUMGetGuestCPL(pVCpu, CPUMCTX2CORE(pCtx)) == 0
1975 && !CPUMIsGuestInLongModeEx(pCtx)
1976 && pVM->hwaccm.s.cPatches < RT_ELEMENTS(pVM->hwaccm.s.aPatches))
1977 {
1978 RTGCPHYS GCPhysApicBase;
1979 PDMApicGetBase(pVM, &GCPhysApicBase); /* @todo cache this */
1980 GCPhysApicBase &= PAGE_BASE_GC_MASK;
1981
1982 if (GCPhysFault == GCPhysApicBase + 0x80)
1983 {
1984 /* Only attempt to patch the instruction once. */
1985 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
1986 if (!pPatch)
1987 {
1988 rc = VINF_EM_HWACCM_PATCH_TPR_INSTR;
1989 break;
1990 }
1991 }
1992 }
1993#endif
1994
1995 /* Handle the pagefault trap for the nested shadow table. */
1996#if HC_ARCH_BITS == 32 /** @todo shadow this in a variable. */
1997 if (CPUMIsGuestInLongModeEx(pCtx))
1998 enmShwPagingMode = PGMMODE_AMD64_NX;
1999 else
2000#endif
2001 enmShwPagingMode = PGMGetHostMode(pVM);
2002
2003 /* MMIO optimization */
2004 Assert((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) != X86_TRAP_PF_RSVD);
2005 if ((errCode & (X86_TRAP_PF_RSVD | X86_TRAP_PF_P)) == (X86_TRAP_PF_RSVD | X86_TRAP_PF_P))
2006 {
2007 rc = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, enmShwPagingMode, CPUMCTX2CORE(pCtx), GCPhysFault, errCode);
2008
2009 /*
2010 * If we succeed, resume execution.
2011 * Or, if fail in interpreting the instruction because we couldn't get the guest physical address
2012 * of the page containing the instruction via the guest's page tables (we would invalidate the guest page
2013 * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this
2014 * weird case. See #6043.
2015 */
2016 if ( rc == VINF_SUCCESS
2017 || rc == VERR_PAGE_TABLE_NOT_PRESENT
2018 || rc == VERR_PAGE_NOT_PRESENT)
2019 {
2020 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip));
2021 goto ResumeExecution;
2022 }
2023 Log2(("PGMR0Trap0eHandlerNPMisconfig(,,,%RGp) at %RGv -> resume\n", GCPhysFault, (RTGCPTR)pCtx->rip));
2024 break;
2025 }
2026
2027 /* Exit qualification contains the linear address of the page fault. */
2028 TRPMAssertTrap(pVCpu, X86_XCPT_PF, TRPM_TRAP);
2029 TRPMSetErrorCode(pVCpu, errCode);
2030 TRPMSetFaultAddress(pVCpu, GCPhysFault);
2031
2032 rc = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, enmShwPagingMode, errCode, CPUMCTX2CORE(pCtx), GCPhysFault);
2033 Log2(("PGMR0Trap0eHandlerNestedPaging %RGv returned %Rrc\n", (RTGCPTR)pCtx->rip, VBOXSTRICTRC_VAL(rc)));
2034
2035 /*
2036 * Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, #6043.
2037 */
2038 if ( rc == VINF_SUCCESS
2039 || rc == VERR_PAGE_TABLE_NOT_PRESENT
2040 || rc == VERR_PAGE_NOT_PRESENT)
2041 { /* We've successfully synced our shadow pages, so let's just continue execution. */
2042 Log2(("Shadow page fault at %RGv cr2=%RGp error code %x\n", (RTGCPTR)pCtx->rip, GCPhysFault, errCode));
2043 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitShadowPF);
2044
2045 TRPMResetTrap(pVCpu);
2046 goto ResumeExecution;
2047 }
2048
2049#ifdef VBOX_STRICT
2050 if (rc != VINF_EM_RAW_EMULATE_INSTR)
2051 LogFlow(("PGMTrap0eHandlerNestedPaging failed with %d\n", VBOXSTRICTRC_VAL(rc)));
2052#endif
2053 /* Need to go back to the recompiler to emulate the instruction. */
2054 TRPMResetTrap(pVCpu);
2055 break;
2056 }
2057
2058 case SVM_EXIT_VINTR:
2059 /* A virtual interrupt is about to be delivered, which means IF=1. */
2060 Log(("SVM_EXIT_VINTR IF=%d\n", pCtx->eflags.Bits.u1IF));
2061 pVMCB->ctrl.IntCtrl.n.u1VIrqValid = 0;
2062 pVMCB->ctrl.IntCtrl.n.u8VIrqVector = 0;
2063 goto ResumeExecution;
2064
2065 case SVM_EXIT_FERR_FREEZE:
2066 case SVM_EXIT_INTR:
2067 case SVM_EXIT_NMI:
2068 case SVM_EXIT_SMI:
2069 case SVM_EXIT_INIT:
2070 /* External interrupt; leave to allow it to be dispatched again. */
2071 rc = VINF_EM_RAW_INTERRUPT;
2072 break;
2073
2074 case SVM_EXIT_WBINVD:
2075 case SVM_EXIT_INVD: /* Guest software attempted to execute INVD. */
2076 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvd);
2077 /* Skip instruction and continue directly. */
2078 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2079 /* Continue execution.*/
2080 goto ResumeExecution;
2081
2082 case SVM_EXIT_CPUID: /* Guest software attempted to execute CPUID. */
2083 {
2084 Log2(("SVM: Cpuid at %RGv for %x\n", (RTGCPTR)pCtx->rip, pCtx->eax));
2085 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCpuid);
2086 rc = EMInterpretCpuId(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2087 if (rc == VINF_SUCCESS)
2088 {
2089 /* Update EIP and continue execution. */
2090 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2091 goto ResumeExecution;
2092 }
2093 AssertMsgFailed(("EMU: cpuid failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2094 rc = VINF_EM_RAW_EMULATE_INSTR;
2095 break;
2096 }
2097
2098 case SVM_EXIT_RDTSC: /* Guest software attempted to execute RDTSC. */
2099 {
2100 Log2(("SVM: Rdtsc\n"));
2101 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
2102 rc = EMInterpretRdtsc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2103 if (rc == VINF_SUCCESS)
2104 {
2105 /* Update EIP and continue execution. */
2106 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2107 goto ResumeExecution;
2108 }
2109 rc = VINF_EM_RAW_EMULATE_INSTR;
2110 break;
2111 }
2112
2113 case SVM_EXIT_RDPMC: /* Guest software attempted to execute RDPMC. */
2114 {
2115 Log2(("SVM: Rdpmc %x\n", pCtx->ecx));
2116 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdpmc);
2117 rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2118 if (rc == VINF_SUCCESS)
2119 {
2120 /* Update EIP and continue execution. */
2121 pCtx->rip += 2; /* Note! hardcoded opcode size! */
2122 goto ResumeExecution;
2123 }
2124 rc = VINF_EM_RAW_EMULATE_INSTR;
2125 break;
2126 }
2127
2128 case SVM_EXIT_RDTSCP: /* Guest software attempted to execute RDTSCP. */
2129 {
2130 Log2(("SVM: Rdtscp\n"));
2131 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitRdtsc);
2132 rc = EMInterpretRdtscp(pVM, pVCpu, pCtx);
2133 if (rc == VINF_SUCCESS)
2134 {
2135 /* Update EIP and continue execution. */
2136 pCtx->rip += 3; /* Note! hardcoded opcode size! */
2137 goto ResumeExecution;
2138 }
2139 AssertMsgFailed(("EMU: rdtscp failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2140 rc = VINF_EM_RAW_EMULATE_INSTR;
2141 break;
2142 }
2143
2144 case SVM_EXIT_INVLPG: /* Guest software attempted to execute INVPG. */
2145 {
2146 Log2(("SVM: invlpg\n"));
2147 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitInvpg);
2148
2149 Assert(!pVM->hwaccm.s.fNestedPaging);
2150
2151 /* Truly a pita. Why can't SVM give the same information as VT-x? */
2152 rc = hmR0SvmInterpretInvpg(pVM, pVCpu, CPUMCTX2CORE(pCtx), pVMCB->ctrl.TLBCtrl.n.u32ASID);
2153 if (rc == VINF_SUCCESS)
2154 {
2155 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushPageInvlpg);
2156 goto ResumeExecution; /* eip already updated */
2157 }
2158 break;
2159 }
2160
2161 case SVM_EXIT_WRITE_CR0: case SVM_EXIT_WRITE_CR1: case SVM_EXIT_WRITE_CR2: case SVM_EXIT_WRITE_CR3:
2162 case SVM_EXIT_WRITE_CR4: case SVM_EXIT_WRITE_CR5: case SVM_EXIT_WRITE_CR6: case SVM_EXIT_WRITE_CR7:
2163 case SVM_EXIT_WRITE_CR8: case SVM_EXIT_WRITE_CR9: case SVM_EXIT_WRITE_CR10: case SVM_EXIT_WRITE_CR11:
2164 case SVM_EXIT_WRITE_CR12: case SVM_EXIT_WRITE_CR13: case SVM_EXIT_WRITE_CR14: case SVM_EXIT_WRITE_CR15:
2165 {
2166 Log2(("SVM: %RGv mov cr%d, \n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_WRITE_CR0));
2167 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxWrite[exitCode - SVM_EXIT_WRITE_CR0]);
2168 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2169
2170 switch (exitCode - SVM_EXIT_WRITE_CR0)
2171 {
2172 case 0:
2173 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR0;
2174 break;
2175 case 2:
2176 break;
2177 case 3:
2178 Assert(!pVM->hwaccm.s.fNestedPaging);
2179 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR3;
2180 break;
2181 case 4:
2182 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_CR4;
2183 break;
2184 case 8:
2185 break;
2186 default:
2187 AssertFailed();
2188 }
2189 if (rc == VINF_SUCCESS)
2190 {
2191 /* EIP has been updated already. */
2192
2193 /* Only resume if successful. */
2194 goto ResumeExecution;
2195 }
2196 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2197 break;
2198 }
2199
2200 case SVM_EXIT_READ_CR0: case SVM_EXIT_READ_CR1: case SVM_EXIT_READ_CR2: case SVM_EXIT_READ_CR3:
2201 case SVM_EXIT_READ_CR4: case SVM_EXIT_READ_CR5: case SVM_EXIT_READ_CR6: case SVM_EXIT_READ_CR7:
2202 case SVM_EXIT_READ_CR8: case SVM_EXIT_READ_CR9: case SVM_EXIT_READ_CR10: case SVM_EXIT_READ_CR11:
2203 case SVM_EXIT_READ_CR12: case SVM_EXIT_READ_CR13: case SVM_EXIT_READ_CR14: case SVM_EXIT_READ_CR15:
2204 {
2205 Log2(("SVM: %RGv mov x, cr%d\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_READ_CR0));
2206 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitCRxRead[exitCode - SVM_EXIT_READ_CR0]);
2207 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2208 if (rc == VINF_SUCCESS)
2209 {
2210 /* EIP has been updated already. */
2211
2212 /* Only resume if successful. */
2213 goto ResumeExecution;
2214 }
2215 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2216 break;
2217 }
2218
2219 case SVM_EXIT_WRITE_DR0: case SVM_EXIT_WRITE_DR1: case SVM_EXIT_WRITE_DR2: case SVM_EXIT_WRITE_DR3:
2220 case SVM_EXIT_WRITE_DR4: case SVM_EXIT_WRITE_DR5: case SVM_EXIT_WRITE_DR6: case SVM_EXIT_WRITE_DR7:
2221 case SVM_EXIT_WRITE_DR8: case SVM_EXIT_WRITE_DR9: case SVM_EXIT_WRITE_DR10: case SVM_EXIT_WRITE_DR11:
2222 case SVM_EXIT_WRITE_DR12: case SVM_EXIT_WRITE_DR13: case SVM_EXIT_WRITE_DR14: case SVM_EXIT_WRITE_DR15:
2223 {
2224 Log2(("SVM: %RGv mov dr%d, x\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_WRITE_DR0));
2225 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxWrite);
2226
2227 if ( !DBGFIsStepping(pVCpu)
2228 && !CPUMIsHyperDebugStateActive(pVCpu))
2229 {
2230 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
2231
2232 /* Disable drx move intercepts. */
2233 pVMCB->ctrl.u16InterceptRdDRx = 0;
2234 pVMCB->ctrl.u16InterceptWrDRx = 0;
2235
2236 /* Save the host and load the guest debug state. */
2237 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
2238 AssertRC(rc2);
2239 goto ResumeExecution;
2240 }
2241
2242 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2243 if (rc == VINF_SUCCESS)
2244 {
2245 /* EIP has been updated already. */
2246 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
2247
2248 /* Only resume if successful. */
2249 goto ResumeExecution;
2250 }
2251 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2252 break;
2253 }
2254
2255 case SVM_EXIT_READ_DR0: case SVM_EXIT_READ_DR1: case SVM_EXIT_READ_DR2: case SVM_EXIT_READ_DR3:
2256 case SVM_EXIT_READ_DR4: case SVM_EXIT_READ_DR5: case SVM_EXIT_READ_DR6: case SVM_EXIT_READ_DR7:
2257 case SVM_EXIT_READ_DR8: case SVM_EXIT_READ_DR9: case SVM_EXIT_READ_DR10: case SVM_EXIT_READ_DR11:
2258 case SVM_EXIT_READ_DR12: case SVM_EXIT_READ_DR13: case SVM_EXIT_READ_DR14: case SVM_EXIT_READ_DR15:
2259 {
2260 Log2(("SVM: %RGv mov x, dr%d\n", (RTGCPTR)pCtx->rip, exitCode - SVM_EXIT_READ_DR0));
2261 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitDRxRead);
2262
2263 if (!DBGFIsStepping(pVCpu))
2264 {
2265 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxContextSwitch);
2266
2267 /* Disable drx move intercepts. */
2268 pVMCB->ctrl.u16InterceptRdDRx = 0;
2269 pVMCB->ctrl.u16InterceptWrDRx = 0;
2270
2271 /* Save the host and load the guest debug state. */
2272 rc2 = CPUMR0LoadGuestDebugState(pVM, pVCpu, pCtx, false /* exclude DR6 */);
2273 AssertRC(rc2);
2274 goto ResumeExecution;
2275 }
2276
2277 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2278 if (rc == VINF_SUCCESS)
2279 {
2280 /* EIP has been updated already. */
2281
2282 /* Only resume if successful. */
2283 goto ResumeExecution;
2284 }
2285 Assert(rc == VERR_EM_INTERPRETER || rc == VINF_PGM_CHANGE_MODE || rc == VINF_PGM_SYNC_CR3);
2286 break;
2287 }
2288
2289 /* Note: We'll get a #GP if the IO instruction isn't allowed (IOPL or TSS bitmap); no need to double check. */
2290 case SVM_EXIT_IOIO: /* I/O instruction. */
2291 {
2292 SVM_IOIO_EXIT IoExitInfo;
2293 uint32_t uIOSize, uAndVal;
2294
2295 IoExitInfo.au32[0] = pVMCB->ctrl.u64ExitInfo1;
2296
2297 /** @todo could use a lookup table here */
2298 if (IoExitInfo.n.u1OP8)
2299 {
2300 uIOSize = 1;
2301 uAndVal = 0xff;
2302 }
2303 else
2304 if (IoExitInfo.n.u1OP16)
2305 {
2306 uIOSize = 2;
2307 uAndVal = 0xffff;
2308 }
2309 else
2310 if (IoExitInfo.n.u1OP32)
2311 {
2312 uIOSize = 4;
2313 uAndVal = 0xffffffff;
2314 }
2315 else
2316 {
2317 AssertFailed(); /* should be fatal. */
2318 rc = VINF_EM_RAW_EMULATE_INSTR;
2319 break;
2320 }
2321
2322 if (IoExitInfo.n.u1STR)
2323 {
2324 /* ins/outs */
2325 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
2326
2327 /* Disassemble manually to deal with segment prefixes. */
2328 rc = EMInterpretDisasOne(pVM, pVCpu, CPUMCTX2CORE(pCtx), pDis, NULL);
2329 if (rc == VINF_SUCCESS)
2330 {
2331 if (IoExitInfo.n.u1Type == 0)
2332 {
2333 Log2(("IOMInterpretOUTSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize));
2334 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringWrite);
2335 rc = IOMInterpretOUTSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->prefix, pDis->addrmode, uIOSize);
2336 }
2337 else
2338 {
2339 Log2(("IOMInterpretINSEx %RGv %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize));
2340 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOStringRead);
2341 rc = IOMInterpretINSEx(pVM, CPUMCTX2CORE(pCtx), IoExitInfo.n.u16Port, pDis->prefix, pDis->addrmode, uIOSize);
2342 }
2343 }
2344 else
2345 rc = VINF_EM_RAW_EMULATE_INSTR;
2346 }
2347 else
2348 {
2349 /* normal in/out */
2350 Assert(!IoExitInfo.n.u1REP);
2351
2352 if (IoExitInfo.n.u1Type == 0)
2353 {
2354 Log2(("IOMIOPortWrite %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize));
2355 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIOWrite);
2356 rc = IOMIOPortWrite(pVM, IoExitInfo.n.u16Port, pCtx->eax & uAndVal, uIOSize);
2357 if (rc == VINF_IOM_R3_IOPORT_WRITE)
2358 HWACCMR0SavePendingIOPortWrite(pVCpu, pCtx->rip, pVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, uIOSize);
2359 }
2360 else
2361 {
2362 uint32_t u32Val = 0;
2363
2364 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitIORead);
2365 rc = IOMIOPortRead(pVM, IoExitInfo.n.u16Port, &u32Val, uIOSize);
2366 if (IOM_SUCCESS(rc))
2367 {
2368 /* Write back to the EAX register. */
2369 pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Val & uAndVal);
2370 Log2(("IOMIOPortRead %RGv %x %x size=%d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, u32Val & uAndVal, uIOSize));
2371 }
2372 else
2373 if (rc == VINF_IOM_R3_IOPORT_READ)
2374 HWACCMR0SavePendingIOPortRead(pVCpu, pCtx->rip, pVMCB->ctrl.u64ExitInfo2, IoExitInfo.n.u16Port, uAndVal, uIOSize);
2375 }
2376 }
2377 /*
2378 * Handled the I/O return codes.
2379 * (The unhandled cases end up with rc == VINF_EM_RAW_EMULATE_INSTR.)
2380 */
2381 if (IOM_SUCCESS(rc))
2382 {
2383 /* Update EIP and continue execution. */
2384 pCtx->rip = pVMCB->ctrl.u64ExitInfo2; /* RIP/EIP of the next instruction is saved in EXITINFO2. */
2385 if (RT_LIKELY(rc == VINF_SUCCESS))
2386 {
2387 /* If any IO breakpoints are armed, then we should check if a debug trap needs to be generated. */
2388 if (pCtx->dr[7] & X86_DR7_ENABLED_MASK)
2389 {
2390 /* IO operation lookup arrays. */
2391 static uint32_t const aIOSize[4] = {1, 2, 0, 4};
2392
2393 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatDRxIOCheck);
2394 for (unsigned i=0;i<4;i++)
2395 {
2396 unsigned uBPLen = aIOSize[X86_DR7_GET_LEN(pCtx->dr[7], i)];
2397
2398 if ( (IoExitInfo.n.u16Port >= pCtx->dr[i] && IoExitInfo.n.u16Port < pCtx->dr[i] + uBPLen)
2399 && (pCtx->dr[7] & (X86_DR7_L(i) | X86_DR7_G(i)))
2400 && (pCtx->dr[7] & X86_DR7_RW(i, X86_DR7_RW_IO)) == X86_DR7_RW(i, X86_DR7_RW_IO))
2401 {
2402 SVM_EVENT Event;
2403
2404 Assert(CPUMIsGuestDebugStateActive(pVCpu));
2405
2406 /* Clear all breakpoint status flags and set the one we just hit. */
2407 pCtx->dr[6] &= ~(X86_DR6_B0|X86_DR6_B1|X86_DR6_B2|X86_DR6_B3);
2408 pCtx->dr[6] |= (uint64_t)RT_BIT(i);
2409
2410 /* Note: AMD64 Architecture Programmer's Manual 13.1:
2411 * Bits 15:13 of the DR6 register is never cleared by the processor and must be cleared by software after
2412 * the contents have been read.
2413 */
2414 pVMCB->guest.u64DR6 = pCtx->dr[6];
2415
2416 /* X86_DR7_GD will be cleared if drx accesses should be trapped inside the guest. */
2417 pCtx->dr[7] &= ~X86_DR7_GD;
2418
2419 /* Paranoia. */
2420 pCtx->dr[7] &= 0xffffffff; /* upper 32 bits reserved */
2421 pCtx->dr[7] &= ~(RT_BIT(11) | RT_BIT(12) | RT_BIT(14) | RT_BIT(15)); /* must be zero */
2422 pCtx->dr[7] |= 0x400; /* must be one */
2423
2424 pVMCB->guest.u64DR7 = pCtx->dr[7];
2425
2426 /* Inject the exception. */
2427 Log(("Inject IO debug trap at %RGv\n", (RTGCPTR)pCtx->rip));
2428
2429 Event.au64[0] = 0;
2430 Event.n.u3Type = SVM_EVENT_EXCEPTION; /* trap or fault */
2431 Event.n.u1Valid = 1;
2432 Event.n.u8Vector = X86_XCPT_DB;
2433
2434 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
2435 goto ResumeExecution;
2436 }
2437 }
2438 }
2439 goto ResumeExecution;
2440 }
2441 Log2(("EM status from IO at %RGv %x size %d: %Rrc\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize, VBOXSTRICTRC_VAL(rc)));
2442 break;
2443 }
2444
2445#ifdef VBOX_STRICT
2446 if (rc == VINF_IOM_R3_IOPORT_READ)
2447 Assert(IoExitInfo.n.u1Type != 0);
2448 else if (rc == VINF_IOM_R3_IOPORT_WRITE)
2449 Assert(IoExitInfo.n.u1Type == 0);
2450 else
2451 AssertMsg(RT_FAILURE(rc) || rc == VINF_EM_RAW_EMULATE_INSTR || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rc)));
2452#endif
2453 Log2(("Failed IO at %RGv %x size %d\n", (RTGCPTR)pCtx->rip, IoExitInfo.n.u16Port, uIOSize));
2454 break;
2455 }
2456
2457 case SVM_EXIT_HLT:
2458 /* Check if external interrupts are pending; if so, don't switch back. */
2459 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitHlt);
2460 pCtx->rip++; /* skip hlt */
2461 if (EMShouldContinueAfterHalt(pVCpu, pCtx))
2462 goto ResumeExecution;
2463
2464 rc = VINF_EM_HALT;
2465 break;
2466
2467 case SVM_EXIT_MWAIT_UNCOND:
2468 Log2(("SVM: mwait\n"));
2469 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMwait);
2470 rc = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2471 if ( rc == VINF_EM_HALT
2472 || rc == VINF_SUCCESS)
2473 {
2474 /* Update EIP and continue execution. */
2475 pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */
2476
2477 /* Check if external interrupts are pending; if so, don't switch back. */
2478 if ( rc == VINF_SUCCESS
2479 || ( rc == VINF_EM_HALT
2480 && EMShouldContinueAfterHalt(pVCpu, pCtx))
2481 )
2482 goto ResumeExecution;
2483 }
2484 AssertMsg(rc == VERR_EM_INTERPRETER || rc == VINF_EM_HALT, ("EMU: mwait failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2485 break;
2486
2487 case SVM_EXIT_MONITOR:
2488 {
2489 Log2(("SVM: monitor\n"));
2490
2491 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatExitMonitor);
2492 rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx));
2493 if (rc == VINF_SUCCESS)
2494 {
2495 /* Update EIP and continue execution. */
2496 pCtx->rip += 3; /* Note: hardcoded opcode size assumption! */
2497 goto ResumeExecution;
2498 }
2499 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: monitor failed with %Rrc\n", VBOXSTRICTRC_VAL(rc)));
2500 break;
2501 }
2502
2503
2504 case SVM_EXIT_VMMCALL:
2505 rc = hmR0SvmEmulateTprVMMCall(pVM, pVCpu, pCtx);
2506 if (rc == VINF_SUCCESS)
2507 {
2508 goto ResumeExecution; /* rip already updated. */
2509 }
2510 /* no break */
2511
2512 case SVM_EXIT_RSM:
2513 case SVM_EXIT_INVLPGA:
2514 case SVM_EXIT_VMRUN:
2515 case SVM_EXIT_VMLOAD:
2516 case SVM_EXIT_VMSAVE:
2517 case SVM_EXIT_STGI:
2518 case SVM_EXIT_CLGI:
2519 case SVM_EXIT_SKINIT:
2520 {
2521 /* Unsupported instructions. */
2522 SVM_EVENT Event;
2523
2524 Event.au64[0] = 0;
2525 Event.n.u3Type = SVM_EVENT_EXCEPTION;
2526 Event.n.u1Valid = 1;
2527 Event.n.u8Vector = X86_XCPT_UD;
2528
2529 Log(("Forced #UD trap at %RGv\n", (RTGCPTR)pCtx->rip));
2530 hmR0SvmInjectEvent(pVCpu, pVMCB, pCtx, &Event);
2531 goto ResumeExecution;
2532 }
2533
2534 /* Emulate in ring 3. */
2535 case SVM_EXIT_MSR:
2536 {
2537 /* When an interrupt is pending, we'll let MSR_K8_LSTAR writes fault in our TPR patch code. */
2538 if ( pVM->hwaccm.s.fTPRPatchingActive
2539 && pCtx->ecx == MSR_K8_LSTAR
2540 && pVMCB->ctrl.u64ExitInfo1 == 1 /* wrmsr */)
2541 {
2542 if ((pCtx->eax & 0xff) != u8LastTPR)
2543 {
2544 Log(("SVM: Faulting MSR_K8_LSTAR write with new TPR value %x\n", pCtx->eax & 0xff));
2545
2546 /* Our patch code uses LSTAR for TPR caching. */
2547 rc2 = PDMApicSetTPR(pVCpu, pCtx->eax & 0xff);
2548 AssertRC(rc2);
2549 }
2550
2551 /* Skip the instruction and continue. */
2552 pCtx->rip += 2; /* wrmsr = [0F 30] */
2553
2554 /* Only resume if successful. */
2555 goto ResumeExecution;
2556 }
2557
2558 /* Note: the intel manual claims there's a REX version of RDMSR that's slightly different, so we play safe by completely disassembling the instruction. */
2559 STAM_COUNTER_INC((pVMCB->ctrl.u64ExitInfo1 == 0) ? &pVCpu->hwaccm.s.StatExitRdmsr : &pVCpu->hwaccm.s.StatExitWrmsr);
2560 Log(("SVM: %s\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr"));
2561 rc = EMInterpretInstruction(pVCpu, CPUMCTX2CORE(pCtx), 0);
2562 if (rc == VINF_SUCCESS)
2563 {
2564 /* EIP has been updated already. */
2565
2566 /* Only resume if successful. */
2567 goto ResumeExecution;
2568 }
2569 AssertMsg(rc == VERR_EM_INTERPRETER, ("EMU: %s failed with %Rrc\n", (pVMCB->ctrl.u64ExitInfo1 == 0) ? "rdmsr" : "wrmsr", VBOXSTRICTRC_VAL(rc)));
2570 break;
2571 }
2572
2573 case SVM_EXIT_TASK_SWITCH: /* too complicated to emulate, so fall back to the recompiler*/
2574 Log(("SVM_EXIT_TASK_SWITCH: exit2=%RX64\n", pVMCB->ctrl.u64ExitInfo2));
2575 if ( !(pVMCB->ctrl.u64ExitInfo2 & (SVM_EXIT2_TASK_SWITCH_IRET | SVM_EXIT2_TASK_SWITCH_JMP))
2576 && pVCpu->hwaccm.s.Event.fPending)
2577 {
2578 SVM_EVENT Event;
2579
2580 Event.au64[0] = pVCpu->hwaccm.s.Event.intInfo;
2581
2582 /* Caused by an injected interrupt. */
2583 pVCpu->hwaccm.s.Event.fPending = false;
2584
2585 switch (Event.n.u3Type)
2586 {
2587 case SVM_EVENT_EXTERNAL_IRQ:
2588 case SVM_EVENT_NMI:
2589 Log(("SVM_EXIT_TASK_SWITCH: reassert trap %d\n", Event.n.u8Vector));
2590 Assert(!Event.n.u1ErrorCodeValid);
2591 rc2 = TRPMAssertTrap(pVCpu, Event.n.u8Vector, TRPM_HARDWARE_INT);
2592 AssertRC(rc2);
2593 break;
2594
2595 default:
2596 /* Exceptions and software interrupts can just be restarted. */
2597 break;
2598 }
2599 }
2600 rc = VERR_EM_INTERPRETER;
2601 break;
2602
2603 case SVM_EXIT_PAUSE:
2604 case SVM_EXIT_MWAIT_ARMED:
2605 rc = VERR_EM_INTERPRETER;
2606 break;
2607
2608 case SVM_EXIT_SHUTDOWN:
2609 rc = VINF_EM_RESET; /* Triple fault equals a reset. */
2610 break;
2611
2612 case SVM_EXIT_IDTR_READ:
2613 case SVM_EXIT_GDTR_READ:
2614 case SVM_EXIT_LDTR_READ:
2615 case SVM_EXIT_TR_READ:
2616 case SVM_EXIT_IDTR_WRITE:
2617 case SVM_EXIT_GDTR_WRITE:
2618 case SVM_EXIT_LDTR_WRITE:
2619 case SVM_EXIT_TR_WRITE:
2620 case SVM_EXIT_CR0_SEL_WRITE:
2621 default:
2622 /* Unexpected exit codes. */
2623 rc = VERR_HMSVM_UNEXPECTED_EXIT;
2624 AssertMsgFailed(("Unexpected exit code %x\n", exitCode)); /* Can't happen. */
2625 break;
2626 }
2627
2628end:
2629
2630 /* We now going back to ring-3, so clear the action flag. */
2631 VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3);
2632
2633 /* Signal changes for the recompiler. */
2634 CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS);
2635
2636 /* If we executed vmrun and an external irq was pending, then we don't have to do a full sync the next time. */
2637 if (exitCode == SVM_EXIT_INTR)
2638 {
2639 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatPendingHostIrq);
2640 /* On the next entry we'll only sync the host context. */
2641 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_HOST_CONTEXT;
2642 }
2643 else
2644 {
2645 /* On the next entry we'll sync everything. */
2646 /** @todo we can do better than this */
2647 /* Not in the VINF_PGM_CHANGE_MODE though! */
2648 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_ALL;
2649 }
2650
2651 /* translate into a less severe return code */
2652 if (rc == VERR_EM_INTERPRETER)
2653 rc = VINF_EM_RAW_EMULATE_INSTR;
2654
2655 /* Just set the correct state here instead of trying to catch every goto above. */
2656 VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC);
2657
2658#ifdef VBOX_WITH_VMMR0_DISABLE_PREEMPTION
2659 /* Restore interrupts if we exitted after disabling them. */
2660 if (uOldEFlags != ~(RTCCUINTREG)0)
2661 ASMSetFlags(uOldEFlags);
2662#endif
2663
2664 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit2, x);
2665 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatExit1, x);
2666 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatEntry, x);
2667 return VBOXSTRICTRC_TODO(rc);
2668}
2669
2670/**
2671 * Emulate simple mov tpr instruction
2672 *
2673 * @returns VBox status code.
2674 * @param pVM The VM to operate on.
2675 * @param pVCpu The VM CPU to operate on.
2676 * @param pCtx CPU context
2677 */
2678static int hmR0SvmEmulateTprVMMCall(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2679{
2680 int rc;
2681
2682 LogFlow(("Emulated VMMCall TPR access replacement at %RGv\n", pCtx->rip));
2683
2684 while (true)
2685 {
2686 bool fPending;
2687 uint8_t u8Tpr;
2688
2689 PHWACCMTPRPATCH pPatch = (PHWACCMTPRPATCH)RTAvloU32Get(&pVM->hwaccm.s.PatchTree, (AVLOU32KEY)pCtx->eip);
2690 if (!pPatch)
2691 break;
2692
2693 switch(pPatch->enmType)
2694 {
2695 case HWACCMTPRINSTR_READ:
2696 /* TPR caching in CR8 */
2697 rc = PDMApicGetTPR(pVCpu, &u8Tpr, &fPending);
2698 AssertRC(rc);
2699
2700 rc = DISWriteReg32(CPUMCTX2CORE(pCtx), pPatch->uDstOperand, u8Tpr);
2701 AssertRC(rc);
2702
2703 LogFlow(("Emulated read successfully\n"));
2704 pCtx->rip += pPatch->cbOp;
2705 break;
2706
2707 case HWACCMTPRINSTR_WRITE_REG:
2708 case HWACCMTPRINSTR_WRITE_IMM:
2709 /* Fetch the new TPR value */
2710 if (pPatch->enmType == HWACCMTPRINSTR_WRITE_REG)
2711 {
2712 uint32_t val;
2713
2714 rc = DISFetchReg32(CPUMCTX2CORE(pCtx), pPatch->uSrcOperand, &val);
2715 AssertRC(rc);
2716 u8Tpr = val;
2717 }
2718 else
2719 u8Tpr = (uint8_t)pPatch->uSrcOperand;
2720
2721 rc = PDMApicSetTPR(pVCpu, u8Tpr);
2722 AssertRC(rc);
2723 LogFlow(("Emulated write successfully\n"));
2724 pCtx->rip += pPatch->cbOp;
2725 break;
2726 default:
2727 AssertMsgFailedReturn(("Unexpected type %d\n", pPatch->enmType), VERR_HMSVM_UNEXPECTED_PATCH_TYPE);
2728 }
2729 }
2730 return VINF_SUCCESS;
2731}
2732
2733
2734/**
2735 * Enters the AMD-V session
2736 *
2737 * @returns VBox status code.
2738 * @param pVM The VM to operate on.
2739 * @param pVCpu The VM CPU to operate on.
2740 * @param pCpu CPU info struct
2741 */
2742VMMR0DECL(int) SVMR0Enter(PVM pVM, PVMCPU pVCpu, PHMGLOBLCPUINFO pCpu)
2743{
2744 Assert(pVM->hwaccm.s.svm.fSupported);
2745
2746 LogFlow(("SVMR0Enter cpu%d last=%d asid=%d\n", pCpu->idCpu, pVCpu->hwaccm.s.idLastCpu, pVCpu->hwaccm.s.uCurrentASID));
2747 pVCpu->hwaccm.s.fResumeVM = false;
2748
2749 /* Force to reload LDTR, so we'll execute VMLoad to load additional guest state. */
2750 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_LDTR;
2751
2752 return VINF_SUCCESS;
2753}
2754
2755
2756/**
2757 * Leaves the AMD-V session
2758 *
2759 * @returns VBox status code.
2760 * @param pVM The VM to operate on.
2761 * @param pVCpu The VM CPU to operate on.
2762 * @param pCtx CPU context
2763 */
2764VMMR0DECL(int) SVMR0Leave(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx)
2765{
2766 SVM_VMCB *pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB;
2767
2768 Assert(pVM->hwaccm.s.svm.fSupported);
2769
2770#ifdef DEBUG
2771 if (CPUMIsHyperDebugStateActive(pVCpu))
2772 {
2773 CPUMR0LoadHostDebugState(pVM, pVCpu);
2774 }
2775 else
2776#endif
2777 /* Save the guest debug state if necessary. */
2778 if (CPUMIsGuestDebugStateActive(pVCpu))
2779 {
2780 CPUMR0SaveGuestDebugState(pVM, pVCpu, pCtx, false /* skip DR6 */);
2781
2782 /* Intercept all DRx reads and writes again. Changed later on. */
2783 pVMCB->ctrl.u16InterceptRdDRx = 0xFFFF;
2784 pVMCB->ctrl.u16InterceptWrDRx = 0xFFFF;
2785
2786 /* Resync the debug registers the next time. */
2787 pVCpu->hwaccm.s.fContextUseFlags |= HWACCM_CHANGED_GUEST_DEBUG;
2788 }
2789 else
2790 Assert(pVMCB->ctrl.u16InterceptRdDRx == 0xFFFF && pVMCB->ctrl.u16InterceptWrDRx == 0xFFFF);
2791
2792 return VINF_SUCCESS;
2793}
2794
2795
2796static int hmR0svmInterpretInvlPg(PVMCPU pVCpu, PDISCPUSTATE pCpu, PCPUMCTXCORE pRegFrame, uint32_t uASID)
2797{
2798 OP_PARAMVAL param1;
2799 RTGCPTR addr;
2800 NOREF(uASID);
2801
2802 int rc = DISQueryParamVal(pRegFrame, pCpu, &pCpu->param1, &param1, PARAM_SOURCE);
2803 if(RT_FAILURE(rc))
2804 return VERR_EM_INTERPRETER;
2805
2806 switch(param1.type)
2807 {
2808 case PARMTYPE_IMMEDIATE:
2809 case PARMTYPE_ADDRESS:
2810 if(!(param1.flags & (PARAM_VAL32|PARAM_VAL64)))
2811 return VERR_EM_INTERPRETER;
2812 addr = param1.val.val64;
2813 break;
2814
2815 default:
2816 return VERR_EM_INTERPRETER;
2817 }
2818
2819 /** @todo is addr always a flat linear address or ds based
2820 * (in absence of segment override prefixes)????
2821 */
2822 rc = PGMInvalidatePage(pVCpu, addr);
2823 if (RT_SUCCESS(rc))
2824 return VINF_SUCCESS;
2825
2826 AssertRC(rc);
2827 return rc;
2828}
2829
2830/**
2831 * Interprets INVLPG
2832 *
2833 * @returns VBox status code.
2834 * @retval VINF_* Scheduling instructions.
2835 * @retval VERR_EM_INTERPRETER Something we can't cope with.
2836 * @retval VERR_* Fatal errors.
2837 *
2838 * @param pVM The VM handle.
2839 * @param pRegFrame The register frame.
2840 * @param ASID Tagged TLB id for the guest
2841 *
2842 * Updates the EIP if an instruction was executed successfully.
2843 */
2844static int hmR0SvmInterpretInvpg(PVM pVM, PVMCPU pVCpu, PCPUMCTXCORE pRegFrame, uint32_t uASID)
2845{
2846 /*
2847 * Only allow 32 & 64 bits code.
2848 */
2849 DISCPUMODE enmMode = SELMGetCpuModeFromSelector(pVCpu, pRegFrame->eflags, pRegFrame->cs, &pRegFrame->csHid);
2850 if (enmMode != CPUMODE_16BIT)
2851 {
2852 RTGCPTR pbCode;
2853 int rc = SELMValidateAndConvertCSAddr(pVCpu, pRegFrame->eflags, pRegFrame->ss, pRegFrame->cs,
2854 &pRegFrame->csHid, (RTGCPTR)pRegFrame->rip, &pbCode);
2855 if (RT_SUCCESS(rc))
2856 {
2857 uint32_t cbOp;
2858 PDISCPUSTATE pDis = &pVCpu->hwaccm.s.DisState;
2859
2860 pDis->mode = enmMode;
2861 rc = EMInterpretDisasOneEx(pVM, pVCpu, pbCode, pRegFrame, pDis, &cbOp);
2862 Assert(RT_FAILURE(rc) || pDis->pCurInstr->opcode == OP_INVLPG);
2863 if (RT_SUCCESS(rc) && pDis->pCurInstr->opcode == OP_INVLPG)
2864 {
2865 Assert(cbOp == pDis->opsize);
2866 rc = hmR0svmInterpretInvlPg(pVCpu, pDis, pRegFrame, uASID);
2867 if (RT_SUCCESS(rc))
2868 pRegFrame->rip += cbOp; /* Move on to the next instruction. */
2869
2870 return rc;
2871 }
2872 }
2873 }
2874 return VERR_EM_INTERPRETER;
2875}
2876
2877
2878/**
2879 * Invalidates a guest page
2880 *
2881 * @returns VBox status code.
2882 * @param pVM The VM to operate on.
2883 * @param pVCpu The VM CPU to operate on.
2884 * @param GCVirt Page to invalidate
2885 */
2886VMMR0DECL(int) SVMR0InvalidatePage(PVM pVM, PVMCPU pVCpu, RTGCPTR GCVirt)
2887{
2888 bool fFlushPending = pVM->hwaccm.s.svm.fAlwaysFlushTLB | VMCPU_FF_ISSET(pVCpu, VMCPU_FF_TLB_FLUSH);
2889
2890 /* Skip it if a TLB flush is already pending. */
2891 if (!fFlushPending)
2892 {
2893 SVM_VMCB *pVMCB;
2894
2895 Log2(("SVMR0InvalidatePage %RGv\n", GCVirt));
2896 AssertReturn(pVM, VERR_INVALID_PARAMETER);
2897 Assert(pVM->hwaccm.s.svm.fSupported);
2898
2899 pVMCB = (SVM_VMCB *)pVCpu->hwaccm.s.svm.pVMCB;
2900 AssertMsgReturn(pVMCB, ("Invalid pVMCB\n"), VERR_HMSVM_INVALID_PVMCB);
2901
2902#if HC_ARCH_BITS == 32
2903 /* If we get a flush in 64 bits guest mode, then force a full TLB flush. Invlpga takes only 32 bits addresses. */
2904 if (CPUMIsGuestInLongMode(pVCpu))
2905 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2906 else
2907#endif
2908 SVMR0InvlpgA(GCVirt, pVMCB->ctrl.TLBCtrl.n.u32ASID);
2909 }
2910 return VINF_SUCCESS;
2911}
2912
2913
2914#if 0 /* obsolete, but left here for clarification. */
2915/**
2916 * Invalidates a guest page by physical address
2917 *
2918 * @returns VBox status code.
2919 * @param pVM The VM to operate on.
2920 * @param pVCpu The VM CPU to operate on.
2921 * @param GCPhys Page to invalidate
2922 */
2923VMMR0DECL(int) SVMR0InvalidatePhysPage(PVM pVM, PVMCPU pVCpu, RTGCPHYS GCPhys)
2924{
2925 Assert(pVM->hwaccm.s.fNestedPaging);
2926 /* invlpga only invalidates TLB entries for guest virtual addresses; we have no choice but to force a TLB flush here. */
2927 VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH);
2928 STAM_COUNTER_INC(&pVCpu->hwaccm.s.StatFlushTLBInvlpga);
2929 return VINF_SUCCESS;
2930}
2931#endif
2932
2933#if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL)
2934/**
2935 * Prepares for and executes VMRUN (64 bits guests from a 32 bits hosts).
2936 *
2937 * @returns VBox status code.
2938 * @param pVMCBHostPhys Physical address of host VMCB.
2939 * @param pVMCBPhys Physical address of the VMCB.
2940 * @param pCtx Guest context.
2941 * @param pVM The VM to operate on.
2942 * @param pVCpu The VMCPU to operate on.
2943 */
2944DECLASM(int) SVMR0VMSwitcherRun64(RTHCPHYS pVMCBHostPhys, RTHCPHYS pVMCBPhys, PCPUMCTX pCtx, PVM pVM, PVMCPU pVCpu)
2945{
2946 uint32_t aParam[4];
2947
2948 aParam[0] = (uint32_t)(pVMCBHostPhys); /* Param 1: pVMCBHostPhys - Lo. */
2949 aParam[1] = (uint32_t)(pVMCBHostPhys >> 32); /* Param 1: pVMCBHostPhys - Hi. */
2950 aParam[2] = (uint32_t)(pVMCBPhys); /* Param 2: pVMCBPhys - Lo. */
2951 aParam[3] = (uint32_t)(pVMCBPhys >> 32); /* Param 2: pVMCBPhys - Hi. */
2952
2953 return SVMR0Execute64BitsHandler(pVM, pVCpu, pCtx, pVM->hwaccm.s.pfnSVMGCVMRun64, 4, &aParam[0]);
2954}
2955
2956/**
2957 * Executes the specified handler in 64 mode
2958 *
2959 * @returns VBox status code.
2960 * @param pVM The VM to operate on.
2961 * @param pVCpu The VMCPU to operate on.
2962 * @param pCtx Guest context
2963 * @param pfnHandler RC handler
2964 * @param cbParam Number of parameters
2965 * @param paParam Array of 32 bits parameters
2966 */
2967VMMR0DECL(int) SVMR0Execute64BitsHandler(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx, RTRCPTR pfnHandler, uint32_t cbParam, uint32_t *paParam)
2968{
2969 int rc;
2970 RTHCUINTREG uOldEFlags;
2971
2972 Assert(pfnHandler);
2973
2974 /* Disable interrupts. */
2975 uOldEFlags = ASMIntDisableFlags();
2976
2977 CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu));
2978 CPUMSetHyperEIP(pVCpu, pfnHandler);
2979 for (int i=(int)cbParam-1;i>=0;i--)
2980 CPUMPushHyper(pVCpu, paParam[i]);
2981
2982 STAM_PROFILE_ADV_START(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
2983 /* Call switcher. */
2984 rc = pVM->hwaccm.s.pfnHost32ToGuest64R0(pVM, RT_OFFSETOF(VM, aCpus[pVCpu->idCpu].cpum) - RT_OFFSETOF(VM, cpum));
2985 STAM_PROFILE_ADV_STOP(&pVCpu->hwaccm.s.StatWorldSwitch3264, z);
2986
2987 ASMSetFlags(uOldEFlags);
2988 return rc;
2989}
2990
2991#endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */
2992
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette