/* $Id: HMVMXR0.cpp 76751 2019-01-10 08:03:57Z vboxsync $ */ /** @file * HM VMX (Intel VT-x) - Host Context Ring-0. */ /* * Copyright (C) 2012-2019 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; * you can redistribute it and/or modify it under the terms of the GNU * General Public License (GPL) as published by the Free Software * Foundation, in version 2 as it comes in the "COPYING" file of the * VirtualBox OSE distribution. VirtualBox OSE is distributed in the * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. */ /********************************************************************************************************************************* * Header Files * *********************************************************************************************************************************/ #define LOG_GROUP LOG_GROUP_HM #define VMCPU_INCL_CPUM_GST_CTX #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VBOX_WITH_REM # include #endif #include "HMInternal.h" #include #include #include "HMVMXR0.h" #include "dtrace/VBoxVMM.h" #ifdef DEBUG_ramshankar # define HMVMX_ALWAYS_SAVE_GUEST_RFLAGS # define HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE # define HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE # define HMVMX_ALWAYS_CHECK_GUEST_STATE # define HMVMX_ALWAYS_TRAP_ALL_XCPTS # define HMVMX_ALWAYS_TRAP_PF # define HMVMX_ALWAYS_FLUSH_TLB # define HMVMX_ALWAYS_SWAP_EFER #endif /********************************************************************************************************************************* * Defined Constants And Macros * *********************************************************************************************************************************/ /** Use the function table. */ #define HMVMX_USE_FUNCTION_TABLE /** Determine which tagged-TLB flush handler to use. */ #define HMVMX_FLUSH_TAGGED_TLB_EPT_VPID 0 #define HMVMX_FLUSH_TAGGED_TLB_EPT 1 #define HMVMX_FLUSH_TAGGED_TLB_VPID 2 #define HMVMX_FLUSH_TAGGED_TLB_NONE 3 /** @name HMVMX_READ_XXX * Flags to skip redundant reads of some common VMCS fields that are not part of * the guest-CPU or VCPU state but are needed while handling VM-exits. */ #define HMVMX_READ_IDT_VECTORING_INFO RT_BIT_32(0) #define HMVMX_READ_IDT_VECTORING_ERROR_CODE RT_BIT_32(1) #define HMVMX_READ_EXIT_QUALIFICATION RT_BIT_32(2) #define HMVMX_READ_EXIT_INSTR_LEN RT_BIT_32(3) #define HMVMX_READ_EXIT_INTERRUPTION_INFO RT_BIT_32(4) #define HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE RT_BIT_32(5) #define HMVMX_READ_EXIT_INSTR_INFO RT_BIT_32(6) #define HMVMX_READ_GUEST_LINEAR_ADDR RT_BIT_32(7) /** @} */ /** * States of the VMCS. * * This does not reflect all possible VMCS states but currently only those * needed for maintaining the VMCS consistently even when thread-context hooks * are used. Maybe later this can be extended (i.e. Nested Virtualization). */ #define HMVMX_VMCS_STATE_CLEAR RT_BIT(0) #define HMVMX_VMCS_STATE_ACTIVE RT_BIT(1) #define HMVMX_VMCS_STATE_LAUNCHED RT_BIT(2) /** * Subset of the guest-CPU state that is kept by VMX R0 code while executing the * guest using hardware-assisted VMX. * * This excludes state like GPRs (other than RSP) which are always are * swapped and restored across the world-switch and also registers like EFER, * MSR which cannot be modified by the guest without causing a VM-exit. */ #define HMVMX_CPUMCTX_EXTRN_ALL ( CPUMCTX_EXTRN_RIP \ | CPUMCTX_EXTRN_RFLAGS \ | CPUMCTX_EXTRN_RSP \ | CPUMCTX_EXTRN_SREG_MASK \ | CPUMCTX_EXTRN_TABLE_MASK \ | CPUMCTX_EXTRN_KERNEL_GS_BASE \ | CPUMCTX_EXTRN_SYSCALL_MSRS \ | CPUMCTX_EXTRN_SYSENTER_MSRS \ | CPUMCTX_EXTRN_TSC_AUX \ | CPUMCTX_EXTRN_OTHER_MSRS \ | CPUMCTX_EXTRN_CR0 \ | CPUMCTX_EXTRN_CR3 \ | CPUMCTX_EXTRN_CR4 \ | CPUMCTX_EXTRN_DR7 \ | CPUMCTX_EXTRN_HM_VMX_MASK) /** * Exception bitmap mask for real-mode guests (real-on-v86). * * We need to intercept all exceptions manually except: * - \#AC and \#DB are always intercepted to prevent the CPU from deadlocking * due to bugs in Intel CPUs. * - \#PF need not be intercepted even in real-mode if we have Nested Paging * support. */ #define HMVMX_REAL_MODE_XCPT_MASK ( RT_BIT(X86_XCPT_DE) /* always: | RT_BIT(X86_XCPT_DB) */ | RT_BIT(X86_XCPT_NMI) \ | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_OF) | RT_BIT(X86_XCPT_BR) \ | RT_BIT(X86_XCPT_UD) | RT_BIT(X86_XCPT_NM) | RT_BIT(X86_XCPT_DF) \ | RT_BIT(X86_XCPT_CO_SEG_OVERRUN) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_NP) \ | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) /* RT_BIT(X86_XCPT_PF) */ \ | RT_BIT(X86_XCPT_MF) /* always: | RT_BIT(X86_XCPT_AC) */ | RT_BIT(X86_XCPT_MC) \ | RT_BIT(X86_XCPT_XF)) /** Maximum VM-instruction error number. */ #define HMVMX_INSTR_ERROR_MAX 28 /** Profiling macro. */ #ifdef HM_PROFILE_EXIT_DISPATCH # define HMVMX_START_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitDispatch, ed) # define HMVMX_STOP_EXIT_DISPATCH_PROF() STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitDispatch, ed) #else # define HMVMX_START_EXIT_DISPATCH_PROF() do { } while (0) # define HMVMX_STOP_EXIT_DISPATCH_PROF() do { } while (0) #endif /** Assert that preemption is disabled or covered by thread-context hooks. */ #define HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu) Assert( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \ || !RTThreadPreemptIsEnabled(NIL_RTTHREAD)) /** Assert that we haven't migrated CPUs when thread-context hooks are not * used. */ #define HMVMX_ASSERT_CPU_SAFE(a_pVCpu) AssertMsg( VMMR0ThreadCtxHookIsEnabled((a_pVCpu)) \ || (a_pVCpu)->hm.s.idEnteredCpu == RTMpCpuId(), \ ("Illegal migration! Entered on CPU %u Current %u\n", \ (a_pVCpu)->hm.s.idEnteredCpu, RTMpCpuId())) /** Asserts that the given CPUMCTX_EXTRN_XXX bits are present in the guest-CPU * context. */ #define HMVMX_CPUMCTX_ASSERT(a_pVCpu, a_fExtrnMbz) AssertMsg(!((a_pVCpu)->cpum.GstCtx.fExtrn & (a_fExtrnMbz)), \ ("fExtrn=%#RX64 fExtrnMbz=%#RX64\n", \ (a_pVCpu)->cpum.GstCtx.fExtrn, (a_fExtrnMbz))) /** Macro for importing guest state from the VMCS back into CPUMCTX (intended to be * used only from VM-exit handlers). */ #define HMVMX_CPUMCTX_IMPORT_STATE(a_pVCpu, a_fWhat) (hmR0VmxImportGuestState((a_pVCpu), (a_fWhat))) /** Helper macro for VM-exit handlers called unexpectedly. */ #define HMVMX_UNEXPECTED_EXIT_RET(a_pVCpu, a_pVmxTransient) \ do { \ (a_pVCpu)->hm.s.u32HMError = (a_pVmxTransient)->uExitReason; \ return VERR_VMX_UNEXPECTED_EXIT; \ } while (0) /** Macro for importing segment registers to the VMCS from the guest-CPU context. */ #ifdef VMX_USE_CACHED_VMCS_ACCESSES # define HMVMX_IMPORT_SREG(Sel, a_pCtxSelReg) \ hmR0VmxImportGuestSegmentReg(pVCpu, VMX_VMCS16_GUEST_##Sel##_SEL, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ VMX_VMCS_GUEST_##Sel##_BASE_CACHE_IDX, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, (a_pCtxSelReg)) #else # define HMVMX_IMPORT_SREG(Sel, a_pCtxSelReg) \ hmR0VmxImportGuestSegmentReg(pVCpu, VMX_VMCS16_GUEST_##Sel##_SEL, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, (a_pCtxSelReg)) #endif /** Macro for exporting segment registers to the VMCS from the guest-CPU context. */ #define HMVMX_EXPORT_SREG(Sel, a_pCtxSelReg) \ hmR0VmxExportGuestSegmentReg(pVCpu, VMX_VMCS16_GUEST_##Sel##_SEL, VMX_VMCS32_GUEST_##Sel##_LIMIT, \ VMX_VMCS_GUEST_##Sel##_BASE, VMX_VMCS32_GUEST_##Sel##_ACCESS_RIGHTS, (a_pCtxSelReg)) #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** Macro that does the necessary privilege checks and intercepted VM-exits for * guests that attempted to execute a VMX instruction. */ # define HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(a_pVCpu, a_uExitReason) \ do \ { \ VBOXSTRICTRC rcStrictTmp = hmR0VmxCheckExitDueToVmxInstr((a_pVCpu), (a_uExitReason)); \ if (rcStrictTmp == VINF_SUCCESS) \ { /* likely */ } \ else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \ { \ Assert((a_pVCpu)->hm.s.Event.fPending); \ Log4Func(("Privilege checks failed -> %#x\n", VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo))); \ return VINF_SUCCESS; \ } \ else \ { \ int rcTmp = VBOXSTRICTRC_VAL(rcStrictTmp); \ AssertMsgFailedReturn(("Unexpected failure. rc=%Rrc", rcTmp), rcTmp); \ } \ } while (0) /** Macro that decodes a memory operand for an instruction VM-exit. */ # define HMVMX_DECODE_MEM_OPERAND(a_pVCpu, a_uExitInstrInfo, a_uExitQual, a_enmMemAccess, a_pGCPtrEffAddr) \ do \ { \ VBOXSTRICTRC rcStrictTmp = hmR0VmxDecodeMemOperand((a_pVCpu), (a_uExitInstrInfo), (a_uExitQual), (a_enmMemAccess), \ (a_pGCPtrEffAddr)); \ if (rcStrictTmp == VINF_SUCCESS) \ { /* likely */ } \ else if (rcStrictTmp == VINF_HM_PENDING_XCPT) \ { \ uint8_t const uXcptTmp = VMX_ENTRY_INT_INFO_VECTOR((a_pVCpu)->hm.s.Event.u64IntInfo); \ Log4Func(("Memory operand decoding failed, raising xcpt %#x\n", uXcptTmp)); \ NOREF(uXcptTmp); \ return VINF_SUCCESS; \ } \ else \ { \ Log4Func(("hmR0VmxCheckExitDueToVmxInstr failed. rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrictTmp))); \ return rcStrictTmp; \ } \ } while (0) #endif /* VBOX_WITH_NESTED_HWVIRT_VMX */ /********************************************************************************************************************************* * Structures and Typedefs * *********************************************************************************************************************************/ /** * VMX transient state. * * A state structure for holding miscellaneous information across * VMX non-root operation and restored after the transition. */ typedef struct VMXTRANSIENT { /** The host's rflags/eflags. */ RTCCUINTREG fEFlags; #if HC_ARCH_BITS == 32 uint32_t u32Alignment0; #endif /** The guest's TPR value used for TPR shadowing. */ uint8_t u8GuestTpr; /** Alignment. */ uint8_t abAlignment0[7]; /** The basic VM-exit reason. */ uint16_t uExitReason; /** Alignment. */ uint16_t u16Alignment0; /** The VM-exit interruption error code. */ uint32_t uExitIntErrorCode; /** The VM-exit exit code qualification. */ uint64_t uExitQual; /** The Guest-linear address. */ uint64_t uGuestLinearAddr; /** The VM-exit interruption-information field. */ uint32_t uExitIntInfo; /** The VM-exit instruction-length field. */ uint32_t cbInstr; /** The VM-exit instruction-information field. */ VMXEXITINSTRINFO ExitInstrInfo; /** Whether the VM-entry failed or not. */ bool fVMEntryFailed; /** Alignment. */ uint8_t abAlignment1[3]; /** The VM-entry interruption-information field. */ uint32_t uEntryIntInfo; /** The VM-entry exception error code field. */ uint32_t uEntryXcptErrorCode; /** The VM-entry instruction length field. */ uint32_t cbEntryInstr; /** IDT-vectoring information field. */ uint32_t uIdtVectoringInfo; /** IDT-vectoring error code. */ uint32_t uIdtVectoringErrorCode; /** Mask of currently read VMCS fields; HMVMX_READ_XXX. */ uint32_t fVmcsFieldsRead; /** Whether the guest debug state was active at the time of VM-exit. */ bool fWasGuestDebugStateActive; /** Whether the hyper debug state was active at the time of VM-exit. */ bool fWasHyperDebugStateActive; /** Whether TSC-offsetting should be setup before VM-entry. */ bool fUpdateTscOffsettingAndPreemptTimer; /** Whether the VM-exit was caused by a page-fault during delivery of a * contributory exception or a page-fault. */ bool fVectoringDoublePF; /** Whether the VM-exit was caused by a page-fault during delivery of an * external interrupt or NMI. */ bool fVectoringPF; } VMXTRANSIENT; AssertCompileMemberAlignment(VMXTRANSIENT, uExitReason, sizeof(uint64_t)); AssertCompileMemberAlignment(VMXTRANSIENT, uExitIntInfo, sizeof(uint64_t)); AssertCompileMemberAlignment(VMXTRANSIENT, uEntryIntInfo, sizeof(uint64_t)); AssertCompileMemberAlignment(VMXTRANSIENT, fWasGuestDebugStateActive, sizeof(uint64_t)); AssertCompileMemberSize(VMXTRANSIENT, ExitInstrInfo, sizeof(uint32_t)); /** Pointer to VMX transient state. */ typedef VMXTRANSIENT *PVMXTRANSIENT; /** * Memory operand read or write access. */ typedef enum VMXMEMACCESS { VMXMEMACCESS_READ = 0, VMXMEMACCESS_WRITE = 1 } VMXMEMACCESS; /** * VMX VM-exit handler. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX-transient structure. */ #ifndef HMVMX_USE_FUNCTION_TABLE typedef VBOXSTRICTRC FNVMXEXITHANDLER(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); #else typedef DECLCALLBACK(VBOXSTRICTRC) FNVMXEXITHANDLER(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); /** Pointer to VM-exit handler. */ typedef FNVMXEXITHANDLER *PFNVMXEXITHANDLER; #endif /** * VMX VM-exit handler, non-strict status code. * * This is generally the same as FNVMXEXITHANDLER, the NSRC bit is just FYI. * * @returns VBox status code, no informational status code returned. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX-transient structure. * * @remarks This is not used on anything returning VERR_EM_INTERPRETER as the * use of that status code will be replaced with VINF_EM_SOMETHING * later when switching over to IEM. */ #ifndef HMVMX_USE_FUNCTION_TABLE typedef int FNVMXEXITHANDLERNSRC(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); #else typedef FNVMXEXITHANDLER FNVMXEXITHANDLERNSRC; #endif /********************************************************************************************************************************* * Internal Functions * *********************************************************************************************************************************/ static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMXTLBFLUSHEPT enmTlbFlush); static void hmR0VmxFlushVpid(PVMCPU pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr); static void hmR0VmxClearIntNmiWindowsVmcs(PVMCPU pVCpu); static int hmR0VmxImportGuestState(PVMCPU pVCpu, uint64_t fWhat); static VBOXSTRICTRC hmR0VmxInjectEventVmcs(PVMCPU pVCpu, uint64_t u64IntInfo, uint32_t cbInstr, uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, bool fStepping, uint32_t *pfIntrState); #if HC_ARCH_BITS == 32 static int hmR0VmxInitVmcsReadCache(PVMCPU pVCpu); #endif #ifndef HMVMX_USE_FUNCTION_TABLE DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t rcReason); # define HMVMX_EXIT_DECL DECLINLINE(VBOXSTRICTRC) # define HMVMX_EXIT_NSRC_DECL DECLINLINE(int) #else # define HMVMX_EXIT_DECL static DECLCALLBACK(VBOXSTRICTRC) # define HMVMX_EXIT_NSRC_DECL HMVMX_EXIT_DECL #endif /** @name VM-exit handlers. * @{ */ static FNVMXEXITHANDLER hmR0VmxExitXcptOrNmi; static FNVMXEXITHANDLER hmR0VmxExitExtInt; static FNVMXEXITHANDLER hmR0VmxExitTripleFault; static FNVMXEXITHANDLERNSRC hmR0VmxExitInitSignal; static FNVMXEXITHANDLERNSRC hmR0VmxExitSipi; static FNVMXEXITHANDLERNSRC hmR0VmxExitIoSmi; static FNVMXEXITHANDLERNSRC hmR0VmxExitSmi; static FNVMXEXITHANDLERNSRC hmR0VmxExitIntWindow; static FNVMXEXITHANDLERNSRC hmR0VmxExitNmiWindow; static FNVMXEXITHANDLER hmR0VmxExitTaskSwitch; static FNVMXEXITHANDLER hmR0VmxExitCpuid; static FNVMXEXITHANDLER hmR0VmxExitGetsec; static FNVMXEXITHANDLER hmR0VmxExitHlt; static FNVMXEXITHANDLERNSRC hmR0VmxExitInvd; static FNVMXEXITHANDLER hmR0VmxExitInvlpg; static FNVMXEXITHANDLER hmR0VmxExitRdpmc; static FNVMXEXITHANDLER hmR0VmxExitVmcall; #ifdef VBOX_WITH_NESTED_HWVIRT_VMX static FNVMXEXITHANDLER hmR0VmxExitVmclear; static FNVMXEXITHANDLER hmR0VmxExitVmlaunch; static FNVMXEXITHANDLER hmR0VmxExitVmptrld; static FNVMXEXITHANDLER hmR0VmxExitVmptrst; static FNVMXEXITHANDLER hmR0VmxExitVmread; static FNVMXEXITHANDLER hmR0VmxExitVmresume; static FNVMXEXITHANDLER hmR0VmxExitVmwrite; static FNVMXEXITHANDLER hmR0VmxExitVmxoff; static FNVMXEXITHANDLER hmR0VmxExitVmxon; #endif static FNVMXEXITHANDLER hmR0VmxExitRdtsc; static FNVMXEXITHANDLERNSRC hmR0VmxExitRsm; static FNVMXEXITHANDLERNSRC hmR0VmxExitSetPendingXcptUD; static FNVMXEXITHANDLER hmR0VmxExitMovCRx; static FNVMXEXITHANDLER hmR0VmxExitMovDRx; static FNVMXEXITHANDLER hmR0VmxExitIoInstr; static FNVMXEXITHANDLER hmR0VmxExitRdmsr; static FNVMXEXITHANDLER hmR0VmxExitWrmsr; static FNVMXEXITHANDLERNSRC hmR0VmxExitErrInvalidGuestState; static FNVMXEXITHANDLERNSRC hmR0VmxExitErrMsrLoad; static FNVMXEXITHANDLERNSRC hmR0VmxExitErrUndefined; static FNVMXEXITHANDLER hmR0VmxExitMwait; static FNVMXEXITHANDLER hmR0VmxExitMtf; static FNVMXEXITHANDLER hmR0VmxExitMonitor; static FNVMXEXITHANDLER hmR0VmxExitPause; static FNVMXEXITHANDLERNSRC hmR0VmxExitErrMachineCheck; static FNVMXEXITHANDLERNSRC hmR0VmxExitTprBelowThreshold; static FNVMXEXITHANDLER hmR0VmxExitApicAccess; static FNVMXEXITHANDLER hmR0VmxExitXdtrAccess; static FNVMXEXITHANDLER hmR0VmxExitEptViolation; static FNVMXEXITHANDLER hmR0VmxExitEptMisconfig; static FNVMXEXITHANDLER hmR0VmxExitRdtscp; static FNVMXEXITHANDLER hmR0VmxExitPreemptTimer; static FNVMXEXITHANDLERNSRC hmR0VmxExitWbinvd; static FNVMXEXITHANDLER hmR0VmxExitXsetbv; static FNVMXEXITHANDLER hmR0VmxExitRdrand; static FNVMXEXITHANDLER hmR0VmxExitInvpcid; /** @} */ static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static int hmR0VmxExitXcptAC(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient); static uint32_t hmR0VmxCheckGuestState(PVMCPU pVCpu); /********************************************************************************************************************************* * Global Variables * *********************************************************************************************************************************/ #ifdef HMVMX_USE_FUNCTION_TABLE /** * VMX_EXIT dispatch table. */ static const PFNVMXEXITHANDLER g_apfnVMExitHandlers[VMX_EXIT_MAX + 1] = { /* 00 VMX_EXIT_XCPT_OR_NMI */ hmR0VmxExitXcptOrNmi, /* 01 VMX_EXIT_EXT_INT */ hmR0VmxExitExtInt, /* 02 VMX_EXIT_TRIPLE_FAULT */ hmR0VmxExitTripleFault, /* 03 VMX_EXIT_INIT_SIGNAL */ hmR0VmxExitInitSignal, /* 04 VMX_EXIT_SIPI */ hmR0VmxExitSipi, /* 05 VMX_EXIT_IO_SMI */ hmR0VmxExitIoSmi, /* 06 VMX_EXIT_SMI */ hmR0VmxExitSmi, /* 07 VMX_EXIT_INT_WINDOW */ hmR0VmxExitIntWindow, /* 08 VMX_EXIT_NMI_WINDOW */ hmR0VmxExitNmiWindow, /* 09 VMX_EXIT_TASK_SWITCH */ hmR0VmxExitTaskSwitch, /* 10 VMX_EXIT_CPUID */ hmR0VmxExitCpuid, /* 11 VMX_EXIT_GETSEC */ hmR0VmxExitGetsec, /* 12 VMX_EXIT_HLT */ hmR0VmxExitHlt, /* 13 VMX_EXIT_INVD */ hmR0VmxExitInvd, /* 14 VMX_EXIT_INVLPG */ hmR0VmxExitInvlpg, /* 15 VMX_EXIT_RDPMC */ hmR0VmxExitRdpmc, /* 16 VMX_EXIT_RDTSC */ hmR0VmxExitRdtsc, /* 17 VMX_EXIT_RSM */ hmR0VmxExitRsm, /* 18 VMX_EXIT_VMCALL */ hmR0VmxExitVmcall, #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitVmclear, /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitVmlaunch, /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitVmptrld, /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitVmptrst, /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitVmread, /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitVmresume, /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitVmwrite, /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitVmxoff, /* 27 VMX_EXIT_VMXON */ hmR0VmxExitVmxon, #else /* 19 VMX_EXIT_VMCLEAR */ hmR0VmxExitSetPendingXcptUD, /* 20 VMX_EXIT_VMLAUNCH */ hmR0VmxExitSetPendingXcptUD, /* 21 VMX_EXIT_VMPTRLD */ hmR0VmxExitSetPendingXcptUD, /* 22 VMX_EXIT_VMPTRST */ hmR0VmxExitSetPendingXcptUD, /* 23 VMX_EXIT_VMREAD */ hmR0VmxExitSetPendingXcptUD, /* 24 VMX_EXIT_VMRESUME */ hmR0VmxExitSetPendingXcptUD, /* 25 VMX_EXIT_VMWRITE */ hmR0VmxExitSetPendingXcptUD, /* 26 VMX_EXIT_VMXOFF */ hmR0VmxExitSetPendingXcptUD, /* 27 VMX_EXIT_VMXON */ hmR0VmxExitSetPendingXcptUD, #endif /* 28 VMX_EXIT_MOV_CRX */ hmR0VmxExitMovCRx, /* 29 VMX_EXIT_MOV_DRX */ hmR0VmxExitMovDRx, /* 30 VMX_EXIT_IO_INSTR */ hmR0VmxExitIoInstr, /* 31 VMX_EXIT_RDMSR */ hmR0VmxExitRdmsr, /* 32 VMX_EXIT_WRMSR */ hmR0VmxExitWrmsr, /* 33 VMX_EXIT_ERR_INVALID_GUEST_STATE */ hmR0VmxExitErrInvalidGuestState, /* 34 VMX_EXIT_ERR_MSR_LOAD */ hmR0VmxExitErrMsrLoad, /* 35 UNDEFINED */ hmR0VmxExitErrUndefined, /* 36 VMX_EXIT_MWAIT */ hmR0VmxExitMwait, /* 37 VMX_EXIT_MTF */ hmR0VmxExitMtf, /* 38 UNDEFINED */ hmR0VmxExitErrUndefined, /* 39 VMX_EXIT_MONITOR */ hmR0VmxExitMonitor, /* 40 UNDEFINED */ hmR0VmxExitPause, /* 41 VMX_EXIT_PAUSE */ hmR0VmxExitErrMachineCheck, /* 42 VMX_EXIT_ERR_MACHINE_CHECK */ hmR0VmxExitErrUndefined, /* 43 VMX_EXIT_TPR_BELOW_THRESHOLD */ hmR0VmxExitTprBelowThreshold, /* 44 VMX_EXIT_APIC_ACCESS */ hmR0VmxExitApicAccess, /* 45 UNDEFINED */ hmR0VmxExitErrUndefined, /* 46 VMX_EXIT_GDTR_IDTR_ACCESS */ hmR0VmxExitXdtrAccess, /* 47 VMX_EXIT_LDTR_TR_ACCESS */ hmR0VmxExitXdtrAccess, /* 48 VMX_EXIT_EPT_VIOLATION */ hmR0VmxExitEptViolation, /* 49 VMX_EXIT_EPT_MISCONFIG */ hmR0VmxExitEptMisconfig, /* 50 VMX_EXIT_INVEPT */ hmR0VmxExitSetPendingXcptUD, /* 51 VMX_EXIT_RDTSCP */ hmR0VmxExitRdtscp, /* 52 VMX_EXIT_PREEMPT_TIMER */ hmR0VmxExitPreemptTimer, /* 53 VMX_EXIT_INVVPID */ hmR0VmxExitSetPendingXcptUD, /* 54 VMX_EXIT_WBINVD */ hmR0VmxExitWbinvd, /* 55 VMX_EXIT_XSETBV */ hmR0VmxExitXsetbv, /* 56 VMX_EXIT_APIC_WRITE */ hmR0VmxExitErrUndefined, /* 57 VMX_EXIT_RDRAND */ hmR0VmxExitRdrand, /* 58 VMX_EXIT_INVPCID */ hmR0VmxExitInvpcid, /* 59 VMX_EXIT_VMFUNC */ hmR0VmxExitSetPendingXcptUD, /* 60 VMX_EXIT_ENCLS */ hmR0VmxExitErrUndefined, /* 61 VMX_EXIT_RDSEED */ hmR0VmxExitErrUndefined, /* only spurious exits, so undefined */ /* 62 VMX_EXIT_PML_FULL */ hmR0VmxExitErrUndefined, /* 63 VMX_EXIT_XSAVES */ hmR0VmxExitSetPendingXcptUD, /* 64 VMX_EXIT_XRSTORS */ hmR0VmxExitSetPendingXcptUD, }; #endif /* HMVMX_USE_FUNCTION_TABLE */ #if defined(VBOX_STRICT) && defined(LOG_ENABLED) static const char * const g_apszVmxInstrErrors[HMVMX_INSTR_ERROR_MAX + 1] = { /* 0 */ "(Not Used)", /* 1 */ "VMCALL executed in VMX root operation.", /* 2 */ "VMCLEAR with invalid physical address.", /* 3 */ "VMCLEAR with VMXON pointer.", /* 4 */ "VMLAUNCH with non-clear VMCS.", /* 5 */ "VMRESUME with non-launched VMCS.", /* 6 */ "VMRESUME after VMXOFF", /* 7 */ "VM-entry with invalid control fields.", /* 8 */ "VM-entry with invalid host state fields.", /* 9 */ "VMPTRLD with invalid physical address.", /* 10 */ "VMPTRLD with VMXON pointer.", /* 11 */ "VMPTRLD with incorrect revision identifier.", /* 12 */ "VMREAD/VMWRITE from/to unsupported VMCS component.", /* 13 */ "VMWRITE to read-only VMCS component.", /* 14 */ "(Not Used)", /* 15 */ "VMXON executed in VMX root operation.", /* 16 */ "VM-entry with invalid executive-VMCS pointer.", /* 17 */ "VM-entry with non-launched executing VMCS.", /* 18 */ "VM-entry with executive-VMCS pointer not VMXON pointer.", /* 19 */ "VMCALL with non-clear VMCS.", /* 20 */ "VMCALL with invalid VM-exit control fields.", /* 21 */ "(Not Used)", /* 22 */ "VMCALL with incorrect MSEG revision identifier.", /* 23 */ "VMXOFF under dual monitor treatment of SMIs and SMM.", /* 24 */ "VMCALL with invalid SMM-monitor features.", /* 25 */ "VM-entry with invalid VM-execution control fields in executive VMCS.", /* 26 */ "VM-entry with events blocked by MOV SS.", /* 27 */ "(Not Used)", /* 28 */ "Invalid operand to INVEPT/INVVPID." }; #endif /* VBOX_STRICT */ /** * Updates the VM's last error record. * * If there was a VMX instruction error, reads the error data from the VMCS and * updates VCPU's last error record as well. * * @param pVCpu The cross context virtual CPU structure of the calling EMT. * Can be NULL if @a rc is not VERR_VMX_UNABLE_TO_START_VM or * VERR_VMX_INVALID_VMCS_FIELD. * @param rc The error code. */ static void hmR0VmxUpdateErrorRecord(PVMCPU pVCpu, int rc) { if ( rc == VERR_VMX_INVALID_VMCS_FIELD || rc == VERR_VMX_UNABLE_TO_START_VM) { AssertPtrReturnVoid(pVCpu); VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); } pVCpu->CTX_SUFF(pVM)->hm.s.rcInit = rc; } /** * Reads the VM-entry interruption-information field from the VMCS into the VMX * transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks No-long-jump zone!!! */ DECLINLINE(int) hmR0VmxReadEntryIntInfoVmcs(PVMXTRANSIENT pVmxTransient) { int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &pVmxTransient->uEntryIntInfo); AssertRCReturn(rc, rc); return VINF_SUCCESS; } #ifdef VBOX_STRICT /** * Reads the VM-entry exception error code field from the VMCS into * the VMX transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks No-long-jump zone!!! */ DECLINLINE(int) hmR0VmxReadEntryXcptErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) { int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &pVmxTransient->uEntryXcptErrorCode); AssertRCReturn(rc, rc); return VINF_SUCCESS; } /** * Reads the VM-entry exception error code field from the VMCS into * the VMX transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks No-long-jump zone!!! */ DECLINLINE(int) hmR0VmxReadEntryInstrLenVmcs(PVMXTRANSIENT pVmxTransient) { int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &pVmxTransient->cbEntryInstr); AssertRCReturn(rc, rc); return VINF_SUCCESS; } #endif /* VBOX_STRICT */ /** * Reads the VM-exit interruption-information field from the VMCS into the VMX * transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadExitIntInfoVmcs(PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_INFO)) { int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_INFO, &pVmxTransient->uExitIntInfo); AssertRCReturn(rc,rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_INFO; } return VINF_SUCCESS; } /** * Reads the VM-exit interruption error code from the VMCS into the VMX * transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadExitIntErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE)) { int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INTERRUPTION_ERROR_CODE, &pVmxTransient->uExitIntErrorCode); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INTERRUPTION_ERROR_CODE; } return VINF_SUCCESS; } /** * Reads the VM-exit instruction length field from the VMCS into the VMX * transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadExitInstrLenVmcs(PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_LEN)) { int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_LENGTH, &pVmxTransient->cbInstr); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_LEN; } return VINF_SUCCESS; } /** * Reads the VM-exit instruction-information field from the VMCS into * the VMX transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadExitInstrInfoVmcs(PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_INSTR_INFO)) { int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_INSTR_INFO, &pVmxTransient->ExitInstrInfo.u); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_INSTR_INFO; } return VINF_SUCCESS; } /** * Reads the VM-exit Qualification from the VMCS into the VMX transient structure. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure of the * calling EMT. (Required for the VMCS cache case.) * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadExitQualVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_EXIT_QUALIFICATION)) { int rc = VMXReadVmcsGstN(VMX_VMCS_RO_EXIT_QUALIFICATION, &pVmxTransient->uExitQual); NOREF(pVCpu); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_EXIT_QUALIFICATION; } return VINF_SUCCESS; } /** * Reads the Guest-linear address from the VMCS into the VMX transient structure. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure of the * calling EMT. (Required for the VMCS cache case.) * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadGuestLinearAddrVmcs(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_GUEST_LINEAR_ADDR)) { int rc = VMXReadVmcsGstN(VMX_VMCS_RO_GUEST_LINEAR_ADDR, &pVmxTransient->uGuestLinearAddr); NOREF(pVCpu); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_GUEST_LINEAR_ADDR; } return VINF_SUCCESS; } /** * Reads the IDT-vectoring information field from the VMCS into the VMX * transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks No-long-jump zone!!! */ DECLINLINE(int) hmR0VmxReadIdtVectoringInfoVmcs(PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_INFO)) { int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_INFO, &pVmxTransient->uIdtVectoringInfo); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_INFO; } return VINF_SUCCESS; } /** * Reads the IDT-vectoring error code from the VMCS into the VMX * transient structure. * * @returns VBox status code. * @param pVmxTransient Pointer to the VMX transient structure. */ DECLINLINE(int) hmR0VmxReadIdtVectoringErrorCodeVmcs(PVMXTRANSIENT pVmxTransient) { if (!(pVmxTransient->fVmcsFieldsRead & HMVMX_READ_IDT_VECTORING_ERROR_CODE)) { int rc = VMXReadVmcs32(VMX_VMCS32_RO_IDT_VECTORING_ERROR_CODE, &pVmxTransient->uIdtVectoringErrorCode); AssertRCReturn(rc, rc); pVmxTransient->fVmcsFieldsRead |= HMVMX_READ_IDT_VECTORING_ERROR_CODE; } return VINF_SUCCESS; } /** * Enters VMX root mode operation on the current CPU. * * @returns VBox status code. * @param pVM The cross context VM structure. Can be * NULL, after a resume. * @param HCPhysCpuPage Physical address of the VMXON region. * @param pvCpuPage Pointer to the VMXON region. */ static int hmR0VmxEnterRootMode(PVM pVM, RTHCPHYS HCPhysCpuPage, void *pvCpuPage) { Assert(HCPhysCpuPage && HCPhysCpuPage != NIL_RTHCPHYS); Assert(RT_ALIGN_T(HCPhysCpuPage, _4K, RTHCPHYS) == HCPhysCpuPage); Assert(pvCpuPage); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); if (pVM) { /* Write the VMCS revision dword to the VMXON region. */ *(uint32_t *)pvCpuPage = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID); } /* Paranoid: Disable interrupts as, in theory, interrupt handlers might mess with CR4. */ RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* Enable the VMX bit in CR4 if necessary. */ RTCCUINTREG uOldCr4 = SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX); /* Enter VMX root mode. */ int rc = VMXEnable(HCPhysCpuPage); if (RT_FAILURE(rc)) { if (!(uOldCr4 & X86_CR4_VMXE)) SUPR0ChangeCR4(0, ~X86_CR4_VMXE); if (pVM) pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage; } /* Restore interrupts. */ ASMSetFlags(fEFlags); return rc; } /** * Exits VMX root mode operation on the current CPU. * * @returns VBox status code. */ static int hmR0VmxLeaveRootMode(void) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Paranoid: Disable interrupts as, in theory, interrupts handlers might mess with CR4. */ RTCCUINTREG fEFlags = ASMIntDisableFlags(); /* If we're for some reason not in VMX root mode, then don't leave it. */ RTCCUINTREG uHostCR4 = ASMGetCR4(); int rc; if (uHostCR4 & X86_CR4_VMXE) { /* Exit VMX root mode and clear the VMX bit in CR4. */ VMXDisable(); SUPR0ChangeCR4(0, ~X86_CR4_VMXE); rc = VINF_SUCCESS; } else rc = VERR_VMX_NOT_IN_VMX_ROOT_MODE; /* Restore interrupts. */ ASMSetFlags(fEFlags); return rc; } /** * Allocates and maps one physically contiguous page. The allocated page is * zero'd out. (Used by various VT-x structures). * * @returns IPRT status code. * @param pMemObj Pointer to the ring-0 memory object. * @param ppVirt Where to store the virtual address of the * allocation. * @param pHCPhys Where to store the physical address of the * allocation. */ static int hmR0VmxPageAllocZ(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys) { AssertPtrReturn(pMemObj, VERR_INVALID_PARAMETER); AssertPtrReturn(ppVirt, VERR_INVALID_PARAMETER); AssertPtrReturn(pHCPhys, VERR_INVALID_PARAMETER); int rc = RTR0MemObjAllocCont(pMemObj, PAGE_SIZE, false /* fExecutable */); if (RT_FAILURE(rc)) return rc; *ppVirt = RTR0MemObjAddress(*pMemObj); *pHCPhys = RTR0MemObjGetPagePhysAddr(*pMemObj, 0 /* iPage */); ASMMemZero32(*ppVirt, PAGE_SIZE); return VINF_SUCCESS; } /** * Frees and unmaps an allocated physical page. * * @param pMemObj Pointer to the ring-0 memory object. * @param ppVirt Where to re-initialize the virtual address of * allocation as 0. * @param pHCPhys Where to re-initialize the physical address of the * allocation as 0. */ static void hmR0VmxPageFree(PRTR0MEMOBJ pMemObj, PRTR0PTR ppVirt, PRTHCPHYS pHCPhys) { AssertPtr(pMemObj); AssertPtr(ppVirt); AssertPtr(pHCPhys); if (*pMemObj != NIL_RTR0MEMOBJ) { int rc = RTR0MemObjFree(*pMemObj, true /* fFreeMappings */); AssertRC(rc); *pMemObj = NIL_RTR0MEMOBJ; *ppVirt = 0; *pHCPhys = 0; } } /** * Worker function to free VT-x related structures. * * @returns IPRT status code. * @param pVM The cross context VM structure. */ static void hmR0VmxStructsFree(PVM pVM) { for (VMCPUID i = 0; i < pVM->cCpus; i++) { PVMCPU pVCpu = &pVM->aCpus[i]; AssertPtr(pVCpu); hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr); hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr); if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap); hmR0VmxPageFree(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs); } hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess); #ifdef VBOX_WITH_CRASHDUMP_MAGIC hmR0VmxPageFree(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch); #endif } /** * Worker function to allocate VT-x related VM structures. * * @returns IPRT status code. * @param pVM The cross context VM structure. */ static int hmR0VmxStructsAlloc(PVM pVM) { /* * Initialize members up-front so we can cleanup properly on allocation failure. */ #define VMXLOCAL_INIT_VM_MEMOBJ(a_Name, a_VirtPrefix) \ pVM->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \ pVM->hm.s.vmx.a_VirtPrefix##a_Name = 0; \ pVM->hm.s.vmx.HCPhys##a_Name = 0; #define VMXLOCAL_INIT_VMCPU_MEMOBJ(a_Name, a_VirtPrefix) \ pVCpu->hm.s.vmx.hMemObj##a_Name = NIL_RTR0MEMOBJ; \ pVCpu->hm.s.vmx.a_VirtPrefix##a_Name = 0; \ pVCpu->hm.s.vmx.HCPhys##a_Name = 0; #ifdef VBOX_WITH_CRASHDUMP_MAGIC VMXLOCAL_INIT_VM_MEMOBJ(Scratch, pv); #endif VMXLOCAL_INIT_VM_MEMOBJ(ApicAccess, pb); AssertCompile(sizeof(VMCPUID) == sizeof(pVM->cCpus)); for (VMCPUID i = 0; i < pVM->cCpus; i++) { PVMCPU pVCpu = &pVM->aCpus[i]; VMXLOCAL_INIT_VMCPU_MEMOBJ(Vmcs, pv); VMXLOCAL_INIT_VMCPU_MEMOBJ(MsrBitmap, pv); VMXLOCAL_INIT_VMCPU_MEMOBJ(GuestMsr, pv); VMXLOCAL_INIT_VMCPU_MEMOBJ(HostMsr, pv); } #undef VMXLOCAL_INIT_VMCPU_MEMOBJ #undef VMXLOCAL_INIT_VM_MEMOBJ /* The VMCS size cannot be more than 4096 bytes. See Intel spec. Appendix A.1 "Basic VMX Information". */ AssertReturnStmt(RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_SIZE) <= PAGE_SIZE, (&pVM->aCpus[0])->hm.s.u32HMError = VMX_UFC_INVALID_VMCS_SIZE, VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO); /* * Allocate all the VT-x structures. */ int rc = VINF_SUCCESS; #ifdef VBOX_WITH_CRASHDUMP_MAGIC rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjScratch, &pVM->hm.s.vmx.pbScratch, &pVM->hm.s.vmx.HCPhysScratch); if (RT_FAILURE(rc)) goto cleanup; strcpy((char *)pVM->hm.s.vmx.pbScratch, "SCRATCH Magic"); *(uint64_t *)(pVM->hm.s.vmx.pbScratch + 16) = UINT64_C(0xdeadbeefdeadbeef); #endif /* Allocate the APIC-access page for trapping APIC accesses from the guest. */ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) { rc = hmR0VmxPageAllocZ(&pVM->hm.s.vmx.hMemObjApicAccess, (PRTR0PTR)&pVM->hm.s.vmx.pbApicAccess, &pVM->hm.s.vmx.HCPhysApicAccess); if (RT_FAILURE(rc)) goto cleanup; } /* * Initialize per-VCPU VT-x structures. */ for (VMCPUID i = 0; i < pVM->cCpus; i++) { PVMCPU pVCpu = &pVM->aCpus[i]; AssertPtr(pVCpu); /* Allocate the VM control structure (VMCS). */ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjVmcs, &pVCpu->hm.s.vmx.pvVmcs, &pVCpu->hm.s.vmx.HCPhysVmcs); if (RT_FAILURE(rc)) goto cleanup; /* Get the allocated virtual-APIC page from the APIC device for transparent TPR accesses. */ if ( PDMHasApic(pVM) && (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW)) { rc = APICGetApicPageForCpu(pVCpu, &pVCpu->hm.s.vmx.HCPhysVirtApic, (PRTR0PTR)&pVCpu->hm.s.vmx.pbVirtApic, NULL /* pR3Ptr */, NULL /* pRCPtr */); if (RT_FAILURE(rc)) goto cleanup; } /* * Allocate the MSR-bitmap if supported by the CPU. The MSR-bitmap is for * transparent accesses of specific MSRs. * * If the condition for enabling MSR bitmaps changes here, don't forget to * update HMAreMsrBitmapsAvailable(). */ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS) { rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjMsrBitmap, &pVCpu->hm.s.vmx.pvMsrBitmap, &pVCpu->hm.s.vmx.HCPhysMsrBitmap); if (RT_FAILURE(rc)) goto cleanup; ASMMemFill32(pVCpu->hm.s.vmx.pvMsrBitmap, PAGE_SIZE, UINT32_C(0xffffffff)); } /* Allocate the VM-entry MSR-load and VM-exit MSR-store page for the guest MSRs. */ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjGuestMsr, &pVCpu->hm.s.vmx.pvGuestMsr, &pVCpu->hm.s.vmx.HCPhysGuestMsr); if (RT_FAILURE(rc)) goto cleanup; /* Allocate the VM-exit MSR-load page for the host MSRs. */ rc = hmR0VmxPageAllocZ(&pVCpu->hm.s.vmx.hMemObjHostMsr, &pVCpu->hm.s.vmx.pvHostMsr, &pVCpu->hm.s.vmx.HCPhysHostMsr); if (RT_FAILURE(rc)) goto cleanup; } return VINF_SUCCESS; cleanup: hmR0VmxStructsFree(pVM); return rc; } /** * Does global VT-x initialization (called during module initialization). * * @returns VBox status code. */ VMMR0DECL(int) VMXR0GlobalInit(void) { #ifdef HMVMX_USE_FUNCTION_TABLE AssertCompile(VMX_EXIT_MAX + 1 == RT_ELEMENTS(g_apfnVMExitHandlers)); # ifdef VBOX_STRICT for (unsigned i = 0; i < RT_ELEMENTS(g_apfnVMExitHandlers); i++) Assert(g_apfnVMExitHandlers[i]); # endif #endif return VINF_SUCCESS; } /** * Does global VT-x termination (called during module termination). */ VMMR0DECL(void) VMXR0GlobalTerm() { /* Nothing to do currently. */ } /** * Sets up and activates VT-x on the current CPU. * * @returns VBox status code. * @param pHostCpu The HM physical-CPU structure. * @param pVM The cross context VM structure. Can be * NULL after a host resume operation. * @param pvCpuPage Pointer to the VMXON region (can be NULL if @a * fEnabledByHost is @c true). * @param HCPhysCpuPage Physical address of the VMXON region (can be 0 if * @a fEnabledByHost is @c true). * @param fEnabledByHost Set if SUPR0EnableVTx() or similar was used to * enable VT-x on the host. * @param pHwvirtMsrs Pointer to the hardware-virtualization MSRs. */ VMMR0DECL(int) VMXR0EnableCpu(PHMPHYSCPU pHostCpu, PVM pVM, void *pvCpuPage, RTHCPHYS HCPhysCpuPage, bool fEnabledByHost, PCSUPHWVIRTMSRS pHwvirtMsrs) { Assert(pHostCpu); Assert(pHwvirtMsrs); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Enable VT-x if it's not already enabled by the host. */ if (!fEnabledByHost) { int rc = hmR0VmxEnterRootMode(pVM, HCPhysCpuPage, pvCpuPage); if (RT_FAILURE(rc)) return rc; } /* * Flush all EPT tagged-TLB entries (in case VirtualBox or any other hypervisor have been * using EPTPs) so we don't retain any stale guest-physical mappings which won't get * invalidated when flushing by VPID. */ if (pHwvirtMsrs->u.vmx.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) { hmR0VmxFlushEpt(NULL /* pVCpu */, VMXTLBFLUSHEPT_ALL_CONTEXTS); pHostCpu->fFlushAsidBeforeUse = false; } else pHostCpu->fFlushAsidBeforeUse = true; /* Ensure each VCPU scheduled on this CPU gets a new VPID on resume. See @bugref{6255}. */ ++pHostCpu->cTlbFlushes; return VINF_SUCCESS; } /** * Deactivates VT-x on the current CPU. * * @returns VBox status code. * @param pvCpuPage Pointer to the VMXON region. * @param HCPhysCpuPage Physical address of the VMXON region. * * @remarks This function should never be called when SUPR0EnableVTx() or * similar was used to enable VT-x on the host. */ VMMR0DECL(int) VMXR0DisableCpu(void *pvCpuPage, RTHCPHYS HCPhysCpuPage) { RT_NOREF2(pvCpuPage, HCPhysCpuPage); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); return hmR0VmxLeaveRootMode(); } /** * Sets the permission bits for the specified MSR in the MSR bitmap. * * @param pVCpu The cross context virtual CPU structure. * @param uMsr The MSR value. * @param enmRead Whether reading this MSR causes a VM-exit. * @param enmWrite Whether writing this MSR causes a VM-exit. */ static void hmR0VmxSetMsrPermission(PVMCPU pVCpu, uint32_t uMsr, VMXMSREXITREAD enmRead, VMXMSREXITWRITE enmWrite) { int32_t iBit; uint8_t *pbMsrBitmap = (uint8_t *)pVCpu->hm.s.vmx.pvMsrBitmap; /* * MSR Layout: * Byte index MSR range Interpreted as * 0x000 - 0x3ff 0x00000000 - 0x00001fff Low MSR read bits. * 0x400 - 0x7ff 0xc0000000 - 0xc0001fff High MSR read bits. * 0x800 - 0xbff 0x00000000 - 0x00001fff Low MSR write bits. * 0xc00 - 0xfff 0xc0000000 - 0xc0001fff High MSR write bits. * * A bit corresponding to an MSR within the above range causes a VM-exit * if the bit is 1 on executions of RDMSR/WRMSR. * * If an MSR falls out of the MSR range, it always cause a VM-exit. * * See Intel spec. 24.6.9 "MSR-Bitmap Address". */ if (uMsr <= 0x00001fff) iBit = uMsr; else if (uMsr - UINT32_C(0xc0000000) <= UINT32_C(0x00001fff)) { iBit = uMsr - UINT32_C(0xc0000000); pbMsrBitmap += 0x400; } else AssertMsgFailedReturnVoid(("hmR0VmxSetMsrPermission: Invalid MSR %#RX32\n", uMsr)); Assert(iBit <= 0x1fff); if (enmRead == VMXMSREXIT_INTERCEPT_READ) ASMBitSet(pbMsrBitmap, iBit); else ASMBitClear(pbMsrBitmap, iBit); if (enmWrite == VMXMSREXIT_INTERCEPT_WRITE) ASMBitSet(pbMsrBitmap + 0x800, iBit); else ASMBitClear(pbMsrBitmap + 0x800, iBit); } /** * Updates the VMCS with the number of effective MSRs in the auto-load/store MSR * area. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param cMsrs The number of MSRs. */ static int hmR0VmxSetAutoLoadStoreMsrCount(PVMCPU pVCpu, uint32_t cMsrs) { /* Shouldn't ever happen but there -is- a number. We're well within the recommended 512. */ uint64_t const uVmxMiscMsr = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.u64Misc; uint32_t const cMaxSupportedMsrs = VMX_MISC_MAX_MSRS(uVmxMiscMsr); if (RT_UNLIKELY(cMsrs > cMaxSupportedMsrs)) { LogRel(("CPU auto-load/store MSR count in VMCS exceeded cMsrs=%u Supported=%u.\n", cMsrs, cMaxSupportedMsrs)); pVCpu->hm.s.u32HMError = VMX_UFC_INSUFFICIENT_GUEST_MSR_STORAGE; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Update number of guest MSRs to load/store across the world-switch. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, cMsrs); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, cMsrs); /* Update number of host MSRs to load after the world-switch. Identical to guest-MSR count as it's always paired. */ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, cMsrs); AssertRCReturn(rc, rc); /* Update the VCPU's copy of the MSR count. */ pVCpu->hm.s.vmx.cMsrs = cMsrs; return VINF_SUCCESS; } /** * Adds a new (or updates the value of an existing) guest/host MSR * pair to be swapped during the world-switch as part of the * auto-load/store MSR area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param uMsr The MSR. * @param uGuestMsrValue Value of the guest MSR. * @param fUpdateHostMsr Whether to update the value of the host MSR if * necessary. * @param pfAddedAndUpdated Where to store whether the MSR was added -and- * its value was updated. Optional, can be NULL. */ static int hmR0VmxAddAutoLoadStoreMsr(PVMCPU pVCpu, uint32_t uMsr, uint64_t uGuestMsrValue, bool fUpdateHostMsr, bool *pfAddedAndUpdated) { PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs; uint32_t i; for (i = 0; i < cMsrs; i++) { if (pGuestMsr->u32Msr == uMsr) break; pGuestMsr++; } bool fAdded = false; if (i == cMsrs) { ++cMsrs; int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, cMsrs); AssertMsgRCReturn(rc, ("hmR0VmxAddAutoLoadStoreMsr: Insufficient space to add MSR %u\n", uMsr), rc); /* Now that we're swapping MSRs during the world-switch, allow the guest to read/write them without causing VM-exits. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxSetMsrPermission(pVCpu, uMsr, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); fAdded = true; } /* Update the MSR values in the auto-load/store MSR area. */ pGuestMsr->u32Msr = uMsr; pGuestMsr->u64Value = uGuestMsrValue; /* Create/update the MSR slot in the host MSR area. */ PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; pHostMsr += i; pHostMsr->u32Msr = uMsr; /* * Update the host MSR only when requested by the caller AND when we're * adding it to the auto-load/store area. Otherwise, it would have been * updated by hmR0VmxExportHostMsrs(). We do this for performance reasons. */ bool fUpdatedMsrValue = false; if ( fAdded && fUpdateHostMsr) { Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); pHostMsr->u64Value = ASMRdMsr(pHostMsr->u32Msr); fUpdatedMsrValue = true; } if (pfAddedAndUpdated) *pfAddedAndUpdated = fUpdatedMsrValue; return VINF_SUCCESS; } /** * Removes a guest/host MSR pair to be swapped during the world-switch from the * auto-load/store MSR area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param uMsr The MSR. */ static int hmR0VmxRemoveAutoLoadStoreMsr(PVMCPU pVCpu, uint32_t uMsr) { PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; uint32_t cMsrs = pVCpu->hm.s.vmx.cMsrs; for (uint32_t i = 0; i < cMsrs; i++) { /* Find the MSR. */ if (pGuestMsr->u32Msr == uMsr) { /* If it's the last MSR, simply reduce the count. */ if (i == cMsrs - 1) { --cMsrs; break; } /* Remove it by swapping the last MSR in place of it, and reducing the count. */ PVMXAUTOMSR pLastGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; pLastGuestMsr += cMsrs - 1; pGuestMsr->u32Msr = pLastGuestMsr->u32Msr; pGuestMsr->u64Value = pLastGuestMsr->u64Value; PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; PVMXAUTOMSR pLastHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; pLastHostMsr += cMsrs - 1; pHostMsr->u32Msr = pLastHostMsr->u32Msr; pHostMsr->u64Value = pLastHostMsr->u64Value; --cMsrs; break; } pGuestMsr++; } /* Update the VMCS if the count changed (meaning the MSR was found). */ if (cMsrs != pVCpu->hm.s.vmx.cMsrs) { int rc = hmR0VmxSetAutoLoadStoreMsrCount(pVCpu, cMsrs); AssertRCReturn(rc, rc); /* We're no longer swapping MSRs during the world-switch, intercept guest read/writes to them. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxSetMsrPermission(pVCpu, uMsr, VMXMSREXIT_INTERCEPT_READ, VMXMSREXIT_INTERCEPT_WRITE); Log4Func(("Removed MSR %#RX32 new cMsrs=%u\n", uMsr, pVCpu->hm.s.vmx.cMsrs)); return VINF_SUCCESS; } return VERR_NOT_FOUND; } /** * Checks if the specified guest MSR is part of the auto-load/store area in * the VMCS. * * @returns true if found, false otherwise. * @param pVCpu The cross context virtual CPU structure. * @param uMsr The MSR to find. */ static bool hmR0VmxIsAutoLoadStoreGuestMsr(PVMCPU pVCpu, uint32_t uMsr) { PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; uint32_t const cMsrs = pVCpu->hm.s.vmx.cMsrs; for (uint32_t i = 0; i < cMsrs; i++, pGuestMsr++) { if (pGuestMsr->u32Msr == uMsr) return true; } return false; } /** * Updates the value of all host MSRs in the auto-load/store area in the VMCS. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxUpdateAutoLoadStoreHostMsrs(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); PVMXAUTOMSR pHostMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; PVMXAUTOMSR pGuestMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; uint32_t const cMsrs = pVCpu->hm.s.vmx.cMsrs; for (uint32_t i = 0; i < cMsrs; i++, pHostMsr++, pGuestMsr++) { AssertReturnVoid(pHostMsr->u32Msr == pGuestMsr->u32Msr); /* * Performance hack for the host EFER MSR. We use the cached value rather than re-read it. * Strict builds will catch mismatches in hmR0VmxCheckAutoLoadStoreMsrs(). See @bugref{7368}. */ if (pHostMsr->u32Msr == MSR_K6_EFER) pHostMsr->u64Value = pVCpu->CTX_SUFF(pVM)->hm.s.vmx.u64HostEfer; else pHostMsr->u64Value = ASMRdMsr(pHostMsr->u32Msr); } pVCpu->hm.s.vmx.fUpdatedHostMsrs = true; } /** * Saves a set of host MSRs to allow read/write passthru access to the guest and * perform lazy restoration of the host MSRs while leaving VT-x. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxLazySaveHostMsrs(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* * Note: If you're adding MSRs here, make sure to update the MSR-bitmap permissions in hmR0VmxSetupProcCtls(). */ if (!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST)) { Assert(!(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)); /* Guest MSRs better not be loaded now. */ #if HC_ARCH_BITS == 64 if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests) { pVCpu->hm.s.vmx.u64HostLStarMsr = ASMRdMsr(MSR_K8_LSTAR); pVCpu->hm.s.vmx.u64HostStarMsr = ASMRdMsr(MSR_K6_STAR); pVCpu->hm.s.vmx.u64HostSFMaskMsr = ASMRdMsr(MSR_K8_SF_MASK); pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); } #endif pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_SAVED_HOST; } } /** * Checks whether the MSR belongs to the set of guest MSRs that we restore * lazily while leaving VT-x. * * @returns true if it does, false otherwise. * @param pVCpu The cross context virtual CPU structure. * @param uMsr The MSR to check. */ static bool hmR0VmxIsLazyGuestMsr(PVMCPU pVCpu, uint32_t uMsr) { NOREF(pVCpu); #if HC_ARCH_BITS == 64 if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests) { switch (uMsr) { case MSR_K8_LSTAR: case MSR_K6_STAR: case MSR_K8_SF_MASK: case MSR_K8_KERNEL_GS_BASE: return true; } } #else RT_NOREF(pVCpu, uMsr); #endif return false; } /** * Loads a set of guests MSRs to allow read/passthru to the guest. * * The name of this function is slightly confusing. This function does NOT * postpone loading, but loads the MSR right now. "hmR0VmxLazy" is simply a * common prefix for functions dealing with "lazy restoration" of the shared * MSRs. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxLazyLoadGuestMsrs(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST); #if HC_ARCH_BITS == 64 if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests) { /* * If the guest MSRs are not loaded -and- if all the guest MSRs are identical * to the MSRs on the CPU (which are the saved host MSRs, see assertion above) then * we can skip a few MSR writes. * * Otherwise, it implies either 1. they're not loaded, or 2. they're loaded but the * guest MSR values in the guest-CPU context might be different to what's currently * loaded in the CPU. In either case, we need to write the new guest MSR values to the * CPU, see @bugref{8728}. */ PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if ( !(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) && pCtx->msrKERNELGSBASE == pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr && pCtx->msrLSTAR == pVCpu->hm.s.vmx.u64HostLStarMsr && pCtx->msrSTAR == pVCpu->hm.s.vmx.u64HostStarMsr && pCtx->msrSFMASK == pVCpu->hm.s.vmx.u64HostSFMaskMsr) { #ifdef VBOX_STRICT Assert(ASMRdMsr(MSR_K8_KERNEL_GS_BASE) == pCtx->msrKERNELGSBASE); Assert(ASMRdMsr(MSR_K8_LSTAR) == pCtx->msrLSTAR); Assert(ASMRdMsr(MSR_K6_STAR) == pCtx->msrSTAR); Assert(ASMRdMsr(MSR_K8_SF_MASK) == pCtx->msrSFMASK); #endif } else { ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE); ASMWrMsr(MSR_K8_LSTAR, pCtx->msrLSTAR); ASMWrMsr(MSR_K6_STAR, pCtx->msrSTAR); ASMWrMsr(MSR_K8_SF_MASK, pCtx->msrSFMASK); } } #endif pVCpu->hm.s.vmx.fLazyMsrs |= VMX_LAZY_MSRS_LOADED_GUEST; } /** * Performs lazy restoration of the set of host MSRs if they were previously * loaded with guest MSR values. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! * @remarks The guest MSRs should have been saved back into the guest-CPU * context by hmR0VmxImportGuestState()!!! */ static void hmR0VmxLazyRestoreHostMsrs(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) { Assert(pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_SAVED_HOST); #if HC_ARCH_BITS == 64 if (pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests) { ASMWrMsr(MSR_K8_LSTAR, pVCpu->hm.s.vmx.u64HostLStarMsr); ASMWrMsr(MSR_K6_STAR, pVCpu->hm.s.vmx.u64HostStarMsr); ASMWrMsr(MSR_K8_SF_MASK, pVCpu->hm.s.vmx.u64HostSFMaskMsr); ASMWrMsr(MSR_K8_KERNEL_GS_BASE, pVCpu->hm.s.vmx.u64HostKernelGSBaseMsr); } #endif } pVCpu->hm.s.vmx.fLazyMsrs &= ~(VMX_LAZY_MSRS_LOADED_GUEST | VMX_LAZY_MSRS_SAVED_HOST); } /** * Verifies that our cached values of the VMCS fields are all consistent with * what's actually present in the VMCS. * * @returns VBox status code. * @retval VINF_SUCCESS if all our caches match their respective VMCS fields. * @retval VERR_VMX_VMCS_FIELD_CACHE_INVALID if a cache field doesn't match the * VMCS content. HMCPU error-field is * updated, see VMX_VCI_XXX. * @param pVCpu The cross context virtual CPU structure. */ static int hmR0VmxCheckVmcsCtls(PVMCPU pVCpu) { uint32_t u32Val; int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32EntryCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32EntryCtls, u32Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_ENTRY, VERR_VMX_VMCS_FIELD_CACHE_INVALID); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32ExitCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32ExitCtls, u32Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_EXIT, VERR_VMX_VMCS_FIELD_CACHE_INVALID); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32PinCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32PinCtls, u32Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PIN_EXEC, VERR_VMX_VMCS_FIELD_CACHE_INVALID); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32ProcCtls == u32Val, ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls, u32Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC, VERR_VMX_VMCS_FIELD_CACHE_INVALID); if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS) { rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32ProcCtls2 == u32Val, ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls2, u32Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_PROC_EXEC2, VERR_VMX_VMCS_FIELD_CACHE_INVALID); } rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u32XcptBitmap == u32Val, ("Cache=%#RX32 VMCS=%#RX32\n", pVCpu->hm.s.vmx.u32XcptBitmap, u32Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_XCPT_BITMAP, VERR_VMX_VMCS_FIELD_CACHE_INVALID); uint64_t u64Val; rc = VMXReadVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, &u64Val); AssertRCReturn(rc, rc); AssertMsgReturnStmt(pVCpu->hm.s.vmx.u64TscOffset == u64Val, ("Cache=%#RX64 VMCS=%#RX64\n", pVCpu->hm.s.vmx.u64TscOffset, u64Val), pVCpu->hm.s.u32HMError = VMX_VCI_CTRL_TSC_OFFSET, VERR_VMX_VMCS_FIELD_CACHE_INVALID); return VINF_SUCCESS; } #ifdef VBOX_STRICT /** * Verifies that our cached host EFER value has not changed * since we cached it. * * @param pVCpu The cross context virtual CPU structure. */ static void hmR0VmxCheckHostEferMsr(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); if (pVCpu->hm.s.vmx.u32ExitCtls & VMX_EXIT_CTLS_LOAD_EFER_MSR) { uint64_t u64Val; int rc = VMXReadVmcs64(VMX_VMCS64_HOST_EFER_FULL, &u64Val); AssertRC(rc); uint64_t u64HostEferMsr = ASMRdMsr(MSR_K6_EFER); AssertMsgReturnVoid(u64HostEferMsr == u64Val, ("u64HostEferMsr=%#RX64 u64Val=%#RX64\n", u64HostEferMsr, u64Val)); } } /** * Verifies whether the guest/host MSR pairs in the auto-load/store area in the * VMCS are correct. * * @param pVCpu The cross context virtual CPU structure. */ static void hmR0VmxCheckAutoLoadStoreMsrs(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* Verify MSR counts in the VMCS are what we think it should be. */ uint32_t cMsrs; int rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &cMsrs); AssertRC(rc); Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &cMsrs); AssertRC(rc); Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &cMsrs); AssertRC(rc); Assert(cMsrs == pVCpu->hm.s.vmx.cMsrs); PCVMXAUTOMSR pHostMsr = (PCVMXAUTOMSR)pVCpu->hm.s.vmx.pvHostMsr; PCVMXAUTOMSR pGuestMsr = (PCVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; for (uint32_t i = 0; i < cMsrs; i++, pHostMsr++, pGuestMsr++) { /* Verify that the MSRs are paired properly and that the host MSR has the correct value. */ AssertMsgReturnVoid(pHostMsr->u32Msr == pGuestMsr->u32Msr, ("HostMsr=%#RX32 GuestMsr=%#RX32 cMsrs=%u\n", pHostMsr->u32Msr, pGuestMsr->u32Msr, cMsrs)); uint64_t u64Msr = ASMRdMsr(pHostMsr->u32Msr); AssertMsgReturnVoid(pHostMsr->u64Value == u64Msr, ("u32Msr=%#RX32 VMCS Value=%#RX64 ASMRdMsr=%#RX64 cMsrs=%u\n", pHostMsr->u32Msr, pHostMsr->u64Value, u64Msr, cMsrs)); /* Verify that the permissions are as expected in the MSR bitmap. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) { VMXMSREXITREAD enmRead; VMXMSREXITWRITE enmWrite; rc = HMVmxGetMsrPermission(pVCpu->hm.s.vmx.pvMsrBitmap, pGuestMsr->u32Msr, &enmRead, &enmWrite); AssertMsgReturnVoid(rc == VINF_SUCCESS, ("HMVmxGetMsrPermission! failed. rc=%Rrc\n", rc)); if (pGuestMsr->u32Msr == MSR_K6_EFER) { AssertMsgReturnVoid(enmRead == VMXMSREXIT_INTERCEPT_READ, ("Passthru read for EFER!?\n")); AssertMsgReturnVoid(enmWrite == VMXMSREXIT_INTERCEPT_WRITE, ("Passthru write for EFER!?\n")); } else { AssertMsgReturnVoid(enmRead == VMXMSREXIT_PASSTHRU_READ, ("u32Msr=%#RX32 cMsrs=%u No passthru read!\n", pGuestMsr->u32Msr, cMsrs)); AssertMsgReturnVoid(enmWrite == VMXMSREXIT_PASSTHRU_WRITE, ("u32Msr=%#RX32 cMsrs=%u No passthru write!\n", pGuestMsr->u32Msr, cMsrs)); } } } } #endif /* VBOX_STRICT */ /** * Flushes the TLB using EPT. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure of the calling * EMT. Can be NULL depending on @a enmTlbFlush. * @param enmTlbFlush Type of flush. * * @remarks Caller is responsible for making sure this function is called only * when NestedPaging is supported and providing @a enmTlbFlush that is * supported by the CPU. * @remarks Can be called with interrupts disabled. */ static void hmR0VmxFlushEpt(PVMCPU pVCpu, VMXTLBFLUSHEPT enmTlbFlush) { uint64_t au64Descriptor[2]; if (enmTlbFlush == VMXTLBFLUSHEPT_ALL_CONTEXTS) au64Descriptor[0] = 0; else { Assert(pVCpu); au64Descriptor[0] = pVCpu->hm.s.vmx.HCPhysEPTP; } au64Descriptor[1] = 0; /* MBZ. Intel spec. 33.3 "VMX Instructions" */ int rc = VMXR0InvEPT(enmTlbFlush, &au64Descriptor[0]); AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvEPT %#x %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hm.s.vmx.HCPhysEPTP : 0, rc)); if ( RT_SUCCESS(rc) && pVCpu) STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushNestedPaging); } /** * Flushes the TLB using VPID. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure of the calling * EMT. Can be NULL depending on @a enmTlbFlush. * @param enmTlbFlush Type of flush. * @param GCPtr Virtual address of the page to flush (can be 0 depending * on @a enmTlbFlush). * * @remarks Can be called with interrupts disabled. */ static void hmR0VmxFlushVpid(PVMCPU pVCpu, VMXTLBFLUSHVPID enmTlbFlush, RTGCPTR GCPtr) { Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid); uint64_t au64Descriptor[2]; if (enmTlbFlush == VMXTLBFLUSHVPID_ALL_CONTEXTS) { au64Descriptor[0] = 0; au64Descriptor[1] = 0; } else { AssertPtr(pVCpu); AssertMsg(pVCpu->hm.s.uCurrentAsid != 0, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid)); AssertMsg(pVCpu->hm.s.uCurrentAsid <= UINT16_MAX, ("VMXR0InvVPID: invalid ASID %lu\n", pVCpu->hm.s.uCurrentAsid)); au64Descriptor[0] = pVCpu->hm.s.uCurrentAsid; au64Descriptor[1] = GCPtr; } int rc = VMXR0InvVPID(enmTlbFlush, &au64Descriptor[0]); AssertMsg(rc == VINF_SUCCESS, ("VMXR0InvVPID %#x %u %RGv failed with %Rrc\n", enmTlbFlush, pVCpu ? pVCpu->hm.s.uCurrentAsid : 0, GCPtr, rc)); if ( RT_SUCCESS(rc) && pVCpu) STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushAsid); NOREF(rc); } /** * Invalidates a guest page by guest virtual address. Only relevant for * EPT/VPID, otherwise there is nothing really to invalidate. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param GCVirt Guest virtual address of the page to invalidate. */ VMMR0DECL(int) VMXR0InvalidatePage(PVMCPU pVCpu, RTGCPTR GCVirt) { AssertPtr(pVCpu); LogFlowFunc(("pVCpu=%p GCVirt=%RGv\n", pVCpu, GCVirt)); bool fFlushPending = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_TLB_FLUSH); if (!fFlushPending) { /* * We must invalidate the guest TLB entry in either case, we cannot ignore it even for * the EPT case. See @bugref{6043} and @bugref{6177}. * * Set the VMCPU_FF_TLB_FLUSH force flag and flush before VM-entry in hmR0VmxFlushTLB*() * as this function maybe called in a loop with individual addresses. */ PVM pVM = pVCpu->CTX_SUFF(pVM); if (pVM->hm.s.vmx.fVpid) { bool fVpidFlush = RT_BOOL(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR); #if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) /* * Workaround Erratum BV75, AAJ159 and others that affect several Intel CPUs * where executing INVVPID outside 64-bit mode does not flush translations of * 64-bit linear addresses, see @bugref{6208#c72}. */ if (RT_HI_U32(GCVirt)) fVpidFlush = false; #endif if (fVpidFlush) { hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_INDIV_ADDR, GCVirt); STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbInvlpgVirt); } else VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); } else if (pVM->hm.s.fNestedPaging) VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); } return VINF_SUCCESS; } /** * Dummy placeholder for tagged-TLB flush handling before VM-entry. Used in the * case where neither EPT nor VPID is supported by the CPU. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbNone(PHMPHYSCPU pHostCpu, PVMCPU pVCpu) { AssertPtr(pVCpu); AssertPtr(pHostCpu); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); Assert(pHostCpu->idCpu != NIL_RTCPUID); pVCpu->hm.s.idLastCpu = pHostCpu->idCpu; pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes; pVCpu->hm.s.fForceTLBFlush = false; return; } /** * Flushes the tagged-TLB entries for EPT+VPID CPUs as necessary. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks All references to "ASID" in this function pertains to "VPID" in Intel's * nomenclature. The reason is, to avoid confusion in compare statements * since the host-CPU copies are named "ASID". * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbBoth(PHMPHYSCPU pHostCpu, PVMCPU pVCpu) { #ifdef VBOX_WITH_STATISTICS bool fTlbFlushed = false; # define HMVMX_SET_TAGGED_TLB_FLUSHED() do { fTlbFlushed = true; } while (0) # define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { \ if (!fTlbFlushed) \ STAM_COUNTER_INC(&pVCpu->hm.s.StatNoFlushTlbWorldSwitch); \ } while (0) #else # define HMVMX_SET_TAGGED_TLB_FLUSHED() do { } while (0) # define HMVMX_UPDATE_FLUSH_SKIPPED_STAT() do { } while (0) #endif AssertPtr(pVCpu); AssertPtr(pHostCpu); Assert(pHostCpu->idCpu != NIL_RTCPUID); PVM pVM = pVCpu->CTX_SUFF(pVM); AssertMsg(pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbBoth cannot be invoked unless NestedPaging & VPID are enabled." "fNestedPaging=%RTbool fVpid=%RTbool", pVM->hm.s.fNestedPaging, pVM->hm.s.vmx.fVpid)); /* * Force a TLB flush for the first world-switch if the current CPU differs from the one we * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we * cannot reuse the current ASID anymore. */ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes) { ++pHostCpu->uCurrentAsid; if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) { pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0. */ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ } pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid; pVCpu->hm.s.idLastCpu = pHostCpu->idCpu; pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes; /* * Flush by EPT when we get rescheduled to a new host CPU to ensure EPT-only tagged mappings are also * invalidated. We don't need to flush-by-VPID here as flushing by EPT covers it. See @bugref{6568}. */ hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmTlbFlushEpt); STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); HMVMX_SET_TAGGED_TLB_FLUSHED(); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH); } else if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) /* Check for explicit TLB flushes. */ { /* * Changes to the EPT paging structure by VMM requires flushing-by-EPT as the CPU * creates guest-physical (ie. only EPT-tagged) mappings while traversing the EPT * tables when EPT is in use. Flushing-by-VPID will only flush linear (only * VPID-tagged) and combined (EPT+VPID tagged) mappings but not guest-physical * mappings, see @bugref{6568}. * * See Intel spec. 28.3.2 "Creating and Using Cached Translation Information". */ hmR0VmxFlushEpt(pVCpu, pVM->hm.s.vmx.enmTlbFlushEpt); STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); HMVMX_SET_TAGGED_TLB_FLUSHED(); } pVCpu->hm.s.fForceTLBFlush = false; HMVMX_UPDATE_FLUSH_SKIPPED_STAT(); Assert(pVCpu->hm.s.idLastCpu == pHostCpu->idCpu); Assert(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes); AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes, ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes)); AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu, pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes)); AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); /* Update VMCS with the VPID. */ int rc = VMXWriteVmcs32(VMX_VMCS16_VPID, pVCpu->hm.s.uCurrentAsid); AssertRC(rc); #undef HMVMX_SET_TAGGED_TLB_FLUSHED } /** * Flushes the tagged-TLB entries for EPT CPUs as necessary. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbEpt(PHMPHYSCPU pHostCpu, PVMCPU pVCpu) { AssertPtr(pVCpu); AssertPtr(pHostCpu); Assert(pHostCpu->idCpu != NIL_RTCPUID); AssertMsg(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked without NestedPaging.")); AssertMsg(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid, ("hmR0VmxFlushTaggedTlbEpt cannot be invoked with VPID.")); /* * Force a TLB flush for the first world-switch if the current CPU differs from the one we ran on last. * A change in the TLB flush count implies the host CPU is online after a suspend/resume. */ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes) { pVCpu->hm.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); } /* Check for explicit TLB flushes. */ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) { pVCpu->hm.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); } pVCpu->hm.s.idLastCpu = pHostCpu->idCpu; pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes; if (pVCpu->hm.s.fForceTLBFlush) { hmR0VmxFlushEpt(pVCpu, pVCpu->CTX_SUFF(pVM)->hm.s.vmx.enmTlbFlushEpt); pVCpu->hm.s.fForceTLBFlush = false; } } /** * Flushes the tagged-TLB entries for VPID CPUs as necessary. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts disabled. */ static void hmR0VmxFlushTaggedTlbVpid(PHMPHYSCPU pHostCpu, PVMCPU pVCpu) { AssertPtr(pVCpu); AssertPtr(pHostCpu); Assert(pHostCpu->idCpu != NIL_RTCPUID); AssertMsg(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid, ("hmR0VmxFlushTlbVpid cannot be invoked without VPID.")); AssertMsg(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging, ("hmR0VmxFlushTlbVpid cannot be invoked with NestedPaging")); /* * Force a TLB flush for the first world switch if the current CPU differs from the one we * ran on last. If the TLB flush count changed, another VM (VCPU rather) has hit the ASID * limit while flushing the TLB or the host CPU is online after a suspend/resume, so we * cannot reuse the current ASID anymore. */ if ( pVCpu->hm.s.idLastCpu != pHostCpu->idCpu || pVCpu->hm.s.cTlbFlushes != pHostCpu->cTlbFlushes) { pVCpu->hm.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlbWorldSwitch); } /* Check for explicit TLB flushes. */ if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_TLB_FLUSH)) { /* * If we ever support VPID flush combinations other than ALL or SINGLE-context (see * hmR0VmxSetupTaggedTlb()) we would need to explicitly flush in this case (add an * fExplicitFlush = true here and change the pHostCpu->fFlushAsidBeforeUse check below to * include fExplicitFlush's too) - an obscure corner case. */ pVCpu->hm.s.fForceTLBFlush = true; STAM_COUNTER_INC(&pVCpu->hm.s.StatFlushTlb); } PVM pVM = pVCpu->CTX_SUFF(pVM); pVCpu->hm.s.idLastCpu = pHostCpu->idCpu; if (pVCpu->hm.s.fForceTLBFlush) { ++pHostCpu->uCurrentAsid; if (pHostCpu->uCurrentAsid >= pVM->hm.s.uMaxAsid) { pHostCpu->uCurrentAsid = 1; /* Wraparound to 1; host uses 0 */ pHostCpu->cTlbFlushes++; /* All VCPUs that run on this host CPU must use a new VPID. */ pHostCpu->fFlushAsidBeforeUse = true; /* All VCPUs that run on this host CPU must flush their new VPID before use. */ } pVCpu->hm.s.fForceTLBFlush = false; pVCpu->hm.s.cTlbFlushes = pHostCpu->cTlbFlushes; pVCpu->hm.s.uCurrentAsid = pHostCpu->uCurrentAsid; if (pHostCpu->fFlushAsidBeforeUse) { if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_SINGLE_CONTEXT) hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_SINGLE_CONTEXT, 0 /* GCPtr */); else if (pVM->hm.s.vmx.enmTlbFlushVpid == VMXTLBFLUSHVPID_ALL_CONTEXTS) { hmR0VmxFlushVpid(pVCpu, VMXTLBFLUSHVPID_ALL_CONTEXTS, 0 /* GCPtr */); pHostCpu->fFlushAsidBeforeUse = false; } else { /* hmR0VmxSetupTaggedTlb() ensures we never get here. Paranoia. */ AssertMsgFailed(("Unsupported VPID-flush context type.\n")); } } } AssertMsg(pVCpu->hm.s.cTlbFlushes == pHostCpu->cTlbFlushes, ("Flush count mismatch for cpu %d (%u vs %u)\n", pHostCpu->idCpu, pVCpu->hm.s.cTlbFlushes, pHostCpu->cTlbFlushes)); AssertMsg(pHostCpu->uCurrentAsid >= 1 && pHostCpu->uCurrentAsid < pVM->hm.s.uMaxAsid, ("Cpu[%u] uCurrentAsid=%u cTlbFlushes=%u pVCpu->idLastCpu=%u pVCpu->cTlbFlushes=%u\n", pHostCpu->idCpu, pHostCpu->uCurrentAsid, pHostCpu->cTlbFlushes, pVCpu->hm.s.idLastCpu, pVCpu->hm.s.cTlbFlushes)); AssertMsg(pVCpu->hm.s.uCurrentAsid >= 1 && pVCpu->hm.s.uCurrentAsid < pVM->hm.s.uMaxAsid, ("Cpu[%u] pVCpu->uCurrentAsid=%u\n", pHostCpu->idCpu, pVCpu->hm.s.uCurrentAsid)); int rc = VMXWriteVmcs32(VMX_VMCS16_VPID, pVCpu->hm.s.uCurrentAsid); AssertRC(rc); } /** * Flushes the guest TLB entry based on CPU capabilities. * * @param pHostCpu The HM physical-CPU structure. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts disabled. */ DECLINLINE(void) hmR0VmxFlushTaggedTlb(PHMPHYSCPU pHostCpu, PVMCPU pVCpu) { #ifdef HMVMX_ALWAYS_FLUSH_TLB VMCPU_FF_SET(pVCpu, VMCPU_FF_TLB_FLUSH); #endif PVM pVM = pVCpu->CTX_SUFF(pVM); switch (pVM->hm.s.vmx.enmTlbFlushType) { case VMXTLBFLUSHTYPE_EPT_VPID: hmR0VmxFlushTaggedTlbBoth(pHostCpu, pVCpu); break; case VMXTLBFLUSHTYPE_EPT: hmR0VmxFlushTaggedTlbEpt(pHostCpu, pVCpu); break; case VMXTLBFLUSHTYPE_VPID: hmR0VmxFlushTaggedTlbVpid(pHostCpu, pVCpu); break; case VMXTLBFLUSHTYPE_NONE: hmR0VmxFlushTaggedTlbNone(pHostCpu, pVCpu); break; default: AssertMsgFailed(("Invalid flush-tag function identifier\n")); break; } /* Don't assert that VMCPU_FF_TLB_FLUSH should no longer be pending. It can be set by other EMTs. */ } /** * Sets up the appropriate tagged TLB-flush level and handler for flushing guest * TLB entries from the host TLB before VM-entry. * * @returns VBox status code. * @param pVM The cross context VM structure. */ static int hmR0VmxSetupTaggedTlb(PVM pVM) { /* * Determine optimal flush type for Nested Paging. * We cannot ignore EPT if no suitable flush-types is supported by the CPU as we've already setup unrestricted * guest execution (see hmR3InitFinalizeR0()). */ if (pVM->hm.s.fNestedPaging) { if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT) { if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_SINGLE_CONTEXT) pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_SINGLE_CONTEXT; else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVEPT_ALL_CONTEXTS) pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_ALL_CONTEXTS; else { /* Shouldn't happen. EPT is supported but no suitable flush-types supported. */ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_FLUSH_TYPE_UNSUPPORTED; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Make sure the write-back cacheable memory type for EPT is supported. */ if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EMT_WB))) { pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_MEM_TYPE_NOT_WB; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* EPT requires a page-walk length of 4. */ if (RT_UNLIKELY(!(pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_PAGE_WALK_LENGTH_4))) { pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_PAGE_WALK_LENGTH_UNSUPPORTED; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } } else { /* Shouldn't happen. EPT is supported but INVEPT instruction is not supported. */ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NOT_SUPPORTED; pVM->aCpus[0].hm.s.u32HMError = VMX_UFC_EPT_INVEPT_UNAVAILABLE; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } } /* * Determine optimal flush type for VPID. */ if (pVM->hm.s.vmx.fVpid) { if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID) { if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT) pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_SINGLE_CONTEXT; else if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_ALL_CONTEXTS) pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_ALL_CONTEXTS; else { /* Neither SINGLE nor ALL-context flush types for VPID is supported by the CPU. Ignore VPID capability. */ if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_INDIV_ADDR) LogRelFunc(("Only INDIV_ADDR supported. Ignoring VPID.\n")); if (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_INVVPID_SINGLE_CONTEXT_RETAIN_GLOBALS) LogRelFunc(("Only SINGLE_CONTEXT_RETAIN_GLOBALS supported. Ignoring VPID.\n")); pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED; pVM->hm.s.vmx.fVpid = false; } } else { /* Shouldn't happen. VPID is supported but INVVPID is not supported by the CPU. Ignore VPID capability. */ Log4Func(("VPID supported without INVEPT support. Ignoring VPID.\n")); pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NOT_SUPPORTED; pVM->hm.s.vmx.fVpid = false; } } /* * Setup the handler for flushing tagged-TLBs. */ if (pVM->hm.s.fNestedPaging && pVM->hm.s.vmx.fVpid) pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT_VPID; else if (pVM->hm.s.fNestedPaging) pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_EPT; else if (pVM->hm.s.vmx.fVpid) pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_VPID; else pVM->hm.s.vmx.enmTlbFlushType = VMXTLBFLUSHTYPE_NONE; return VINF_SUCCESS; } /** * Sets up pin-based VM-execution controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks We don't really care about optimizing vmwrites here as it's done only * once per VM and hence we don't care about VMCS-field cache comparisons. */ static int hmR0VmxSetupPinCtls(PVMCPU pVCpu) { PVM pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = pVM->hm.s.vmx.Msrs.PinCtls.n.allowed0; /* Bits set here must always be set. */ uint32_t const fZap = pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1; /* Bits cleared here must always be cleared. */ fVal |= VMX_PIN_CTLS_EXT_INT_EXIT /* External interrupts cause a VM-exit. */ | VMX_PIN_CTLS_NMI_EXIT; /* Non-maskable interrupts (NMIs) cause a VM-exit. */ if (pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_VIRT_NMI) fVal |= VMX_PIN_CTLS_VIRT_NMI; /* Use virtual NMIs and virtual-NMI blocking features. */ /* Enable the VMX preemption timer. */ if (pVM->hm.s.vmx.fUsePreemptTimer) { Assert(pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_PREEMPT_TIMER); fVal |= VMX_PIN_CTLS_PREEMPT_TIMER; } #if 0 /* Enable posted-interrupt processing. */ if (pVM->hm.s.fPostedIntrs) { Assert(pVM->hm.s.vmx.Msrs.PinCtls.n.allowed1 & VMX_PIN_CTLS_POSTED_INT); Assert(pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_ACK_EXT_INT); fVal |= VMX_PIN_CTL_POSTED_INT; } #endif if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid pin-based VM-execution controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", pVM->hm.s.vmx.Msrs.PinCtls.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PIN_EXEC; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, fVal); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32PinCtls = fVal; return VINF_SUCCESS; } /** * Sets up secondary processor-based VM-execution controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks We don't really care about optimizing vmwrites here as it's done only * once per VM and hence we don't care about VMCS-field cache comparisons. */ static int hmR0VmxSetupProcCtls2(PVMCPU pVCpu) { PVM pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed0; /* Bits set here must be set in the VMCS. */ uint32_t const fZap = pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ /* WBINVD causes a VM-exit. */ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_WBINVD_EXIT) fVal |= VMX_PROC_CTLS2_WBINVD_EXIT; /* Enable EPT (aka nested-paging). */ if (pVM->hm.s.fNestedPaging) fVal |= VMX_PROC_CTLS2_EPT; /* * Enable the INVPCID instruction if supported by the hardware and we expose * it to the guest. Without this, guest executing INVPCID would cause a #UD. */ if ( (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_INVPCID) && pVM->cpum.ro.GuestFeatures.fInvpcid) fVal |= VMX_PROC_CTLS2_INVPCID; /* Enable VPID. */ if (pVM->hm.s.vmx.fVpid) fVal |= VMX_PROC_CTLS2_VPID; /* Enable Unrestricted guest execution. */ if (pVM->hm.s.vmx.fUnrestrictedGuest) fVal |= VMX_PROC_CTLS2_UNRESTRICTED_GUEST; #if 0 if (pVM->hm.s.fVirtApicRegs) { /* Enable APIC-register virtualization. */ Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_APIC_REG_VIRT); fVal |= VMX_PROC_CTLS2_APIC_REG_VIRT; /* Enable virtual-interrupt delivery. */ Assert(pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_INTR_DELIVERY); fVal |= VMX_PROC_CTLS2_VIRT_INTR_DELIVERY; } #endif /* Virtualize-APIC accesses if supported by the CPU. The virtual-APIC page is where the TPR shadow resides. */ /** @todo VIRT_X2APIC support, it's mutually exclusive with this. So must be * done dynamically. */ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) { Assert(pVM->hm.s.vmx.HCPhysApicAccess); Assert(!(pVM->hm.s.vmx.HCPhysApicAccess & 0xfff)); /* Bits 11:0 MBZ. */ fVal |= VMX_PROC_CTLS2_VIRT_APIC_ACCESS; /* Virtualize APIC accesses. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL, pVM->hm.s.vmx.HCPhysApicAccess); AssertRCReturn(rc, rc); } /* Enable RDTSCP. */ if (pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_RDTSCP) fVal |= VMX_PROC_CTLS2_RDTSCP; /* Enable Pause-Loop exiting. */ if ( pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1 & VMX_PROC_CTLS2_PAUSE_LOOP_EXIT && pVM->hm.s.vmx.cPleGapTicks && pVM->hm.s.vmx.cPleWindowTicks) { fVal |= VMX_PROC_CTLS2_PAUSE_LOOP_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_GAP, pVM->hm.s.vmx.cPleGapTicks); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PLE_WINDOW, pVM->hm.s.vmx.cPleWindowTicks); AssertRCReturn(rc, rc); } if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid secondary processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC2; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, fVal); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32ProcCtls2 = fVal; return VINF_SUCCESS; } /** * Sets up processor-based VM-execution controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks We don't really care about optimizing vmwrites here as it's done only * once per VM and hence we don't care about VMCS-field cache comparisons. */ static int hmR0VmxSetupProcCtls(PVMCPU pVCpu) { PVM pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0; /* Bits set here must be set in the VMCS. */ uint32_t const fZap = pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ fVal |= VMX_PROC_CTLS_HLT_EXIT /* HLT causes a VM-exit. */ | VMX_PROC_CTLS_USE_TSC_OFFSETTING /* Use TSC-offsetting. */ | VMX_PROC_CTLS_MOV_DR_EXIT /* MOV DRx causes a VM-exit. */ | VMX_PROC_CTLS_UNCOND_IO_EXIT /* All IO instructions cause a VM-exit. */ | VMX_PROC_CTLS_RDPMC_EXIT /* RDPMC causes a VM-exit. */ | VMX_PROC_CTLS_MONITOR_EXIT /* MONITOR causes a VM-exit. */ | VMX_PROC_CTLS_MWAIT_EXIT; /* MWAIT causes a VM-exit. */ /* We toggle VMX_PROC_CTLS_MOV_DR_EXIT later, check if it's not -always- needed to be set or clear. */ if ( !(pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MOV_DR_EXIT) || (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0 & VMX_PROC_CTLS_MOV_DR_EXIT)) { LogRelFunc(("Unsupported VMX_PROC_CTLS_MOV_DR_EXIT combo!")); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_MOV_DRX_EXIT; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Without Nested Paging, INVLPG (also affects INVPCID) and MOV CR3 instructions should cause VM-exits. */ if (!pVM->hm.s.fNestedPaging) { Assert(!pVM->hm.s.vmx.fUnrestrictedGuest); /* Paranoia. */ fVal |= VMX_PROC_CTLS_INVLPG_EXIT | VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR3_STORE_EXIT; } /* Use TPR shadowing if supported by the CPU. */ if ( PDMHasApic(pVM) && pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_TPR_SHADOW) { Assert(pVCpu->hm.s.vmx.HCPhysVirtApic); Assert(!(pVCpu->hm.s.vmx.HCPhysVirtApic & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, 0); rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL, pVCpu->hm.s.vmx.HCPhysVirtApic); AssertRCReturn(rc, rc); fVal |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* CR8 reads from the Virtual-APIC page. */ /* CR8 writes cause a VM-exit based on TPR threshold. */ Assert(!(fVal & VMX_PROC_CTLS_CR8_STORE_EXIT)); Assert(!(fVal & VMX_PROC_CTLS_CR8_LOAD_EXIT)); } else { /* * Some 32-bit CPUs do not support CR8 load/store exiting as MOV CR8 is invalid on 32-bit Intel CPUs. * Set this control only for 64-bit guests. */ if (pVM->hm.s.fAllow64BitGuests) { fVal |= VMX_PROC_CTLS_CR8_STORE_EXIT /* CR8 reads cause a VM-exit. */ | VMX_PROC_CTLS_CR8_LOAD_EXIT; /* CR8 writes cause a VM-exit. */ } } /* Use MSR-bitmaps if supported by the CPU. */ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS) { fVal |= VMX_PROC_CTLS_USE_MSR_BITMAPS; Assert(pVCpu->hm.s.vmx.HCPhysMsrBitmap); Assert(!(pVCpu->hm.s.vmx.HCPhysMsrBitmap & 0xfff)); /* Bits 11:0 MBZ. */ int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_MSR_BITMAP_FULL, pVCpu->hm.s.vmx.HCPhysMsrBitmap); AssertRCReturn(rc, rc); /* * The guest can access the following MSRs (read, write) without causing VM-exits; they are loaded/stored * automatically using dedicated fields in the VMCS. */ hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_CS, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_ESP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_SYSENTER_EIP, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_K8_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_K8_FS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); #if HC_ARCH_BITS == 64 /* * Set passthru permissions for the following MSRs (mandatory for VT-x) required for 64-bit guests. */ if (pVM->hm.s.fAllow64BitGuests) { hmR0VmxSetMsrPermission(pVCpu, MSR_K8_LSTAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_K6_STAR, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_K8_SF_MASK, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); hmR0VmxSetMsrPermission(pVCpu, MSR_K8_KERNEL_GS_BASE, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); } #endif /* * The IA32_PRED_CMD and IA32_FLUSH_CMD MSRs are write-only and has no state * associated with then. We never need to intercept access (writes need to * be executed without exiting, reads will #GP-fault anyway). */ if (pVM->cpum.ro.GuestFeatures.fIbpb) hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_PRED_CMD, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); if (pVM->cpum.ro.GuestFeatures.fFlushCmd) hmR0VmxSetMsrPermission(pVCpu, MSR_IA32_FLUSH_CMD, VMXMSREXIT_PASSTHRU_READ, VMXMSREXIT_PASSTHRU_WRITE); /* Though MSR_IA32_PERF_GLOBAL_CTRL is saved/restored lazily, we want intercept reads/write to it for now. */ } /* Use the secondary processor-based VM-execution controls if supported by the CPU. */ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_SECONDARY_CTLS) fVal |= VMX_PROC_CTLS_USE_SECONDARY_CTLS; if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid processor-based VM-execution controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_PROC_EXEC; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, fVal); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32ProcCtls = fVal; /* Set up secondary processor-based VM-execution controls if the CPU supports it. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS) return hmR0VmxSetupProcCtls2(pVCpu); /* Sanity check, should not really happen. */ if (RT_UNLIKELY(pVM->hm.s.vmx.fUnrestrictedGuest)) { LogRelFunc(("Unrestricted Guest enabled when secondary processor-based VM-execution controls not available\n")); pVCpu->hm.s.u32HMError = VMX_UFC_INVALID_UX_COMBO; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Old CPUs without secondary processor-based VM-execution controls would end up here. */ return VINF_SUCCESS; } /** * Sets up miscellaneous (everything other than Pin & Processor-based * VM-execution) control fields in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ static int hmR0VmxSetupMiscCtls(PVMCPU pVCpu) { AssertPtr(pVCpu); int rc = VERR_GENERAL_FAILURE; /* All fields are zero-initialized during allocation; but don't remove the commented block below. */ #if 0 /* All CR3 accesses cause VM-exits. Later we optimize CR3 accesses (see hmR0VmxExportGuestCR3AndCR4())*/ rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, 0); rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, 0); /* * Set MASK & MATCH to 0. VMX checks if GuestPFErrCode & MASK == MATCH. If equal (in our case it always is) * and if the X86_XCPT_PF bit in the exception bitmap is set it causes a VM-exit, if clear doesn't cause an exit. * We thus use the exception bitmap to control it rather than use both. */ rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, 0); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, 0); /* All IO & IOIO instructions cause VM-exits. */ rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_A_FULL, 0); rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_IO_BITMAP_B_FULL, 0); /* Initialize the MSR-bitmap area. */ rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, 0); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, 0); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, 0); AssertRCReturn(rc, rc); #endif /* Setup MSR auto-load/store area. */ Assert(pVCpu->hm.s.vmx.HCPhysGuestMsr); Assert(!(pVCpu->hm.s.vmx.HCPhysGuestMsr & 0xf)); /* Lower 4 bits MBZ. */ rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr); rc |= VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL, pVCpu->hm.s.vmx.HCPhysGuestMsr); AssertRCReturn(rc, rc); Assert(pVCpu->hm.s.vmx.HCPhysHostMsr); Assert(!(pVCpu->hm.s.vmx.HCPhysHostMsr & 0xf)); /* Lower 4 bits MBZ. */ rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL, pVCpu->hm.s.vmx.HCPhysHostMsr); AssertRCReturn(rc, rc); /* Set VMCS link pointer. Reserved for future use, must be -1. Intel spec. 24.4 "Guest-State Area". */ rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, UINT64_C(0xffffffffffffffff)); AssertRCReturn(rc, rc); /* All fields are zero-initialized during allocation; but don't remove the commented block below. */ #if 0 /* Setup debug controls */ rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, 0); rc |= VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, 0); AssertRCReturn(rc, rc); #endif return rc; } /** * Sets up the initial exception bitmap in the VMCS based on static conditions. * * We shall setup those exception intercepts that don't change during the * lifetime of the VM here. The rest are done dynamically while loading the * guest state. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ static int hmR0VmxInitXcptBitmap(PVMCPU pVCpu) { AssertPtr(pVCpu); uint32_t uXcptBitmap; /* Must always intercept #AC to prevent the guest from hanging the CPU. */ uXcptBitmap = RT_BIT_32(X86_XCPT_AC); /* Because we need to maintain the DR6 state even when intercepting DRx reads and writes, and because recursive #DBs can cause the CPU hang, we must always intercept #DB. */ uXcptBitmap |= RT_BIT_32(X86_XCPT_DB); /* Without Nested Paging, #PF must cause a VM-exit so we can sync our shadow page tables. */ if (!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging) uXcptBitmap |= RT_BIT(X86_XCPT_PF); /* Commit it to the VMCS. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap); AssertRCReturn(rc, rc); /* Update our cache of the exception bitmap. */ pVCpu->hm.s.vmx.u32XcptBitmap = uXcptBitmap; return VINF_SUCCESS; } /** * Does per-VM VT-x initialization. * * @returns VBox status code. * @param pVM The cross context VM structure. */ VMMR0DECL(int) VMXR0InitVM(PVM pVM) { LogFlowFunc(("pVM=%p\n", pVM)); int rc = hmR0VmxStructsAlloc(pVM); if (RT_FAILURE(rc)) { LogRelFunc(("hmR0VmxStructsAlloc failed! rc=%Rrc\n", rc)); return rc; } return VINF_SUCCESS; } /** * Does per-VM VT-x termination. * * @returns VBox status code. * @param pVM The cross context VM structure. */ VMMR0DECL(int) VMXR0TermVM(PVM pVM) { LogFlowFunc(("pVM=%p\n", pVM)); #ifdef VBOX_WITH_CRASHDUMP_MAGIC if (pVM->hm.s.vmx.hMemObjScratch != NIL_RTR0MEMOBJ) ASMMemZero32(pVM->hm.s.vmx.pvScratch, PAGE_SIZE); #endif hmR0VmxStructsFree(pVM); return VINF_SUCCESS; } /** * Sets up the VM for execution under VT-x. * This function is only called once per-VM during initialization. * * @returns VBox status code. * @param pVM The cross context VM structure. */ VMMR0DECL(int) VMXR0SetupVM(PVM pVM) { AssertPtrReturn(pVM, VERR_INVALID_PARAMETER); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); LogFlowFunc(("pVM=%p\n", pVM)); /* * Without UnrestrictedGuest, pRealModeTSS and pNonPagingModeEPTPageTable *must* always be * allocated. We no longer support the highly unlikely case of UnrestrictedGuest without * pRealModeTSS, see hmR3InitFinalizeR0Intel(). */ if ( !pVM->hm.s.vmx.fUnrestrictedGuest && ( !pVM->hm.s.vmx.pNonPagingModeEPTPageTable || !pVM->hm.s.vmx.pRealModeTSS)) { LogRelFunc(("Invalid real-on-v86 state.\n")); return VERR_INTERNAL_ERROR; } /* Initialize these always, see hmR3InitFinalizeR0().*/ pVM->hm.s.vmx.enmTlbFlushEpt = VMXTLBFLUSHEPT_NONE; pVM->hm.s.vmx.enmTlbFlushVpid = VMXTLBFLUSHVPID_NONE; /* Setup the tagged-TLB flush handlers. */ int rc = hmR0VmxSetupTaggedTlb(pVM); if (RT_FAILURE(rc)) { LogRelFunc(("hmR0VmxSetupTaggedTlb failed! rc=%Rrc\n", rc)); return rc; } /* Check if we can use the VMCS controls for swapping the EFER MSR. */ Assert(!pVM->hm.s.vmx.fSupportsVmcsEfer); #if HC_ARCH_BITS == 64 if ( (pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1 & VMX_ENTRY_CTLS_LOAD_EFER_MSR) && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_LOAD_EFER_MSR) && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_SAVE_EFER_MSR)) { pVM->hm.s.vmx.fSupportsVmcsEfer = true; } #endif /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */ RTCCUINTREG const uHostCR4 = ASMGetCR4(); if (RT_UNLIKELY(!(uHostCR4 & X86_CR4_VMXE))) return VERR_VMX_NOT_IN_VMX_ROOT_MODE; for (VMCPUID i = 0; i < pVM->cCpus; i++) { PVMCPU pVCpu = &pVM->aCpus[i]; AssertPtr(pVCpu); AssertPtr(pVCpu->hm.s.vmx.pvVmcs); /* Log the VCPU pointers, useful for debugging SMP VMs. */ Log4Func(("pVCpu=%p idCpu=%RU32\n", pVCpu, pVCpu->idCpu)); /* Set revision dword at the beginning of the VMCS structure. */ *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_ID); /* Set the VMCS launch state to "clear", see Intel spec. 31.6 "Preparation and launch a virtual machine". */ rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); /* Load this VMCS as the current VMCS. */ rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXActivateVmcs failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); rc = hmR0VmxSetupPinCtls(pVCpu); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupPinCtls failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); rc = hmR0VmxSetupProcCtls(pVCpu); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupProcCtls failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); rc = hmR0VmxSetupMiscCtls(pVCpu); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxSetupMiscCtls failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); rc = hmR0VmxInitXcptBitmap(pVCpu); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitXcptBitmap failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); #if HC_ARCH_BITS == 32 rc = hmR0VmxInitVmcsReadCache(pVCpu); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: hmR0VmxInitVmcsReadCache failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); #endif /* Sync any CPU internal VMCS data back into our VMCS in memory. */ rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); AssertLogRelMsgRCReturnStmt(rc, ("VMXR0SetupVM: VMXClearVmcs(2) failed! rc=%Rrc\n", rc), hmR0VmxUpdateErrorRecord(pVCpu, rc), rc); pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR; hmR0VmxUpdateErrorRecord(pVCpu, rc); } return VINF_SUCCESS; } /** * Saves the host control registers (CR0, CR3, CR4) into the host-state area in * the VMCS. * * @returns VBox status code. */ static int hmR0VmxExportHostControlRegs(void) { RTCCUINTREG uReg = ASMGetCR0(); int rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR0, uReg); AssertRCReturn(rc, rc); uReg = ASMGetCR3(); rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR3, uReg); AssertRCReturn(rc, rc); uReg = ASMGetCR4(); rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_CR4, uReg); AssertRCReturn(rc, rc); return rc; } /** * Saves the host segment registers and GDTR, IDTR, (TR, GS and FS bases) into * the host-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ static int hmR0VmxExportHostSegmentRegs(PVMCPU pVCpu) { #if HC_ARCH_BITS == 64 /** * Macro for adjusting host segment selectors to satisfy VT-x's VM-entry * requirements. See hmR0VmxExportHostSegmentRegs(). */ # define VMXLOCAL_ADJUST_HOST_SEG(seg, selValue) \ if ((selValue) & (X86_SEL_RPL | X86_SEL_LDT)) \ { \ bool fValidSelector = true; \ if ((selValue) & X86_SEL_LDT) \ { \ uint32_t uAttr = ASMGetSegAttr((selValue)); \ fValidSelector = RT_BOOL(uAttr != UINT32_MAX && (uAttr & X86_DESC_P)); \ } \ if (fValidSelector) \ { \ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_##seg; \ pVCpu->hm.s.vmx.RestoreHost.uHostSel##seg = (selValue); \ } \ (selValue) = 0; \ } /* * If we've executed guest code using VT-x, the host-state bits will be messed up. We * should -not- save the messed up state without restoring the original host-state, * see @bugref{7240}. * * This apparently can happen (most likely the FPU changes), deal with it rather than * asserting. Was observed booting Solaris 10u10 32-bit guest. */ if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) { Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, pVCpu->idCpu)); VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); } pVCpu->hm.s.vmx.fRestoreHostFlags = 0; #else RT_NOREF(pVCpu); #endif /* * Host DS, ES, FS and GS segment registers. */ #if HC_ARCH_BITS == 64 RTSEL uSelDS = ASMGetDS(); RTSEL uSelES = ASMGetES(); RTSEL uSelFS = ASMGetFS(); RTSEL uSelGS = ASMGetGS(); #else RTSEL uSelDS = 0; RTSEL uSelES = 0; RTSEL uSelFS = 0; RTSEL uSelGS = 0; #endif /* * Host CS and SS segment registers. */ RTSEL uSelCS = ASMGetCS(); RTSEL uSelSS = ASMGetSS(); /* * Host TR segment register. */ RTSEL uSelTR = ASMGetTR(); #if HC_ARCH_BITS == 64 /* * Determine if the host segment registers are suitable for VT-x. Otherwise use zero to * gain VM-entry and restore them before we get preempted. * * See Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers". */ VMXLOCAL_ADJUST_HOST_SEG(DS, uSelDS); VMXLOCAL_ADJUST_HOST_SEG(ES, uSelES); VMXLOCAL_ADJUST_HOST_SEG(FS, uSelFS); VMXLOCAL_ADJUST_HOST_SEG(GS, uSelGS); # undef VMXLOCAL_ADJUST_HOST_SEG #endif /* Verification based on Intel spec. 26.2.3 "Checks on Host Segment and Descriptor-Table Registers" */ Assert(!(uSelCS & X86_SEL_RPL)); Assert(!(uSelCS & X86_SEL_LDT)); Assert(!(uSelSS & X86_SEL_RPL)); Assert(!(uSelSS & X86_SEL_LDT)); Assert(!(uSelDS & X86_SEL_RPL)); Assert(!(uSelDS & X86_SEL_LDT)); Assert(!(uSelES & X86_SEL_RPL)); Assert(!(uSelES & X86_SEL_LDT)); Assert(!(uSelFS & X86_SEL_RPL)); Assert(!(uSelFS & X86_SEL_LDT)); Assert(!(uSelGS & X86_SEL_RPL)); Assert(!(uSelGS & X86_SEL_LDT)); Assert(!(uSelTR & X86_SEL_RPL)); Assert(!(uSelTR & X86_SEL_LDT)); Assert(uSelCS); Assert(uSelTR); /* Assertion is right but we would not have updated u32ExitCtls yet. */ #if 0 if (!(pVCpu->hm.s.vmx.u32ExitCtls & VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE)) Assert(uSelSS != 0); #endif /* Write these host selector fields into the host-state area in the VMCS. */ int rc = VMXWriteVmcs32(VMX_VMCS16_HOST_CS_SEL, uSelCS); rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_SS_SEL, uSelSS); #if HC_ARCH_BITS == 64 rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_DS_SEL, uSelDS); rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_ES_SEL, uSelES); rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_FS_SEL, uSelFS); rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_GS_SEL, uSelGS); #else NOREF(uSelDS); NOREF(uSelES); NOREF(uSelFS); NOREF(uSelGS); #endif rc |= VMXWriteVmcs32(VMX_VMCS16_HOST_TR_SEL, uSelTR); AssertRCReturn(rc, rc); /* * Host GDTR and IDTR. */ RTGDTR Gdtr; RTIDTR Idtr; RT_ZERO(Gdtr); RT_ZERO(Idtr); ASMGetGDTR(&Gdtr); ASMGetIDTR(&Idtr); rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, Gdtr.pGdt); rc |= VMXWriteVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, Idtr.pIdt); AssertRCReturn(rc, rc); #if HC_ARCH_BITS == 64 /* * Determine if we need to manually need to restore the GDTR and IDTR limits as VT-x zaps * them to the maximum limit (0xffff) on every VM-exit. */ if (Gdtr.cbGdt != 0xffff) pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDTR; /* * IDT limit is effectively capped at 0xfff. (See Intel spec. 6.14.1 "64-Bit Mode IDT" and * Intel spec. 6.2 "Exception and Interrupt Vectors".) Therefore if the host has the limit * as 0xfff, VT-x bloating the limit to 0xffff shouldn't cause any different CPU behavior. * However, several hosts either insists on 0xfff being the limit (Windows Patch Guard) or * uses the limit for other purposes (darwin puts the CPU ID in there but botches sidt * alignment in at least one consumer). So, we're only allowing the IDTR.LIMIT to be left * at 0xffff on hosts where we are sure it won't cause trouble. */ # if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) if (Idtr.cbIdt < 0x0fff) # else if (Idtr.cbIdt != 0xffff) # endif { pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_IDTR; AssertCompile(sizeof(Idtr) == sizeof(X86XDTR64)); memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostIdtr, &Idtr, sizeof(X86XDTR64)); } #endif /* * Host TR base. Verify that TR selector doesn't point past the GDT. Masking off the TI * and RPL bits is effectively what the CPU does for "scaling by 8". TI is always 0 and * RPL should be too in most cases. */ AssertMsgReturn((uSelTR | X86_SEL_RPL_LDT) <= Gdtr.cbGdt, ("TR selector exceeds limit. TR=%RTsel cbGdt=%#x\n", uSelTR, Gdtr.cbGdt), VERR_VMX_INVALID_HOST_STATE); PCX86DESCHC pDesc = (PCX86DESCHC)(Gdtr.pGdt + (uSelTR & X86_SEL_MASK)); #if HC_ARCH_BITS == 64 uintptr_t uTRBase = X86DESC64_BASE(pDesc); /* * VT-x unconditionally restores the TR limit to 0x67 and type to 11 (32-bit busy TSS) on * all VM-exits. The type is the same for 64-bit busy TSS[1]. The limit needs manual * restoration if the host has something else. Task switching is not supported in 64-bit * mode[2], but the limit still matters as IOPM is supported in 64-bit mode. Restoring the * limit lazily while returning to ring-3 is safe because IOPM is not applicable in ring-0. * * [1] See Intel spec. 3.5 "System Descriptor Types". * [2] See Intel spec. 7.2.3 "TSS Descriptor in 64-bit mode". */ PVM pVM = pVCpu->CTX_SUFF(pVM); Assert(pDesc->System.u4Type == 11); if ( pDesc->System.u16LimitLow != 0x67 || pDesc->System.u4LimitHigh) { pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_SEL_TR; /* If the host has made GDT read-only, we would need to temporarily toggle CR0.WP before writing the GDT. */ if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_READ_ONLY) pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_READ_ONLY; pVCpu->hm.s.vmx.RestoreHost.uHostSelTR = uSelTR; } /* * Store the GDTR as we need it when restoring the GDT and while restoring the TR. */ if (pVCpu->hm.s.vmx.fRestoreHostFlags & (VMX_RESTORE_HOST_GDTR | VMX_RESTORE_HOST_SEL_TR)) { AssertCompile(sizeof(Gdtr) == sizeof(X86XDTR64)); memcpy(&pVCpu->hm.s.vmx.RestoreHost.HostGdtr, &Gdtr, sizeof(X86XDTR64)); if (pVM->hm.s.fHostKernelFeatures & SUPKERNELFEATURES_GDT_NEED_WRITABLE) { /* The GDT is read-only but the writable GDT is available. */ pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_GDT_NEED_WRITABLE; pVCpu->hm.s.vmx.RestoreHost.HostGdtrRw.cb = Gdtr.cbGdt; rc = SUPR0GetCurrentGdtRw(&pVCpu->hm.s.vmx.RestoreHost.HostGdtrRw.uAddr); AssertRCReturn(rc, rc); } } #else uintptr_t uTRBase = X86DESC_BASE(pDesc); #endif rc = VMXWriteVmcsHstN(VMX_VMCS_HOST_TR_BASE, uTRBase); AssertRCReturn(rc, rc); /* * Host FS base and GS base. */ #if HC_ARCH_BITS == 64 uint64_t u64FSBase = ASMRdMsr(MSR_K8_FS_BASE); uint64_t u64GSBase = ASMRdMsr(MSR_K8_GS_BASE); rc = VMXWriteVmcs64(VMX_VMCS_HOST_FS_BASE, u64FSBase); rc |= VMXWriteVmcs64(VMX_VMCS_HOST_GS_BASE, u64GSBase); AssertRCReturn(rc, rc); /* Store the base if we have to restore FS or GS manually as we need to restore the base as well. */ if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_FS) pVCpu->hm.s.vmx.RestoreHost.uHostFSBase = u64FSBase; if (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_SEL_GS) pVCpu->hm.s.vmx.RestoreHost.uHostGSBase = u64GSBase; #endif return VINF_SUCCESS; } /** * Exports certain host MSRs in the VM-exit MSR-load area and some in the * host-state area of the VMCS. * * Theses MSRs will be automatically restored on the host after every successful * VM-exit. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportHostMsrs(PVMCPU pVCpu) { AssertPtr(pVCpu); AssertPtr(pVCpu->hm.s.vmx.pvHostMsr); /* * Save MSRs that we restore lazily (due to preemption or transition to ring-3) * rather than swapping them on every VM-entry. */ hmR0VmxLazySaveHostMsrs(pVCpu); /* * Host Sysenter MSRs. */ int rc = VMXWriteVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, ASMRdMsr_Low(MSR_IA32_SYSENTER_CS)); #if HC_ARCH_BITS == 32 rc |= VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr_Low(MSR_IA32_SYSENTER_ESP)); rc |= VMXWriteVmcs32(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr_Low(MSR_IA32_SYSENTER_EIP)); #else rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, ASMRdMsr(MSR_IA32_SYSENTER_ESP)); rc |= VMXWriteVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, ASMRdMsr(MSR_IA32_SYSENTER_EIP)); #endif AssertRCReturn(rc, rc); /* * Host EFER MSR. * * If the CPU supports the newer VMCS controls for managing EFER, use it. Otherwise it's * done as part of auto-load/store MSR area in the VMCS, see hmR0VmxExportGuestMsrs(). */ PVM pVM = pVCpu->CTX_SUFF(pVM); if (pVM->hm.s.vmx.fSupportsVmcsEfer) { rc = VMXWriteVmcs64(VMX_VMCS64_HOST_EFER_FULL, pVM->hm.s.vmx.u64HostEfer); AssertRCReturn(rc, rc); } /** @todo IA32_PERF_GLOBALCTRL, IA32_PAT also see hmR0VmxExportGuestExitCtls(). */ return VINF_SUCCESS; } /** * Figures out if we need to swap the EFER MSR which is particularly expensive. * * We check all relevant bits. For now, that's everything besides LMA/LME, as * these two bits are handled by VM-entry, see hmR0VmxExportGuestExitCtls() and * hmR0VMxExportGuestEntryCtls(). * * @returns true if we need to load guest EFER, false otherwise. * @param pVCpu The cross context virtual CPU structure. * * @remarks Requires EFER, CR4. * @remarks No-long-jump zone!!! */ static bool hmR0VmxShouldSwapEferMsr(PVMCPU pVCpu) { #ifdef HMVMX_ALWAYS_SWAP_EFER RT_NOREF(pVCpu); return true; #else PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; #if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) /* For 32-bit hosts running 64-bit guests, we always swap EFER in the world-switcher. Nothing to do here. */ if (CPUMIsGuestInLongModeEx(pCtx)) return false; #endif PVM pVM = pVCpu->CTX_SUFF(pVM); uint64_t const u64HostEfer = pVM->hm.s.vmx.u64HostEfer; uint64_t const u64GuestEfer = pCtx->msrEFER; /* * For 64-bit guests, if EFER.SCE bit differs, we need to swap EFER to ensure that the * guest's SYSCALL behaviour isn't broken, see @bugref{7386}. */ if ( CPUMIsGuestInLongModeEx(pCtx) && (u64GuestEfer & MSR_K6_EFER_SCE) != (u64HostEfer & MSR_K6_EFER_SCE)) { return true; } /* * If the guest uses PAE and EFER.NXE bit differs, we need to swap EFER as it * affects guest paging. 64-bit paging implies CR4.PAE as well. * See Intel spec. 4.5 "IA-32e Paging" and Intel spec. 4.1.1 "Three Paging Modes". */ if ( (pCtx->cr4 & X86_CR4_PAE) && (pCtx->cr0 & X86_CR0_PG) && (u64GuestEfer & MSR_K6_EFER_NXE) != (u64HostEfer & MSR_K6_EFER_NXE)) { /* Assert that host is NX capable. */ Assert(pVCpu->CTX_SUFF(pVM)->cpum.ro.HostFeatures.fNoExecute); return true; } return false; #endif } /** * Exports the guest state with appropriate VM-entry controls in the VMCS. * * These controls can affect things done on VM-exit; e.g. "load debug controls", * see Intel spec. 24.8.1 "VM-entry controls". * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Requires EFER. * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestEntryCtls(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_ENTRY_CTLS) { PVM pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0; /* Bits set here must be set in the VMCS. */ uint32_t const fZap = pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ /* Load debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x capable CPUs only supports the 1-setting of this bit. */ fVal |= VMX_ENTRY_CTLS_LOAD_DEBUG; /* Set if the guest is in long mode. This will set/clear the EFER.LMA bit on VM-entry. */ if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)) { fVal |= VMX_ENTRY_CTLS_IA32E_MODE_GUEST; Log4Func(("VMX_ENTRY_CTLS_IA32E_MODE_GUEST\n")); } else Assert(!(fVal & VMX_ENTRY_CTLS_IA32E_MODE_GUEST)); /* If the CPU supports the newer VMCS controls for managing guest/host EFER, use it. */ if ( pVM->hm.s.vmx.fSupportsVmcsEfer && hmR0VmxShouldSwapEferMsr(pVCpu)) { fVal |= VMX_ENTRY_CTLS_LOAD_EFER_MSR; Log4Func(("VMX_ENTRY_CTLS_LOAD_EFER_MSR\n")); } /* * The following should -not- be set (since we're not in SMM mode): * - VMX_ENTRY_CTLS_ENTRY_TO_SMM * - VMX_ENTRY_CTLS_DEACTIVATE_DUAL_MON */ /** @todo VMX_ENTRY_CTLS_LOAD_PERF_MSR, * VMX_ENTRY_CTLS_LOAD_PAT_MSR. */ if ((fVal & fZap) != fVal) { Log4Func(("Invalid VM-entry controls combo! Cpu=%#RX32 fVal=%#RX32 fZap=%#RX32\n", pVM->hm.s.vmx.Msrs.EntryCtls.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_ENTRY; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ if (pVCpu->hm.s.vmx.u32EntryCtls != fVal) { int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY, fVal); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32EntryCtls = fVal; } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_ENTRY_CTLS); } return VINF_SUCCESS; } /** * Exports the guest state with appropriate VM-exit controls in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Requires EFER. */ static int hmR0VmxExportGuestExitCtls(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_EXIT_CTLS) { PVM pVM = pVCpu->CTX_SUFF(pVM); uint32_t fVal = pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed0; /* Bits set here must be set in the VMCS. */ uint32_t const fZap = pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1; /* Bits cleared here must be cleared in the VMCS. */ /* Save debug controls (DR7 & IA32_DEBUGCTL_MSR). The first VT-x CPUs only supported the 1-setting of this bit. */ fVal |= VMX_EXIT_CTLS_SAVE_DEBUG; /* * Set the host long mode active (EFER.LMA) bit (which Intel calls "Host address-space size") if necessary. * On VM-exit, VT-x sets both the host EFER.LMA and EFER.LME bit to this value. See assertion in * hmR0VmxExportHostMsrs(). */ #if HC_ARCH_BITS == 64 fVal |= VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE; Log4Func(("VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE\n")); #else Assert( pVCpu->hm.s.vmx.pfnStartVM == VMXR0SwitcherStartVM64 || pVCpu->hm.s.vmx.pfnStartVM == VMXR0StartVM32); /* Set the host address-space size based on the switcher, not guest state. See @bugref{8432}. */ if (pVCpu->hm.s.vmx.pfnStartVM == VMXR0SwitcherStartVM64) { /* The switcher returns to long mode, EFER is managed by the switcher. */ fVal |= VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE; Log4Func(("VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE\n")); } else Assert(!(fVal & VMX_EXIT_CTLS_HOST_ADDR_SPACE_SIZE)); #endif /* If the newer VMCS fields for managing EFER exists, use it. */ if ( pVM->hm.s.vmx.fSupportsVmcsEfer && hmR0VmxShouldSwapEferMsr(pVCpu)) { fVal |= VMX_EXIT_CTLS_SAVE_EFER_MSR | VMX_EXIT_CTLS_LOAD_EFER_MSR; Log4Func(("VMX_EXIT_CTLS_SAVE_EFER_MSR and VMX_EXIT_CTLS_LOAD_EFER_MSR\n")); } /* Don't acknowledge external interrupts on VM-exit. We want to let the host do that. */ Assert(!(fVal & VMX_EXIT_CTLS_ACK_EXT_INT)); /** @todo VMX_EXIT_CTLS_LOAD_PERF_MSR, * VMX_EXIT_CTLS_SAVE_PAT_MSR, * VMX_EXIT_CTLS_LOAD_PAT_MSR. */ /* Enable saving of the VMX preemption timer value on VM-exit. */ if ( pVM->hm.s.vmx.fUsePreemptTimer && (pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed1 & VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER)) fVal |= VMX_EXIT_CTLS_SAVE_PREEMPT_TIMER; if ((fVal & fZap) != fVal) { LogRelFunc(("Invalid VM-exit controls combo! cpu=%#RX32 fVal=%#RX32 fZap=%R#X32\n", pVM->hm.s.vmx.Msrs.ExitCtls.n.allowed0, fVal, fZap)); pVCpu->hm.s.u32HMError = VMX_UFC_CTRL_EXIT; return VERR_HM_UNSUPPORTED_CPU_FEATURE_COMBO; } /* Commit it to the VMCS and update our cache. */ if (pVCpu->hm.s.vmx.u32ExitCtls != fVal) { int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXIT, fVal); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32ExitCtls = fVal; } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_EXIT_CTLS); } return VINF_SUCCESS; } /** * Sets the TPR threshold in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param u32TprThreshold The TPR threshold (task-priority class only). */ DECLINLINE(int) hmR0VmxApicSetTprThreshold(PVMCPU pVCpu, uint32_t u32TprThreshold) { Assert(!(u32TprThreshold & ~VMX_TPR_THRESHOLD_MASK)); /* Bits 31:4 MBZ. */ Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW); RT_NOREF_PV(pVCpu); return VMXWriteVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, u32TprThreshold); } /** * Exports the guest APIC TPR state into the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestApicTpr(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_APIC_TPR) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_APIC_TPR); if ( PDMHasApic(pVCpu->CTX_SUFF(pVM)) && APICIsEnabled(pVCpu)) { /* * Setup TPR shadowing. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW) { Assert(pVCpu->hm.s.vmx.HCPhysVirtApic); bool fPendingIntr = false; uint8_t u8Tpr = 0; uint8_t u8PendingIntr = 0; int rc = APICGetTpr(pVCpu, &u8Tpr, &fPendingIntr, &u8PendingIntr); AssertRCReturn(rc, rc); /* * If there are interrupts pending but masked by the TPR, instruct VT-x to * cause a TPR-below-threshold VM-exit when the guest lowers its TPR below the * priority of the pending interrupt so we can deliver the interrupt. If there * are no interrupts pending, set threshold to 0 to not cause any * TPR-below-threshold VM-exits. */ pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR] = u8Tpr; uint32_t u32TprThreshold = 0; if (fPendingIntr) { /* Bits 3:0 of the TPR threshold field correspond to bits 7:4 of the TPR (which is the Task-Priority Class). */ const uint8_t u8PendingPriority = u8PendingIntr >> 4; const uint8_t u8TprPriority = u8Tpr >> 4; if (u8PendingPriority <= u8TprPriority) u32TprThreshold = u8PendingPriority; } rc = hmR0VmxApicSetTprThreshold(pVCpu, u32TprThreshold); AssertRCReturn(rc, rc); } } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_APIC_TPR); } return VINF_SUCCESS; } /** * Gets the guest's interruptibility-state ("interrupt shadow" as AMD calls it). * * @returns Guest's interruptibility-state. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static uint32_t hmR0VmxGetGuestIntrState(PVMCPU pVCpu) { /* * Check if we should inhibit interrupt delivery due to instructions like STI and MOV SS. */ uint32_t fIntrState = 0; if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) { /* If inhibition is active, RIP & RFLAGS should've been accessed (i.e. read previously from the VMCS or from ring-3). */ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; #ifdef VBOX_STRICT uint64_t const fExtrn = ASMAtomicUoReadU64(&pCtx->fExtrn); AssertMsg(!(fExtrn & (CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS)), ("%#x\n", fExtrn)); #endif if (pCtx->rip == EMGetInhibitInterruptsPC(pVCpu)) { if (pCtx->eflags.Bits.u1IF) fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_STI; else fIntrState = VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS; } else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) { /* * We can clear the inhibit force flag as even if we go back to the recompiler * without executing guest code in VT-x, the flag's condition to be cleared is * met and thus the cleared state is correct. */ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); } } /* * NMIs to the guest are blocked after an NMI is injected until the guest executes an IRET. We only * bother with virtual-NMI blocking when we have support for virtual NMIs in the CPU, otherwise * setting this would block host-NMIs and IRET will not clear the blocking. * * See Intel spec. 26.6.1 "Interruptibility state". See @bugref{7445}. */ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS) && (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)) { fIntrState |= VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI; } return fIntrState; } /** * Exports the exception intercepts required for guest execution in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestXcptIntercepts(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_XCPT_INTERCEPTS) { uint32_t uXcptBitmap = pVCpu->hm.s.vmx.u32XcptBitmap; /* The remaining exception intercepts are handled elsewhere, e.g. in hmR0VmxExportGuestCR0(). */ if (pVCpu->hm.s.fGIMTrapXcptUD) uXcptBitmap |= RT_BIT(X86_XCPT_UD); #ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS else uXcptBitmap &= ~RT_BIT(X86_XCPT_UD); #endif Assert(uXcptBitmap & RT_BIT_32(X86_XCPT_AC)); Assert(uXcptBitmap & RT_BIT_32(X86_XCPT_DB)); if (uXcptBitmap != pVCpu->hm.s.vmx.u32XcptBitmap) { int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32XcptBitmap = uXcptBitmap; } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_XCPT_INTERCEPTS); Log4Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP=%#RX64\n", uXcptBitmap)); } return VINF_SUCCESS; } /** * Exports the guest's RIP into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestRip(PVMCPU pVCpu) { int rc = VINF_SUCCESS; if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RIP) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP); rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RIP, pVCpu->cpum.GstCtx.rip); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RIP); Log4Func(("RIP=%#RX64\n", pVCpu->cpum.GstCtx.rip)); } return rc; } /** * Exports the guest's RSP into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestRsp(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RSP) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RSP); int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_RSP, pVCpu->cpum.GstCtx.rsp); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RSP); } return VINF_SUCCESS; } /** * Exports the guest's RFLAGS into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestRflags(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_RFLAGS) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RFLAGS); /* Intel spec. 2.3.1 "System Flags and Fields in IA-32e Mode" claims the upper 32-bits of RFLAGS are reserved (MBZ). Let us assert it as such and use 32-bit VMWRITE. */ Assert(!RT_HI_U32(pVCpu->cpum.GstCtx.rflags.u64)); X86EFLAGS fEFlags = pVCpu->cpum.GstCtx.eflags; Assert(fEFlags.u32 & X86_EFL_RA1_MASK); Assert(!(fEFlags.u32 & ~(X86_EFL_1 | X86_EFL_LIVE_MASK))); /* * If we're emulating real-mode using Virtual 8086 mode, save the real-mode eflags so * we can restore them on VM-exit. Modify the real-mode guest's eflags so that VT-x * can run the real-mode guest code under Virtual 8086 mode. */ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); pVCpu->hm.s.vmx.RealMode.Eflags.u32 = fEFlags.u32; /* Save the original eflags of the real-mode guest. */ fEFlags.Bits.u1VM = 1; /* Set the Virtual 8086 mode bit. */ fEFlags.Bits.u2IOPL = 0; /* Change IOPL to 0, otherwise certain instructions won't fault. */ } int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_RFLAGS, fEFlags.u32); AssertRCReturn(rc, rc); /* * Setup pending debug exceptions if the guest is single-stepping using EFLAGS.TF. * * We must avoid setting any automatic debug exceptions delivery when single-stepping * through the hypervisor debugger using EFLAGS.TF. */ if ( !pVCpu->hm.s.fSingleInstruction && fEFlags.Bits.u1TF) { /** @todo r=ramshankar: Warning! We ASSUME EFLAGS.TF will not cleared on * premature trips to ring-3 esp since IEM does not yet handle it. */ rc = VMXWriteVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, VMX_VMCS_GUEST_PENDING_DEBUG_XCPT_BS); AssertRCReturn(rc, rc); } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_RFLAGS); Log4Func(("EFlags=%#RX32\n", fEFlags.u32)); } return VINF_SUCCESS; } /** * Exports the guest CR0 control register into the guest-state area in the VMCS. * * The guest FPU state is always pre-loaded hence we don't need to bother about * sharing FPU related CR0 bits between the guest and host. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestCR0(PVMCPU pVCpu) { if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR0) { PVM pVM = pVCpu->CTX_SUFF(pVM); HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR0); Assert(!RT_HI_U32(pVCpu->cpum.GstCtx.cr0)); uint32_t const u32ShadowCr0 = pVCpu->cpum.GstCtx.cr0; uint32_t u32GuestCr0 = pVCpu->cpum.GstCtx.cr0; /* * Setup VT-x's view of the guest CR0. * Minimize VM-exits due to CR3 changes when we have NestedPaging. */ uint32_t uProcCtls = pVCpu->hm.s.vmx.u32ProcCtls; if (pVM->hm.s.fNestedPaging) { if (CPUMIsGuestPagingEnabled(pVCpu)) { /* The guest has paging enabled, let it access CR3 without causing a VM-exit if supported. */ uProcCtls &= ~( VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR3_STORE_EXIT); } else { /* The guest doesn't have paging enabled, make CR3 access cause a VM-exit to update our shadow. */ uProcCtls |= VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR3_STORE_EXIT; } /* If we have unrestricted guest execution, we never have to intercept CR3 reads. */ if (pVM->hm.s.vmx.fUnrestrictedGuest) uProcCtls &= ~VMX_PROC_CTLS_CR3_STORE_EXIT; } else { /* Guest CPL 0 writes to its read-only pages should cause a #PF VM-exit. */ u32GuestCr0 |= X86_CR0_WP; } /* * Guest FPU bits. * * Since we pre-load the guest FPU always before VM-entry there is no need to track lazy state * using CR0.TS. * * Intel spec. 23.8 "Restrictions on VMX operation" mentions that CR0.NE bit must always be * set on the first CPUs to support VT-x and no mention of with regards to UX in VM-entry checks. */ u32GuestCr0 |= X86_CR0_NE; /* If CR0.NE isn't set, we need to intercept #MF exceptions and report them to the guest differently. */ bool const fInterceptMF = !(u32ShadowCr0 & X86_CR0_NE); /* * Update exception intercepts. */ uint32_t uXcptBitmap = pVCpu->hm.s.vmx.u32XcptBitmap; if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { Assert(PDMVmmDevHeapIsEnabled(pVM)); Assert(pVM->hm.s.vmx.pRealModeTSS); uXcptBitmap |= HMVMX_REAL_MODE_XCPT_MASK; } else { /* For now, cleared here as mode-switches can happen outside HM/VT-x. See @bugref{7626#c11}. */ uXcptBitmap &= ~HMVMX_REAL_MODE_XCPT_MASK; if (fInterceptMF) uXcptBitmap |= RT_BIT(X86_XCPT_MF); } /* Additional intercepts for debugging, define these yourself explicitly. */ #ifdef HMVMX_ALWAYS_TRAP_ALL_XCPTS uXcptBitmap |= 0 | RT_BIT(X86_XCPT_BP) | RT_BIT(X86_XCPT_DE) | RT_BIT(X86_XCPT_NM) | RT_BIT(X86_XCPT_TS) | RT_BIT(X86_XCPT_UD) | RT_BIT(X86_XCPT_NP) | RT_BIT(X86_XCPT_SS) | RT_BIT(X86_XCPT_GP) | RT_BIT(X86_XCPT_PF) | RT_BIT(X86_XCPT_MF) ; #elif defined(HMVMX_ALWAYS_TRAP_PF) uXcptBitmap |= RT_BIT(X86_XCPT_PF); #endif if (pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv) uXcptBitmap |= RT_BIT(X86_XCPT_GP); Assert(pVM->hm.s.fNestedPaging || (uXcptBitmap & RT_BIT(X86_XCPT_PF))); /* * Set/clear the CR0 specific bits along with their exceptions (PE, PG, CD, NW). */ uint32_t fSetCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); uint32_t fZapCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); if (pVM->hm.s.vmx.fUnrestrictedGuest) /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). */ fSetCr0 &= ~(X86_CR0_PE | X86_CR0_PG); else Assert((fSetCr0 & (X86_CR0_PE | X86_CR0_PG)) == (X86_CR0_PE | X86_CR0_PG)); u32GuestCr0 |= fSetCr0; u32GuestCr0 &= fZapCr0; u32GuestCr0 &= ~(X86_CR0_CD | X86_CR0_NW); /* Always enable caching. */ /* * CR0 is shared between host and guest along with a CR0 read shadow. Therefore, certain bits must not be changed * by the guest because VT-x ignores saving/restoring them (namely CD, ET, NW) and for certain other bits * we want to be notified immediately of guest CR0 changes (e.g. PG to update our shadow page tables). */ uint32_t u32Cr0Mask = X86_CR0_PE | X86_CR0_NE | (pVM->hm.s.fNestedPaging ? 0 : X86_CR0_WP) | X86_CR0_PG | X86_CR0_ET /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.ET */ | X86_CR0_CD /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.CD */ | X86_CR0_NW; /* Bit ignored on VM-entry and VM-exit. Don't let the guest modify the host CR0.NW */ /** @todo Avoid intercepting CR0.PE with unrestricted guests. Fix PGM * enmGuestMode to be in-sync with the current mode. See @bugref{6398} * and @bugref{6944}. */ #if 0 if (pVM->hm.s.vmx.fUnrestrictedGuest) u32Cr0Mask &= ~X86_CR0_PE; #endif /* * Finally, update VMCS fields with the CR0 values and the exception bitmap. */ int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR0, u32GuestCr0); rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, u32ShadowCr0); if (u32Cr0Mask != pVCpu->hm.s.vmx.u32Cr0Mask) rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, u32Cr0Mask); if (uProcCtls != pVCpu->hm.s.vmx.u32ProcCtls) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls); if (uXcptBitmap != pVCpu->hm.s.vmx.u32XcptBitmap) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, uXcptBitmap); AssertRCReturn(rc, rc); /* Update our caches. */ pVCpu->hm.s.vmx.u32Cr0Mask = u32Cr0Mask; pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls; pVCpu->hm.s.vmx.u32XcptBitmap = uXcptBitmap; ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR0); Log4Func(("u32Cr0Mask=%#RX32 u32ShadowCr0=%#RX32 u32GuestCr0=%#RX32 (fSetCr0=%#RX32 fZapCr0=%#RX32\n", u32Cr0Mask, u32ShadowCr0, u32GuestCr0, fSetCr0, fZapCr0)); } return VINF_SUCCESS; } /** * Exports the guest control registers (CR3, CR4) into the guest-state area * in the VMCS. * * @returns VBox strict status code. * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code * without unrestricted guest access and the VMMDev is not presently * mapped (e.g. EFI32). * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxExportGuestCR3AndCR4(PVMCPU pVCpu) { int rc = VINF_SUCCESS; PVM pVM = pVCpu->CTX_SUFF(pVM); /* * Guest CR2. * It's always loaded in the assembler code. Nothing to do here. */ /* * Guest CR3. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR3) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR3); RTGCPHYS GCPhysGuestCR3 = NIL_RTGCPHYS; if (pVM->hm.s.fNestedPaging) { pVCpu->hm.s.vmx.HCPhysEPTP = PGMGetHyperCR3(pVCpu); /* Validate. See Intel spec. 28.2.2 "EPT Translation Mechanism" and 24.6.11 "Extended-Page-Table Pointer (EPTP)" */ Assert(pVCpu->hm.s.vmx.HCPhysEPTP); Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & UINT64_C(0xfff0000000000000))); Assert(!(pVCpu->hm.s.vmx.HCPhysEPTP & 0xfff)); /* VMX_EPT_MEMTYPE_WB support is already checked in hmR0VmxSetupTaggedTlb(). */ pVCpu->hm.s.vmx.HCPhysEPTP |= VMX_EPT_MEMTYPE_WB | (VMX_EPT_PAGE_WALK_LENGTH_DEFAULT << VMX_EPT_PAGE_WALK_LENGTH_SHIFT); /* Validate. See Intel spec. 26.2.1 "Checks on VMX Controls" */ AssertMsg( ((pVCpu->hm.s.vmx.HCPhysEPTP >> 3) & 0x07) == 3 /* Bits 3:5 (EPT page walk length - 1) must be 3. */ && ((pVCpu->hm.s.vmx.HCPhysEPTP >> 7) & 0x1f) == 0, /* Bits 7:11 MBZ. */ ("EPTP %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP)); AssertMsg( !((pVCpu->hm.s.vmx.HCPhysEPTP >> 6) & 0x01) /* Bit 6 (EPT accessed & dirty bit). */ || (pVM->hm.s.vmx.Msrs.u64EptVpidCaps & MSR_IA32_VMX_EPT_VPID_CAP_EPT_ACCESS_DIRTY), ("EPTP accessed/dirty bit not supported by CPU but set %#RX64\n", pVCpu->hm.s.vmx.HCPhysEPTP)); rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, pVCpu->hm.s.vmx.HCPhysEPTP); AssertRCReturn(rc, rc); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if ( pVM->hm.s.vmx.fUnrestrictedGuest || CPUMIsGuestPagingEnabledEx(pCtx)) { /* If the guest is in PAE mode, pass the PDPEs to VT-x using the VMCS fields. */ if (CPUMIsGuestInPAEModeEx(pCtx)) { rc = PGMGstGetPaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); AssertRCReturn(rc, rc); rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, pVCpu->hm.s.aPdpes[0].u); rc |= VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, pVCpu->hm.s.aPdpes[1].u); rc |= VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, pVCpu->hm.s.aPdpes[2].u); rc |= VMXWriteVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, pVCpu->hm.s.aPdpes[3].u); AssertRCReturn(rc, rc); } /* * The guest's view of its CR3 is unblemished with Nested Paging when the * guest is using paging or we have unrestricted guest execution to handle * the guest when it's not using paging. */ GCPhysGuestCR3 = pCtx->cr3; } else { /* * The guest is not using paging, but the CPU (VT-x) has to. While the guest * thinks it accesses physical memory directly, we use our identity-mapped * page table to map guest-linear to guest-physical addresses. EPT takes care * of translating it to host-physical addresses. */ RTGCPHYS GCPhys; Assert(pVM->hm.s.vmx.pNonPagingModeEPTPageTable); /* We obtain it here every time as the guest could have relocated this PCI region. */ rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pNonPagingModeEPTPageTable, &GCPhys); if (RT_SUCCESS(rc)) { /* likely */ } else if (rc == VERR_PDM_DEV_HEAP_R3_TO_GCPHYS) { Log4Func(("VERR_PDM_DEV_HEAP_R3_TO_GCPHYS -> VINF_EM_RESCHEDULE_REM\n")); return VINF_EM_RESCHEDULE_REM; /* We cannot execute now, switch to REM/IEM till the guest maps in VMMDev. */ } else AssertMsgFailedReturn(("%Rrc\n", rc), rc); GCPhysGuestCR3 = GCPhys; } Log4Func(("u32GuestCr3=%#RGp (GstN)\n", GCPhysGuestCR3)); rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_CR3, GCPhysGuestCR3); AssertRCReturn(rc, rc); } else { /* Non-nested paging case, just use the hypervisor's CR3. */ RTHCPHYS HCPhysGuestCR3 = PGMGetHyperCR3(pVCpu); Log4Func(("u32GuestCr3=%#RHv (HstN)\n", HCPhysGuestCR3)); rc = VMXWriteVmcsHstN(VMX_VMCS_GUEST_CR3, HCPhysGuestCR3); AssertRCReturn(rc, rc); } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR3); } /* * Guest CR4. * ASSUMES this is done everytime we get in from ring-3! (XCR0) */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CR4) { PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4); Assert(!RT_HI_U32(pCtx->cr4)); uint32_t u32GuestCr4 = pCtx->cr4; uint32_t const u32ShadowCr4 = pCtx->cr4; /* * Setup VT-x's view of the guest CR4. * * If we're emulating real-mode using virtual-8086 mode, we want to redirect software * interrupts to the 8086 program interrupt handler. Clear the VME bit (the interrupt * redirection bitmap is already all 0, see hmR3InitFinalizeR0()) * * See Intel spec. 20.2 "Software Interrupt Handling Methods While in Virtual-8086 Mode". */ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { Assert(pVM->hm.s.vmx.pRealModeTSS); Assert(PDMVmmDevHeapIsEnabled(pVM)); u32GuestCr4 &= ~X86_CR4_VME; } if (pVM->hm.s.fNestedPaging) { if ( !CPUMIsGuestPagingEnabledEx(pCtx) && !pVM->hm.s.vmx.fUnrestrictedGuest) { /* We use 4 MB pages in our identity mapping page table when the guest doesn't have paging. */ u32GuestCr4 |= X86_CR4_PSE; /* Our identity mapping is a 32-bit page directory. */ u32GuestCr4 &= ~X86_CR4_PAE; } /* else use guest CR4.*/ } else { /* * The shadow paging modes and guest paging modes are different, the shadow is in accordance with the host * paging mode and thus we need to adjust VT-x's view of CR4 depending on our shadow page tables. */ switch (pVCpu->hm.s.enmShadowMode) { case PGMMODE_REAL: /* Real-mode. */ case PGMMODE_PROTECTED: /* Protected mode without paging. */ case PGMMODE_32_BIT: /* 32-bit paging. */ { u32GuestCr4 &= ~X86_CR4_PAE; break; } case PGMMODE_PAE: /* PAE paging. */ case PGMMODE_PAE_NX: /* PAE paging with NX. */ { u32GuestCr4 |= X86_CR4_PAE; break; } case PGMMODE_AMD64: /* 64-bit AMD paging (long mode). */ case PGMMODE_AMD64_NX: /* 64-bit AMD paging (long mode) with NX enabled. */ #ifdef VBOX_ENABLE_64_BITS_GUESTS break; #endif default: AssertFailed(); return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; } } /* We need to set and clear the CR4 specific bits here (mainly the X86_CR4_VMXE bit). */ uint64_t const fSetCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); uint64_t const fZapCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); u32GuestCr4 |= fSetCr4; u32GuestCr4 &= fZapCr4; /* Setup CR4 mask. CR4 flags owned by the host, if the guest attempts to change them, that would cause a VM-exit. */ uint32_t u32Cr4Mask = X86_CR4_VME | X86_CR4_PAE | X86_CR4_PGE | X86_CR4_PSE | X86_CR4_VMXE; if (pVM->cpum.ro.HostFeatures.fXSaveRstor) u32Cr4Mask |= X86_CR4_OSXSAVE; if (pVM->cpum.ro.GuestFeatures.fPcid) u32Cr4Mask |= X86_CR4_PCIDE; /* Write VT-x's view of the guest CR4, the CR4 modify mask and the read-only CR4 shadow into the VMCS and update our cache. */ rc = VMXWriteVmcs32(VMX_VMCS_GUEST_CR4, u32GuestCr4); rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, u32ShadowCr4); if (pVCpu->hm.s.vmx.u32Cr4Mask != u32Cr4Mask) rc |= VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, u32Cr4Mask); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.u32Cr4Mask = u32Cr4Mask; /* Whether to save/load/restore XCR0 during world switch depends on CR4.OSXSAVE and host+guest XCR0. */ pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0(); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CR4); Log4Func(("u32GuestCr4=%#RX32 u32ShadowCr4=%#RX32 (fSetCr4=%#RX32 fZapCr4=%#RX32)\n", u32GuestCr4, u32ShadowCr4, fSetCr4, fZapCr4)); } return rc; } /** * Exports the guest debug registers into the guest-state area in the VMCS. * The guest debug bits are partially shared with the host (e.g. DR6, DR0-3). * * This also sets up whether \#DB and MOV DRx accesses cause VM-exits. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportSharedDebugState(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); #ifdef VBOX_STRICT /* Validate. Intel spec. 26.3.1.1 "Checks on Guest Controls Registers, Debug Registers, MSRs" */ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG) { /* Validate. Intel spec. 17.2 "Debug Registers", recompiler paranoia checks. */ Assert((pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_MBZ_MASK | X86_DR7_RAZ_MASK)) == 0); Assert((pVCpu->cpum.GstCtx.dr[7] & X86_DR7_RA1_MASK) == X86_DR7_RA1_MASK); } #endif bool fSteppingDB = false; bool fInterceptMovDRx = false; uint32_t uProcCtls = pVCpu->hm.s.vmx.u32ProcCtls; if (pVCpu->hm.s.fSingleInstruction) { /* If the CPU supports the monitor trap flag, use it for single stepping in DBGF and avoid intercepting #DB. */ PVM pVM = pVCpu->CTX_SUFF(pVM); if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_MONITOR_TRAP_FLAG) { uProcCtls |= VMX_PROC_CTLS_MONITOR_TRAP_FLAG; Assert(fSteppingDB == false); } else { pVCpu->cpum.GstCtx.eflags.u32 |= X86_EFL_TF; pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_RFLAGS; pVCpu->hm.s.fClearTrapFlag = true; fSteppingDB = true; } } uint32_t u32GuestDr7; if ( fSteppingDB || (CPUMGetHyperDR7(pVCpu) & X86_DR7_ENABLED_MASK)) { /* * Use the combined guest and host DRx values found in the hypervisor register set * because the debugger has breakpoints active or someone is single stepping on the * host side without a monitor trap flag. * * Note! DBGF expects a clean DR6 state before executing guest code. */ #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) if ( CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx) && !CPUMIsHyperDebugStateActivePending(pVCpu)) { CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsHyperDebugStateActivePending(pVCpu)); Assert(!CPUMIsGuestDebugStateActivePending(pVCpu)); } else #endif if (!CPUMIsHyperDebugStateActive(pVCpu)) { CPUMR0LoadHyperDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsHyperDebugStateActive(pVCpu)); Assert(!CPUMIsGuestDebugStateActive(pVCpu)); } /* Update DR7 with the hypervisor value (other DRx registers are handled by CPUM one way or another). */ u32GuestDr7 = (uint32_t)CPUMGetHyperDR7(pVCpu); pVCpu->hm.s.fUsingHyperDR7 = true; fInterceptMovDRx = true; } else { /* * If the guest has enabled debug registers, we need to load them prior to * executing guest code so they'll trigger at the right time. */ if (pVCpu->cpum.GstCtx.dr[7] & (X86_DR7_ENABLED_MASK | X86_DR7_GD)) { #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) if ( CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx) && !CPUMIsGuestDebugStateActivePending(pVCpu)) { CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsGuestDebugStateActivePending(pVCpu)); Assert(!CPUMIsHyperDebugStateActivePending(pVCpu)); STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); } else #endif if (!CPUMIsGuestDebugStateActive(pVCpu)) { CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsGuestDebugStateActive(pVCpu)); Assert(!CPUMIsHyperDebugStateActive(pVCpu)); STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxArmed); } Assert(!fInterceptMovDRx); } /* * If no debugging enabled, we'll lazy load DR0-3. Unlike on AMD-V, we * must intercept #DB in order to maintain a correct DR6 guest value, and * because we need to intercept it to prevent nested #DBs from hanging the * CPU, we end up always having to intercept it. See hmR0VmxInitXcptBitmap. */ #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) else if ( !CPUMIsGuestDebugStateActivePending(pVCpu) && !CPUMIsGuestDebugStateActive(pVCpu)) #else else if (!CPUMIsGuestDebugStateActive(pVCpu)) #endif { fInterceptMovDRx = true; } /* Update DR7 with the actual guest value. */ u32GuestDr7 = pVCpu->cpum.GstCtx.dr[7]; pVCpu->hm.s.fUsingHyperDR7 = false; } if (fInterceptMovDRx) uProcCtls |= VMX_PROC_CTLS_MOV_DR_EXIT; else uProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT; /* * Update the processor-based VM-execution controls with the MOV-DRx intercepts and the * monitor-trap flag and update our cache. */ if (uProcCtls != pVCpu->hm.s.vmx.u32ProcCtls) { int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls); AssertRCReturn(rc2, rc2); pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls; } /* * Update guest DR7. */ int rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, u32GuestDr7); AssertRCReturn(rc, rc); /* * If we have forced EFLAGS.TF to be set because we're single-stepping in the hypervisor debugger, * we need to clear interrupt inhibition if any as otherwise it causes a VM-entry failure. * * See Intel spec. 26.3.1.5 "Checks on Guest Non-Register State". */ if (fSteppingDB) { Assert(pVCpu->hm.s.fSingleInstruction); Assert(pVCpu->cpum.GstCtx.eflags.Bits.u1TF); uint32_t fIntrState = 0; rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState); AssertRCReturn(rc, rc); if (fIntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)) { fIntrState &= ~(VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS); rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState); AssertRCReturn(rc, rc); } } return VINF_SUCCESS; } #ifdef VBOX_STRICT /** * Strict function to validate segment registers. * * @param pVCpu The cross context virtual CPU structure. * * @remarks Will import guest CR0 on strict builds during validation of * segments. */ static void hmR0VmxValidateSegmentRegs(PVMCPU pVCpu) { /* * Validate segment registers. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". * * The reason we check for attribute value 0 in this function and not just the unusable bit is * because hmR0VmxExportGuestSegmentReg() only updates the VMCS' copy of the value with the unusable bit * and doesn't change the guest-context value. */ PVM pVM = pVCpu->CTX_SUFF(pVM); PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CR0); if ( !pVM->hm.s.vmx.fUnrestrictedGuest && ( !CPUMIsGuestInRealModeEx(pCtx) && !CPUMIsGuestInV86ModeEx(pCtx))) { /* Protected mode checks */ /* CS */ Assert(pCtx->cs.Attr.n.u1Present); Assert(!(pCtx->cs.Attr.u & 0xf00)); Assert(!(pCtx->cs.Attr.u & 0xfffe0000)); Assert( (pCtx->cs.u32Limit & 0xfff) == 0xfff || !(pCtx->cs.Attr.n.u1Granularity)); Assert( !(pCtx->cs.u32Limit & 0xfff00000) || (pCtx->cs.Attr.n.u1Granularity)); /* CS cannot be loaded with NULL in protected mode. */ Assert(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE)); /** @todo is this really true even for 64-bit CS? */ if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11) Assert(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl); else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15) Assert(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl); else AssertMsgFailed(("Invalid CS Type %#x\n", pCtx->cs.Attr.n.u2Dpl)); /* SS */ Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL)); Assert(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL)); if ( !(pCtx->cr0 & X86_CR0_PE) || pCtx->cs.Attr.n.u4Type == 3) { Assert(!pCtx->ss.Attr.n.u2Dpl); } if (pCtx->ss.Attr.u && !(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE)) { Assert((pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL)); Assert(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7); Assert(pCtx->ss.Attr.n.u1Present); Assert(!(pCtx->ss.Attr.u & 0xf00)); Assert(!(pCtx->ss.Attr.u & 0xfffe0000)); Assert( (pCtx->ss.u32Limit & 0xfff) == 0xfff || !(pCtx->ss.Attr.n.u1Granularity)); Assert( !(pCtx->ss.u32Limit & 0xfff00000) || (pCtx->ss.Attr.n.u1Granularity)); } /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSegmentReg(). */ if (pCtx->ds.Attr.u && !(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE)) { Assert(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); Assert(pCtx->ds.Attr.n.u1Present); Assert(pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL)); Assert(!(pCtx->ds.Attr.u & 0xf00)); Assert(!(pCtx->ds.Attr.u & 0xfffe0000)); Assert( (pCtx->ds.u32Limit & 0xfff) == 0xfff || !(pCtx->ds.Attr.n.u1Granularity)); Assert( !(pCtx->ds.u32Limit & 0xfff00000) || (pCtx->ds.Attr.n.u1Granularity)); Assert( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ)); } if (pCtx->es.Attr.u && !(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE)) { Assert(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); Assert(pCtx->es.Attr.n.u1Present); Assert(pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL)); Assert(!(pCtx->es.Attr.u & 0xf00)); Assert(!(pCtx->es.Attr.u & 0xfffe0000)); Assert( (pCtx->es.u32Limit & 0xfff) == 0xfff || !(pCtx->es.Attr.n.u1Granularity)); Assert( !(pCtx->es.u32Limit & 0xfff00000) || (pCtx->es.Attr.n.u1Granularity)); Assert( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ)); } if (pCtx->fs.Attr.u && !(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE)) { Assert(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); Assert(pCtx->fs.Attr.n.u1Present); Assert(pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL)); Assert(!(pCtx->fs.Attr.u & 0xf00)); Assert(!(pCtx->fs.Attr.u & 0xfffe0000)); Assert( (pCtx->fs.u32Limit & 0xfff) == 0xfff || !(pCtx->fs.Attr.n.u1Granularity)); Assert( !(pCtx->fs.u32Limit & 0xfff00000) || (pCtx->fs.Attr.n.u1Granularity)); Assert( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ)); } if (pCtx->gs.Attr.u && !(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE)) { Assert(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED); Assert(pCtx->gs.Attr.n.u1Present); Assert(pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL)); Assert(!(pCtx->gs.Attr.u & 0xf00)); Assert(!(pCtx->gs.Attr.u & 0xfffe0000)); Assert( (pCtx->gs.u32Limit & 0xfff) == 0xfff || !(pCtx->gs.Attr.n.u1Granularity)); Assert( !(pCtx->gs.u32Limit & 0xfff00000) || (pCtx->gs.Attr.n.u1Granularity)); Assert( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ)); } /* 64-bit capable CPUs. */ # if HC_ARCH_BITS == 64 Assert(!RT_HI_U32(pCtx->cs.u64Base)); Assert(!pCtx->ss.Attr.u || !RT_HI_U32(pCtx->ss.u64Base)); Assert(!pCtx->ds.Attr.u || !RT_HI_U32(pCtx->ds.u64Base)); Assert(!pCtx->es.Attr.u || !RT_HI_U32(pCtx->es.u64Base)); # endif } else if ( CPUMIsGuestInV86ModeEx(pCtx) || ( CPUMIsGuestInRealModeEx(pCtx) && !pVM->hm.s.vmx.fUnrestrictedGuest)) { /* Real and v86 mode checks. */ /* hmR0VmxExportGuestSegmentReg() writes the modified in VMCS. We want what we're feeding to VT-x. */ uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr; if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3; } else { u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u; } /* CS */ AssertMsg((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), ("CS base %#x %#x\n", pCtx->cs.u64Base, pCtx->cs.Sel)); Assert(pCtx->cs.u32Limit == 0xffff); Assert(u32CSAttr == 0xf3); /* SS */ Assert(pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4); Assert(pCtx->ss.u32Limit == 0xffff); Assert(u32SSAttr == 0xf3); /* DS */ Assert(pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4); Assert(pCtx->ds.u32Limit == 0xffff); Assert(u32DSAttr == 0xf3); /* ES */ Assert(pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4); Assert(pCtx->es.u32Limit == 0xffff); Assert(u32ESAttr == 0xf3); /* FS */ Assert(pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4); Assert(pCtx->fs.u32Limit == 0xffff); Assert(u32FSAttr == 0xf3); /* GS */ Assert(pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4); Assert(pCtx->gs.u32Limit == 0xffff); Assert(u32GSAttr == 0xf3); /* 64-bit capable CPUs. */ # if HC_ARCH_BITS == 64 Assert(!RT_HI_U32(pCtx->cs.u64Base)); Assert(!u32SSAttr || !RT_HI_U32(pCtx->ss.u64Base)); Assert(!u32DSAttr || !RT_HI_U32(pCtx->ds.u64Base)); Assert(!u32ESAttr || !RT_HI_U32(pCtx->es.u64Base)); # endif } } #endif /* VBOX_STRICT */ /** * Exports a guest segment register into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param idxSel Index of the selector in the VMCS. * @param idxLimit Index of the segment limit in the VMCS. * @param idxBase Index of the segment base in the VMCS. * @param idxAccess Index of the access rights of the segment in the VMCS. * @param pSelReg Pointer to the segment selector. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess, PCCPUMSELREG pSelReg) { int rc = VMXWriteVmcs32(idxSel, pSelReg->Sel); /* 16-bit guest selector field. */ rc |= VMXWriteVmcs32(idxLimit, pSelReg->u32Limit); /* 32-bit guest segment limit field. */ rc |= VMXWriteVmcsGstN(idxBase, pSelReg->u64Base); /* Natural width guest segment base field.*/ AssertRCReturn(rc, rc); uint32_t u32Access = pSelReg->Attr.u; if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { /* VT-x requires our real-using-v86 mode hack to override the segment access-right bits. */ u32Access = 0xf3; Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); } else { /* * The way to differentiate between whether this is really a null selector or was just * a selector loaded with 0 in real-mode is using the segment attributes. A selector * loaded in real-mode with the value 0 is valid and usable in protected-mode and we * should -not- mark it as an unusable segment. Both the recompiler & VT-x ensures * NULL selectors loaded in protected-mode have their attribute as 0. */ if (!u32Access) u32Access = X86DESCATTR_UNUSABLE; } /* Validate segment access rights. Refer to Intel spec. "26.3.1.2 Checks on Guest Segment Registers". */ AssertMsg((u32Access & X86DESCATTR_UNUSABLE) || (u32Access & X86_SEL_TYPE_ACCESSED), ("Access bit not set for usable segment. idx=%#x sel=%#x attr %#x\n", idxBase, pSelReg, pSelReg->Attr.u)); rc = VMXWriteVmcs32(idxAccess, u32Access); /* 32-bit guest segment access-rights field. */ AssertRCReturn(rc, rc); return rc; } /** * Exports the guest segment registers, GDTR, IDTR, LDTR, (TR, FS and GS bases) * into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Will import guest CR0 on strict builds during validation of * segments. * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestSegmentRegs(PVMCPU pVCpu) { int rc = VERR_INTERNAL_ERROR_5; PVM pVM = pVCpu->CTX_SUFF(pVM); PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; /* * Guest Segment registers: CS, SS, DS, ES, FS, GS. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SREG_MASK) { #ifdef VBOX_WITH_REM if (!pVM->hm.s.vmx.fUnrestrictedGuest) { Assert(pVM->hm.s.vmx.pRealModeTSS); AssertCompile(PGMMODE_REAL < PGMMODE_PROTECTED); if ( pVCpu->hm.s.vmx.fWasInRealMode && PGMGetGuestMode(pVCpu) >= PGMMODE_PROTECTED) { /* Signal that the recompiler must flush its code-cache as the guest -may- rewrite code it will later execute in real-mode (e.g. OpenBSD 4.0) */ REMFlushTBs(pVM); Log4Func(("Switch to protected mode detected!\n")); pVCpu->hm.s.vmx.fWasInRealMode = false; } } #endif if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_CS) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CS); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pVCpu->hm.s.vmx.RealMode.AttrCS.u = pCtx->cs.Attr.u; rc = HMVMX_EXPORT_SREG(CS, &pCtx->cs); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_CS); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SS) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SS); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pVCpu->hm.s.vmx.RealMode.AttrSS.u = pCtx->ss.Attr.u; rc = HMVMX_EXPORT_SREG(SS, &pCtx->ss); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SS); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_DS) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_DS); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pVCpu->hm.s.vmx.RealMode.AttrDS.u = pCtx->ds.Attr.u; rc = HMVMX_EXPORT_SREG(DS, &pCtx->ds); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_DS); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_ES) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_ES); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pVCpu->hm.s.vmx.RealMode.AttrES.u = pCtx->es.Attr.u; rc = HMVMX_EXPORT_SREG(ES, &pCtx->es); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_ES); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_FS) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_FS); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pVCpu->hm.s.vmx.RealMode.AttrFS.u = pCtx->fs.Attr.u; rc = HMVMX_EXPORT_SREG(FS, &pCtx->fs); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_FS); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GS) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GS); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pVCpu->hm.s.vmx.RealMode.AttrGS.u = pCtx->gs.Attr.u; rc = HMVMX_EXPORT_SREG(GS, &pCtx->gs); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GS); } #ifdef VBOX_STRICT hmR0VmxValidateSegmentRegs(pVCpu); #endif Log4Func(("CS=%#RX16 Base=%#RX64 Limit=%#RX32 Attr=%#RX32\n", pCtx->cs.Sel, pCtx->cs.u64Base, pCtx->cs.u32Limit, pCtx->cs.Attr.u)); } /* * Guest TR. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_TR) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_TR); /* * Real-mode emulation using virtual-8086 mode with CR4.VME. Interrupt redirection is * achieved using the interrupt redirection bitmap (all bits cleared to let the guest * handle INT-n's) in the TSS. See hmR3InitFinalizeR0() to see how pRealModeTSS is setup. */ uint16_t u16Sel = 0; uint32_t u32Limit = 0; uint64_t u64Base = 0; uint32_t u32AccessRights = 0; if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { u16Sel = pCtx->tr.Sel; u32Limit = pCtx->tr.u32Limit; u64Base = pCtx->tr.u64Base; u32AccessRights = pCtx->tr.Attr.u; } else { Assert(pVM->hm.s.vmx.pRealModeTSS); Assert(PDMVmmDevHeapIsEnabled(pVM)); /* Guaranteed by HMCanExecuteGuest() -XXX- what about inner loop changes? */ /* We obtain it here every time as PCI regions could be reconfigured in the guest, changing the VMMDev base. */ RTGCPHYS GCPhys; rc = PDMVmmDevHeapR3ToGCPhys(pVM, pVM->hm.s.vmx.pRealModeTSS, &GCPhys); AssertRCReturn(rc, rc); X86DESCATTR DescAttr; DescAttr.u = 0; DescAttr.n.u1Present = 1; DescAttr.n.u4Type = X86_SEL_TYPE_SYS_386_TSS_BUSY; u16Sel = 0; u32Limit = HM_VTX_TSS_SIZE; u64Base = GCPhys; /* in real-mode phys = virt. */ u32AccessRights = DescAttr.u; } /* Validate. */ Assert(!(u16Sel & RT_BIT(2))); AssertMsg( (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_386_TSS_BUSY || (u32AccessRights & 0xf) == X86_SEL_TYPE_SYS_286_TSS_BUSY, ("TSS is not busy!? %#x\n", u32AccessRights)); AssertMsg(!(u32AccessRights & X86DESCATTR_UNUSABLE), ("TR unusable bit is not clear!? %#x\n", u32AccessRights)); Assert(!(u32AccessRights & RT_BIT(4))); /* System MBZ.*/ Assert(u32AccessRights & RT_BIT(7)); /* Present MB1.*/ Assert(!(u32AccessRights & 0xf00)); /* 11:8 MBZ. */ Assert(!(u32AccessRights & 0xfffe0000)); /* 31:17 MBZ. */ Assert( (u32Limit & 0xfff) == 0xfff || !(u32AccessRights & RT_BIT(15))); /* Granularity MBZ. */ Assert( !(pCtx->tr.u32Limit & 0xfff00000) || (u32AccessRights & RT_BIT(15))); /* Granularity MB1. */ rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_TR_SEL, u16Sel); rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_LIMIT, u32Limit); rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_TR_ACCESS_RIGHTS, u32AccessRights); rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_TR_BASE, u64Base); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_TR); Log4Func(("TR base=%#RX64\n", pCtx->tr.u64Base)); } /* * Guest GDTR. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_GDTR) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_GDTR); rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, pCtx->gdtr.cbGdt); rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, pCtx->gdtr.pGdt); AssertRCReturn(rc, rc); /* Validate. */ Assert(!(pCtx->gdtr.cbGdt & 0xffff0000)); /* Bits 31:16 MBZ. */ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_GDTR); Log4Func(("GDTR base=%#RX64\n", pCtx->gdtr.pGdt)); } /* * Guest LDTR. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_LDTR) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_LDTR); /* The unusable bit is specific to VT-x, if it's a null selector mark it as an unusable segment. */ uint32_t u32Access = 0; if (!pCtx->ldtr.Attr.u) u32Access = X86DESCATTR_UNUSABLE; else u32Access = pCtx->ldtr.Attr.u; rc = VMXWriteVmcs32(VMX_VMCS16_GUEST_LDTR_SEL, pCtx->ldtr.Sel); rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_LIMIT, pCtx->ldtr.u32Limit); rc |= VMXWriteVmcs32(VMX_VMCS32_GUEST_LDTR_ACCESS_RIGHTS, u32Access); rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_LDTR_BASE, pCtx->ldtr.u64Base); AssertRCReturn(rc, rc); /* Validate. */ if (!(u32Access & X86DESCATTR_UNUSABLE)) { Assert(!(pCtx->ldtr.Sel & RT_BIT(2))); /* TI MBZ. */ Assert(pCtx->ldtr.Attr.n.u4Type == 2); /* Type MB2 (LDT). */ Assert(!pCtx->ldtr.Attr.n.u1DescType); /* System MBZ. */ Assert(pCtx->ldtr.Attr.n.u1Present == 1); /* Present MB1. */ Assert(!pCtx->ldtr.Attr.n.u4LimitHigh); /* 11:8 MBZ. */ Assert(!(pCtx->ldtr.Attr.u & 0xfffe0000)); /* 31:17 MBZ. */ Assert( (pCtx->ldtr.u32Limit & 0xfff) == 0xfff || !pCtx->ldtr.Attr.n.u1Granularity); /* Granularity MBZ. */ Assert( !(pCtx->ldtr.u32Limit & 0xfff00000) || pCtx->ldtr.Attr.n.u1Granularity); /* Granularity MB1. */ } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_LDTR); Log4Func(("LDTR base=%#RX64\n", pCtx->ldtr.u64Base)); } /* * Guest IDTR. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_IDTR) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_IDTR); rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, pCtx->idtr.cbIdt); rc |= VMXWriteVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, pCtx->idtr.pIdt); AssertRCReturn(rc, rc); /* Validate. */ Assert(!(pCtx->idtr.cbIdt & 0xffff0000)); /* Bits 31:16 MBZ. */ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_IDTR); Log4Func(("IDTR base=%#RX64\n", pCtx->idtr.pIdt)); } return VINF_SUCCESS; } /** * Exports certain guest MSRs into the VM-entry MSR-load and VM-exit MSR-store * areas. * * These MSRs will automatically be loaded to the host CPU on every successful * VM-entry and stored from the host CPU on every successful VM-exit. This also * creates/updates MSR slots for the host MSRs. The actual host MSR values are * -not- updated here for performance reasons. See hmR0VmxExportHostMsrs(). * * Also exports the guest sysenter MSRs into the guest-state area in the VMCS. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportGuestMsrs(PVMCPU pVCpu) { AssertPtr(pVCpu); AssertPtr(pVCpu->hm.s.vmx.pvGuestMsr); /* * MSRs that we use the auto-load/store MSR area in the VMCS. * For 64-bit hosts, we load/restore them lazily, see hmR0VmxLazyLoadGuestMsrs(). */ PVM pVM = pVCpu->CTX_SUFF(pVM); PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_VMX_GUEST_AUTO_MSRS) { if (pVM->hm.s.fAllow64BitGuests) { #if HC_ARCH_BITS == 32 HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSCALL_MSRS | CPUMCTX_EXTRN_KERNEL_GS_BASE); int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_LSTAR, pCtx->msrLSTAR, false, NULL); rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K6_STAR, pCtx->msrSTAR, false, NULL); rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_SF_MASK, pCtx->msrSFMASK, false, NULL); rc |= hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_KERNEL_GS_BASE, pCtx->msrKERNELGSBASE, false, NULL); AssertRCReturn(rc, rc); # ifdef LOG_ENABLED PCVMXAUTOMSR pMsr = (PCVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; for (uint32_t i = 0; i < pVCpu->hm.s.vmx.cMsrs; i++, pMsr++) Log4Func(("MSR[%RU32]: u32Msr=%#RX32 u64Value=%#RX64\n", i, pMsr->u32Msr, pMsr->u64Value)); # endif #endif } ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_VMX_GUEST_AUTO_MSRS); } /* * Guest Sysenter MSRs. * These flags are only set when MSR-bitmaps are not supported by the CPU and we cause * VM-exits on WRMSRs for these MSRs. */ if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_MSR_MASK) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_SYSENTER_MSRS); if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_CS_MSR) { int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, pCtx->SysEnter.cs); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_CS_MSR); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_EIP_MSR) { int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, pCtx->SysEnter.eip); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_EIP_MSR); } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_SYSENTER_ESP_MSR) { int rc = VMXWriteVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, pCtx->SysEnter.esp); AssertRCReturn(rc, rc); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_SYSENTER_ESP_MSR); } } if (ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged) & HM_CHANGED_GUEST_EFER_MSR) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_EFER); if (hmR0VmxShouldSwapEferMsr(pVCpu)) { /* * If the CPU supports VMCS controls for swapping EFER, use it. Otherwise, we have no option * but to use the auto-load store MSR area in the VMCS for swapping EFER. See @bugref{7368}. */ if (pVM->hm.s.vmx.fSupportsVmcsEfer) { int rc = VMXWriteVmcs64(VMX_VMCS64_GUEST_EFER_FULL, pCtx->msrEFER); AssertRCReturn(rc,rc); Log4Func(("EFER=%#RX64\n", pCtx->msrEFER)); } else { int rc = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K6_EFER, pCtx->msrEFER, false /* fUpdateHostMsr */, NULL /* pfAddedAndUpdated */); AssertRCReturn(rc, rc); /* We need to intercept reads too, see @bugref{7386#c16}. */ if (pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_USE_MSR_BITMAPS) hmR0VmxSetMsrPermission(pVCpu, MSR_K6_EFER, VMXMSREXIT_INTERCEPT_READ, VMXMSREXIT_INTERCEPT_WRITE); Log4Func(("MSR[--]: u32Msr=%#RX32 u64Value=%#RX64 cMsrs=%u\n", MSR_K6_EFER, pCtx->msrEFER, pVCpu->hm.s.vmx.cMsrs)); } } else if (!pVM->hm.s.vmx.fSupportsVmcsEfer) hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, MSR_K6_EFER); ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~HM_CHANGED_GUEST_EFER_MSR); } return VINF_SUCCESS; } #if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) /** * Check if guest state allows safe use of 32-bit switcher again. * * Segment bases and protected mode structures must be 32-bit addressable * because the 32-bit switcher will ignore high dword when writing these VMCS * fields. See @bugref{8432} for details. * * @returns true if safe, false if must continue to use the 64-bit switcher. * @param pCtx Pointer to the guest-CPU context. * * @remarks No-long-jump zone!!! */ static bool hmR0VmxIs32BitSwitcherSafe(PCCPUMCTX pCtx) { if (pCtx->gdtr.pGdt & UINT64_C(0xffffffff00000000)) return false; if (pCtx->idtr.pIdt & UINT64_C(0xffffffff00000000)) return false; if (pCtx->ldtr.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->tr.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->es.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->cs.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->ss.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->ds.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->fs.u64Base & UINT64_C(0xffffffff00000000)) return false; if (pCtx->gs.u64Base & UINT64_C(0xffffffff00000000)) return false; /* All good, bases are 32-bit. */ return true; } #endif /** * Selects up the appropriate function to run guest code. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxSelectVMRunHandler(PVMCPU pVCpu) { PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (CPUMIsGuestInLongModeEx(pCtx)) { #ifndef VBOX_ENABLE_64_BITS_GUESTS return VERR_PGM_UNSUPPORTED_SHADOW_PAGING_MODE; #endif Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); /* Guaranteed by hmR3InitFinalizeR0(). */ #if HC_ARCH_BITS == 32 /* 32-bit host. We need to switch to 64-bit before running the 64-bit guest. */ if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64) { #ifdef VBOX_STRICT if (pVCpu->hm.s.vmx.pfnStartVM != NULL) /* Very first entry would have saved host-state already, ignore it. */ { /* Currently, all mode changes sends us back to ring-3, so these should be set. See @bugref{6944}. */ uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged); RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); AssertMsg(fCtxChanged & ( HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS | HM_CHANGED_GUEST_EFER_MSR), ("fCtxChanged=%#RX64\n", fCtxChanged)); } #endif pVCpu->hm.s.vmx.pfnStartVM = VMXR0SwitcherStartVM64; /* Mark that we've switched to 64-bit handler, we can't safely switch back to 32-bit for the rest of the VM run (until VM reset). See @bugref{8432#c7}. */ pVCpu->hm.s.vmx.fSwitchedTo64on32 = true; Log4Func(("Selected 64-bit switcher\n")); } #else /* 64-bit host. */ pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM64; #endif } else { /* Guest is not in long mode, use the 32-bit handler. */ #if HC_ARCH_BITS == 32 if ( pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32 && !pVCpu->hm.s.vmx.fSwitchedTo64on32 /* If set, guest mode change does not imply switcher change. */ && pVCpu->hm.s.vmx.pfnStartVM != NULL) /* Very first entry would have saved host-state already, ignore it. */ { # ifdef VBOX_STRICT /* Currently, all mode changes sends us back to ring-3, so these should be set. See @bugref{6944}. */ uint64_t const fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged); RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); AssertMsg(fCtxChanged & ( HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_VMX_ENTRY_CTLS | HM_CHANGED_GUEST_EFER_MSR), ("fCtxChanged=%#RX64\n", fCtxChanged)); # endif } # ifdef VBOX_ENABLE_64_BITS_GUESTS /* * Keep using the 64-bit switcher even though we're in 32-bit because of bad Intel * design, see @bugref{8432#c7}. If real-on-v86 mode is active, clear the 64-bit * switcher flag because now we know the guest is in a sane state where it's safe * to use the 32-bit switcher. Otherwise check the guest state if it's safe to use * the much faster 32-bit switcher again. */ if (!pVCpu->hm.s.vmx.fSwitchedTo64on32) { if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0StartVM32) Log4Func(("Selected 32-bit switcher\n")); pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; } else { Assert(pVCpu->hm.s.vmx.pfnStartVM == VMXR0SwitcherStartVM64); if ( pVCpu->hm.s.vmx.RealMode.fRealOnV86Active || hmR0VmxIs32BitSwitcherSafe(pCtx)) { pVCpu->hm.s.vmx.fSwitchedTo64on32 = false; pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_CTLS | HM_CHANGED_VMX_EXIT_CTLS | HM_CHANGED_HOST_CONTEXT); Log4Func(("Selected 32-bit switcher (safe)\n")); } } # else pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; # endif #else pVCpu->hm.s.vmx.pfnStartVM = VMXR0StartVM32; #endif } Assert(pVCpu->hm.s.vmx.pfnStartVM); return VINF_SUCCESS; } /** * Wrapper for running the guest code in VT-x. * * @returns VBox status code, no informational status codes. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ DECLINLINE(int) hmR0VmxRunGuest(PVMCPU pVCpu) { /* Mark that HM is the keeper of all guest-CPU registers now that we're going to execute guest code. */ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; pCtx->fExtrn |= HMVMX_CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_KEEPER_HM; /* * 64-bit Windows uses XMM registers in the kernel as the Microsoft compiler expresses * floating-point operations using SSE instructions. Some XMM registers (XMM6-XMM15) are * callee-saved and thus the need for this XMM wrapper. * * See MSDN "Configuring Programs for 64-bit/x64 Software Conventions / Register Usage". */ bool const fResumeVM = RT_BOOL(pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_LAUNCHED); /** @todo Add stats for resume vs launch. */ PVM pVM = pVCpu->CTX_SUFF(pVM); #ifdef VBOX_WITH_KERNEL_USING_XMM int rc = hmR0VMXStartVMWrapXMM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu, pVCpu->hm.s.vmx.pfnStartVM); #else int rc = pVCpu->hm.s.vmx.pfnStartVM(fResumeVM, pCtx, &pVCpu->hm.s.vmx.VMCSCache, pVM, pVCpu); #endif AssertMsg(rc <= VINF_SUCCESS, ("%Rrc\n", rc)); return rc; } /** * Reports world-switch error and dumps some useful debug info. * * @param pVCpu The cross context virtual CPU structure. * @param rcVMRun The return code from VMLAUNCH/VMRESUME. * @param pVmxTransient Pointer to the VMX transient structure (only * exitReason updated). */ static void hmR0VmxReportWorldSwitchError(PVMCPU pVCpu, int rcVMRun, PVMXTRANSIENT pVmxTransient) { Assert(pVCpu); Assert(pVmxTransient); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); Log4Func(("VM-entry failure: %Rrc\n", rcVMRun)); switch (rcVMRun) { case VERR_VMX_INVALID_VMXON_PTR: AssertFailed(); break; case VINF_SUCCESS: /* VMLAUNCH/VMRESUME succeeded but VM-entry failed... yeah, true story. */ case VERR_VMX_UNABLE_TO_START_VM: /* VMLAUNCH/VMRESUME itself failed. */ { int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &pVCpu->hm.s.vmx.LastError.u32ExitReason); rc |= VMXReadVmcs32(VMX_VMCS32_RO_VM_INSTR_ERROR, &pVCpu->hm.s.vmx.LastError.u32InstrError); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRC(rc); pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu; /* LastError.idCurrentCpu was already updated in hmR0VmxPreRunGuestCommitted(). Cannot do it here as we may have been long preempted. */ #ifdef VBOX_STRICT Log4(("uExitReason %#RX32 (VmxTransient %#RX16)\n", pVCpu->hm.s.vmx.LastError.u32ExitReason, pVmxTransient->uExitReason)); Log4(("Exit Qualification %#RX64\n", pVmxTransient->uExitQual)); Log4(("InstrError %#RX32\n", pVCpu->hm.s.vmx.LastError.u32InstrError)); if (pVCpu->hm.s.vmx.LastError.u32InstrError <= HMVMX_INSTR_ERROR_MAX) Log4(("InstrError Desc. \"%s\"\n", g_apszVmxInstrErrors[pVCpu->hm.s.vmx.LastError.u32InstrError])); else Log4(("InstrError Desc. Range exceeded %u\n", HMVMX_INSTR_ERROR_MAX)); Log4(("Entered host CPU %u\n", pVCpu->hm.s.vmx.LastError.idEnteredCpu)); Log4(("Current host CPU %u\n", pVCpu->hm.s.vmx.LastError.idCurrentCpu)); /* VMX control bits. */ uint32_t u32Val; uint64_t u64Val; RTHCUINTREG uHCReg; rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PIN_EXEC, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_PIN_EXEC %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_PROC_EXEC %#RX32\n", u32Val)); if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_SECONDARY_CTLS) { rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_PROC_EXEC2 %#RX32\n", u32Val)); } rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_ENTRY %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_EXIT %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_CR3_TARGET_COUNT, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_CR3_TARGET_COUNT %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %u\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_TPR_THRESHOLD, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_TPR_THRESHOLD %u\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_EXIT_MSR_STORE_COUNT %u (guest MSRs)\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_EXIT_MSR_LOAD_COUNT %u (host MSRs)\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_ENTRY_MSR_LOAD_COUNT %u (guest MSRs)\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MASK %#RX32\n", u32Val)); rc = VMXReadVmcs32(VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH, &u32Val); AssertRC(rc); Log4(("VMX_VMCS32_CTRL_PAGEFAULT_ERROR_MATCH %#RX32\n", u32Val)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); if (pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging) { rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc); Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val)); } /* Guest bits. */ rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); AssertRC(rc); Log4(("Old Guest Rip %#RX64 New %#RX64\n", pVCpu->cpum.GstCtx.rip, u64Val)); rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); AssertRC(rc); Log4(("Old Guest Rsp %#RX64 New %#RX64\n", pVCpu->cpum.GstCtx.rsp, u64Val)); rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); AssertRC(rc); Log4(("Old Guest Rflags %#RX32 New %#RX32\n", pVCpu->cpum.GstCtx.eflags.u32, u32Val)); if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fVpid) { rc = VMXReadVmcs32(VMX_VMCS16_VPID, &u32Val); AssertRC(rc); Log4(("VMX_VMCS16_VPID %u\n", u32Val)); } /* Host bits. */ rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR0, &uHCReg); AssertRC(rc); Log4(("Host CR0 %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR3, &uHCReg); AssertRC(rc); Log4(("Host CR3 %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_CR4, &uHCReg); AssertRC(rc); Log4(("Host CR4 %#RHr\n", uHCReg)); RTGDTR HostGdtr; PCX86DESCHC pDesc; ASMGetGDTR(&HostGdtr); rc = VMXReadVmcs32(VMX_VMCS16_HOST_CS_SEL, &u32Val); AssertRC(rc); Log4(("Host CS %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "CS: "); } rc = VMXReadVmcs32(VMX_VMCS16_HOST_DS_SEL, &u32Val); AssertRC(rc); Log4(("Host DS %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "DS: "); } rc = VMXReadVmcs32(VMX_VMCS16_HOST_ES_SEL, &u32Val); AssertRC(rc); Log4(("Host ES %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "ES: "); } rc = VMXReadVmcs32(VMX_VMCS16_HOST_FS_SEL, &u32Val); AssertRC(rc); Log4(("Host FS %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "FS: "); } rc = VMXReadVmcs32(VMX_VMCS16_HOST_GS_SEL, &u32Val); AssertRC(rc); Log4(("Host GS %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "GS: "); } rc = VMXReadVmcs32(VMX_VMCS16_HOST_SS_SEL, &u32Val); AssertRC(rc); Log4(("Host SS %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "SS: "); } rc = VMXReadVmcs32(VMX_VMCS16_HOST_TR_SEL, &u32Val); AssertRC(rc); Log4(("Host TR %#08x\n", u32Val)); if (u32Val < HostGdtr.cbGdt) { pDesc = (PCX86DESCHC)(HostGdtr.pGdt + (u32Val & X86_SEL_MASK)); hmR0DumpDescriptor(pDesc, u32Val, "TR: "); } rc = VMXReadVmcsHstN(VMX_VMCS_HOST_TR_BASE, &uHCReg); AssertRC(rc); Log4(("Host TR Base %#RHv\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_GDTR_BASE, &uHCReg); AssertRC(rc); Log4(("Host GDTR Base %#RHv\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_IDTR_BASE, &uHCReg); AssertRC(rc); Log4(("Host IDTR Base %#RHv\n", uHCReg)); rc = VMXReadVmcs32(VMX_VMCS32_HOST_SYSENTER_CS, &u32Val); AssertRC(rc); Log4(("Host SYSENTER CS %#08x\n", u32Val)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_EIP, &uHCReg); AssertRC(rc); Log4(("Host SYSENTER EIP %#RHv\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_SYSENTER_ESP, &uHCReg); AssertRC(rc); Log4(("Host SYSENTER ESP %#RHv\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RSP, &uHCReg); AssertRC(rc); Log4(("Host RSP %#RHv\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_HOST_RIP, &uHCReg); AssertRC(rc); Log4(("Host RIP %#RHv\n", uHCReg)); # if HC_ARCH_BITS == 64 Log4(("MSR_K6_EFER = %#RX64\n", ASMRdMsr(MSR_K6_EFER))); Log4(("MSR_K8_CSTAR = %#RX64\n", ASMRdMsr(MSR_K8_CSTAR))); Log4(("MSR_K8_LSTAR = %#RX64\n", ASMRdMsr(MSR_K8_LSTAR))); Log4(("MSR_K6_STAR = %#RX64\n", ASMRdMsr(MSR_K6_STAR))); Log4(("MSR_K8_SF_MASK = %#RX64\n", ASMRdMsr(MSR_K8_SF_MASK))); Log4(("MSR_K8_KERNEL_GS_BASE = %#RX64\n", ASMRdMsr(MSR_K8_KERNEL_GS_BASE))); # endif #endif /* VBOX_STRICT */ break; } default: /* Impossible */ AssertMsgFailed(("hmR0VmxReportWorldSwitchError %Rrc (%#x)\n", rcVMRun, rcVMRun)); break; } } #if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) #ifndef VMX_USE_CACHED_VMCS_ACCESSES # error "VMX_USE_CACHED_VMCS_ACCESSES not defined when it should be!" #endif #ifdef VBOX_STRICT static bool hmR0VmxIsValidWriteField(uint32_t idxField) { switch (idxField) { case VMX_VMCS_GUEST_RIP: case VMX_VMCS_GUEST_RSP: case VMX_VMCS_GUEST_SYSENTER_EIP: case VMX_VMCS_GUEST_SYSENTER_ESP: case VMX_VMCS_GUEST_GDTR_BASE: case VMX_VMCS_GUEST_IDTR_BASE: case VMX_VMCS_GUEST_CS_BASE: case VMX_VMCS_GUEST_DS_BASE: case VMX_VMCS_GUEST_ES_BASE: case VMX_VMCS_GUEST_FS_BASE: case VMX_VMCS_GUEST_GS_BASE: case VMX_VMCS_GUEST_SS_BASE: case VMX_VMCS_GUEST_LDTR_BASE: case VMX_VMCS_GUEST_TR_BASE: case VMX_VMCS_GUEST_CR3: return true; } return false; } static bool hmR0VmxIsValidReadField(uint32_t idxField) { switch (idxField) { /* Read-only fields. */ case VMX_VMCS_RO_EXIT_QUALIFICATION: return true; } /* Remaining readable fields should also be writable. */ return hmR0VmxIsValidWriteField(idxField); } #endif /* VBOX_STRICT */ /** * Executes the specified handler in 64-bit mode. * * @returns VBox status code (no informational status codes). * @param pVCpu The cross context virtual CPU structure. * @param enmOp The operation to perform. * @param cParams Number of parameters. * @param paParam Array of 32-bit parameters. */ VMMR0DECL(int) VMXR0Execute64BitsHandler(PVMCPU pVCpu, HM64ON32OP enmOp, uint32_t cParams, uint32_t *paParam) { PVM pVM = pVCpu->CTX_SUFF(pVM); AssertReturn(pVM->hm.s.pfnHost32ToGuest64R0, VERR_HM_NO_32_TO_64_SWITCHER); Assert(enmOp > HM64ON32OP_INVALID && enmOp < HM64ON32OP_END); Assert(pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Write.aField)); Assert(pVCpu->hm.s.vmx.VMCSCache.Read.cValidEntries <= RT_ELEMENTS(pVCpu->hm.s.vmx.VMCSCache.Read.aField)); #ifdef VBOX_STRICT for (uint32_t i = 0; i < pVCpu->hm.s.vmx.VMCSCache.Write.cValidEntries; i++) Assert(hmR0VmxIsValidWriteField(pVCpu->hm.s.vmx.VMCSCache.Write.aField[i])); for (uint32_t i = 0; i hm.s.vmx.VMCSCache.Read.cValidEntries; i++) Assert(hmR0VmxIsValidReadField(pVCpu->hm.s.vmx.VMCSCache.Read.aField[i])); #endif /* Disable interrupts. */ RTCCUINTREG fOldEFlags = ASMIntDisableFlags(); #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI RTCPUID idHostCpu = RTMpCpuId(); CPUMR0SetLApic(pVCpu, idHostCpu); #endif PCHMPHYSCPU pHostCpu = hmR0GetCurrentCpu(); RTHCPHYS HCPhysCpuPage = pHostCpu->HCPhysMemObj; /* Clear VMCS. Marking it inactive, clearing implementation-specific data and writing VMCS data back to memory. */ VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR; /* Leave VMX Root Mode. */ VMXDisable(); SUPR0ChangeCR4(0, ~X86_CR4_VMXE); CPUMSetHyperESP(pVCpu, VMMGetStackRC(pVCpu)); CPUMSetHyperEIP(pVCpu, enmOp); for (int i = (int)cParams - 1; i >= 0; i--) CPUMPushHyper(pVCpu, paParam[i]); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatWorldSwitch3264, z); /* Call the switcher. */ int rc = pVM->hm.s.pfnHost32ToGuest64R0(pVM, RT_UOFFSETOF_DYN(VM, aCpus[pVCpu->idCpu].cpum) - RT_UOFFSETOF(VM, cpum)); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatWorldSwitch3264, z); /** @todo replace with hmR0VmxEnterRootMode() and hmR0VmxLeaveRootMode(). */ /* Make sure the VMX instructions don't cause #UD faults. */ SUPR0ChangeCR4(X86_CR4_VMXE, RTCCUINTREG_MAX); /* Re-enter VMX Root Mode */ int rc2 = VMXEnable(HCPhysCpuPage); if (RT_FAILURE(rc2)) { SUPR0ChangeCR4(0, ~X86_CR4_VMXE); ASMSetFlags(fOldEFlags); pVM->hm.s.vmx.HCPhysVmxEnableError = HCPhysCpuPage; return rc2; } rc2 = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); AssertRC(rc2); pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_ACTIVE; Assert(!(ASMGetFlags() & X86_EFL_IF)); ASMSetFlags(fOldEFlags); return rc; } /** * Prepares for and executes VMLAUNCH (64-bit guests) for 32-bit hosts * supporting 64-bit guests. * * @returns VBox status code. * @param fResume Whether to VMLAUNCH or VMRESUME. * @param pCtx Pointer to the guest-CPU context. * @param pCache Pointer to the VMCS cache. * @param pVM The cross context VM structure. * @param pVCpu The cross context virtual CPU structure. */ DECLASM(int) VMXR0SwitcherStartVM64(RTHCUINT fResume, PCPUMCTX pCtx, PVMCSCACHE pCache, PVM pVM, PVMCPU pVCpu) { NOREF(fResume); PCHMPHYSCPU pHostCpu = hmR0GetCurrentCpu(); RTHCPHYS const HCPhysCpuPage = pHostCpu->HCPhysMemObj; #ifdef VBOX_WITH_CRASHDUMP_MAGIC pCache->uPos = 1; pCache->interPD = PGMGetInterPaeCR3(pVM); pCache->pSwitcher = (uint64_t)pVM->hm.s.pfnHost32ToGuest64R0; #endif #if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES) pCache->TestIn.HCPhysCpuPage = 0; pCache->TestIn.HCPhysVmcs = 0; pCache->TestIn.pCache = 0; pCache->TestOut.HCPhysVmcs = 0; pCache->TestOut.pCache = 0; pCache->TestOut.pCtx = 0; pCache->TestOut.eflags = 0; #else NOREF(pCache); #endif uint32_t aParam[10]; aParam[0] = RT_LO_U32(HCPhysCpuPage); /* Param 1: VMXON physical address - Lo. */ aParam[1] = RT_HI_U32(HCPhysCpuPage); /* Param 1: VMXON physical address - Hi. */ aParam[2] = RT_LO_U32(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Lo. */ aParam[3] = RT_HI_U32(pVCpu->hm.s.vmx.HCPhysVmcs); /* Param 2: VMCS physical address - Hi. */ aParam[4] = VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache); aParam[5] = 0; aParam[6] = VM_RC_ADDR(pVM, pVM); aParam[7] = 0; aParam[8] = VM_RC_ADDR(pVM, pVCpu); aParam[9] = 0; #ifdef VBOX_WITH_CRASHDUMP_MAGIC pCtx->dr[4] = pVM->hm.s.vmx.pScratchPhys + 16 + 8; *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 1; #endif int rc = VMXR0Execute64BitsHandler(pVCpu, HM64ON32OP_VMXRCStartVM64, RT_ELEMENTS(aParam), &aParam[0]); #ifdef VBOX_WITH_CRASHDUMP_MAGIC Assert(*(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) == 5); Assert(pCtx->dr[4] == 10); *(uint32_t *)(pVM->hm.s.vmx.pScratch + 16 + 8) = 0xff; #endif #if defined(DEBUG) && defined(VMX_USE_CACHED_VMCS_ACCESSES) AssertMsg(pCache->TestIn.HCPhysCpuPage == HCPhysCpuPage, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysCpuPage, HCPhysCpuPage)); AssertMsg(pCache->TestIn.HCPhysVmcs == pVCpu->hm.s.vmx.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs, pVCpu->hm.s.vmx.HCPhysVmcs)); AssertMsg(pCache->TestIn.HCPhysVmcs == pCache->TestOut.HCPhysVmcs, ("%RHp vs %RHp\n", pCache->TestIn.HCPhysVmcs, pCache->TestOut.HCPhysVmcs)); AssertMsg(pCache->TestIn.pCache == pCache->TestOut.pCache, ("%RGv vs %RGv\n", pCache->TestIn.pCache, pCache->TestOut.pCache)); AssertMsg(pCache->TestIn.pCache == VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache), ("%RGv vs %RGv\n", pCache->TestIn.pCache, VM_RC_ADDR(pVM, &pVM->aCpus[pVCpu->idCpu].hm.s.vmx.VMCSCache))); AssertMsg(pCache->TestIn.pCtx == pCache->TestOut.pCtx, ("%RGv vs %RGv\n", pCache->TestIn.pCtx, pCache->TestOut.pCtx)); Assert(!(pCache->TestOut.eflags & X86_EFL_IF)); #endif NOREF(pCtx); return rc; } /** * Initialize the VMCS-Read cache. * * The VMCS cache is used for 32-bit hosts running 64-bit guests (except 32-bit * Darwin which runs with 64-bit paging in 32-bit mode) for 64-bit fields that * cannot be accessed in 32-bit mode. Some 64-bit fields -can- be accessed * (those that have a 32-bit FULL & HIGH part). * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ static int hmR0VmxInitVmcsReadCache(PVMCPU pVCpu) { #define VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, idxField) \ do { \ Assert(pCache->Read.aField[idxField##_CACHE_IDX] == 0); \ pCache->Read.aField[idxField##_CACHE_IDX] = idxField; \ pCache->Read.aFieldVal[idxField##_CACHE_IDX] = 0; \ ++cReadFields; \ } while (0) PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; uint32_t cReadFields = 0; /* * Don't remove the #if 0'd fields in this code. They're listed here for consistency * and serve to indicate exceptions to the rules. */ /* Guest-natural selector base fields. */ #if 0 /* These are 32-bit in practice. See Intel spec. 2.5 "Control Registers". */ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR0); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR4); #endif VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_ES_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CS_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SS_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_DS_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_FS_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GS_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_LDTR_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_TR_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_GDTR_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_IDTR_BASE); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RSP); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_RIP); #if 0 /* Unused natural width guest-state fields. */ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); /* Handled in Nested Paging case */ #endif VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_ESP); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_SYSENTER_EIP); /* 64-bit guest-state fields; unused as we use two 32-bit VMREADs for these 64-bit fields (using "FULL" and "HIGH" fields). */ #if 0 VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_DEBUGCTL_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PAT_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_EFER_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE0_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE1_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE2_FULL); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS64_GUEST_PDPTE3_FULL); #endif /* Natural width guest-state fields. */ VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_EXIT_QUALIFICATION); VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_RO_GUEST_LINEAR_ADDR); if (pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging) { VMXLOCAL_INIT_READ_CACHE_FIELD(pCache, VMX_VMCS_GUEST_CR3); AssertMsg(cReadFields == VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX)); pCache->Read.cValidEntries = VMX_VMCS_MAX_NESTED_PAGING_CACHE_IDX; } else { AssertMsg(cReadFields == VMX_VMCS_MAX_CACHE_IDX, ("cReadFields=%u expected %u\n", cReadFields, VMX_VMCS_MAX_CACHE_IDX)); pCache->Read.cValidEntries = VMX_VMCS_MAX_CACHE_IDX; } #undef VMXLOCAL_INIT_READ_CACHE_FIELD return VINF_SUCCESS; } /** * Writes a field into the VMCS. This can either directly invoke a VMWRITE or * queue up the VMWRITE by using the VMCS write cache (on 32-bit hosts, except * darwin, running 64-bit guests). * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param idxField The VMCS field encoding. * @param u64Val 16, 32 or 64-bit value. */ VMMR0DECL(int) VMXWriteVmcs64Ex(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) { int rc; switch (idxField) { /* * These fields consists of a "FULL" and a "HIGH" part which can be written to individually. */ /* 64-bit Control fields. */ case VMX_VMCS64_CTRL_IO_BITMAP_A_FULL: case VMX_VMCS64_CTRL_IO_BITMAP_B_FULL: case VMX_VMCS64_CTRL_MSR_BITMAP_FULL: case VMX_VMCS64_CTRL_EXIT_MSR_STORE_FULL: case VMX_VMCS64_CTRL_EXIT_MSR_LOAD_FULL: case VMX_VMCS64_CTRL_ENTRY_MSR_LOAD_FULL: case VMX_VMCS64_CTRL_EXEC_VMCS_PTR_FULL: case VMX_VMCS64_CTRL_TSC_OFFSET_FULL: case VMX_VMCS64_CTRL_VIRT_APIC_PAGEADDR_FULL: case VMX_VMCS64_CTRL_APIC_ACCESSADDR_FULL: case VMX_VMCS64_CTRL_VMFUNC_CTRLS_FULL: case VMX_VMCS64_CTRL_EPTP_FULL: case VMX_VMCS64_CTRL_EPTP_LIST_FULL: /* 64-bit Guest-state fields. */ case VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL: case VMX_VMCS64_GUEST_DEBUGCTL_FULL: case VMX_VMCS64_GUEST_PAT_FULL: case VMX_VMCS64_GUEST_EFER_FULL: case VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL: case VMX_VMCS64_GUEST_PDPTE0_FULL: case VMX_VMCS64_GUEST_PDPTE1_FULL: case VMX_VMCS64_GUEST_PDPTE2_FULL: case VMX_VMCS64_GUEST_PDPTE3_FULL: /* 64-bit Host-state fields. */ case VMX_VMCS64_HOST_PAT_FULL: case VMX_VMCS64_HOST_EFER_FULL: case VMX_VMCS64_HOST_PERF_GLOBAL_CTRL_FULL: { rc = VMXWriteVmcs32(idxField, RT_LO_U32(u64Val)); rc |= VMXWriteVmcs32(idxField + 1, RT_HI_U32(u64Val)); break; } /* * These fields do not have high and low parts. Queue up the VMWRITE by using the VMCS write-cache (for 64-bit * values). When we switch the host to 64-bit mode for running 64-bit guests, these VMWRITEs get executed then. */ /* Natural-width Guest-state fields. */ case VMX_VMCS_GUEST_CR3: case VMX_VMCS_GUEST_ES_BASE: case VMX_VMCS_GUEST_CS_BASE: case VMX_VMCS_GUEST_SS_BASE: case VMX_VMCS_GUEST_DS_BASE: case VMX_VMCS_GUEST_FS_BASE: case VMX_VMCS_GUEST_GS_BASE: case VMX_VMCS_GUEST_LDTR_BASE: case VMX_VMCS_GUEST_TR_BASE: case VMX_VMCS_GUEST_GDTR_BASE: case VMX_VMCS_GUEST_IDTR_BASE: case VMX_VMCS_GUEST_RSP: case VMX_VMCS_GUEST_RIP: case VMX_VMCS_GUEST_SYSENTER_ESP: case VMX_VMCS_GUEST_SYSENTER_EIP: { if (!(RT_HI_U32(u64Val))) { /* If this field is 64-bit, VT-x will zero out the top bits. */ rc = VMXWriteVmcs32(idxField, RT_LO_U32(u64Val)); } else { /* Assert that only the 32->64 switcher case should ever come here. */ Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fAllow64BitGuests); rc = VMXWriteCachedVmcsEx(pVCpu, idxField, u64Val); } break; } default: { AssertMsgFailed(("VMXWriteVmcs64Ex: Invalid field %#RX32 (pVCpu=%p u64Val=%#RX64)\n", idxField, pVCpu, u64Val)); rc = VERR_INVALID_PARAMETER; break; } } AssertRCReturn(rc, rc); return rc; } /** * Queue up a VMWRITE by using the VMCS write cache. * This is only used on 32-bit hosts (except darwin) for 64-bit guests. * * @param pVCpu The cross context virtual CPU structure. * @param idxField The VMCS field encoding. * @param u64Val 16, 32 or 64-bit value. */ VMMR0DECL(int) VMXWriteCachedVmcsEx(PVMCPU pVCpu, uint32_t idxField, uint64_t u64Val) { AssertPtr(pVCpu); PVMCSCACHE pCache = &pVCpu->hm.s.vmx.VMCSCache; AssertMsgReturn(pCache->Write.cValidEntries < VMCSCACHE_MAX_ENTRY - 1, ("entries=%u\n", pCache->Write.cValidEntries), VERR_ACCESS_DENIED); /* Make sure there are no duplicates. */ for (uint32_t i = 0; i < pCache->Write.cValidEntries; i++) { if (pCache->Write.aField[i] == idxField) { pCache->Write.aFieldVal[i] = u64Val; return VINF_SUCCESS; } } pCache->Write.aField[pCache->Write.cValidEntries] = idxField; pCache->Write.aFieldVal[pCache->Write.cValidEntries] = u64Val; pCache->Write.cValidEntries++; return VINF_SUCCESS; } #endif /* HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) */ /** * Sets up the usage of TSC-offsetting and updates the VMCS. * * If offsetting is not possible, cause VM-exits on RDTSC(P)s. Also sets up the * VMX preemption timer. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxUpdateTscOffsettingAndPreemptTimer(PVMCPU pVCpu) { bool fOffsettedTsc; bool fParavirtTsc; PVM pVM = pVCpu->CTX_SUFF(pVM); uint64_t uTscOffset; if (pVM->hm.s.vmx.fUsePreemptTimer) { uint64_t cTicksToDeadline = TMCpuTickGetDeadlineAndTscOffset(pVM, pVCpu, &uTscOffset, &fOffsettedTsc, &fParavirtTsc); /* Make sure the returned values have sane upper and lower boundaries. */ uint64_t u64CpuHz = SUPGetCpuHzFromGipBySetIndex(g_pSUPGlobalInfoPage, pVCpu->iHostCpuSet); cTicksToDeadline = RT_MIN(cTicksToDeadline, u64CpuHz / 64); /* 1/64th of a second */ cTicksToDeadline = RT_MAX(cTicksToDeadline, u64CpuHz / 2048); /* 1/2048th of a second */ cTicksToDeadline >>= pVM->hm.s.vmx.cPreemptTimerShift; uint32_t cPreemptionTickCount = (uint32_t)RT_MIN(cTicksToDeadline, UINT32_MAX - 16); int rc = VMXWriteVmcs32(VMX_VMCS32_PREEMPT_TIMER_VALUE, cPreemptionTickCount); AssertRC(rc); } else fOffsettedTsc = TMCpuTickCanUseRealTSC(pVM, pVCpu, &uTscOffset, &fParavirtTsc); if (fParavirtTsc) { /* Currently neither Hyper-V nor KVM need to update their paravirt. TSC information before every VM-entry, hence disable it for performance sake. */ #if 0 int rc = GIMR0UpdateParavirtTsc(pVM, 0 /* u64Offset */); AssertRC(rc); #endif STAM_COUNTER_INC(&pVCpu->hm.s.StatTscParavirt); } uint32_t uProcCtls = pVCpu->hm.s.vmx.u32ProcCtls; if ( fOffsettedTsc && RT_LIKELY(!pVCpu->hm.s.fDebugWantRdTscExit)) { if (pVCpu->hm.s.vmx.u64TscOffset != uTscOffset) { int rc = VMXWriteVmcs64(VMX_VMCS64_CTRL_TSC_OFFSET_FULL, uTscOffset); AssertRC(rc); pVCpu->hm.s.vmx.u64TscOffset = uTscOffset; } if (uProcCtls & VMX_PROC_CTLS_RDTSC_EXIT) { uProcCtls &= ~VMX_PROC_CTLS_RDTSC_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls); AssertRC(rc); pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls; } STAM_COUNTER_INC(&pVCpu->hm.s.StatTscOffset); } else { /* We can't use TSC-offsetting (non-fixed TSC, warp drive active etc.), VM-exit on RDTSC(P). */ if (!(uProcCtls & VMX_PROC_CTLS_RDTSC_EXIT)) { uProcCtls |= VMX_PROC_CTLS_RDTSC_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, uProcCtls); AssertRC(rc); pVCpu->hm.s.vmx.u32ProcCtls = uProcCtls; } STAM_COUNTER_INC(&pVCpu->hm.s.StatTscIntercept); } } /** * Gets the IEM exception flags for the specified vector and IDT vectoring / * VM-exit interruption info type. * * @returns The IEM exception flags. * @param uVector The event vector. * @param uVmxVectorType The VMX event type. * * @remarks This function currently only constructs flags required for * IEMEvaluateRecursiveXcpt and not the complete flags (e.g, error-code * and CR2 aspects of an exception are not included). */ static uint32_t hmR0VmxGetIemXcptFlags(uint8_t uVector, uint32_t uVmxVectorType) { uint32_t fIemXcptFlags; switch (uVmxVectorType) { case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT: case VMX_IDT_VECTORING_INFO_TYPE_NMI: fIemXcptFlags = IEM_XCPT_FLAGS_T_CPU_XCPT; break; case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT: fIemXcptFlags = IEM_XCPT_FLAGS_T_EXT_INT; break; case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT: fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT | IEM_XCPT_FLAGS_ICEBP_INSTR; break; case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: { fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT; if (uVector == X86_XCPT_BP) fIemXcptFlags |= IEM_XCPT_FLAGS_BP_INSTR; else if (uVector == X86_XCPT_OF) fIemXcptFlags |= IEM_XCPT_FLAGS_OF_INSTR; else { fIemXcptFlags = 0; AssertMsgFailed(("Unexpected vector for software int. uVector=%#x", uVector)); } break; } case VMX_IDT_VECTORING_INFO_TYPE_SW_INT: fIemXcptFlags = IEM_XCPT_FLAGS_T_SOFT_INT; break; default: fIemXcptFlags = 0; AssertMsgFailed(("Unexpected vector type! uVmxVectorType=%#x uVector=%#x", uVmxVectorType, uVector)); break; } return fIemXcptFlags; } /** * Sets an event as a pending event to be injected into the guest. * * @param pVCpu The cross context virtual CPU structure. * @param u32IntInfo The VM-entry interruption-information field. * @param cbInstr The VM-entry instruction length in bytes (for software * interrupts, exceptions and privileged software * exceptions). * @param u32ErrCode The VM-entry exception error code. * @param GCPtrFaultAddress The fault-address (CR2) in case it's a * page-fault. * * @remarks Statistics counter assumes this is a guest event being injected or * re-injected into the guest, i.e. 'StatInjectPendingReflect' is * always incremented. */ DECLINLINE(void) hmR0VmxSetPendingEvent(PVMCPU pVCpu, uint32_t u32IntInfo, uint32_t cbInstr, uint32_t u32ErrCode, RTGCUINTPTR GCPtrFaultAddress) { Assert(!pVCpu->hm.s.Event.fPending); pVCpu->hm.s.Event.fPending = true; pVCpu->hm.s.Event.u64IntInfo = u32IntInfo; pVCpu->hm.s.Event.u32ErrCode = u32ErrCode; pVCpu->hm.s.Event.cbInstr = cbInstr; pVCpu->hm.s.Event.GCPtrFaultAddress = GCPtrFaultAddress; } /** * Sets a double-fault (\#DF) exception as pending-for-injection into the VM. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetPendingXcptDF(PVMCPU pVCpu) { uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DF) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); } /** * Sets an invalid-opcode (\#UD) exception as pending-for-injection into the VM. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetPendingXcptUD(PVMCPU pVCpu) { uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_UD) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); } /** * Sets a debug (\#DB) exception as pending-for-injection into the VM. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetPendingXcptDB(PVMCPU pVCpu) { uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_DB) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 0) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); } #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** * Sets a general-protection (\#GP) exception as pending-for-injection into the VM. * * @param pVCpu The cross context virtual CPU structure. * @param u32ErrCode The error code for the general-protection exception. */ DECLINLINE(void) hmR0VmxSetPendingXcptGP(PVMCPU pVCpu, uint32_t u32ErrCode) { uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_GP) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */); } /** * Sets a stack (\#SS) exception as pending-for-injection into the VM. * * @param pVCpu The cross context virtual CPU structure. * @param u32ErrCode The error code for the stack exception. */ DECLINLINE(void) hmR0VmxSetPendingXcptSS(PVMCPU pVCpu, uint32_t u32ErrCode) { uint32_t const u32IntInfo = RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VECTOR, X86_XCPT_SS) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_TYPE, VMX_EXIT_INT_INFO_TYPE_HW_XCPT) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_ERR_CODE_VALID, 1) | RT_BF_MAKE(VMX_BF_ENTRY_INT_INFO_VALID, 1); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrCode, 0 /* GCPtrFaultAddress */); } /** * Decodes the memory operand of an instruction that caused a VM-exit. * * The VM-exit qualification field provides the displacement field for memory * operand instructions, if any. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_SUCCESS if the operand was successfully decoded. * @retval VINF_HM_PENDING_XCPT if an exception was raised while decoding the * operand. * @param pVCpu The cross context virtual CPU structure. * @param uExitInstrInfo The VM-exit instruction information field. * @param enmMemAccess The memory operand's access type (read or write). * @param GCPtrDisp The instruction displacement field, if any. For * RIP-relative addressing pass RIP + displacement here. * @param pGCPtrMem Where to store the effective destination memory address. */ static VBOXSTRICTRC hmR0VmxDecodeMemOperand(PVMCPU pVCpu, uint32_t uExitInstrInfo, RTGCPTR GCPtrDisp, VMXMEMACCESS enmMemAccess, PRTGCPTR pGCPtrMem) { Assert(pGCPtrMem); Assert(!CPUMIsGuestInRealOrV86Mode(pVCpu)); HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_EFER | CPUMCTX_EXTRN_CR0); static uint64_t const s_auAddrSizeMasks[] = { UINT64_C(0xffff), UINT64_C(0xffffffff), UINT64_C(0xffffffffffffffff) }; static uint64_t const s_auAccessSizeMasks[] = { sizeof(uint16_t), sizeof(uint32_t), sizeof(uint64_t) }; AssertCompile(RT_ELEMENTS(s_auAccessSizeMasks) == RT_ELEMENTS(s_auAddrSizeMasks)); VMXEXITINSTRINFO ExitInstrInfo; ExitInstrInfo.u = uExitInstrInfo; uint8_t const uAddrSize = ExitInstrInfo.All.u3AddrSize; uint8_t const iSegReg = ExitInstrInfo.All.iSegReg; bool const fIdxRegValid = !ExitInstrInfo.All.fIdxRegInvalid; uint8_t const iIdxReg = ExitInstrInfo.All.iIdxReg; uint8_t const uScale = ExitInstrInfo.All.u2Scaling; bool const fBaseRegValid = !ExitInstrInfo.All.fBaseRegInvalid; uint8_t const iBaseReg = ExitInstrInfo.All.iBaseReg; bool const fIsMemOperand = !ExitInstrInfo.All.fIsRegOperand; bool const fIsLongMode = CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx); /* * Validate instruction information. * This shouldn't happen on real hardware but useful while testing our nested hardware-virtualization code. */ AssertLogRelMsgReturn(uAddrSize < RT_ELEMENTS(s_auAddrSizeMasks), ("Invalid address size. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_1); AssertLogRelMsgReturn(iSegReg < X86_SREG_COUNT, ("Invalid segment register. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_2); AssertLogRelMsgReturn(fIsMemOperand, ("Expected memory operand. ExitInstrInfo=%#RX32\n", ExitInstrInfo.u), VERR_VMX_IPE_3); /* * Compute the complete effective address. * * See AMD instruction spec. 1.4.2 "SIB Byte Format" * See AMD spec. 4.5.2 "Segment Registers". */ RTGCPTR GCPtrMem = GCPtrDisp; if (fBaseRegValid) GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iBaseReg].u64; if (fIdxRegValid) GCPtrMem += pVCpu->cpum.GstCtx.aGRegs[iIdxReg].u64 << uScale; RTGCPTR const GCPtrOff = GCPtrMem; if ( !fIsLongMode || iSegReg >= X86_SREG_FS) GCPtrMem += pVCpu->cpum.GstCtx.aSRegs[iSegReg].u64Base; GCPtrMem &= s_auAddrSizeMasks[uAddrSize]; /* * Validate effective address. * See AMD spec. 4.5.3 "Segment Registers in 64-Bit Mode". */ uint8_t const cbAccess = s_auAccessSizeMasks[uAddrSize]; Assert(cbAccess > 0); if (fIsLongMode) { if (X86_IS_CANONICAL(GCPtrMem)) { *pGCPtrMem = GCPtrMem; return VINF_SUCCESS; } /** @todo r=ramshankar: We should probably raise \#SS or \#GP. See AMD spec. 4.12.2 * "Data Limit Checks in 64-bit Mode". */ Log4Func(("Long mode effective address is not canonical GCPtrMem=%#RX64\n", GCPtrMem)); hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } /* * This is a watered down version of iemMemApplySegment(). * Parts that are not applicable for VMX instructions like real-or-v8086 mode * and segment CPL/DPL checks are skipped. */ RTGCPTR32 const GCPtrFirst32 = (RTGCPTR32)GCPtrOff; RTGCPTR32 const GCPtrLast32 = GCPtrFirst32 + cbAccess - 1; PCCPUMSELREG pSel = &pVCpu->cpum.GstCtx.aSRegs[iSegReg]; /* Check if the segment is present and usable. */ if ( pSel->Attr.n.u1Present && !pSel->Attr.n.u1Unusable) { Assert(pSel->Attr.n.u1DescType); if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_CODE)) { /* Check permissions for the data segment. */ if ( enmMemAccess == VMXMEMACCESS_WRITE && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_WRITE)) { Log4Func(("Data segment access invalid. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u)); hmR0VmxSetPendingXcptGP(pVCpu, iSegReg); return VINF_HM_PENDING_XCPT; } /* Check limits if it's a normal data segment. */ if (!(pSel->Attr.n.u4Type & X86_SEL_TYPE_DOWN)) { if ( GCPtrFirst32 > pSel->u32Limit || GCPtrLast32 > pSel->u32Limit) { Log4Func(("Data segment limit exceeded." "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32, GCPtrLast32, pSel->u32Limit)); if (iSegReg == X86_SREG_SS) hmR0VmxSetPendingXcptSS(pVCpu, 0); else hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } } else { /* Check limits if it's an expand-down data segment. Note! The upper boundary is defined by the B bit, not the G bit! */ if ( GCPtrFirst32 < pSel->u32Limit + UINT32_C(1) || GCPtrLast32 > (pSel->Attr.n.u1DefBig ? UINT32_MAX : UINT32_C(0xffff))) { Log4Func(("Expand-down data segment limit exceeded." "iSegReg=%#x GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", iSegReg, GCPtrFirst32, GCPtrLast32, pSel->u32Limit)); if (iSegReg == X86_SREG_SS) hmR0VmxSetPendingXcptSS(pVCpu, 0); else hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } } } else { /* Check permissions for the code segment. */ if ( enmMemAccess == VMXMEMACCESS_WRITE || ( enmMemAccess == VMXMEMACCESS_READ && !(pSel->Attr.n.u4Type & X86_SEL_TYPE_READ))) { Log4Func(("Code segment access invalid. Attr=%#RX32\n", pSel->Attr.u)); Assert(!CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx)); hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } /* Check limits for the code segment (normal/expand-down not applicable for code segments). */ if ( GCPtrFirst32 > pSel->u32Limit || GCPtrLast32 > pSel->u32Limit) { Log4Func(("Code segment limit exceeded. GCPtrFirst32=%#RX32 GCPtrLast32=%#RX32 u32Limit=%#RX32\n", GCPtrFirst32, GCPtrLast32, pSel->u32Limit)); if (iSegReg == X86_SREG_SS) hmR0VmxSetPendingXcptSS(pVCpu, 0); else hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } } } else { Log4Func(("Not present or unusable segment. iSegReg=%#x Attr=%#RX32\n", iSegReg, pSel->Attr.u)); hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } *pGCPtrMem = GCPtrMem; return VINF_SUCCESS; } /** * Perform the relevant VMX instruction checks for VM-exits that occurred due to the * guest attempting to execute a VMX instruction. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_SUCCESS if we should continue handling the VM-exit. * @retval VINF_HM_PENDING_XCPT if an exception was raised. * * @param pVCpu The cross context virtual CPU structure. * @param uExitReason The VM-exit reason. * * @todo NstVmx: Document other error codes when VM-exit is implemented. * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxCheckExitDueToVmxInstr(PVMCPU pVCpu, uint32_t uExitReason) { HMVMX_CPUMCTX_ASSERT(pVCpu, CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER); if ( CPUMIsGuestInRealOrV86ModeEx(&pVCpu->cpum.GstCtx) || ( CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx) && !CPUMIsGuestIn64BitCodeEx(&pVCpu->cpum.GstCtx))) { Log4Func(("In real/v86-mode or long-mode outside 64-bit code segment -> #UD\n")); hmR0VmxSetPendingXcptUD(pVCpu); return VINF_HM_PENDING_XCPT; } if (uExitReason == VMX_EXIT_VMXON) { /* * We check CR4.VMXE because it is required to be always set while in VMX operation * by physical CPUs and our CR4 read shadow is only consulted when executing specific * instructions (CLTS, LMSW, MOV CR, and SMSW) and thus doesn't affect CPU operation * otherwise (i.e. physical CPU won't automatically #UD if Cr4Shadow.VMXE is 0). */ if (!CPUMIsGuestVmxEnabled(&pVCpu->cpum.GstCtx)) { Log4Func(("CR4.VMXE is not set -> #UD\n")); hmR0VmxSetPendingXcptUD(pVCpu); return VINF_HM_PENDING_XCPT; } } else if (!CPUMIsGuestInVmxRootMode(&pVCpu->cpum.GstCtx)) { /* * The guest has not entered VMX operation but attempted to execute a VMX instruction * (other than VMXON), we need to raise a #UD. */ Log4Func(("Not in VMX root mode -> #UD\n")); hmR0VmxSetPendingXcptUD(pVCpu); return VINF_HM_PENDING_XCPT; } if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { /* * The nested-guest attempted to execute a VMX instruction, cause a VM-exit and let * the guest hypervisor deal with it. */ /** @todo NSTVMX: Trigger a VM-exit */ } /* * VMX instructions require CPL 0 except in VMX non-root mode where the VM-exit intercept * (above) takes preceedence over the CPL check. */ if (CPUMGetGuestCPL(pVCpu) > 0) { Log4Func(("CPL > 0 -> #GP(0)\n")); hmR0VmxSetPendingXcptGP(pVCpu, 0); return VINF_HM_PENDING_XCPT; } return VINF_SUCCESS; } #endif /* VBOX_WITH_NESTED_HWVIRT_VMX */ /** * Handle a condition that occurred while delivering an event through the guest * IDT. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_SUCCESS if we should continue handling the VM-exit. * @retval VINF_HM_DOUBLE_FAULT if a \#DF condition was detected and we ought * to continue execution of the guest which will delivery the \#DF. * @retval VINF_EM_RESET if we detected a triple-fault condition. * @retval VERR_EM_GUEST_CPU_HANG if we detected a guest CPU hang. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxCheckExitDueToEventDelivery(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { uint32_t const uExitVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo); int rc2 = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient); rc2 |= hmR0VmxReadExitIntInfoVmcs(pVmxTransient); AssertRCReturn(rc2, rc2); VBOXSTRICTRC rcStrict = VINF_SUCCESS; if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo)) { uint32_t const uIdtVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo); uint32_t const uIdtVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo); /* * If the event was a software interrupt (generated with INT n) or a software exception * (generated by INT3/INTO) or a privileged software exception (generated by INT1), we * can handle the VM-exit and continue guest execution which will re-execute the * instruction rather than re-injecting the exception, as that can cause premature * trips to ring-3 before injection and involve TRPM which currently has no way of * storing that these exceptions were caused by these instructions (ICEBP's #DB poses * the problem). */ IEMXCPTRAISE enmRaise; IEMXCPTRAISEINFO fRaiseInfo; if ( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT) { enmRaise = IEMXCPTRAISE_REEXEC_INSTR; fRaiseInfo = IEMXCPTRAISEINFO_NONE; } else if (VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo)) { uint32_t const uExitVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uExitIntInfo); uint32_t const fIdtVectorFlags = hmR0VmxGetIemXcptFlags(uIdtVector, uIdtVectorType); uint32_t const fExitVectorFlags = hmR0VmxGetIemXcptFlags(uExitVector, uExitVectorType); /** @todo Make AssertMsgReturn as just AssertMsg later. */ AssertMsgReturn(uExitVectorType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT, ("hmR0VmxCheckExitDueToEventDelivery: Unexpected VM-exit interruption info. %#x!\n", uExitVectorType), VERR_VMX_IPE_5); enmRaise = IEMEvaluateRecursiveXcpt(pVCpu, fIdtVectorFlags, uIdtVector, fExitVectorFlags, uExitVector, &fRaiseInfo); /* Determine a vectoring #PF condition, see comment in hmR0VmxExitXcptPF(). */ if (fRaiseInfo & (IEMXCPTRAISEINFO_EXT_INT_PF | IEMXCPTRAISEINFO_NMI_PF)) { pVmxTransient->fVectoringPF = true; enmRaise = IEMXCPTRAISE_PREV_EVENT; } } else { /* * If an exception or hardware interrupt delivery caused an EPT violation/misconfig or APIC access * VM-exit, then the VM-exit interruption-information will not be valid and we end up here. * It is sufficient to reflect the original event to the guest after handling the VM-exit. */ Assert( uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI || uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_EXT_INT); enmRaise = IEMXCPTRAISE_PREV_EVENT; fRaiseInfo = IEMXCPTRAISEINFO_NONE; } /* * On CPUs that support Virtual NMIs, if this VM-exit (be it an exception or EPT violation/misconfig * etc.) occurred while delivering the NMI, we need to clear the block-by-NMI field in the guest * interruptibility-state before re-delivering the NMI after handling the VM-exit. Otherwise the * subsequent VM-entry would fail. * * See Intel spec. 30.7.1.2 "Resuming Guest Software after Handling an Exception". See @bugref{7445}. */ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS) && uIdtVectorType == VMX_IDT_VECTORING_INFO_TYPE_NMI && ( enmRaise == IEMXCPTRAISE_PREV_EVENT || (fRaiseInfo & IEMXCPTRAISEINFO_NMI_PF)) && (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)) { VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS); } switch (enmRaise) { case IEMXCPTRAISE_CURRENT_XCPT: { Log4Func(("IDT: Pending secondary Xcpt: uIdtVectoringInfo=%#RX64 uExitIntInfo=%#RX64\n", pVmxTransient->uIdtVectoringInfo, pVmxTransient->uExitIntInfo)); Assert(rcStrict == VINF_SUCCESS); break; } case IEMXCPTRAISE_PREV_EVENT: { uint32_t u32ErrCode; if (VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uIdtVectoringInfo)) { rc2 = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient); AssertRCReturn(rc2, rc2); u32ErrCode = pVmxTransient->uIdtVectoringErrorCode; } else u32ErrCode = 0; /* If uExitVector is #PF, CR2 value will be updated from the VMCS if it's a guest #PF, see hmR0VmxExitXcptPF(). */ STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect); hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo), 0 /* cbInstr */, u32ErrCode, pVCpu->cpum.GstCtx.cr2); Log4Func(("IDT: Pending vectoring event %#RX64 Err=%#RX32\n", pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.u32ErrCode)); Assert(rcStrict == VINF_SUCCESS); break; } case IEMXCPTRAISE_REEXEC_INSTR: Assert(rcStrict == VINF_SUCCESS); break; case IEMXCPTRAISE_DOUBLE_FAULT: { /* * Determing a vectoring double #PF condition. Used later, when PGM evaluates the * second #PF as a guest #PF (and not a shadow #PF) and needs to be converted into a #DF. */ if (fRaiseInfo & IEMXCPTRAISEINFO_PF_PF) { pVmxTransient->fVectoringDoublePF = true; Log4Func(("IDT: Vectoring double #PF %#RX64 cr2=%#RX64\n", pVCpu->hm.s.Event.u64IntInfo, pVCpu->cpum.GstCtx.cr2)); rcStrict = VINF_SUCCESS; } else { STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingReflect); hmR0VmxSetPendingXcptDF(pVCpu); Log4Func(("IDT: Pending vectoring #DF %#RX64 uIdtVector=%#x uExitVector=%#x\n", pVCpu->hm.s.Event.u64IntInfo, uIdtVector, uExitVector)); rcStrict = VINF_HM_DOUBLE_FAULT; } break; } case IEMXCPTRAISE_TRIPLE_FAULT: { Log4Func(("IDT: Pending vectoring triple-fault uIdt=%#x uExit=%#x\n", uIdtVector, uExitVector)); rcStrict = VINF_EM_RESET; break; } case IEMXCPTRAISE_CPU_HANG: { Log4Func(("IDT: Bad guest! Entering CPU hang. fRaiseInfo=%#x\n", fRaiseInfo)); rcStrict = VERR_EM_GUEST_CPU_HANG; break; } default: { AssertMsgFailed(("IDT: vcpu[%RU32] Unexpected/invalid value! enmRaise=%#x\n", pVCpu->idCpu, enmRaise)); rcStrict = VERR_VMX_IPE_2; break; } } } else if ( VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo) && VMX_EXIT_INT_INFO_IS_NMI_UNBLOCK_IRET(pVmxTransient->uExitIntInfo) && uExitVector != X86_XCPT_DF && (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI)) { /* * Execution of IRET caused this fault when NMI blocking was in effect (i.e we're in the guest NMI handler). * We need to set the block-by-NMI field so that NMIs remain blocked until the IRET execution is restarted. * See Intel spec. 30.7.1.2 "Resuming guest software after handling an exception". */ if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) { Log4Func(("Setting VMCPU_FF_BLOCK_NMIS. fValid=%RTbool uExitReason=%u\n", VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo), pVmxTransient->uExitReason)); VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS); } } Assert( rcStrict == VINF_SUCCESS || rcStrict == VINF_HM_DOUBLE_FAULT || rcStrict == VINF_EM_RESET || rcStrict == VERR_EM_GUEST_CPU_HANG); return rcStrict; } /** * Imports a guest segment register from the current VMCS into * the guest-CPU context. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param idxSel Index of the selector in the VMCS. * @param idxLimit Index of the segment limit in the VMCS. * @param idxBase Index of the segment base in the VMCS. * @param idxAccess Index of the access rights of the segment in the VMCS. * @param pSelReg Pointer to the segment selector. * * @remarks Called with interrupts and/or preemption disabled, try not to assert and * do not log! * * @remarks Never call this function directly!!! Use the * HMVMX_IMPORT_SREG() macro as that takes care * of whether to read from the VMCS cache or not. */ static int hmR0VmxImportGuestSegmentReg(PVMCPU pVCpu, uint32_t idxSel, uint32_t idxLimit, uint32_t idxBase, uint32_t idxAccess, PCPUMSELREG pSelReg) { NOREF(pVCpu); uint32_t u32Sel; uint32_t u32Limit; uint32_t u32Attr; uint64_t u64Base; int rc = VMXReadVmcs32(idxSel, &u32Sel); rc |= VMXReadVmcs32(idxLimit, &u32Limit); rc |= VMXReadVmcs32(idxAccess, &u32Attr); rc |= VMXReadVmcsGstNByIdxVal(idxBase, &u64Base); AssertRCReturn(rc, rc); pSelReg->Sel = (uint16_t)u32Sel; pSelReg->ValidSel = (uint16_t)u32Sel; pSelReg->fFlags = CPUMSELREG_FLAGS_VALID; pSelReg->u32Limit = u32Limit; pSelReg->u64Base = u64Base; pSelReg->Attr.u = u32Attr; /* * If VT-x marks the segment as unusable, most other bits remain undefined: * - For CS the L, D and G bits have meaning. * - For SS the DPL has meaning (it -is- the CPL for Intel and VBox). * - For the remaining data segments no bits are defined. * * The present bit and the unusable bit has been observed to be set at the * same time (the selector was supposed to be invalid as we started executing * a V8086 interrupt in ring-0). * * What should be important for the rest of the VBox code, is that the P bit is * cleared. Some of the other VBox code recognizes the unusable bit, but * AMD-V certainly don't, and REM doesn't really either. So, to be on the * safe side here, we'll strip off P and other bits we don't care about. If * any code breaks because Attr.u != 0 when Sel < 4, it should be fixed. * * See Intel spec. 27.3.2 "Saving Segment Registers and Descriptor-Table Registers". */ if (pSelReg->Attr.u & X86DESCATTR_UNUSABLE) { Assert(idxSel != VMX_VMCS16_GUEST_TR_SEL); /* TR is the only selector that can never be unusable. */ /* Masking off: X86DESCATTR_P, X86DESCATTR_LIMIT_HIGH, and X86DESCATTR_AVL. The latter two are really irrelevant. */ pSelReg->Attr.u &= X86DESCATTR_UNUSABLE | X86DESCATTR_L | X86DESCATTR_D | X86DESCATTR_G | X86DESCATTR_DPL | X86DESCATTR_TYPE | X86DESCATTR_DT; #ifdef VBOX_STRICT VMMRZCallRing3Disable(pVCpu); Log4Func(("Unusable idxSel=%#x attr=%#x -> %#x\n", idxSel, u32Sel, pSelReg->Attr.u)); # ifdef DEBUG_bird AssertMsg((u32Attr & ~X86DESCATTR_P) == pSelReg->Attr.u, ("%#x: %#x != %#x (sel=%#x base=%#llx limit=%#x)\n", idxSel, u32Sel, pSelReg->Attr.u, pSelReg->Sel, pSelReg->u64Base, pSelReg->u32Limit)); # endif VMMRZCallRing3Enable(pVCpu); #endif } return VINF_SUCCESS; } /** * Imports the guest RIP from the VMCS back into the guest-CPU context. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts and/or preemption disabled, should not assert! * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState() * instead!!! */ DECLINLINE(int) hmR0VmxImportGuestRip(PVMCPU pVCpu) { uint64_t u64Val; PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (pCtx->fExtrn & CPUMCTX_EXTRN_RIP) { int rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RIP, &u64Val); if (RT_SUCCESS(rc)) { pCtx->rip = u64Val; EMR0HistoryUpdatePC(pVCpu, pCtx->rip, false); pCtx->fExtrn &= ~CPUMCTX_EXTRN_RIP; } return rc; } return VINF_SUCCESS; } /** * Imports the guest RFLAGS from the VMCS back into the guest-CPU context. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts and/or preemption disabled, should not assert! * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState() * instead!!! */ DECLINLINE(int) hmR0VmxImportGuestRFlags(PVMCPU pVCpu) { uint32_t u32Val; PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (pCtx->fExtrn & CPUMCTX_EXTRN_RFLAGS) { int rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Val); if (RT_SUCCESS(rc)) { pCtx->eflags.u32 = u32Val; /* Restore eflags for real-on-v86-mode hack. */ if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { pCtx->eflags.Bits.u1VM = 0; pCtx->eflags.Bits.u2IOPL = pVCpu->hm.s.vmx.RealMode.Eflags.Bits.u2IOPL; } } pCtx->fExtrn &= ~CPUMCTX_EXTRN_RFLAGS; return rc; } return VINF_SUCCESS; } /** * Imports the guest interruptibility-state from the VMCS back into the guest-CPU * context. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Called with interrupts and/or preemption disabled, try not to assert and * do not log! * @remarks Do -not- call this function directly, use hmR0VmxImportGuestState() * instead!!! */ DECLINLINE(int) hmR0VmxImportGuestIntrState(PVMCPU pVCpu) { uint32_t u32Val; PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32Val); AssertRCReturn(rc, rc); /* * We additionally have a requirement to import RIP, RFLAGS depending on whether we * might need them in hmR0VmxEvaluatePendingEvent(). */ if (!u32Val) { if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) { rc = hmR0VmxImportGuestRip(pVCpu); rc |= hmR0VmxImportGuestRFlags(pVCpu); AssertRCReturn(rc, rc); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); } if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS); } else { rc = hmR0VmxImportGuestRip(pVCpu); rc |= hmR0VmxImportGuestRFlags(pVCpu); AssertRCReturn(rc, rc); if (u32Val & ( VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS | VMX_VMCS_GUEST_INT_STATE_BLOCK_STI)) { EMSetInhibitInterruptsPC(pVCpu, pCtx->rip); } else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); if (u32Val & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI) { if (!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) VMCPU_FF_SET(pVCpu, VMCPU_FF_BLOCK_NMIS); } else if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)) VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_BLOCK_NMIS); } return VINF_SUCCESS; } /** * Worker for VMXR0ImportStateOnDemand. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param fWhat What to import, CPUMCTX_EXTRN_XXX. */ static int hmR0VmxImportGuestState(PVMCPU pVCpu, uint64_t fWhat) { #define VMXLOCAL_BREAK_RC(a_rc) \ if (RT_FAILURE(a_rc)) \ break int rc = VINF_SUCCESS; PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; uint64_t u64Val; uint32_t u32Val; Log4Func(("fExtrn=%#RX64 fWhat=%#RX64\n", pCtx->fExtrn, fWhat)); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatImportGuestState, x); /* * We disable interrupts to make the updating of the state and in particular * the fExtrn modification atomic wrt to preemption hooks. */ RTCCUINTREG const fEFlags = ASMIntDisableFlags(); fWhat &= pCtx->fExtrn; if (fWhat) { do { if (fWhat & CPUMCTX_EXTRN_RIP) { rc = hmR0VmxImportGuestRip(pVCpu); VMXLOCAL_BREAK_RC(rc); } if (fWhat & CPUMCTX_EXTRN_RFLAGS) { rc = hmR0VmxImportGuestRFlags(pVCpu); VMXLOCAL_BREAK_RC(rc); } if (fWhat & CPUMCTX_EXTRN_HM_VMX_INT_STATE) { rc = hmR0VmxImportGuestIntrState(pVCpu); VMXLOCAL_BREAK_RC(rc); } if (fWhat & CPUMCTX_EXTRN_RSP) { rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_RSP, &u64Val); VMXLOCAL_BREAK_RC(rc); pCtx->rsp = u64Val; } if (fWhat & CPUMCTX_EXTRN_SREG_MASK) { if (fWhat & CPUMCTX_EXTRN_CS) { rc = HMVMX_IMPORT_SREG(CS, &pCtx->cs); rc |= hmR0VmxImportGuestRip(pVCpu); VMXLOCAL_BREAK_RC(rc); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pCtx->cs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrCS.u; EMR0HistoryUpdatePC(pVCpu, pCtx->cs.u64Base + pCtx->rip, true); } if (fWhat & CPUMCTX_EXTRN_SS) { rc = HMVMX_IMPORT_SREG(SS, &pCtx->ss); VMXLOCAL_BREAK_RC(rc); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pCtx->ss.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrSS.u; } if (fWhat & CPUMCTX_EXTRN_DS) { rc = HMVMX_IMPORT_SREG(DS, &pCtx->ds); VMXLOCAL_BREAK_RC(rc); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pCtx->ds.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrDS.u; } if (fWhat & CPUMCTX_EXTRN_ES) { rc = HMVMX_IMPORT_SREG(ES, &pCtx->es); VMXLOCAL_BREAK_RC(rc); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pCtx->es.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrES.u; } if (fWhat & CPUMCTX_EXTRN_FS) { rc = HMVMX_IMPORT_SREG(FS, &pCtx->fs); VMXLOCAL_BREAK_RC(rc); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pCtx->fs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrFS.u; } if (fWhat & CPUMCTX_EXTRN_GS) { rc = HMVMX_IMPORT_SREG(GS, &pCtx->gs); VMXLOCAL_BREAK_RC(rc); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) pCtx->gs.Attr.u = pVCpu->hm.s.vmx.RealMode.AttrGS.u; } } if (fWhat & CPUMCTX_EXTRN_TABLE_MASK) { if (fWhat & CPUMCTX_EXTRN_LDTR) { rc = HMVMX_IMPORT_SREG(LDTR, &pCtx->ldtr); VMXLOCAL_BREAK_RC(rc); } if (fWhat & CPUMCTX_EXTRN_GDTR) { rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); VMXLOCAL_BREAK_RC(rc); pCtx->gdtr.pGdt = u64Val; pCtx->gdtr.cbGdt = u32Val; } /* Guest IDTR. */ if (fWhat & CPUMCTX_EXTRN_IDTR) { rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); VMXLOCAL_BREAK_RC(rc); pCtx->idtr.pIdt = u64Val; pCtx->idtr.cbIdt = u32Val; } /* Guest TR. */ if (fWhat & CPUMCTX_EXTRN_TR) { /* Real-mode emulation using virtual-8086 mode has the fake TSS (pRealModeTSS) in TR, don't save that one. */ if (!pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { rc = HMVMX_IMPORT_SREG(TR, &pCtx->tr); VMXLOCAL_BREAK_RC(rc); } } } if (fWhat & CPUMCTX_EXTRN_SYSENTER_MSRS) { rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_EIP, &pCtx->SysEnter.eip); rc |= VMXReadVmcsGstN(VMX_VMCS_GUEST_SYSENTER_ESP, &pCtx->SysEnter.esp); rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_SYSENTER_CS, &u32Val); pCtx->SysEnter.cs = u32Val; VMXLOCAL_BREAK_RC(rc); } #if HC_ARCH_BITS == 64 if (fWhat & CPUMCTX_EXTRN_KERNEL_GS_BASE) { if ( pVM->hm.s.fAllow64BitGuests && (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)) pCtx->msrKERNELGSBASE = ASMRdMsr(MSR_K8_KERNEL_GS_BASE); } if (fWhat & CPUMCTX_EXTRN_SYSCALL_MSRS) { if ( pVM->hm.s.fAllow64BitGuests && (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST)) { pCtx->msrLSTAR = ASMRdMsr(MSR_K8_LSTAR); pCtx->msrSTAR = ASMRdMsr(MSR_K6_STAR); pCtx->msrSFMASK = ASMRdMsr(MSR_K8_SF_MASK); } } #endif if ( (fWhat & (CPUMCTX_EXTRN_TSC_AUX | CPUMCTX_EXTRN_OTHER_MSRS)) #if HC_ARCH_BITS == 32 || (fWhat & (CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS)) #endif ) { PCVMXAUTOMSR pMsr = (PVMXAUTOMSR)pVCpu->hm.s.vmx.pvGuestMsr; uint32_t const cMsrs = pVCpu->hm.s.vmx.cMsrs; for (uint32_t i = 0; i < cMsrs; i++, pMsr++) { switch (pMsr->u32Msr) { #if HC_ARCH_BITS == 32 case MSR_K8_LSTAR: pCtx->msrLSTAR = pMsr->u64Value; break; case MSR_K6_STAR: pCtx->msrSTAR = pMsr->u64Value; break; case MSR_K8_SF_MASK: pCtx->msrSFMASK = pMsr->u64Value; break; case MSR_K8_KERNEL_GS_BASE: pCtx->msrKERNELGSBASE = pMsr->u64Value; break; #endif case MSR_IA32_SPEC_CTRL: CPUMSetGuestSpecCtrl(pVCpu, pMsr->u64Value); break; case MSR_K8_TSC_AUX: CPUMSetGuestTscAux(pVCpu, pMsr->u64Value); break; case MSR_K6_EFER: /* EFER can't be changed without causing a VM-exit */ break; default: { pVCpu->hm.s.u32HMError = pMsr->u32Msr; ASMSetFlags(fEFlags); AssertMsgFailed(("Unexpected MSR in auto-load/store area. uMsr=%#RX32 cMsrs=%u\n", pMsr->u32Msr, cMsrs)); return VERR_HM_UNEXPECTED_LD_ST_MSR; } } } } if (fWhat & CPUMCTX_EXTRN_DR7) { if (!pVCpu->hm.s.fUsingHyperDR7) { /* Upper 32-bits are always zero. See Intel spec. 2.7.3 "Loading and Storing Debug Registers". */ rc = VMXReadVmcs32(VMX_VMCS_GUEST_DR7, &u32Val); VMXLOCAL_BREAK_RC(rc); pCtx->dr[7] = u32Val; } } if (fWhat & CPUMCTX_EXTRN_CR_MASK) { uint32_t u32Shadow; if (fWhat & CPUMCTX_EXTRN_CR0) { rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); rc |= VMXReadVmcs32(VMX_VMCS_CTRL_CR0_READ_SHADOW, &u32Shadow); VMXLOCAL_BREAK_RC(rc); u32Val = (u32Val & ~pVCpu->hm.s.vmx.u32Cr0Mask) | (u32Shadow & pVCpu->hm.s.vmx.u32Cr0Mask); VMMRZCallRing3Disable(pVCpu); /* Calls into PGM which has Log statements. */ CPUMSetGuestCR0(pVCpu, u32Val); VMMRZCallRing3Enable(pVCpu); } if (fWhat & CPUMCTX_EXTRN_CR4) { rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32Val); rc |= VMXReadVmcs32(VMX_VMCS_CTRL_CR4_READ_SHADOW, &u32Shadow); VMXLOCAL_BREAK_RC(rc); u32Val = (u32Val & ~pVCpu->hm.s.vmx.u32Cr4Mask) | (u32Shadow & pVCpu->hm.s.vmx.u32Cr4Mask); CPUMSetGuestCR4(pVCpu, u32Val); } if (fWhat & CPUMCTX_EXTRN_CR3) { /* CR0.PG bit changes are always intercepted, so it's up to date. */ if ( pVM->hm.s.vmx.fUnrestrictedGuest || ( pVM->hm.s.fNestedPaging && CPUMIsGuestPagingEnabledEx(pCtx))) { rc = VMXReadVmcsGstN(VMX_VMCS_GUEST_CR3, &u64Val); if (pCtx->cr3 != u64Val) { CPUMSetGuestCR3(pVCpu, u64Val); VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3); } /* If the guest is in PAE mode, sync back the PDPE's into the guest state. Note: CR4.PAE, CR0.PG, EFER bit changes are always intercepted, so they're up to date. */ if (CPUMIsGuestInPAEModeEx(pCtx)) { rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &pVCpu->hm.s.aPdpes[0].u); rc |= VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &pVCpu->hm.s.aPdpes[1].u); rc |= VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &pVCpu->hm.s.aPdpes[2].u); rc |= VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &pVCpu->hm.s.aPdpes[3].u); VMXLOCAL_BREAK_RC(rc); VMCPU_FF_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES); } } } } } while (0); if (RT_SUCCESS(rc)) { /* Update fExtrn. */ pCtx->fExtrn &= ~fWhat; /* If everything has been imported, clear the HM keeper bit. */ if (!(pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL)) { pCtx->fExtrn &= ~CPUMCTX_EXTRN_KEEPER_HM; Assert(!pCtx->fExtrn); } } } else AssertMsg(!pCtx->fExtrn || (pCtx->fExtrn & HMVMX_CPUMCTX_EXTRN_ALL), ("%#RX64\n", pCtx->fExtrn)); ASMSetFlags(fEFlags); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatImportGuestState, x); /* * Honor any pending CR3 updates. * * Consider this scenario: VM-exit -> VMMRZCallRing3Enable() -> do stuff that causes a longjmp -> hmR0VmxCallRing3Callback() * -> VMMRZCallRing3Disable() -> hmR0VmxImportGuestState() -> Sets VMCPU_FF_HM_UPDATE_CR3 pending -> return from the longjmp * -> continue with VM-exit handling -> hmR0VmxImportGuestState() and here we are. * * The reason for such complicated handling is because VM-exits that call into PGM expect CR3 to be up-to-date and thus * if any CR3-saves -before- the VM-exit (longjmp) postponed the CR3 update via the force-flag, any VM-exit handler that * calls into PGM when it re-saves CR3 will end up here and we call PGMUpdateCR3(). This is why the code below should * -NOT- check if CPUMCTX_EXTRN_CR3 is set! * * The longjmp exit path can't check these CR3 force-flags and call code that takes a lock again. We cover for it here. */ if (VMMRZCallRing3IsEnabled(pVCpu)) { if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) { Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_CR3)); PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu)); } if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); } return VINF_SUCCESS; #undef VMXLOCAL_BREAK_RC } /** * Saves the guest state from the VMCS into the guest-CPU context. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param fWhat What to import, CPUMCTX_EXTRN_XXX. */ VMMR0DECL(int) VMXR0ImportStateOnDemand(PVMCPU pVCpu, uint64_t fWhat) { return hmR0VmxImportGuestState(pVCpu, fWhat); } /** * Check per-VM and per-VCPU force flag actions that require us to go back to * ring-3 for one reason or another. * * @returns Strict VBox status code (i.e. informational status codes too) * @retval VINF_SUCCESS if we don't have any actions that require going back to * ring-3. * @retval VINF_PGM_SYNC_CR3 if we have pending PGM CR3 sync. * @retval VINF_EM_PENDING_REQUEST if we have pending requests (like hardware * interrupts) * @retval VINF_PGM_POOL_FLUSH_PENDING if PGM is doing a pool flush and requires * all EMTs to be in ring-3. * @retval VINF_EM_RAW_TO_R3 if there is pending DMA requests. * @retval VINF_EM_NO_MEMORY PGM is out of memory, we need to return * to the EM loop. * * @param pVCpu The cross context virtual CPU structure. * @param fStepping Running in hmR0VmxRunGuestCodeStep(). */ static VBOXSTRICTRC hmR0VmxCheckForceFlags(PVMCPU pVCpu, bool fStepping) { Assert(VMMRZCallRing3IsEnabled(pVCpu)); /* * Anything pending? Should be more likely than not if we're doing a good job. */ PVM pVM = pVCpu->CTX_SUFF(pVM); if ( !fStepping ? !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_MASK) && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_MASK) : !VM_FF_IS_ANY_SET(pVM, VM_FF_HP_R0_PRE_HM_STEP_MASK) && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) ) return VINF_SUCCESS; /* Pending PGM C3 sync. */ if (VMCPU_FF_IS_ANY_SET(pVCpu,VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL)) { PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; Assert(!(ASMAtomicUoReadU64(&pCtx->fExtrn) & (CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR3 | CPUMCTX_EXTRN_CR4))); VBOXSTRICTRC rcStrict2 = PGMSyncCR3(pVCpu, pCtx->cr0, pCtx->cr3, pCtx->cr4, VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3)); if (rcStrict2 != VINF_SUCCESS) { AssertRC(VBOXSTRICTRC_VAL(rcStrict2)); Log4Func(("PGMSyncCR3 forcing us back to ring-3. rc2=%d\n", VBOXSTRICTRC_VAL(rcStrict2))); return rcStrict2; } } /* Pending HM-to-R3 operations (critsects, timers, EMT rendezvous etc.) */ if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HM_TO_R3_MASK) || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); int rc2 = RT_LIKELY(!VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY)) ? VINF_EM_RAW_TO_R3 : VINF_EM_NO_MEMORY; Log4Func(("HM_TO_R3 forcing us back to ring-3. rc=%d\n", rc2)); return rc2; } /* Pending VM request packets, such as hardware interrupts. */ if ( VM_FF_IS_SET(pVM, VM_FF_REQUEST) || VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_REQUEST)) { Log4Func(("Pending VM request forcing us back to ring-3\n")); return VINF_EM_PENDING_REQUEST; } /* Pending PGM pool flushes. */ if (VM_FF_IS_SET(pVM, VM_FF_PGM_POOL_FLUSH_PENDING)) { Log4Func(("PGM pool flush pending forcing us back to ring-3\n")); return VINF_PGM_POOL_FLUSH_PENDING; } /* Pending DMA requests. */ if (VM_FF_IS_SET(pVM, VM_FF_PDM_DMA)) { Log4Func(("Pending DMA request forcing us back to ring-3\n")); return VINF_EM_RAW_TO_R3; } return VINF_SUCCESS; } /** * Converts any TRPM trap into a pending HM event. This is typically used when * entering from ring-3 (not longjmp returns). * * @param pVCpu The cross context virtual CPU structure. */ static void hmR0VmxTrpmTrapToPendingEvent(PVMCPU pVCpu) { Assert(TRPMHasTrap(pVCpu)); Assert(!pVCpu->hm.s.Event.fPending); uint8_t uVector; TRPMEVENT enmTrpmEvent; RTGCUINT uErrCode; RTGCUINTPTR GCPtrFaultAddress; uint8_t cbInstr; int rc = TRPMQueryTrapAll(pVCpu, &uVector, &enmTrpmEvent, &uErrCode, &GCPtrFaultAddress, &cbInstr); AssertRC(rc); /* Refer Intel spec. 24.8.3 "VM-entry Controls for Event Injection" for the format of u32IntInfo. */ uint32_t u32IntInfo = uVector | VMX_EXIT_INT_INFO_VALID; if (enmTrpmEvent == TRPM_TRAP) { switch (uVector) { case X86_XCPT_NMI: u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_NMI << VMX_EXIT_INT_INFO_TYPE_SHIFT); break; case X86_XCPT_BP: case X86_XCPT_OF: u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_SW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT); break; case X86_XCPT_PF: case X86_XCPT_DF: case X86_XCPT_TS: case X86_XCPT_NP: case X86_XCPT_SS: case X86_XCPT_GP: case X86_XCPT_AC: u32IntInfo |= VMX_EXIT_INT_INFO_ERROR_CODE_VALID; RT_FALL_THRU(); default: u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_HW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT); break; } } else if (enmTrpmEvent == TRPM_HARDWARE_INT) u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_EXT_INT << VMX_EXIT_INT_INFO_TYPE_SHIFT); else if (enmTrpmEvent == TRPM_SOFTWARE_INT) u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_SW_INT << VMX_EXIT_INT_INFO_TYPE_SHIFT); else AssertMsgFailed(("Invalid TRPM event type %d\n", enmTrpmEvent)); rc = TRPMResetTrap(pVCpu); AssertRC(rc); Log4(("TRPM->HM event: u32IntInfo=%#RX32 enmTrpmEvent=%d cbInstr=%u uErrCode=%#RX32 GCPtrFaultAddress=%#RGv\n", u32IntInfo, enmTrpmEvent, cbInstr, uErrCode, GCPtrFaultAddress)); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, cbInstr, uErrCode, GCPtrFaultAddress); } /** * Converts the pending HM event into a TRPM trap. * * @param pVCpu The cross context virtual CPU structure. */ static void hmR0VmxPendingEventToTrpmTrap(PVMCPU pVCpu) { Assert(pVCpu->hm.s.Event.fPending); uint32_t uVectorType = VMX_IDT_VECTORING_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo); uint32_t uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVCpu->hm.s.Event.u64IntInfo); bool fErrorCodeValid = VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVCpu->hm.s.Event.u64IntInfo); uint32_t uErrorCode = pVCpu->hm.s.Event.u32ErrCode; /* If a trap was already pending, we did something wrong! */ Assert(TRPMQueryTrap(pVCpu, NULL /* pu8TrapNo */, NULL /* pEnmType */) == VERR_TRPM_NO_ACTIVE_TRAP); TRPMEVENT enmTrapType; switch (uVectorType) { case VMX_IDT_VECTORING_INFO_TYPE_EXT_INT: enmTrapType = TRPM_HARDWARE_INT; break; case VMX_IDT_VECTORING_INFO_TYPE_SW_INT: enmTrapType = TRPM_SOFTWARE_INT; break; case VMX_IDT_VECTORING_INFO_TYPE_NMI: case VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT: case VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT: /* #BP and #OF */ case VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT: enmTrapType = TRPM_TRAP; break; default: AssertMsgFailed(("Invalid trap type %#x\n", uVectorType)); enmTrapType = TRPM_32BIT_HACK; break; } Log4(("HM event->TRPM: uVector=%#x enmTrapType=%d\n", uVector, enmTrapType)); int rc = TRPMAssertTrap(pVCpu, uVector, enmTrapType); AssertRC(rc); if (fErrorCodeValid) TRPMSetErrorCode(pVCpu, uErrorCode); if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT && uVector == X86_XCPT_PF) { TRPMSetFaultAddress(pVCpu, pVCpu->hm.s.Event.GCPtrFaultAddress); } else if ( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_XCPT || uVectorType == VMX_IDT_VECTORING_INFO_TYPE_PRIV_SW_XCPT) { AssertMsg( uVectorType == VMX_IDT_VECTORING_INFO_TYPE_SW_INT || (uVector == X86_XCPT_BP || uVector == X86_XCPT_OF), ("Invalid vector: uVector=%#x uVectorType=%#x\n", uVector, uVectorType)); TRPMSetInstrLength(pVCpu, pVCpu->hm.s.Event.cbInstr); } /* Clear the events from the VMCS. */ VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, 0); /* We're now done converting the pending event. */ pVCpu->hm.s.Event.fPending = false; } /** * Does the necessary state syncing before returning to ring-3 for any reason * (longjmp, preemption, voluntary exits to ring-3) from VT-x. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param fImportState Whether to import the guest state from the VMCS back * to the guest-CPU context. * * @remarks No-long-jmp zone!!! */ static int hmR0VmxLeave(PVMCPU pVCpu, bool fImportState) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); RTCPUID idCpu = RTMpCpuId(); Log4Func(("HostCpuId=%u\n", idCpu)); /* * !!! IMPORTANT !!! * If you modify code here, check whether hmR0VmxCallRing3Callback() needs to be updated too. */ /* Save the guest state if necessary. */ if (fImportState) { int rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); } /* Restore host FPU state if necessary. We will resync on next R0 reentry. */ CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu); Assert(!CPUMIsGuestFPUStateActive(pVCpu)); /* Restore host debug registers if necessary. We will resync on next R0 reentry. */ #ifdef VBOX_STRICT if (CPUMIsHyperDebugStateActive(pVCpu)) Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_MOV_DR_EXIT); #endif CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */); Assert(!CPUMIsGuestDebugStateActive(pVCpu) && !CPUMIsGuestDebugStateActivePending(pVCpu)); Assert(!CPUMIsHyperDebugStateActive(pVCpu) && !CPUMIsHyperDebugStateActivePending(pVCpu)); #if HC_ARCH_BITS == 64 /* Restore host-state bits that VT-x only restores partially. */ if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) { Log4Func(("Restoring Host State: fRestoreHostFlags=%#RX32 HostCpuId=%u\n", pVCpu->hm.s.vmx.fRestoreHostFlags, idCpu)); VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); } pVCpu->hm.s.vmx.fRestoreHostFlags = 0; #endif /* Restore the lazy host MSRs as we're leaving VT-x context. */ if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) { /* We shouldn't restore the host MSRs without saving the guest MSRs first. */ if (!fImportState) { int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_KERNEL_GS_BASE | CPUMCTX_EXTRN_SYSCALL_MSRS); AssertRCReturn(rc, rc); } hmR0VmxLazyRestoreHostMsrs(pVCpu); Assert(!pVCpu->hm.s.vmx.fLazyMsrs); } else pVCpu->hm.s.vmx.fLazyMsrs = 0; /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */ pVCpu->hm.s.vmx.fUpdatedHostMsrs = false; STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatEntry); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatImportGuestState); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExportGuestState); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatPreExit); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitHandling); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitIO); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitMovCRx); STAM_PROFILE_ADV_SET_STOPPED(&pVCpu->hm.s.StatExitXcptNmi); STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchLongJmpToR3); VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); /** @todo This partially defeats the purpose of having preemption hooks. * The problem is, deregistering the hooks should be moved to a place that * lasts until the EMT is about to be destroyed not everytime while leaving HM * context. */ if (pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_ACTIVE) { int rc = VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); AssertRCReturn(rc, rc); pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR; Log4Func(("Cleared Vmcs. HostCpuId=%u\n", idCpu)); } Assert(!(pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_LAUNCHED)); NOREF(idCpu); return VINF_SUCCESS; } /** * Leaves the VT-x session. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jmp zone!!! */ static int hmR0VmxLeaveSession(PVMCPU pVCpu) { HM_DISABLE_PREEMPT(pVCpu); HMVMX_ASSERT_CPU_SAFE(pVCpu); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* When thread-context hooks are used, we can avoid doing the leave again if we had been preempted before and done this from the VMXR0ThreadCtxCallback(). */ if (!pVCpu->hm.s.fLeaveDone) { int rc2 = hmR0VmxLeave(pVCpu, true /* fImportState */); AssertRCReturnStmt(rc2, HM_RESTORE_PREEMPT(), rc2); pVCpu->hm.s.fLeaveDone = true; } Assert(!pVCpu->cpum.GstCtx.fExtrn); /* * !!! IMPORTANT !!! * If you modify code here, make sure to check whether hmR0VmxCallRing3Callback() needs to be updated too. */ /* Deregister hook now that we've left HM context before re-enabling preemption. */ /** @todo Deregistering here means we need to VMCLEAR always * (longjmp/exit-to-r3) in VT-x which is not efficient, eliminate need * for calling VMMR0ThreadCtxHookDisable here! */ VMMR0ThreadCtxHookDisable(pVCpu); /* Leave HM context. This takes care of local init (term). */ int rc = HMR0LeaveCpu(pVCpu); HM_RESTORE_PREEMPT(); return rc; } /** * Does the necessary state syncing before doing a longjmp to ring-3. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jmp zone!!! */ DECLINLINE(int) hmR0VmxLongJmpToRing3(PVMCPU pVCpu) { return hmR0VmxLeaveSession(pVCpu); } /** * Take necessary actions before going back to ring-3. * * An action requires us to go back to ring-3. This function does the necessary * steps before we can safely return to ring-3. This is not the same as longjmps * to ring-3, this is voluntary and prepares the guest so it may continue * executing outside HM (recompiler/IEM). * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param rcExit The reason for exiting to ring-3. Can be * VINF_VMM_UNKNOWN_RING3_CALL. */ static int hmR0VmxExitToRing3(PVMCPU pVCpu, VBOXSTRICTRC rcExit) { Assert(pVCpu); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); if (RT_UNLIKELY(rcExit == VERR_VMX_INVALID_VMCS_PTR)) { VMXGetActivatedVmcs(&pVCpu->hm.s.vmx.LastError.u64VmcsPhys); pVCpu->hm.s.vmx.LastError.u32VmcsRev = *(uint32_t *)pVCpu->hm.s.vmx.pvVmcs; pVCpu->hm.s.vmx.LastError.idEnteredCpu = pVCpu->hm.s.idEnteredCpu; /* LastError.idCurrentCpu was updated in hmR0VmxPreRunGuestCommitted(). */ } /* Please, no longjumps here (any logging shouldn't flush jump back to ring-3). NO LOGGING BEFORE THIS POINT! */ VMMRZCallRing3Disable(pVCpu); Log4Func(("rcExit=%d\n", VBOXSTRICTRC_VAL(rcExit))); /* We need to do this only while truly exiting the "inner loop" back to ring-3 and -not- for any longjmp to ring3. */ if (pVCpu->hm.s.Event.fPending) { hmR0VmxPendingEventToTrpmTrap(pVCpu); Assert(!pVCpu->hm.s.Event.fPending); } /* Clear interrupt-window and NMI-window controls as we re-evaluate it when we return from ring-3. */ hmR0VmxClearIntNmiWindowsVmcs(pVCpu); /* If we're emulating an instruction, we shouldn't have any TRPM traps pending and if we're injecting an event we should have a TRPM trap pending. */ AssertMsg(rcExit != VINF_EM_RAW_INJECT_TRPM_EVENT || TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit))); #ifndef DEBUG_bird /* Triggered after firing an NMI against NT4SP1, possibly a triple fault in progress. */ AssertMsg(rcExit != VINF_EM_RAW_EMULATE_INSTR || !TRPMHasTrap(pVCpu), ("%Rrc\n", VBOXSTRICTRC_VAL(rcExit))); #endif /* Save guest state and restore host state bits. */ int rc = hmR0VmxLeaveSession(pVCpu); AssertRCReturn(rc, rc); STAM_COUNTER_DEC(&pVCpu->hm.s.StatSwitchLongJmpToR3); /* Thread-context hooks are unregistered at this point!!! */ /* Sync recompiler state. */ VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_TO_R3); CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_SYSENTER_MSR | CPUM_CHANGED_LDTR | CPUM_CHANGED_GDTR | CPUM_CHANGED_IDTR | CPUM_CHANGED_TR | CPUM_CHANGED_HIDDEN_SEL_REGS); if ( pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging && CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx)) { CPUMSetChangedFlags(pVCpu, CPUM_CHANGED_GLOBAL_TLB_FLUSH); } Assert(!pVCpu->hm.s.fClearTrapFlag); /* Update the exit-to-ring 3 reason. */ pVCpu->hm.s.rcLastExitToR3 = VBOXSTRICTRC_VAL(rcExit); /* On our way back from ring-3 reload the guest state if there is a possibility of it being changed. */ if (rcExit != VINF_EM_RAW_INTERRUPT) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchExitToR3); /* We do -not- want any longjmp notifications after this! We must return to ring-3 ASAP. */ VMMRZCallRing3RemoveNotification(pVCpu); VMMRZCallRing3Enable(pVCpu); return rc; } /** * VMMRZCallRing3() callback wrapper which saves the guest state before we * longjump to ring-3 and possibly get preempted. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * @param enmOperation The operation causing the ring-3 longjump. * @param pvUser User argument, currently unused, NULL. */ static DECLCALLBACK(int) hmR0VmxCallRing3Callback(PVMCPU pVCpu, VMMCALLRING3 enmOperation, void *pvUser) { RT_NOREF(pvUser); if (enmOperation == VMMCALLRING3_VM_R0_ASSERTION) { /* * !!! IMPORTANT !!! * If you modify code here, check whether hmR0VmxLeave() and hmR0VmxLeaveSession() needs to be updated too. * This is a stripped down version which gets out ASAP, trying to not trigger any further assertions. */ VMMRZCallRing3RemoveNotification(pVCpu); VMMRZCallRing3Disable(pVCpu); RTTHREADPREEMPTSTATE PreemptState = RTTHREADPREEMPTSTATE_INITIALIZER; RTThreadPreemptDisable(&PreemptState); hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); CPUMR0FpuStateMaybeSaveGuestAndRestoreHost(pVCpu); CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(pVCpu, true /* save DR6 */); #if HC_ARCH_BITS == 64 /* Restore host-state bits that VT-x only restores partially. */ if ( (pVCpu->hm.s.vmx.fRestoreHostFlags & VMX_RESTORE_HOST_REQUIRED) && (pVCpu->hm.s.vmx.fRestoreHostFlags & ~VMX_RESTORE_HOST_REQUIRED)) VMXRestoreHostState(pVCpu->hm.s.vmx.fRestoreHostFlags, &pVCpu->hm.s.vmx.RestoreHost); pVCpu->hm.s.vmx.fRestoreHostFlags = 0; #endif /* Restore the lazy host MSRs as we're leaving VT-x context. */ if (pVCpu->hm.s.vmx.fLazyMsrs & VMX_LAZY_MSRS_LOADED_GUEST) hmR0VmxLazyRestoreHostMsrs(pVCpu); /* Update auto-load/store host MSRs values when we re-enter VT-x (as we could be on a different CPU). */ pVCpu->hm.s.vmx.fUpdatedHostMsrs = false; VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_HM, VMCPUSTATE_STARTED_EXEC); if (pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_ACTIVE) { VMXClearVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_CLEAR; } /** @todo eliminate the need for calling VMMR0ThreadCtxHookDisable here! */ VMMR0ThreadCtxHookDisable(pVCpu); HMR0LeaveCpu(pVCpu); RTThreadPreemptRestore(&PreemptState); return VINF_SUCCESS; } Assert(pVCpu); Assert(pvUser); Assert(VMMRZCallRing3IsEnabled(pVCpu)); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); VMMRZCallRing3Disable(pVCpu); Assert(VMMR0IsLogFlushDisabled(pVCpu)); Log4Func((" -> hmR0VmxLongJmpToRing3 enmOperation=%d\n", enmOperation)); int rc = hmR0VmxLongJmpToRing3(pVCpu); AssertRCReturn(rc, rc); VMMRZCallRing3Enable(pVCpu); return VINF_SUCCESS; } /** * Sets the interrupt-window exiting control in the VMCS which instructs VT-x to * cause a VM-exit as soon as the guest is in a state to receive interrupts. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetIntWindowExitVmcs(PVMCPU pVCpu) { if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_INT_WINDOW_EXIT)) { if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT)) { pVCpu->hm.s.vmx.u32ProcCtls |= VMX_PROC_CTLS_INT_WINDOW_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); Log4Func(("Setup interrupt-window exiting\n")); } } /* else we will deliver interrupts whenever the guest exits next and is in a state to receive events. */ } /** * Clears the interrupt-window exiting control in the VMCS. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxClearIntWindowExitVmcs(PVMCPU pVCpu) { Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT); pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_INT_WINDOW_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); Log4Func(("Cleared interrupt-window exiting\n")); } /** * Sets the NMI-window exiting control in the VMCS which instructs VT-x to * cause a VM-exit as soon as the guest is in a state to receive NMIs. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxSetNmiWindowExitVmcs(PVMCPU pVCpu) { if (RT_LIKELY(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.Msrs.ProcCtls.n.allowed1 & VMX_PROC_CTLS_NMI_WINDOW_EXIT)) { if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT)) { pVCpu->hm.s.vmx.u32ProcCtls |= VMX_PROC_CTLS_NMI_WINDOW_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); Log4Func(("Setup NMI-window exiting\n")); } } /* else we will deliver NMIs whenever we VM-exit next, even possibly nesting NMIs. Can't be helped on ancient CPUs. */ } /** * Clears the NMI-window exiting control in the VMCS. * * @param pVCpu The cross context virtual CPU structure. */ DECLINLINE(void) hmR0VmxClearNmiWindowExitVmcs(PVMCPU pVCpu) { Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT); pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_NMI_WINDOW_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRC(rc); Log4Func(("Cleared NMI-window exiting\n")); } /** * Evaluates the event to be delivered to the guest and sets it as the pending * event. * * @returns The VT-x guest-interruptibility state. * @param pVCpu The cross context virtual CPU structure. */ static uint32_t hmR0VmxEvaluatePendingEvent(PVMCPU pVCpu) { /* Get the current interruptibility-state of the guest and then figure out what can be injected. */ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; uint32_t const fIntrState = hmR0VmxGetGuestIntrState(pVCpu); bool const fBlockMovSS = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS); bool const fBlockSti = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI); bool const fBlockNmi = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI); Assert(!fBlockSti || !(ASMAtomicUoReadU64(&pCtx->fExtrn) & CPUMCTX_EXTRN_RFLAGS)); Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/ Assert(!fBlockSti || pCtx->eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */ Assert(!TRPMHasTrap(pVCpu)); if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC)) APICUpdatePendingInterrupts(pVCpu); /* * Toggling of interrupt force-flags here is safe since we update TRPM on premature exits * to ring-3 before executing guest code, see hmR0VmxExitToRing3(). We must NOT restore these force-flags. */ /** @todo SMI. SMIs take priority over NMIs. */ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI)) /* NMI. NMIs take priority over regular interrupts. */ { /* On some CPUs block-by-STI also blocks NMIs. See Intel spec. 26.3.1.5 "Checks On Guest Non-Register State". */ if ( !pVCpu->hm.s.Event.fPending && !fBlockNmi && !fBlockSti && !fBlockMovSS) { Log4Func(("Pending NMI\n")); uint32_t u32IntInfo = X86_XCPT_NMI | VMX_EXIT_INT_INFO_VALID; u32IntInfo |= (VMX_EXIT_INT_INFO_TYPE_NMI << VMX_EXIT_INT_INFO_TYPE_SHIFT); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */); VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI); } else hmR0VmxSetNmiWindowExitVmcs(pVCpu); } /* * Check if the guest can receive external interrupts (PIC/APIC). Once PDMGetInterrupt() returns * a valid interrupt we must- deliver the interrupt. We can no longer re-request it from the APIC. */ else if ( VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC) && !pVCpu->hm.s.fSingleInstruction) { Assert(!DBGFIsStepping(pVCpu)); int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_RFLAGS); AssertRCReturn(rc, 0); bool const fBlockInt = !(pCtx->eflags.u32 & X86_EFL_IF); if ( !pVCpu->hm.s.Event.fPending && !fBlockInt && !fBlockSti && !fBlockMovSS) { uint8_t u8Interrupt; rc = PDMGetInterrupt(pVCpu, &u8Interrupt); if (RT_SUCCESS(rc)) { Log4Func(("Pending external interrupt u8Interrupt=%#x\n", u8Interrupt)); uint32_t u32IntInfo = u8Interrupt | VMX_EXIT_INT_INFO_VALID | (VMX_EXIT_INT_INFO_TYPE_EXT_INT << VMX_EXIT_INT_INFO_TYPE_SHIFT); hmR0VmxSetPendingEvent(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrfaultAddress */); } else if (rc == VERR_APIC_INTR_MASKED_BY_TPR) { if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW) hmR0VmxApicSetTprThreshold(pVCpu, u8Interrupt >> 4); STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchTprMaskedIrq); /* * If the CPU doesn't have TPR shadowing, we will always get a VM-exit on TPR changes and * APICSetTpr() will end up setting the VMCPU_FF_INTERRUPT_APIC if required, so there is no * need to re-set this force-flag here. */ } else STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchGuestIrq); } else hmR0VmxSetIntWindowExitVmcs(pVCpu); } return fIntrState; } /** * Injects any pending events into the guest if the guest is in a state to * receive them. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param fIntrState The VT-x guest-interruptibility state. * @param fStepping Running in hmR0VmxRunGuestCodeStep() and we should * return VINF_EM_DBG_STEPPED if the event was * dispatched directly. */ static VBOXSTRICTRC hmR0VmxInjectPendingEvent(PVMCPU pVCpu, uint32_t fIntrState, bool fStepping) { HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); Assert(VMMRZCallRing3IsEnabled(pVCpu)); bool const fBlockMovSS = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS); bool const fBlockSti = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI); Assert(!fBlockSti || !(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_RFLAGS)); Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI)); /* We don't support block-by-SMI yet.*/ Assert(!fBlockSti || pVCpu->cpum.GstCtx.eflags.Bits.u1IF); /* Cannot set block-by-STI when interrupts are disabled. */ Assert(!TRPMHasTrap(pVCpu)); VBOXSTRICTRC rcStrict = VINF_SUCCESS; if (pVCpu->hm.s.Event.fPending) { /* * Do -not- clear any interrupt-window exiting control here. We might have an interrupt * pending even while injecting an event and in this case, we want a VM-exit as soon as * the guest is ready for the next interrupt, see @bugref{6208#c45}. * * See Intel spec. 26.6.5 "Interrupt-Window Exiting and Virtual-Interrupt Delivery". */ uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(pVCpu->hm.s.Event.u64IntInfo); #ifdef VBOX_STRICT if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT) { bool const fBlockInt = !(pVCpu->cpum.GstCtx.eflags.u32 & X86_EFL_IF); Assert(!fBlockInt); Assert(!fBlockSti); Assert(!fBlockMovSS); } else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_NMI) { bool const fBlockNmi = RT_BOOL(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI); Assert(!fBlockSti); Assert(!fBlockMovSS); Assert(!fBlockNmi); } #endif Log4(("Injecting pending event vcpu[%RU32] u64IntInfo=%#RX64 Type=%#RX32\n", pVCpu->idCpu, pVCpu->hm.s.Event.u64IntInfo, uIntType)); /* * Inject the event and get any changes to the guest-interruptibility state. * * The guest-interruptibility state may need to be updated if we inject the event * into the guest IDT ourselves (for real-on-v86 guest injecting software interrupts). */ rcStrict = hmR0VmxInjectEventVmcs(pVCpu, pVCpu->hm.s.Event.u64IntInfo, pVCpu->hm.s.Event.cbInstr, pVCpu->hm.s.Event.u32ErrCode, pVCpu->hm.s.Event.GCPtrFaultAddress, fStepping, &fIntrState); AssertRCReturn(VBOXSTRICTRC_VAL(rcStrict), rcStrict); if (uIntType == VMX_ENTRY_INT_INFO_TYPE_EXT_INT) STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectInterrupt); else STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectXcpt); } /* * Update the guest-interruptibility state. * * This is required for the real-on-v86 software interrupt injection case above, as well as * updates to the guest state from ring-3 or IEM/REM. */ int rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState); AssertRCReturn(rc, rc); /* * There's no need to clear the VM-entry interruption-information field here if we're not * injecting anything. VT-x clears the valid bit on every VM-exit. * * See Intel spec. 24.8.3 "VM-Entry Controls for Event Injection". */ Assert(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping)); NOREF(fBlockMovSS); NOREF(fBlockSti); return rcStrict; } /** * Injects a double-fault (\#DF) exception into the VM. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param fStepping Whether we're running in hmR0VmxRunGuestCodeStep() * and should return VINF_EM_DBG_STEPPED if the event * is injected directly (register modified by us, not * by hardware on VM-entry). * @param pfIntrState Pointer to the current guest interruptibility-state. * This interruptibility-state will be updated if * necessary. This cannot not be NULL. */ DECLINLINE(VBOXSTRICTRC) hmR0VmxInjectXcptDF(PVMCPU pVCpu, bool fStepping, uint32_t *pfIntrState) { uint32_t const u32IntInfo = X86_XCPT_DF | VMX_EXIT_INT_INFO_VALID | (VMX_EXIT_INT_INFO_TYPE_HW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT) | VMX_EXIT_INT_INFO_ERROR_CODE_VALID; return hmR0VmxInjectEventVmcs(pVCpu, u32IntInfo, 0 /* cbInstr */, 0 /* u32ErrCode */, 0 /* GCPtrFaultAddress */, fStepping, pfIntrState); } /** * Injects a general-protection (\#GP) fault into the VM. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param fErrorCodeValid Whether the error code is valid (depends on the CPU * mode, i.e. in real-mode it's not valid). * @param u32ErrorCode The error code associated with the \#GP. * @param fStepping Whether we're running in * hmR0VmxRunGuestCodeStep() and should return * VINF_EM_DBG_STEPPED if the event is injected * directly (register modified by us, not by * hardware on VM-entry). * @param pfIntrState Pointer to the current guest interruptibility-state. * This interruptibility-state will be updated if * necessary. This cannot not be NULL. */ DECLINLINE(VBOXSTRICTRC) hmR0VmxInjectXcptGP(PVMCPU pVCpu, bool fErrorCodeValid, uint32_t u32ErrorCode, bool fStepping, uint32_t *pfIntrState) { uint32_t const u32IntInfo = X86_XCPT_GP | VMX_EXIT_INT_INFO_VALID | (VMX_EXIT_INT_INFO_TYPE_HW_XCPT << VMX_EXIT_INT_INFO_TYPE_SHIFT) | (fErrorCodeValid ? VMX_EXIT_INT_INFO_ERROR_CODE_VALID : 0); return hmR0VmxInjectEventVmcs(pVCpu, u32IntInfo, 0 /* cbInstr */, u32ErrorCode, 0 /* GCPtrFaultAddress */, fStepping, pfIntrState); } /** * Pushes a 2-byte value onto the real-mode (in virtual-8086 mode) guest's * stack. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_EM_RESET if pushing a value to the stack caused a triple-fault. * @param pVCpu The cross context virtual CPU structure. * @param uValue The value to push to the guest stack. */ static VBOXSTRICTRC hmR0VmxRealModeGuestStackPush(PVMCPU pVCpu, uint16_t uValue) { /* * The stack limit is 0xffff in real-on-virtual 8086 mode. Real-mode with weird stack limits cannot be run in * virtual 8086 mode in VT-x. See Intel spec. 26.3.1.2 "Checks on Guest Segment Registers". * See Intel Instruction reference for PUSH and Intel spec. 22.33.1 "Segment Wraparound". */ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (pCtx->sp == 1) return VINF_EM_RESET; pCtx->sp -= sizeof(uint16_t); /* May wrap around which is expected behaviour. */ int rc = PGMPhysSimpleWriteGCPhys(pVCpu->CTX_SUFF(pVM), pCtx->ss.u64Base + pCtx->sp, &uValue, sizeof(uint16_t)); AssertRC(rc); return rc; } /** * Injects an event into the guest upon VM-entry by updating the relevant fields * in the VM-entry area in the VMCS. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_SUCCESS if the event is successfully injected into the VMCS. * @retval VINF_EM_RESET if event injection resulted in a triple-fault. * * @param pVCpu The cross context virtual CPU structure. * @param u64IntInfo The VM-entry interruption-information field. * @param cbInstr The VM-entry instruction length in bytes (for * software interrupts, exceptions and privileged * software exceptions). * @param u32ErrCode The VM-entry exception error code. * @param GCPtrFaultAddress The page-fault address for \#PF exceptions. * @param pfIntrState Pointer to the current guest interruptibility-state. * This interruptibility-state will be updated if * necessary. This cannot not be NULL. * @param fStepping Whether we're running in * hmR0VmxRunGuestCodeStep() and should return * VINF_EM_DBG_STEPPED if the event is injected * directly (register modified by us, not by * hardware on VM-entry). */ static VBOXSTRICTRC hmR0VmxInjectEventVmcs(PVMCPU pVCpu, uint64_t u64IntInfo, uint32_t cbInstr, uint32_t u32ErrCode, RTGCUINTREG GCPtrFaultAddress, bool fStepping, uint32_t *pfIntrState) { /* Intel spec. 24.8.3 "VM-Entry Controls for Event Injection" specifies the interruption-information field to be 32-bits. */ AssertMsg(!RT_HI_U32(u64IntInfo), ("%#RX64\n", u64IntInfo)); Assert(pfIntrState); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; uint32_t u32IntInfo = (uint32_t)u64IntInfo; uint32_t const uVector = VMX_ENTRY_INT_INFO_VECTOR(u32IntInfo); uint32_t const uIntType = VMX_ENTRY_INT_INFO_TYPE(u32IntInfo); #ifdef VBOX_STRICT /* * Validate the error-code-valid bit for hardware exceptions. * No error codes for exceptions in real-mode. * * See Intel spec. 20.1.4 "Interrupt and Exception Handling" */ if ( uIntType == VMX_EXIT_INT_INFO_TYPE_HW_XCPT && !CPUMIsGuestInRealModeEx(pCtx)) { switch (uVector) { case X86_XCPT_PF: case X86_XCPT_DF: case X86_XCPT_TS: case X86_XCPT_NP: case X86_XCPT_SS: case X86_XCPT_GP: case X86_XCPT_AC: AssertMsg(VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo), ("Error-code-valid bit not set for exception that has an error code uVector=%#x\n", uVector)); RT_FALL_THRU(); default: break; } } #endif /* Cannot inject an NMI when block-by-MOV SS is in effect. */ Assert( uIntType != VMX_EXIT_INT_INFO_TYPE_NMI || !(*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)); STAM_COUNTER_INC(&pVCpu->hm.s.paStatInjectedIrqsR0[uVector & MASK_INJECT_IRQ_STAT]); /* * Hardware interrupts & exceptions cannot be delivered through the software interrupt * redirection bitmap to the real mode task in virtual-8086 mode. We must jump to the * interrupt handler in the (real-mode) guest. * * See Intel spec. 20.3 "Interrupt and Exception handling in Virtual-8086 Mode". * See Intel spec. 20.1.4 "Interrupt and Exception Handling" for real-mode interrupt handling. */ if (CPUMIsGuestInRealModeEx(pCtx)) /* CR0.PE bit changes are always intercepted, so it's up to date. */ { if (pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest) { /* * For unrestricted execution enabled CPUs running real-mode guests, we must not * set the deliver-error-code bit. * * See Intel spec. 26.2.1.3 "VM-Entry Control Fields". */ u32IntInfo &= ~VMX_ENTRY_INT_INFO_ERROR_CODE_VALID; } else { PVM pVM = pVCpu->CTX_SUFF(pVM); Assert(PDMVmmDevHeapIsEnabled(pVM)); Assert(pVM->hm.s.vmx.pRealModeTSS); /* We require RIP, RSP, RFLAGS, CS, IDTR, import them. */ int rc2 = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_TABLE_MASK | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_RFLAGS); AssertRCReturn(rc2, rc2); /* Check if the interrupt handler is present in the IVT (real-mode IDT). IDT limit is (4N - 1). */ size_t const cbIdtEntry = sizeof(X86IDTR16); if (uVector * cbIdtEntry + (cbIdtEntry - 1) > pCtx->idtr.cbIdt) { /* If we are trying to inject a #DF with no valid IDT entry, return a triple-fault. */ if (uVector == X86_XCPT_DF) return VINF_EM_RESET; /* If we're injecting a #GP with no valid IDT entry, inject a double-fault. */ if (uVector == X86_XCPT_GP) return hmR0VmxInjectXcptDF(pVCpu, fStepping, pfIntrState); /* * If we're injecting an event with no valid IDT entry, inject a #GP. * No error codes for exceptions in real-mode. * * See Intel spec. 20.1.4 "Interrupt and Exception Handling" */ return hmR0VmxInjectXcptGP(pVCpu, false /* fErrCodeValid */, 0 /* u32ErrCode */, fStepping, pfIntrState); } /* Software exceptions (#BP and #OF exceptions thrown as a result of INT3 or INTO) */ uint16_t uGuestIp = pCtx->ip; if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_XCPT) { Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF); /* #BP and #OF are both benign traps, we need to resume the next instruction. */ uGuestIp = pCtx->ip + (uint16_t)cbInstr; } else if (uIntType == VMX_ENTRY_INT_INFO_TYPE_SW_INT) uGuestIp = pCtx->ip + (uint16_t)cbInstr; /* Get the code segment selector and offset from the IDT entry for the interrupt handler. */ X86IDTR16 IdtEntry; RTGCPHYS GCPhysIdtEntry = (RTGCPHYS)pCtx->idtr.pIdt + uVector * cbIdtEntry; rc2 = PGMPhysSimpleReadGCPhys(pVM, &IdtEntry, GCPhysIdtEntry, cbIdtEntry); AssertRCReturn(rc2, rc2); /* Construct the stack frame for the interrupt/exception handler. */ VBOXSTRICTRC rcStrict; rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->eflags.u32); if (rcStrict == VINF_SUCCESS) rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, pCtx->cs.Sel); if (rcStrict == VINF_SUCCESS) rcStrict = hmR0VmxRealModeGuestStackPush(pVCpu, uGuestIp); /* Clear the required eflag bits and jump to the interrupt/exception handler. */ if (rcStrict == VINF_SUCCESS) { pCtx->eflags.u32 &= ~(X86_EFL_IF | X86_EFL_TF | X86_EFL_RF | X86_EFL_AC); pCtx->rip = IdtEntry.offSel; pCtx->cs.Sel = IdtEntry.uSel; pCtx->cs.ValidSel = IdtEntry.uSel; pCtx->cs.u64Base = IdtEntry.uSel << cbIdtEntry; if ( uIntType == VMX_ENTRY_INT_INFO_TYPE_HW_XCPT && uVector == X86_XCPT_PF) pCtx->cr2 = GCPtrFaultAddress; /* If any other guest-state bits are changed here, make sure to update hmR0VmxPreRunGuestCommitted() when thread-context hooks are used. */ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CS | HM_CHANGED_GUEST_CR2 | HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_RSP); /* We're clearing interrupts, which means no block-by-STI interrupt-inhibition. */ if (*pfIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI) { Assert( uIntType != VMX_ENTRY_INT_INFO_TYPE_NMI && uIntType != VMX_ENTRY_INT_INFO_TYPE_EXT_INT); Log4Func(("Clearing inhibition due to STI\n")); *pfIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI; } Log4(("Injecting real-mode: u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x Eflags=%#x CS:EIP=%04x:%04x\n", u32IntInfo, u32ErrCode, cbInstr, pCtx->eflags.u, pCtx->cs.Sel, pCtx->eip)); /* The event has been truly dispatched. Mark it as no longer pending so we don't attempt to 'undo' it, if we are returning to ring-3 before executing guest code. */ pVCpu->hm.s.Event.fPending = false; /* Make hmR0VmxPreRunGuest() return if we're stepping since we've changed cs:rip. */ if (fStepping) rcStrict = VINF_EM_DBG_STEPPED; } AssertMsg(rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping), ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } } /* Validate. */ Assert(VMX_ENTRY_INT_INFO_IS_VALID(u32IntInfo)); /* Bit 31 (Valid bit) must be set by caller. */ Assert(!(u32IntInfo & VMX_BF_ENTRY_INT_INFO_RSVD_12_30_MASK)); /* Bits 30:12 MBZ. */ /* Inject. */ int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, u32IntInfo); if (VMX_ENTRY_INT_INFO_IS_ERROR_CODE_VALID(u32IntInfo)) rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE, u32ErrCode); rc |= VMXWriteVmcs32(VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH, cbInstr); AssertRCReturn(rc, rc); /* Update CR2. */ if ( VMX_ENTRY_INT_INFO_TYPE(u32IntInfo) == VMX_EXIT_INT_INFO_TYPE_HW_XCPT && uVector == X86_XCPT_PF) pCtx->cr2 = GCPtrFaultAddress; Log4(("Injecting u32IntInfo=%#x u32ErrCode=%#x cbInstr=%#x CR2=%#RX64\n", u32IntInfo, u32ErrCode, cbInstr, pCtx->cr2)); return VINF_SUCCESS; } /** * Clears the interrupt-window exiting control in the VMCS and if necessary * clears the current event in the VMCS as well. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks Use this function only to clear events that have not yet been * delivered to the guest but are injected in the VMCS! * @remarks No-long-jump zone!!! */ static void hmR0VmxClearIntNmiWindowsVmcs(PVMCPU pVCpu) { if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_INT_WINDOW_EXIT) { hmR0VmxClearIntWindowExitVmcs(pVCpu); Log4Func(("Cleared interrupt window\n")); } if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT) { hmR0VmxClearNmiWindowExitVmcs(pVCpu); Log4Func(("Cleared NMI window\n")); } } /** * Enters the VT-x session. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. */ VMMR0DECL(int) VMXR0Enter(PVMCPU pVCpu) { AssertPtr(pVCpu); Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fSupported); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); LogFlowFunc(("pVCpu=%p\n", pVCpu)); Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)); #ifdef VBOX_STRICT /* At least verify VMX is enabled, since we can't check if we're in VMX root mode without #GP'ing. */ RTCCUINTREG uHostCR4 = ASMGetCR4(); if (!(uHostCR4 & X86_CR4_VMXE)) { LogRelFunc(("X86_CR4_VMXE bit in CR4 is not set!\n")); return VERR_VMX_X86_CR4_VMXE_CLEARED; } #endif /* * Load the VCPU's VMCS as the current (and active) one. */ Assert(pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_CLEAR); int rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); if (RT_SUCCESS(rc)) { pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_ACTIVE; pVCpu->hm.s.fLeaveDone = false; Log4Func(("Activated Vmcs. HostCpuId=%u\n", RTMpCpuId())); /* * Do the EMT scheduled L1D flush here if needed. */ if (pVCpu->CTX_SUFF(pVM)->hm.s.fL1dFlushOnSched) ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D); } return rc; } /** * The thread-context callback (only on platforms which support it). * * @param enmEvent The thread-context event. * @param pVCpu The cross context virtual CPU structure. * @param fGlobalInit Whether global VT-x/AMD-V init. was used. * @thread EMT(pVCpu) */ VMMR0DECL(void) VMXR0ThreadCtxCallback(RTTHREADCTXEVENT enmEvent, PVMCPU pVCpu, bool fGlobalInit) { NOREF(fGlobalInit); switch (enmEvent) { case RTTHREADCTXEVENT_OUT: { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu)); VMCPU_ASSERT_EMT(pVCpu); /* No longjmps (logger flushes, locks) in this fragile context. */ VMMRZCallRing3Disable(pVCpu); Log4Func(("Preempting: HostCpuId=%u\n", RTMpCpuId())); /* * Restore host-state (FPU, debug etc.) */ if (!pVCpu->hm.s.fLeaveDone) { /* * Do -not- import the guest-state here as we might already be in the middle of importing * it, esp. bad if we're holding the PGM lock, see comment in hmR0VmxImportGuestState(). */ hmR0VmxLeave(pVCpu, false /* fImportState */); pVCpu->hm.s.fLeaveDone = true; } /* Leave HM context, takes care of local init (term). */ int rc = HMR0LeaveCpu(pVCpu); AssertRC(rc); NOREF(rc); /* Restore longjmp state. */ VMMRZCallRing3Enable(pVCpu); STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreempt); break; } case RTTHREADCTXEVENT_IN: { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(VMMR0ThreadCtxHookIsEnabled(pVCpu)); VMCPU_ASSERT_EMT(pVCpu); /* No longjmps here, as we don't want to trigger preemption (& its hook) while resuming. */ VMMRZCallRing3Disable(pVCpu); Log4Func(("Resumed: HostCpuId=%u\n", RTMpCpuId())); /* Initialize the bare minimum state required for HM. This takes care of initializing VT-x if necessary (onlined CPUs, local init etc.) */ int rc = hmR0EnterCpu(pVCpu); AssertRC(rc); Assert((pVCpu->hm.s.fCtxChanged & (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) == (HM_CHANGED_HOST_CONTEXT | HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)); /* Load the active VMCS as the current one. */ if (pVCpu->hm.s.vmx.fVmcsState & HMVMX_VMCS_STATE_CLEAR) { rc = VMXActivateVmcs(pVCpu->hm.s.vmx.HCPhysVmcs); AssertRC(rc); NOREF(rc); pVCpu->hm.s.vmx.fVmcsState = HMVMX_VMCS_STATE_ACTIVE; Log4Func(("Resumed: Activated Vmcs. HostCpuId=%u\n", RTMpCpuId())); } pVCpu->hm.s.fLeaveDone = false; /* Do the EMT scheduled L1D flush if needed. */ if (pVCpu->CTX_SUFF(pVM)->hm.s.fL1dFlushOnSched) ASMWrMsr(MSR_IA32_FLUSH_CMD, MSR_IA32_FLUSH_CMD_F_L1D); /* Restore longjmp state. */ VMMRZCallRing3Enable(pVCpu); break; } default: break; } } /** * Exports the host state into the VMCS host-state area. * Sets up the VM-exit MSR-load area. * * The CPU state will be loaded from these fields on every successful VM-exit. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxExportHostState(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); int rc = VINF_SUCCESS; if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT) { rc = hmR0VmxExportHostControlRegs(); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportHostSegmentRegs(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportHostMsrs(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_HOST_CONTEXT; } return rc; } /** * Saves the host state in the VMCS host-state. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ VMMR0DECL(int) VMXR0ExportHostState(PVMCPU pVCpu) { AssertPtr(pVCpu); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* * Export the host state here while entering HM context. * When thread-context hooks are used, we might get preempted and have to re-save the host * state but most of the time we won't be, so do it here before we disable interrupts. */ return hmR0VmxExportHostState(pVCpu); } /** * Exports the guest state into the VMCS guest-state area. * * The will typically be done before VM-entry when the guest-CPU state and the * VMCS state may potentially be out of sync. * * Sets up the VM-entry MSR-load and VM-exit MSR-store areas. Sets up the * VM-entry controls. * Sets up the appropriate VMX non-root function to execute guest code based on * the guest CPU mode. * * @returns VBox strict status code. * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code * without unrestricted guest access and the VMMDev is not presently * mapped (e.g. EFI32). * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxExportGuestState(PVMCPU pVCpu) { AssertPtr(pVCpu); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); LogFlowFunc(("pVCpu=%p\n", pVCpu)); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExportGuestState, x); /* Determine real-on-v86 mode. */ pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false; if ( !pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest && CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx)) pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = true; /* * Any ordering dependency among the sub-functions below must be explicitly stated using comments. * Ideally, assert that the cross-dependent bits are up-to-date at the point of using it. */ int rc = hmR0VmxSelectVMRunHandler(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); /* This needs to be done after hmR0VmxSelectVMRunHandler() as changing pfnStartVM may require VM-entry control updates. */ rc = hmR0VmxExportGuestEntryCtls(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); /* This needs to be done after hmR0VmxSelectVMRunHandler() as changing pfnStartVM may require VM-exit control updates. */ rc = hmR0VmxExportGuestExitCtls(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportGuestCR0(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); VBOXSTRICTRC rcStrict = hmR0VmxExportGuestCR3AndCR4(pVCpu); if (rcStrict == VINF_SUCCESS) { /* likely */ } else { Assert(rcStrict == VINF_EM_RESCHEDULE_REM || RT_FAILURE_NP(rcStrict)); return rcStrict; } rc = hmR0VmxExportGuestSegmentRegs(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); /* This needs to be done after hmR0VmxExportGuestEntryCtls() and hmR0VmxExportGuestExitCtls() as it may alter controls if we determine we don't have to swap EFER after all. */ rc = hmR0VmxExportGuestMsrs(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportGuestApicTpr(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportGuestXcptIntercepts(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); rc = hmR0VmxExportGuestRip(pVCpu); rc |= hmR0VmxExportGuestRsp(pVCpu); rc |= hmR0VmxExportGuestRflags(pVCpu); AssertLogRelMsgRCReturn(rc, ("rc=%Rrc\n", rc), rc); /* Clear any bits that may be set but exported unconditionally or unused/reserved bits. */ ASMAtomicUoAndU64(&pVCpu->hm.s.fCtxChanged, ~( (HM_CHANGED_GUEST_GPRS_MASK & ~HM_CHANGED_GUEST_RSP) | HM_CHANGED_GUEST_CR2 | (HM_CHANGED_GUEST_DR_MASK & ~HM_CHANGED_GUEST_DR7) | HM_CHANGED_GUEST_X87 | HM_CHANGED_GUEST_SSE_AVX | HM_CHANGED_GUEST_OTHER_XSAVE | HM_CHANGED_GUEST_XCRx | HM_CHANGED_GUEST_KERNEL_GS_BASE /* Part of lazy or auto load-store MSRs. */ | HM_CHANGED_GUEST_SYSCALL_MSRS /* Part of lazy or auto load-store MSRs. */ | HM_CHANGED_GUEST_TSC_AUX | HM_CHANGED_GUEST_OTHER_MSRS | HM_CHANGED_GUEST_HWVIRT | (HM_CHANGED_KEEPER_STATE_MASK & ~HM_CHANGED_VMX_MASK))); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExportGuestState, x); return rc; } /** * Exports the state shared between the host and guest into the VMCS. * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static void hmR0VmxExportSharedState(PVMCPU pVCpu) { Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_DR_MASK) { int rc = hmR0VmxExportSharedDebugState(pVCpu); AssertRC(rc); pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_GUEST_DR_MASK; /* Loading shared debug bits might have changed eflags.TF bit for debugging purposes. */ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_GUEST_RFLAGS) { rc = hmR0VmxExportGuestRflags(pVCpu); AssertRC(rc); } } if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_GUEST_LAZY_MSRS) { hmR0VmxLazyLoadGuestMsrs(pVCpu); pVCpu->hm.s.fCtxChanged &= ~HM_CHANGED_VMX_GUEST_LAZY_MSRS; } AssertMsg(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE), ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged)); } /** * Worker for loading the guest-state bits in the inner VT-x execution loop. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_EM_RESCHEDULE_REM if we try to emulate non-paged guest code * without unrestricted guest access and the VMMDev is not presently * mapped (e.g. EFI32). * * @param pVCpu The cross context virtual CPU structure. * * @remarks No-long-jump zone!!! */ static VBOXSTRICTRC hmR0VmxExportGuestStateOptimal(PVMCPU pVCpu) { HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(VMMR0IsLogFlushDisabled(pVCpu)); #ifdef HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); #endif /* * For many exits it's only RIP that changes and hence try to export it first * without going through a lot of change flag checks. */ VBOXSTRICTRC rcStrict; uint64_t fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged); RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); if ((fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) == HM_CHANGED_GUEST_RIP) { rcStrict = hmR0VmxExportGuestRip(pVCpu); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */} else AssertMsgFailedReturn(("hmR0VmxExportGuestRip failed! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict)), rcStrict); STAM_COUNTER_INC(&pVCpu->hm.s.StatExportMinimal); } else if (fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)) { rcStrict = hmR0VmxExportGuestState(pVCpu); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */} else { AssertMsg(rcStrict == VINF_EM_RESCHEDULE_REM, ("hmR0VmxExportGuestState failed! rc=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); return rcStrict; } STAM_COUNTER_INC(&pVCpu->hm.s.StatExportFull); } else rcStrict = VINF_SUCCESS; #ifdef VBOX_STRICT /* All the guest state bits should be loaded except maybe the host context and/or the shared host/guest bits. */ fCtxChanged = ASMAtomicUoReadU64(&pVCpu->hm.s.fCtxChanged); RT_UNTRUSTED_NONVOLATILE_COPY_FENCE(); AssertMsg(!(fCtxChanged & (HM_CHANGED_ALL_GUEST & ~HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE)), ("fCtxChanged=%#RX64\n", fCtxChanged)); #endif return rcStrict; } /** * Does the preparations before executing guest code in VT-x. * * This may cause longjmps to ring-3 and may even result in rescheduling to the * recompiler/IEM. We must be cautious what we do here regarding committing * guest-state information into the VMCS assuming we assuredly execute the * guest in VT-x mode. * * If we fall back to the recompiler/IEM after updating the VMCS and clearing * the common-state (TRPM/forceflags), we must undo those changes so that the * recompiler/IEM can (and should) use them when it resumes guest execution. * Otherwise such operations must be done when we can no longer exit to ring-3. * * @returns Strict VBox status code (i.e. informational status codes too). * @retval VINF_SUCCESS if we can proceed with running the guest, interrupts * have been disabled. * @retval VINF_EM_RESET if a triple-fault occurs while injecting a * double-fault into the guest. * @retval VINF_EM_DBG_STEPPED if @a fStepping is true and an event was * dispatched directly. * @retval VINF_* scheduling changes, we have to go back to ring-3. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX transient structure. * @param fStepping Set if called from hmR0VmxRunGuestCodeStep(). Makes * us ignore some of the reasons for returning to * ring-3, and return VINF_EM_DBG_STEPPED if event * dispatching took place. */ static VBOXSTRICTRC hmR0VmxPreRunGuest(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, bool fStepping) { Assert(VMMRZCallRing3IsEnabled(pVCpu)); #ifdef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM if (CPUMIsGuestInVmxNonRootMode(&pVCpu->cpum.GstCtx)) { Log2(("hmR0VmxPreRunGuest: Rescheduling to IEM due to nested-hwvirt or forced IEM exec -> VINF_EM_RESCHEDULE_REM\n")); RT_NOREF3(pVCpu, pVmxTransient, fStepping); return VINF_EM_RESCHEDULE_REM; } #endif #ifdef VBOX_WITH_2X_4GB_ADDR_SPACE_IN_R0 PGMRZDynMapFlushAutoSet(pVCpu); #endif /* Check force flag actions that might require us to go back to ring-3. */ VBOXSTRICTRC rcStrict = hmR0VmxCheckForceFlags(pVCpu, fStepping); if (rcStrict == VINF_SUCCESS) { /* FFs doesn't get set all the time. */ } else return rcStrict; /* * Setup the virtualized-APIC accesses. * * Note! This can cause a longjumps to R3 due to the acquisition of the PGM lock * in both PGMHandlerPhysicalReset() and IOMMMIOMapMMIOHCPage(), see @bugref{8721}. * * This is the reason we do it here and not in hmR0VmxExportGuestState(). */ PVM pVM = pVCpu->CTX_SUFF(pVM); if ( !pVCpu->hm.s.vmx.u64MsrApicBase && (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_VIRT_APIC_ACCESS) && PDMHasApic(pVM)) { uint64_t const u64MsrApicBase = APICGetBaseMsrNoCheck(pVCpu); Assert(u64MsrApicBase); Assert(pVM->hm.s.vmx.HCPhysApicAccess); RTGCPHYS const GCPhysApicBase = u64MsrApicBase & PAGE_BASE_GC_MASK; /* Unalias any existing mapping. */ int rc = PGMHandlerPhysicalReset(pVM, GCPhysApicBase); AssertRCReturn(rc, rc); /* Map the HC APIC-access page in place of the MMIO page, also updates the shadow page tables if necessary. */ Log4Func(("Mapped HC APIC-access page at %#RGp\n", GCPhysApicBase)); rc = IOMMMIOMapMMIOHCPage(pVM, pVCpu, GCPhysApicBase, pVM->hm.s.vmx.HCPhysApicAccess, X86_PTE_RW | X86_PTE_P); AssertRCReturn(rc, rc); /* Update the per-VCPU cache of the APIC base MSR. */ pVCpu->hm.s.vmx.u64MsrApicBase = u64MsrApicBase; } if (TRPMHasTrap(pVCpu)) hmR0VmxTrpmTrapToPendingEvent(pVCpu); uint32_t fIntrState = hmR0VmxEvaluatePendingEvent(pVCpu); /* * Event injection may take locks (currently the PGM lock for real-on-v86 case) and thus * needs to be done with longjmps or interrupts + preemption enabled. Event injection might * also result in triple-faulting the VM. */ rcStrict = hmR0VmxInjectPendingEvent(pVCpu, fIntrState, fStepping); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */ } else { AssertMsg(rcStrict == VINF_EM_RESET || (rcStrict == VINF_EM_DBG_STEPPED && fStepping), ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /* * A longjump might result in importing CR3 even for VM-exits that don't necessarily * import CR3 themselves. We will need to update them here, as even as late as the above * hmR0VmxInjectPendingEvent() call may lazily import guest-CPU state on demand causing * the below force flags to be set. */ if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)) { Assert(!(ASMAtomicUoReadU64(&pVCpu->cpum.GstCtx.fExtrn) & CPUMCTX_EXTRN_CR3)); int rc2 = PGMUpdateCR3(pVCpu, CPUMGetGuestCR3(pVCpu)); AssertMsgReturn(rc2 == VINF_SUCCESS || rc2 == VINF_PGM_SYNC_CR3, ("%Rrc\n", rc2), RT_FAILURE_NP(rc2) ? rc2 : VERR_IPE_UNEXPECTED_INFO_STATUS); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); } if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)) { PGMGstUpdatePaePdpes(pVCpu, &pVCpu->hm.s.aPdpes[0]); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); } /* * No longjmps to ring-3 from this point on!!! * Asserts() will still longjmp to ring-3 (but won't return), which is intentional, better than a kernel panic. * This also disables flushing of the R0-logger instance (if any). */ VMMRZCallRing3Disable(pVCpu); /* * Export the guest state bits. * * We cannot perform longjmps while loading the guest state because we do not preserve the * host/guest state (although the VMCS will be preserved) across longjmps which can cause * CPU migration. * * If we are injecting events to a real-on-v86 mode guest, we will have to update * RIP and some segment registers, i.e. hmR0VmxInjectPendingEvent()->hmR0VmxInjectEventVmcs(). * Hence, loading of the guest state needs to be done -after- injection of events. */ rcStrict = hmR0VmxExportGuestStateOptimal(pVCpu); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* likely */ } else { VMMRZCallRing3Enable(pVCpu); return rcStrict; } /* * We disable interrupts so that we don't miss any interrupts that would flag preemption * (IPI/timers etc.) when thread-context hooks aren't used and we've been running with * preemption disabled for a while. Since this is purly to aid the * RTThreadPreemptIsPending() code, it doesn't matter that it may temporarily reenable and * disable interrupt on NT. * * We need to check for force-flags that could've possible been altered since we last * checked them (e.g. by PDMGetInterrupt() leaving the PDM critical section, * see @bugref{6398}). * * We also check a couple of other force-flags as a last opportunity to get the EMT back * to ring-3 before executing guest code. */ pVmxTransient->fEFlags = ASMIntDisableFlags(); if ( ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC) && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK)) || ( fStepping /* Optimized for the non-stepping case, so a bit of unnecessary work when stepping. */ && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK & ~(VMCPU_FF_TIMER | VMCPU_FF_PDM_CRITSECT))) ) { if (!RTThreadPreemptIsPending(NIL_RTTHREAD)) { pVCpu->hm.s.Event.fPending = false; /* * We've injected any pending events. This is really the point of no return (to ring-3). * * Note! The caller expects to continue with interrupts & longjmps disabled on successful * returns from this function, so don't enable them here. */ return VINF_SUCCESS; } STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPendingHostIrq); rcStrict = VINF_EM_RAW_INTERRUPT; } else { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHmToR3FF); rcStrict = VINF_EM_RAW_TO_R3; } ASMSetFlags(pVmxTransient->fEFlags); VMMRZCallRing3Enable(pVCpu); return rcStrict; } /** * Prepares to run guest code in VT-x and we've committed to doing so. This * means there is no backing out to ring-3 or anywhere else at this * point. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks Called with preemption disabled. * @remarks No-long-jump zone!!! */ static void hmR0VmxPreRunGuestCommitted(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { Assert(!VMMRZCallRing3IsEnabled(pVCpu)); Assert(VMMR0IsLogFlushDisabled(pVCpu)); Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD)); /* * Indicate start of guest execution and where poking EMT out of guest-context is recognized. */ VMCPU_ASSERT_STATE(pVCpu, VMCPUSTATE_STARTED_HM); VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC); PVM pVM = pVCpu->CTX_SUFF(pVM); if (!CPUMIsGuestFPUStateActive(pVCpu)) { STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatLoadGuestFpuState, x); if (CPUMR0LoadGuestFPU(pVM, pVCpu) == VINF_CPUM_HOST_CR0_MODIFIED) pVCpu->hm.s.fCtxChanged |= HM_CHANGED_HOST_CONTEXT; STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatLoadGuestFpuState, x); STAM_COUNTER_INC(&pVCpu->hm.s.StatLoadGuestFpu); } /* * Lazy-update of the host MSRs values in the auto-load/store MSR area. */ if ( !pVCpu->hm.s.vmx.fUpdatedHostMsrs && pVCpu->hm.s.vmx.cMsrs > 0) hmR0VmxUpdateAutoLoadStoreHostMsrs(pVCpu); /* * Re-save the host state bits as we may've been preempted (only happens when * thread-context hooks are used or when hmR0VmxSetupVMRunHandler() changes pfnStartVM). * Note that the 64-on-32 switcher saves the (64-bit) host state into the VMCS and * if we change the switcher back to 32-bit, we *must* save the 32-bit host state here. * See @bugref{8432}. */ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT) { int rc = hmR0VmxExportHostState(pVCpu); AssertRC(rc); STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchPreemptExportHostState); } Assert(!(pVCpu->hm.s.fCtxChanged & HM_CHANGED_HOST_CONTEXT)); /* * Export the state shared between host and guest (FPU, debug, lazy MSRs). */ if (pVCpu->hm.s.fCtxChanged & HM_CHANGED_VMX_HOST_GUEST_SHARED_STATE) hmR0VmxExportSharedState(pVCpu); AssertMsg(!pVCpu->hm.s.fCtxChanged, ("fCtxChanged=%#RX64\n", pVCpu->hm.s.fCtxChanged)); /* Store status of the shared guest-host state at the time of VM-entry. */ #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.GstCtx)) { pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActivePending(pVCpu); pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActivePending(pVCpu); } else #endif { pVmxTransient->fWasGuestDebugStateActive = CPUMIsGuestDebugStateActive(pVCpu); pVmxTransient->fWasHyperDebugStateActive = CPUMIsHyperDebugStateActive(pVCpu); } /* * Cache the TPR-shadow for checking on every VM-exit if it might have changed. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW) pVmxTransient->u8GuestTpr = pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR]; PHMPHYSCPU pHostCpu = hmR0GetCurrentCpu(); RTCPUID idCurrentCpu = pHostCpu->idCpu; if ( pVmxTransient->fUpdateTscOffsettingAndPreemptTimer || idCurrentCpu != pVCpu->hm.s.idLastCpu) { hmR0VmxUpdateTscOffsettingAndPreemptTimer(pVCpu); pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = false; } ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, true); /* Used for TLB flushing, set this across the world switch. */ hmR0VmxFlushTaggedTlb(pHostCpu, pVCpu); /* Invalidate the appropriate guest entries from the TLB. */ Assert(idCurrentCpu == pVCpu->hm.s.idLastCpu); pVCpu->hm.s.vmx.LastError.idCurrentCpu = idCurrentCpu; /* Update the error reporting info. with the current host CPU. */ STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatEntry, &pVCpu->hm.s.StatInGC, x); TMNotifyStartOfExecution(pVCpu); /* Finally, notify TM to resume its clocks as we're about to start executing. */ /* * Load the TSC_AUX MSR when we are not intercepting RDTSCP. */ if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_RDTSCP) { if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT)) { bool fMsrUpdated; hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_TSC_AUX); int rc2 = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_K8_TSC_AUX, CPUMGetGuestTscAux(pVCpu), true /* fUpdateHostMsr */, &fMsrUpdated); AssertRC(rc2); Assert(fMsrUpdated || pVCpu->hm.s.vmx.fUpdatedHostMsrs); /* Finally, mark that all host MSR values are updated so we don't redo it without leaving VT-x. See @bugref{6956}. */ pVCpu->hm.s.vmx.fUpdatedHostMsrs = true; } else { hmR0VmxRemoveAutoLoadStoreMsr(pVCpu, MSR_K8_TSC_AUX); Assert(!pVCpu->hm.s.vmx.cMsrs || pVCpu->hm.s.vmx.fUpdatedHostMsrs); } } if (pVM->cpum.ro.GuestFeatures.fIbrs) { bool fMsrUpdated; hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_OTHER_MSRS); int rc2 = hmR0VmxAddAutoLoadStoreMsr(pVCpu, MSR_IA32_SPEC_CTRL, CPUMGetGuestSpecCtrl(pVCpu), true /* fUpdateHostMsr */, &fMsrUpdated); AssertRC(rc2); Assert(fMsrUpdated || pVCpu->hm.s.vmx.fUpdatedHostMsrs); /* Finally, mark that all host MSR values are updated so we don't redo it without leaving VT-x. See @bugref{6956}. */ pVCpu->hm.s.vmx.fUpdatedHostMsrs = true; } #ifdef VBOX_STRICT hmR0VmxCheckAutoLoadStoreMsrs(pVCpu); hmR0VmxCheckHostEferMsr(pVCpu); AssertRC(hmR0VmxCheckVmcsCtls(pVCpu)); #endif #ifdef HMVMX_ALWAYS_CHECK_GUEST_STATE if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)) { uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu); if (uInvalidReason != VMX_IGS_REASON_NOT_FOUND) Log4(("hmR0VmxCheckGuestState returned %#x\n", uInvalidReason)); } #endif } /** * Performs some essential restoration of state after running guest code in * VT-x. * * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX transient structure. * @param rcVMRun Return code of VMLAUNCH/VMRESUME. * * @remarks Called with interrupts disabled, and returns with interrupts enabled! * * @remarks No-long-jump zone!!! This function will however re-enable longjmps * unconditionally when it is safe to do so. */ static void hmR0VmxPostRunGuest(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, int rcVMRun) { uint64_t const uHostTsc = ASMReadTSC(); Assert(!VMMRZCallRing3IsEnabled(pVCpu)); ASMAtomicWriteBool(&pVCpu->hm.s.fCheckedTLBFlush, false); /* See HMInvalidatePageOnAllVCpus(): used for TLB flushing. */ ASMAtomicIncU32(&pVCpu->hm.s.cWorldSwitchExits); /* Initialized in vmR3CreateUVM(): used for EMT poking. */ pVCpu->hm.s.fCtxChanged = 0; /* Exits/longjmps to ring-3 requires saving the guest state. */ pVmxTransient->fVmcsFieldsRead = 0; /* Transient fields need to be read from the VMCS. */ pVmxTransient->fVectoringPF = false; /* Vectoring page-fault needs to be determined later. */ pVmxTransient->fVectoringDoublePF = false; /* Vectoring double page-fault needs to be determined later. */ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_RDTSC_EXIT)) TMCpuTickSetLastSeen(pVCpu, uHostTsc + pVCpu->hm.s.vmx.u64TscOffset); STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatInGC, &pVCpu->hm.s.StatPreExit, x); TMNotifyEndOfExecution(pVCpu); /* Notify TM that the guest is no longer running. */ Assert(!ASMIntAreEnabled()); VMCPU_SET_STATE(pVCpu, VMCPUSTATE_STARTED_HM); #if HC_ARCH_BITS == 64 pVCpu->hm.s.vmx.fRestoreHostFlags |= VMX_RESTORE_HOST_REQUIRED; /* Host state messed up by VT-x, we must restore. */ #endif #if HC_ARCH_BITS == 32 && defined(VBOX_ENABLE_64_BITS_GUESTS) /* The 64-on-32 switcher maintains fVmcsState on its own and we need to leave it alone here. */ if (pVCpu->hm.s.vmx.pfnStartVM != VMXR0SwitcherStartVM64) pVCpu->hm.s.vmx.fVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */ #else pVCpu->hm.s.vmx.fVmcsState |= HMVMX_VMCS_STATE_LAUNCHED; /* Use VMRESUME instead of VMLAUNCH in the next run. */ #endif #ifdef VBOX_STRICT hmR0VmxCheckHostEferMsr(pVCpu); /* Verify that VMRUN/VMLAUNCH didn't modify host EFER. */ #endif ASMSetFlags(pVmxTransient->fEFlags); /* Enable interrupts. */ /* Save the basic VM-exit reason. Refer Intel spec. 24.9.1 "Basic VM-exit Information". */ uint32_t uExitReason; int rc = VMXReadVmcs32(VMX_VMCS32_RO_EXIT_REASON, &uExitReason); rc |= hmR0VmxReadEntryIntInfoVmcs(pVmxTransient); AssertRC(rc); pVmxTransient->uExitReason = VMX_EXIT_REASON_BASIC(uExitReason); pVmxTransient->fVMEntryFailed = VMX_EXIT_REASON_HAS_ENTRY_FAILED(uExitReason); if (rcVMRun == VINF_SUCCESS) { /* * Update the VM-exit history array here even if the VM-entry failed due to: * - Invalid guest state. * - MSR loading. * - Machine-check event. * * In any of the above cases we will still have a "valid" VM-exit reason * despite @a fVMEntryFailed being false. * * See Intel spec. 26.7 "VM-Entry failures during or after loading guest state". * * Note! We don't have CS or RIP at this point. Will probably address that later * by amending the history entry added here. */ EMHistoryAddExit(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_VMX, pVmxTransient->uExitReason & EMEXIT_F_TYPE_MASK), UINT64_MAX, uHostTsc); if (!pVmxTransient->fVMEntryFailed) { VMMRZCallRing3Enable(pVCpu); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_CR3)); Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_HM_UPDATE_PAE_PDPES)); #if defined(HMVMX_ALWAYS_SYNC_FULL_GUEST_STATE) || defined(HMVMX_ALWAYS_SAVE_FULL_GUEST_STATE) rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRC(rc); #elif defined(HMVMX_ALWAYS_SAVE_GUEST_RFLAGS) rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_RFLAGS); AssertRC(rc); #else /* * Import the guest-interruptibility state always as we need it while evaluating * injecting events on re-entry. * * We don't import CR0 (when Unrestricted guest execution is unavailable) despite * checking for real-mode while exporting the state because all bits that cause * mode changes wrt CR0 are intercepted. */ rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_HM_VMX_INT_STATE); AssertRC(rc); #endif /* * Sync the TPR shadow with our APIC state. */ if ( (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW) && pVmxTransient->u8GuestTpr != pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR]) { rc = APICSetTpr(pVCpu, pVCpu->hm.s.vmx.pbVirtApic[XAPIC_OFF_TPR]); AssertRC(rc); ASMAtomicOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR); } Assert(VMMRZCallRing3IsEnabled(pVCpu)); return; } } else Log4Func(("VM-entry failure: rcVMRun=%Rrc fVMEntryFailed=%RTbool\n", rcVMRun, pVmxTransient->fVMEntryFailed)); VMMRZCallRing3Enable(pVCpu); } /** * Runs the guest code using VT-x the normal way. * * @returns VBox status code. * @param pVCpu The cross context virtual CPU structure. * * @note Mostly the same as hmR0VmxRunGuestCodeStep(). */ static VBOXSTRICTRC hmR0VmxRunGuestCodeNormal(PVMCPU pVCpu) { VMXTRANSIENT VmxTransient; VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true; VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5; uint32_t cLoops = 0; for (;; cLoops++) { Assert(!HMR0SuspendPending()); HMVMX_ASSERT_CPU_SAFE(pVCpu); /* Preparatory work for running guest code, this may force us to return to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, false /* fStepping */); if (rcStrict != VINF_SUCCESS) break; hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient); int rcRun = hmR0VmxRunGuest(pVCpu); /* Restore any residual host-state and save any bits shared between host and guest into the guest-CPU state. Re-enables interrupts! */ hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun); /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ if (RT_SUCCESS(rcRun)) { /* very likely */ } else { STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x); hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient); return rcRun; } /* Profile the VM-exit. */ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x); HMVMX_START_EXIT_DISPATCH_PROF(); VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason); /* Handle the VM-exit. */ #ifdef HMVMX_USE_FUNCTION_TABLE rcStrict = g_apfnVMExitHandlers[VmxTransient.uExitReason](pVCpu, &VmxTransient); #else rcStrict = hmR0VmxHandleExit(pVCpu, &VmxTransient, VmxTransient.uExitReason); #endif STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x); if (rcStrict == VINF_SUCCESS) { if (cLoops <= pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops) continue; /* likely */ STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops); rcStrict = VINF_EM_RAW_INTERRUPT; } break; } STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); return rcStrict; } /** @name Execution loop for single stepping, DBGF events and expensive Dtrace * probes. * * The following few functions and associated structure contains the bloat * necessary for providing detailed debug events and dtrace probes as well as * reliable host side single stepping. This works on the principle of * "subclassing" the normal execution loop and workers. We replace the loop * method completely and override selected helpers to add necessary adjustments * to their core operation. * * The goal is to keep the "parent" code lean and mean, so as not to sacrifice * any performance for debug and analysis features. * * @{ */ /** * Transient per-VCPU debug state of VMCS and related info. we save/restore in * the debug run loop. */ typedef struct VMXRUNDBGSTATE { /** The RIP we started executing at. This is for detecting that we stepped. */ uint64_t uRipStart; /** The CS we started executing with. */ uint16_t uCsStart; /** Whether we've actually modified the 1st execution control field. */ bool fModifiedProcCtls : 1; /** Whether we've actually modified the 2nd execution control field. */ bool fModifiedProcCtls2 : 1; /** Whether we've actually modified the exception bitmap. */ bool fModifiedXcptBitmap : 1; /** We desire the modified the CR0 mask to be cleared. */ bool fClearCr0Mask : 1; /** We desire the modified the CR4 mask to be cleared. */ bool fClearCr4Mask : 1; /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC. */ uint32_t fCpe1Extra; /** Stuff we do not want in VMX_VMCS32_CTRL_PROC_EXEC. */ uint32_t fCpe1Unwanted; /** Stuff we need in VMX_VMCS32_CTRL_PROC_EXEC2. */ uint32_t fCpe2Extra; /** Extra stuff we need in VMX_VMCS32_CTRL_EXCEPTION_BITMAP. */ uint32_t bmXcptExtra; /** The sequence number of the Dtrace provider settings the state was * configured against. */ uint32_t uDtraceSettingsSeqNo; /** VM-exits to check (one bit per VM-exit). */ uint32_t bmExitsToCheck[3]; /** The initial VMX_VMCS32_CTRL_PROC_EXEC value (helps with restore). */ uint32_t fProcCtlsInitial; /** The initial VMX_VMCS32_CTRL_PROC_EXEC2 value (helps with restore). */ uint32_t fProcCtls2Initial; /** The initial VMX_VMCS32_CTRL_EXCEPTION_BITMAP value (helps with restore). */ uint32_t bmXcptInitial; } VMXRUNDBGSTATE; AssertCompileMemberSize(VMXRUNDBGSTATE, bmExitsToCheck, (VMX_EXIT_MAX + 1 + 31) / 32 * 4); typedef VMXRUNDBGSTATE *PVMXRUNDBGSTATE; /** * Initializes the VMXRUNDBGSTATE structure. * * @param pVCpu The cross context virtual CPU structure of the * calling EMT. * @param pDbgState The structure to initialize. */ static void hmR0VmxRunDebugStateInit(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState) { pDbgState->uRipStart = pVCpu->cpum.GstCtx.rip; pDbgState->uCsStart = pVCpu->cpum.GstCtx.cs.Sel; pDbgState->fModifiedProcCtls = false; pDbgState->fModifiedProcCtls2 = false; pDbgState->fModifiedXcptBitmap = false; pDbgState->fClearCr0Mask = false; pDbgState->fClearCr4Mask = false; pDbgState->fCpe1Extra = 0; pDbgState->fCpe1Unwanted = 0; pDbgState->fCpe2Extra = 0; pDbgState->bmXcptExtra = 0; pDbgState->fProcCtlsInitial = pVCpu->hm.s.vmx.u32ProcCtls; pDbgState->fProcCtls2Initial = pVCpu->hm.s.vmx.u32ProcCtls2; pDbgState->bmXcptInitial = pVCpu->hm.s.vmx.u32XcptBitmap; } /** * Updates the VMSC fields with changes requested by @a pDbgState. * * This is performed after hmR0VmxPreRunGuestDebugStateUpdate as well * immediately before executing guest code, i.e. when interrupts are disabled. * We don't check status codes here as we cannot easily assert or return in the * latter case. * * @param pVCpu The cross context virtual CPU structure. * @param pDbgState The debug state. */ static void hmR0VmxPreRunGuestDebugStateApply(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState) { /* * Ensure desired flags in VMCS control fields are set. * (Ignoring write failure here, as we're committed and it's just debug extras.) * * Note! We load the shadow CR0 & CR4 bits when we flag the clearing, so * there should be no stale data in pCtx at this point. */ if ( (pVCpu->hm.s.vmx.u32ProcCtls & pDbgState->fCpe1Extra) != pDbgState->fCpe1Extra || (pVCpu->hm.s.vmx.u32ProcCtls & pDbgState->fCpe1Unwanted)) { pVCpu->hm.s.vmx.u32ProcCtls |= pDbgState->fCpe1Extra; pVCpu->hm.s.vmx.u32ProcCtls &= ~pDbgState->fCpe1Unwanted; VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC: %#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls)); pDbgState->fModifiedProcCtls = true; } if ((pVCpu->hm.s.vmx.u32ProcCtls2 & pDbgState->fCpe2Extra) != pDbgState->fCpe2Extra) { pVCpu->hm.s.vmx.u32ProcCtls2 |= pDbgState->fCpe2Extra; VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pVCpu->hm.s.vmx.u32ProcCtls2); Log6Func(("VMX_VMCS32_CTRL_PROC_EXEC2: %#RX32\n", pVCpu->hm.s.vmx.u32ProcCtls2)); pDbgState->fModifiedProcCtls2 = true; } if ((pVCpu->hm.s.vmx.u32XcptBitmap & pDbgState->bmXcptExtra) != pDbgState->bmXcptExtra) { pVCpu->hm.s.vmx.u32XcptBitmap |= pDbgState->bmXcptExtra; VMXWriteVmcs32(VMX_VMCS32_CTRL_EXCEPTION_BITMAP, pVCpu->hm.s.vmx.u32XcptBitmap); Log6Func(("VMX_VMCS32_CTRL_EXCEPTION_BITMAP: %#RX32\n", pVCpu->hm.s.vmx.u32XcptBitmap)); pDbgState->fModifiedXcptBitmap = true; } if (pDbgState->fClearCr0Mask && pVCpu->hm.s.vmx.u32Cr0Mask != 0) { pVCpu->hm.s.vmx.u32Cr0Mask = 0; VMXWriteVmcs32(VMX_VMCS_CTRL_CR0_MASK, 0); Log6Func(("VMX_VMCS_CTRL_CR0_MASK: 0\n")); } if (pDbgState->fClearCr4Mask && pVCpu->hm.s.vmx.u32Cr4Mask != 0) { pVCpu->hm.s.vmx.u32Cr4Mask = 0; VMXWriteVmcs32(VMX_VMCS_CTRL_CR4_MASK, 0); Log6Func(("VMX_VMCS_CTRL_CR4_MASK: 0\n")); } } /** * Restores VMCS fields that were changed by hmR0VmxPreRunGuestDebugStateApply for * re-entry next time around. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param pDbgState The debug state. * @param rcStrict The return code from executing the guest using single * stepping. */ static VBOXSTRICTRC hmR0VmxRunDebugStateRevert(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState, VBOXSTRICTRC rcStrict) { /* * Restore VM-exit control settings as we may not reenter this function the * next time around. */ /* We reload the initial value, trigger what we can of recalculations the next time around. From the looks of things, that's all that's required atm. */ if (pDbgState->fModifiedProcCtls) { if (!(pDbgState->fProcCtlsInitial & VMX_PROC_CTLS_MOV_DR_EXIT) && CPUMIsHyperDebugStateActive(pVCpu)) pDbgState->fProcCtlsInitial |= VMX_PROC_CTLS_MOV_DR_EXIT; /* Avoid assertion in hmR0VmxLeave */ int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pDbgState->fProcCtlsInitial); AssertRCReturn(rc2, rc2); pVCpu->hm.s.vmx.u32ProcCtls = pDbgState->fProcCtlsInitial; } /* We're currently the only ones messing with this one, so just restore the cached value and reload the field. */ if ( pDbgState->fModifiedProcCtls2 && pVCpu->hm.s.vmx.u32ProcCtls2 != pDbgState->fProcCtls2Initial) { int rc2 = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC2, pDbgState->fProcCtls2Initial); AssertRCReturn(rc2, rc2); pVCpu->hm.s.vmx.u32ProcCtls2 = pDbgState->fProcCtls2Initial; } /* If we've modified the exception bitmap, we restore it and trigger reloading and partial recalculation the next time around. */ if (pDbgState->fModifiedXcptBitmap) pVCpu->hm.s.vmx.u32XcptBitmap = pDbgState->bmXcptInitial; return rcStrict; } /** * Configures VM-exit controls for current DBGF and DTrace settings. * * This updates @a pDbgState and the VMCS execution control fields to reflect * the necessary VM-exits demanded by DBGF and DTrace. * * @param pVCpu The cross context virtual CPU structure. * @param pDbgState The debug state. * @param pVmxTransient Pointer to the VMX transient structure. May update * fUpdateTscOffsettingAndPreemptTimer. */ static void hmR0VmxPreRunGuestDebugStateUpdate(PVMCPU pVCpu, PVMXRUNDBGSTATE pDbgState, PVMXTRANSIENT pVmxTransient) { /* * Take down the dtrace serial number so we can spot changes. */ pDbgState->uDtraceSettingsSeqNo = VBOXVMM_GET_SETTINGS_SEQ_NO(); ASMCompilerBarrier(); /* * We'll rebuild most of the middle block of data members (holding the * current settings) as we go along here, so start by clearing it all. */ pDbgState->bmXcptExtra = 0; pDbgState->fCpe1Extra = 0; pDbgState->fCpe1Unwanted = 0; pDbgState->fCpe2Extra = 0; for (unsigned i = 0; i < RT_ELEMENTS(pDbgState->bmExitsToCheck); i++) pDbgState->bmExitsToCheck[i] = 0; /* * Software interrupts (INT XXh) - no idea how to trigger these... */ PVM pVM = pVCpu->CTX_SUFF(pVM); if ( DBGF_IS_EVENT_ENABLED(pVM, DBGFEVENT_INTERRUPT_SOFTWARE) || VBOXVMM_INT_SOFTWARE_ENABLED()) { ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI); } /* * INT3 breakpoints - triggered by #BP exceptions. */ if (pVM->dbgf.ro.cEnabledInt3Breakpoints > 0) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP); /* * Exception bitmap and XCPT events+probes. */ for (int iXcpt = 0; iXcpt < (DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST + 1); iXcpt++) if (DBGF_IS_EVENT_ENABLED(pVM, (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + iXcpt))) pDbgState->bmXcptExtra |= RT_BIT_32(iXcpt); if (VBOXVMM_XCPT_DE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DE); if (VBOXVMM_XCPT_DB_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DB); if (VBOXVMM_XCPT_BP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BP); if (VBOXVMM_XCPT_OF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_OF); if (VBOXVMM_XCPT_BR_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_BR); if (VBOXVMM_XCPT_UD_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_UD); if (VBOXVMM_XCPT_NM_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NM); if (VBOXVMM_XCPT_DF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_DF); if (VBOXVMM_XCPT_TS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_TS); if (VBOXVMM_XCPT_NP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_NP); if (VBOXVMM_XCPT_SS_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SS); if (VBOXVMM_XCPT_GP_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_GP); if (VBOXVMM_XCPT_PF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_PF); if (VBOXVMM_XCPT_MF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_MF); if (VBOXVMM_XCPT_AC_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_AC); if (VBOXVMM_XCPT_XF_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_XF); if (VBOXVMM_XCPT_VE_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_VE); if (VBOXVMM_XCPT_SX_ENABLED()) pDbgState->bmXcptExtra |= RT_BIT_32(X86_XCPT_SX); if (pDbgState->bmXcptExtra) ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_XCPT_OR_NMI); /* * Process events and probes for VM-exits, making sure we get the wanted VM-exits. * * Note! This is the reverse of what hmR0VmxHandleExitDtraceEvents does. * So, when adding/changing/removing please don't forget to update it. * * Some of the macros are picking up local variables to save horizontal space, * (being able to see it in a table is the lesser evil here). */ #define IS_EITHER_ENABLED(a_pVM, a_EventSubName) \ ( DBGF_IS_EVENT_ENABLED(a_pVM, RT_CONCAT(DBGFEVENT_, a_EventSubName)) \ || RT_CONCAT3(VBOXVMM_, a_EventSubName, _ENABLED)() ) #define SET_ONLY_XBM_IF_EITHER_EN(a_EventSubName, a_uExit) \ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \ { AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \ } else do { } while (0) #define SET_CPE1_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec) \ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \ { \ (pDbgState)->fCpe1Extra |= (a_fCtrlProcExec); \ AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \ } else do { } while (0) #define SET_CPEU_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fUnwantedCtrlProcExec) \ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \ { \ (pDbgState)->fCpe1Unwanted |= (a_fUnwantedCtrlProcExec); \ AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \ } else do { } while (0) #define SET_CPE2_XBM_IF_EITHER_EN(a_EventSubName, a_uExit, a_fCtrlProcExec2) \ if (IS_EITHER_ENABLED(pVM, a_EventSubName)) \ { \ (pDbgState)->fCpe2Extra |= (a_fCtrlProcExec2); \ AssertCompile((unsigned)(a_uExit) < sizeof(pDbgState->bmExitsToCheck) * 8); \ ASMBitSet((pDbgState)->bmExitsToCheck, a_uExit); \ } else do { } while (0) SET_ONLY_XBM_IF_EITHER_EN(EXIT_TASK_SWITCH, VMX_EXIT_TASK_SWITCH); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_VIOLATION, VMX_EXIT_EPT_VIOLATION); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_EPT_MISCONFIG, VMX_EXIT_EPT_MISCONFIG); /* unconditional (unless #VE) */ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_ACCESS, VMX_EXIT_APIC_ACCESS); /* feature dependent, nothing to enable here */ SET_ONLY_XBM_IF_EITHER_EN(EXIT_VMX_VAPIC_WRITE, VMX_EXIT_APIC_WRITE); /* feature dependent, nothing to enable here */ SET_ONLY_XBM_IF_EITHER_EN(INSTR_CPUID, VMX_EXIT_CPUID); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_CPUID, VMX_EXIT_CPUID); SET_ONLY_XBM_IF_EITHER_EN(INSTR_GETSEC, VMX_EXIT_GETSEC); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_GETSEC, VMX_EXIT_GETSEC); SET_CPE1_XBM_IF_EITHER_EN(INSTR_HALT, VMX_EXIT_HLT, VMX_PROC_CTLS_HLT_EXIT); /* paranoia */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_HALT, VMX_EXIT_HLT); SET_ONLY_XBM_IF_EITHER_EN(INSTR_INVD, VMX_EXIT_INVD); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVD, VMX_EXIT_INVD); SET_CPE1_XBM_IF_EITHER_EN(INSTR_INVLPG, VMX_EXIT_INVLPG, VMX_PROC_CTLS_INVLPG_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_INVLPG, VMX_EXIT_INVLPG); SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDPMC, VMX_EXIT_RDPMC, VMX_PROC_CTLS_RDPMC_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDPMC, VMX_EXIT_RDPMC); SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSC, VMX_EXIT_RDTSC, VMX_PROC_CTLS_RDTSC_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSC, VMX_EXIT_RDTSC); SET_ONLY_XBM_IF_EITHER_EN(INSTR_RSM, VMX_EXIT_RSM); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RSM, VMX_EXIT_RSM); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMM_CALL, VMX_EXIT_VMCALL); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMM_CALL, VMX_EXIT_VMCALL); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMCLEAR, VMX_EXIT_VMCLEAR); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMCLEAR, VMX_EXIT_VMCLEAR); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMLAUNCH, VMX_EXIT_VMLAUNCH); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRLD, VMX_EXIT_VMPTRLD); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRLD, VMX_EXIT_VMPTRLD); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMPTRST, VMX_EXIT_VMPTRST); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMPTRST, VMX_EXIT_VMPTRST); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMREAD, VMX_EXIT_VMREAD); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMREAD, VMX_EXIT_VMREAD); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMRESUME, VMX_EXIT_VMRESUME); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMRESUME, VMX_EXIT_VMRESUME); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMWRITE, VMX_EXIT_VMWRITE); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMWRITE, VMX_EXIT_VMWRITE); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXOFF, VMX_EXIT_VMXOFF); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXOFF, VMX_EXIT_VMXOFF); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMXON, VMX_EXIT_VMXON); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMXON, VMX_EXIT_VMXON); if ( IS_EITHER_ENABLED(pVM, INSTR_CRX_READ) || IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE)) { int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_APIC_TPR); AssertRC(rc); #if 0 /** @todo fix me */ pDbgState->fClearCr0Mask = true; pDbgState->fClearCr4Mask = true; #endif if (IS_EITHER_ENABLED(pVM, INSTR_CRX_READ)) pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_STORE_EXIT | VMX_PROC_CTLS_CR8_STORE_EXIT; if (IS_EITHER_ENABLED(pVM, INSTR_CRX_WRITE)) pDbgState->fCpe1Extra |= VMX_PROC_CTLS_CR3_LOAD_EXIT | VMX_PROC_CTLS_CR8_LOAD_EXIT; pDbgState->fCpe1Unwanted |= VMX_PROC_CTLS_USE_TPR_SHADOW; /* risky? */ /* Note! We currently don't use VMX_VMCS32_CTRL_CR3_TARGET_COUNT. It would require clearing here and in the loop if we start using it. */ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_CRX); } else { if (pDbgState->fClearCr0Mask) { pDbgState->fClearCr0Mask = false; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR0); } if (pDbgState->fClearCr4Mask) { pDbgState->fClearCr4Mask = false; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_CR4); } } SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_READ, VMX_EXIT_MOV_CRX); SET_ONLY_XBM_IF_EITHER_EN( EXIT_CRX_WRITE, VMX_EXIT_MOV_CRX); if ( IS_EITHER_ENABLED(pVM, INSTR_DRX_READ) || IS_EITHER_ENABLED(pVM, INSTR_DRX_WRITE)) { /** @todo later, need to fix handler as it assumes this won't usually happen. */ ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_MOV_DRX); } SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_READ, VMX_EXIT_MOV_DRX); SET_ONLY_XBM_IF_EITHER_EN( EXIT_DRX_WRITE, VMX_EXIT_MOV_DRX); SET_CPEU_XBM_IF_EITHER_EN(INSTR_RDMSR, VMX_EXIT_RDMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS); /* risky clearing this? */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDMSR, VMX_EXIT_RDMSR); SET_CPEU_XBM_IF_EITHER_EN(INSTR_WRMSR, VMX_EXIT_WRMSR, VMX_PROC_CTLS_USE_MSR_BITMAPS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_WRMSR, VMX_EXIT_WRMSR); SET_CPE1_XBM_IF_EITHER_EN(INSTR_MWAIT, VMX_EXIT_MWAIT, VMX_PROC_CTLS_MWAIT_EXIT); /* paranoia */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_MWAIT, VMX_EXIT_MWAIT); SET_CPE1_XBM_IF_EITHER_EN(INSTR_MONITOR, VMX_EXIT_MONITOR, VMX_PROC_CTLS_MONITOR_EXIT); /* paranoia */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_MONITOR, VMX_EXIT_MONITOR); #if 0 /** @todo too slow, fix handler. */ SET_CPE1_XBM_IF_EITHER_EN(INSTR_PAUSE, VMX_EXIT_PAUSE, VMX_PROC_CTLS_PAUSE_EXIT); #endif SET_ONLY_XBM_IF_EITHER_EN( EXIT_PAUSE, VMX_EXIT_PAUSE); if ( IS_EITHER_ENABLED(pVM, INSTR_SGDT) || IS_EITHER_ENABLED(pVM, INSTR_SIDT) || IS_EITHER_ENABLED(pVM, INSTR_LGDT) || IS_EITHER_ENABLED(pVM, INSTR_LIDT)) { pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT; ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_GDTR_IDTR_ACCESS); } SET_ONLY_XBM_IF_EITHER_EN( EXIT_SGDT, VMX_EXIT_GDTR_IDTR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_SIDT, VMX_EXIT_GDTR_IDTR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_LGDT, VMX_EXIT_GDTR_IDTR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_LIDT, VMX_EXIT_GDTR_IDTR_ACCESS); if ( IS_EITHER_ENABLED(pVM, INSTR_SLDT) || IS_EITHER_ENABLED(pVM, INSTR_STR) || IS_EITHER_ENABLED(pVM, INSTR_LLDT) || IS_EITHER_ENABLED(pVM, INSTR_LTR)) { pDbgState->fCpe2Extra |= VMX_PROC_CTLS2_DESC_TABLE_EXIT; ASMBitSet(pDbgState->bmExitsToCheck, VMX_EXIT_LDTR_TR_ACCESS); } SET_ONLY_XBM_IF_EITHER_EN( EXIT_SLDT, VMX_EXIT_LDTR_TR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_STR, VMX_EXIT_LDTR_TR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_LLDT, VMX_EXIT_LDTR_TR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN( EXIT_LTR, VMX_EXIT_LDTR_TR_ACCESS); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVEPT, VMX_EXIT_INVEPT); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVEPT, VMX_EXIT_INVEPT); SET_CPE1_XBM_IF_EITHER_EN(INSTR_RDTSCP, VMX_EXIT_RDTSCP, VMX_PROC_CTLS_RDTSC_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDTSCP, VMX_EXIT_RDTSCP); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_INVVPID, VMX_EXIT_INVVPID); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVVPID, VMX_EXIT_INVVPID); SET_CPE2_XBM_IF_EITHER_EN(INSTR_WBINVD, VMX_EXIT_WBINVD, VMX_PROC_CTLS2_WBINVD_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_WBINVD, VMX_EXIT_WBINVD); SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSETBV, VMX_EXIT_XSETBV); /* unconditional */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_XSETBV, VMX_EXIT_XSETBV); SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDRAND, VMX_EXIT_RDRAND, VMX_PROC_CTLS2_RDRAND_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDRAND, VMX_EXIT_RDRAND); SET_CPE1_XBM_IF_EITHER_EN(INSTR_VMX_INVPCID, VMX_EXIT_INVPCID, VMX_PROC_CTLS_INVLPG_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_INVPCID, VMX_EXIT_INVPCID); SET_ONLY_XBM_IF_EITHER_EN(INSTR_VMX_VMFUNC, VMX_EXIT_VMFUNC); /* unconditional for the current setup */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_VMX_VMFUNC, VMX_EXIT_VMFUNC); SET_CPE2_XBM_IF_EITHER_EN(INSTR_RDSEED, VMX_EXIT_RDSEED, VMX_PROC_CTLS2_RDSEED_EXIT); SET_ONLY_XBM_IF_EITHER_EN( EXIT_RDSEED, VMX_EXIT_RDSEED); SET_ONLY_XBM_IF_EITHER_EN(INSTR_XSAVES, VMX_EXIT_XSAVES); /* unconditional (enabled by host, guest cfg) */ SET_ONLY_XBM_IF_EITHER_EN(EXIT_XSAVES, VMX_EXIT_XSAVES); SET_ONLY_XBM_IF_EITHER_EN(INSTR_XRSTORS, VMX_EXIT_XRSTORS); /* unconditional (enabled by host, guest cfg) */ SET_ONLY_XBM_IF_EITHER_EN( EXIT_XRSTORS, VMX_EXIT_XRSTORS); #undef IS_EITHER_ENABLED #undef SET_ONLY_XBM_IF_EITHER_EN #undef SET_CPE1_XBM_IF_EITHER_EN #undef SET_CPEU_XBM_IF_EITHER_EN #undef SET_CPE2_XBM_IF_EITHER_EN /* * Sanitize the control stuff. */ pDbgState->fCpe2Extra &= pVM->hm.s.vmx.Msrs.ProcCtls2.n.allowed1; if (pDbgState->fCpe2Extra) pDbgState->fCpe1Extra |= VMX_PROC_CTLS_USE_SECONDARY_CTLS; pDbgState->fCpe1Extra &= pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed1; pDbgState->fCpe1Unwanted &= ~pVM->hm.s.vmx.Msrs.ProcCtls.n.allowed0; if (pVCpu->hm.s.fDebugWantRdTscExit != RT_BOOL(pDbgState->fCpe1Extra & VMX_PROC_CTLS_RDTSC_EXIT)) { pVCpu->hm.s.fDebugWantRdTscExit ^= true; pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; } Log6(("HM: debug state: cpe1=%#RX32 cpeu=%#RX32 cpe2=%#RX32%s%s\n", pDbgState->fCpe1Extra, pDbgState->fCpe1Unwanted, pDbgState->fCpe2Extra, pDbgState->fClearCr0Mask ? " clr-cr0" : "", pDbgState->fClearCr4Mask ? " clr-cr4" : "")); } /** * Fires off DBGF events and dtrace probes for a VM-exit, when it's * appropriate. * * The caller has checked the VM-exit against the * VMXRUNDBGSTATE::bmExitsToCheck bitmap. The caller has checked for NMIs * already, so we don't have to do that either. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX-transient structure. * @param uExitReason The VM-exit reason. * * @remarks The name of this function is displayed by dtrace, so keep it short * and to the point. No longer than 33 chars long, please. */ static VBOXSTRICTRC hmR0VmxHandleExitDtraceEvents(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t uExitReason) { /* * Translate the event into a DBGF event (enmEvent + uEventArg) and at the * same time check whether any corresponding Dtrace event is enabled (fDtrace). * * Note! This is the reverse operation of what hmR0VmxPreRunGuestDebugStateUpdate * does. Must add/change/remove both places. Same ordering, please. * * Added/removed events must also be reflected in the next section * where we dispatch dtrace events. */ bool fDtrace1 = false; bool fDtrace2 = false; DBGFEVENTTYPE enmEvent1 = DBGFEVENT_END; DBGFEVENTTYPE enmEvent2 = DBGFEVENT_END; uint32_t uEventArg = 0; #define SET_EXIT(a_EventSubName) \ do { \ enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \ fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \ } while (0) #define SET_BOTH(a_EventSubName) \ do { \ enmEvent1 = RT_CONCAT(DBGFEVENT_INSTR_, a_EventSubName); \ enmEvent2 = RT_CONCAT(DBGFEVENT_EXIT_, a_EventSubName); \ fDtrace1 = RT_CONCAT3(VBOXVMM_INSTR_, a_EventSubName, _ENABLED)(); \ fDtrace2 = RT_CONCAT3(VBOXVMM_EXIT_, a_EventSubName, _ENABLED)(); \ } while (0) switch (uExitReason) { case VMX_EXIT_MTF: return hmR0VmxExitMtf(pVCpu, pVmxTransient); case VMX_EXIT_XCPT_OR_NMI: { uint8_t const idxVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo); switch (VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo)) { case VMX_EXIT_INT_INFO_TYPE_HW_XCPT: case VMX_EXIT_INT_INFO_TYPE_SW_XCPT: case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT: if (idxVector <= (unsigned)(DBGFEVENT_XCPT_LAST - DBGFEVENT_XCPT_FIRST)) { if (VMX_EXIT_INT_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uExitIntInfo)) { hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); uEventArg = pVmxTransient->uExitIntErrorCode; } enmEvent1 = (DBGFEVENTTYPE)(DBGFEVENT_XCPT_FIRST + idxVector); switch (enmEvent1) { case DBGFEVENT_XCPT_DE: fDtrace1 = VBOXVMM_XCPT_DE_ENABLED(); break; case DBGFEVENT_XCPT_DB: fDtrace1 = VBOXVMM_XCPT_DB_ENABLED(); break; case DBGFEVENT_XCPT_BP: fDtrace1 = VBOXVMM_XCPT_BP_ENABLED(); break; case DBGFEVENT_XCPT_OF: fDtrace1 = VBOXVMM_XCPT_OF_ENABLED(); break; case DBGFEVENT_XCPT_BR: fDtrace1 = VBOXVMM_XCPT_BR_ENABLED(); break; case DBGFEVENT_XCPT_UD: fDtrace1 = VBOXVMM_XCPT_UD_ENABLED(); break; case DBGFEVENT_XCPT_NM: fDtrace1 = VBOXVMM_XCPT_NM_ENABLED(); break; case DBGFEVENT_XCPT_DF: fDtrace1 = VBOXVMM_XCPT_DF_ENABLED(); break; case DBGFEVENT_XCPT_TS: fDtrace1 = VBOXVMM_XCPT_TS_ENABLED(); break; case DBGFEVENT_XCPT_NP: fDtrace1 = VBOXVMM_XCPT_NP_ENABLED(); break; case DBGFEVENT_XCPT_SS: fDtrace1 = VBOXVMM_XCPT_SS_ENABLED(); break; case DBGFEVENT_XCPT_GP: fDtrace1 = VBOXVMM_XCPT_GP_ENABLED(); break; case DBGFEVENT_XCPT_PF: fDtrace1 = VBOXVMM_XCPT_PF_ENABLED(); break; case DBGFEVENT_XCPT_MF: fDtrace1 = VBOXVMM_XCPT_MF_ENABLED(); break; case DBGFEVENT_XCPT_AC: fDtrace1 = VBOXVMM_XCPT_AC_ENABLED(); break; case DBGFEVENT_XCPT_XF: fDtrace1 = VBOXVMM_XCPT_XF_ENABLED(); break; case DBGFEVENT_XCPT_VE: fDtrace1 = VBOXVMM_XCPT_VE_ENABLED(); break; case DBGFEVENT_XCPT_SX: fDtrace1 = VBOXVMM_XCPT_SX_ENABLED(); break; default: break; } } else AssertFailed(); break; case VMX_EXIT_INT_INFO_TYPE_SW_INT: uEventArg = idxVector; enmEvent1 = DBGFEVENT_INTERRUPT_SOFTWARE; fDtrace1 = VBOXVMM_INT_SOFTWARE_ENABLED(); break; } break; } case VMX_EXIT_TRIPLE_FAULT: enmEvent1 = DBGFEVENT_TRIPLE_FAULT; //fDtrace1 = VBOXVMM_EXIT_TRIPLE_FAULT_ENABLED(); break; case VMX_EXIT_TASK_SWITCH: SET_EXIT(TASK_SWITCH); break; case VMX_EXIT_EPT_VIOLATION: SET_EXIT(VMX_EPT_VIOLATION); break; case VMX_EXIT_EPT_MISCONFIG: SET_EXIT(VMX_EPT_MISCONFIG); break; case VMX_EXIT_APIC_ACCESS: SET_EXIT(VMX_VAPIC_ACCESS); break; case VMX_EXIT_APIC_WRITE: SET_EXIT(VMX_VAPIC_WRITE); break; /* Instruction specific VM-exits: */ case VMX_EXIT_CPUID: SET_BOTH(CPUID); break; case VMX_EXIT_GETSEC: SET_BOTH(GETSEC); break; case VMX_EXIT_HLT: SET_BOTH(HALT); break; case VMX_EXIT_INVD: SET_BOTH(INVD); break; case VMX_EXIT_INVLPG: SET_BOTH(INVLPG); break; case VMX_EXIT_RDPMC: SET_BOTH(RDPMC); break; case VMX_EXIT_RDTSC: SET_BOTH(RDTSC); break; case VMX_EXIT_RSM: SET_BOTH(RSM); break; case VMX_EXIT_VMCALL: SET_BOTH(VMM_CALL); break; case VMX_EXIT_VMCLEAR: SET_BOTH(VMX_VMCLEAR); break; case VMX_EXIT_VMLAUNCH: SET_BOTH(VMX_VMLAUNCH); break; case VMX_EXIT_VMPTRLD: SET_BOTH(VMX_VMPTRLD); break; case VMX_EXIT_VMPTRST: SET_BOTH(VMX_VMPTRST); break; case VMX_EXIT_VMREAD: SET_BOTH(VMX_VMREAD); break; case VMX_EXIT_VMRESUME: SET_BOTH(VMX_VMRESUME); break; case VMX_EXIT_VMWRITE: SET_BOTH(VMX_VMWRITE); break; case VMX_EXIT_VMXOFF: SET_BOTH(VMX_VMXOFF); break; case VMX_EXIT_VMXON: SET_BOTH(VMX_VMXON); break; case VMX_EXIT_MOV_CRX: hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); if (VMX_EXIT_QUAL_CRX_ACCESS(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_CRX_ACCESS_READ) SET_BOTH(CRX_READ); else SET_BOTH(CRX_WRITE); uEventArg = VMX_EXIT_QUAL_CRX_REGISTER(pVmxTransient->uExitQual); break; case VMX_EXIT_MOV_DRX: hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); if ( VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_READ) SET_BOTH(DRX_READ); else SET_BOTH(DRX_WRITE); uEventArg = VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual); break; case VMX_EXIT_RDMSR: SET_BOTH(RDMSR); break; case VMX_EXIT_WRMSR: SET_BOTH(WRMSR); break; case VMX_EXIT_MWAIT: SET_BOTH(MWAIT); break; case VMX_EXIT_MONITOR: SET_BOTH(MONITOR); break; case VMX_EXIT_PAUSE: SET_BOTH(PAUSE); break; case VMX_EXIT_GDTR_IDTR_ACCESS: hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_XDTR_INSINFO_INSTR_ID)) { case VMX_XDTR_INSINFO_II_SGDT: SET_BOTH(SGDT); break; case VMX_XDTR_INSINFO_II_SIDT: SET_BOTH(SIDT); break; case VMX_XDTR_INSINFO_II_LGDT: SET_BOTH(LGDT); break; case VMX_XDTR_INSINFO_II_LIDT: SET_BOTH(LIDT); break; } break; case VMX_EXIT_LDTR_TR_ACCESS: hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); switch (RT_BF_GET(pVmxTransient->ExitInstrInfo.u, VMX_BF_YYTR_INSINFO_INSTR_ID)) { case VMX_YYTR_INSINFO_II_SLDT: SET_BOTH(SLDT); break; case VMX_YYTR_INSINFO_II_STR: SET_BOTH(STR); break; case VMX_YYTR_INSINFO_II_LLDT: SET_BOTH(LLDT); break; case VMX_YYTR_INSINFO_II_LTR: SET_BOTH(LTR); break; } break; case VMX_EXIT_INVEPT: SET_BOTH(VMX_INVEPT); break; case VMX_EXIT_RDTSCP: SET_BOTH(RDTSCP); break; case VMX_EXIT_INVVPID: SET_BOTH(VMX_INVVPID); break; case VMX_EXIT_WBINVD: SET_BOTH(WBINVD); break; case VMX_EXIT_XSETBV: SET_BOTH(XSETBV); break; case VMX_EXIT_RDRAND: SET_BOTH(RDRAND); break; case VMX_EXIT_INVPCID: SET_BOTH(VMX_INVPCID); break; case VMX_EXIT_VMFUNC: SET_BOTH(VMX_VMFUNC); break; case VMX_EXIT_RDSEED: SET_BOTH(RDSEED); break; case VMX_EXIT_XSAVES: SET_BOTH(XSAVES); break; case VMX_EXIT_XRSTORS: SET_BOTH(XRSTORS); break; /* Events that aren't relevant at this point. */ case VMX_EXIT_EXT_INT: case VMX_EXIT_INT_WINDOW: case VMX_EXIT_NMI_WINDOW: case VMX_EXIT_TPR_BELOW_THRESHOLD: case VMX_EXIT_PREEMPT_TIMER: case VMX_EXIT_IO_INSTR: break; /* Errors and unexpected events. */ case VMX_EXIT_INIT_SIGNAL: case VMX_EXIT_SIPI: case VMX_EXIT_IO_SMI: case VMX_EXIT_SMI: case VMX_EXIT_ERR_INVALID_GUEST_STATE: case VMX_EXIT_ERR_MSR_LOAD: case VMX_EXIT_ERR_MACHINE_CHECK: break; default: AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason)); break; } #undef SET_BOTH #undef SET_EXIT /* * Dtrace tracepoints go first. We do them here at once so we don't * have to copy the guest state saving and stuff a few dozen times. * Down side is that we've got to repeat the switch, though this time * we use enmEvent since the probes are a subset of what DBGF does. */ if (fDtrace1 || fDtrace2) { hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; switch (enmEvent1) { /** @todo consider which extra parameters would be helpful for each probe. */ case DBGFEVENT_END: break; case DBGFEVENT_XCPT_DE: VBOXVMM_XCPT_DE(pVCpu, pCtx); break; case DBGFEVENT_XCPT_DB: VBOXVMM_XCPT_DB(pVCpu, pCtx, pCtx->dr[6]); break; case DBGFEVENT_XCPT_BP: VBOXVMM_XCPT_BP(pVCpu, pCtx); break; case DBGFEVENT_XCPT_OF: VBOXVMM_XCPT_OF(pVCpu, pCtx); break; case DBGFEVENT_XCPT_BR: VBOXVMM_XCPT_BR(pVCpu, pCtx); break; case DBGFEVENT_XCPT_UD: VBOXVMM_XCPT_UD(pVCpu, pCtx); break; case DBGFEVENT_XCPT_NM: VBOXVMM_XCPT_NM(pVCpu, pCtx); break; case DBGFEVENT_XCPT_DF: VBOXVMM_XCPT_DF(pVCpu, pCtx); break; case DBGFEVENT_XCPT_TS: VBOXVMM_XCPT_TS(pVCpu, pCtx, uEventArg); break; case DBGFEVENT_XCPT_NP: VBOXVMM_XCPT_NP(pVCpu, pCtx, uEventArg); break; case DBGFEVENT_XCPT_SS: VBOXVMM_XCPT_SS(pVCpu, pCtx, uEventArg); break; case DBGFEVENT_XCPT_GP: VBOXVMM_XCPT_GP(pVCpu, pCtx, uEventArg); break; case DBGFEVENT_XCPT_PF: VBOXVMM_XCPT_PF(pVCpu, pCtx, uEventArg, pCtx->cr2); break; case DBGFEVENT_XCPT_MF: VBOXVMM_XCPT_MF(pVCpu, pCtx); break; case DBGFEVENT_XCPT_AC: VBOXVMM_XCPT_AC(pVCpu, pCtx); break; case DBGFEVENT_XCPT_XF: VBOXVMM_XCPT_XF(pVCpu, pCtx); break; case DBGFEVENT_XCPT_VE: VBOXVMM_XCPT_VE(pVCpu, pCtx); break; case DBGFEVENT_XCPT_SX: VBOXVMM_XCPT_SX(pVCpu, pCtx, uEventArg); break; case DBGFEVENT_INTERRUPT_SOFTWARE: VBOXVMM_INT_SOFTWARE(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_INSTR_CPUID: VBOXVMM_INSTR_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break; case DBGFEVENT_INSTR_GETSEC: VBOXVMM_INSTR_GETSEC(pVCpu, pCtx); break; case DBGFEVENT_INSTR_HALT: VBOXVMM_INSTR_HALT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_INVD: VBOXVMM_INSTR_INVD(pVCpu, pCtx); break; case DBGFEVENT_INSTR_INVLPG: VBOXVMM_INSTR_INVLPG(pVCpu, pCtx); break; case DBGFEVENT_INSTR_RDPMC: VBOXVMM_INSTR_RDPMC(pVCpu, pCtx); break; case DBGFEVENT_INSTR_RDTSC: VBOXVMM_INSTR_RDTSC(pVCpu, pCtx); break; case DBGFEVENT_INSTR_RSM: VBOXVMM_INSTR_RSM(pVCpu, pCtx); break; case DBGFEVENT_INSTR_CRX_READ: VBOXVMM_INSTR_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_INSTR_CRX_WRITE: VBOXVMM_INSTR_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_INSTR_DRX_READ: VBOXVMM_INSTR_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_INSTR_DRX_WRITE: VBOXVMM_INSTR_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_INSTR_RDMSR: VBOXVMM_INSTR_RDMSR(pVCpu, pCtx, pCtx->ecx); break; case DBGFEVENT_INSTR_WRMSR: VBOXVMM_INSTR_WRMSR(pVCpu, pCtx, pCtx->ecx, RT_MAKE_U64(pCtx->eax, pCtx->edx)); break; case DBGFEVENT_INSTR_MWAIT: VBOXVMM_INSTR_MWAIT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_MONITOR: VBOXVMM_INSTR_MONITOR(pVCpu, pCtx); break; case DBGFEVENT_INSTR_PAUSE: VBOXVMM_INSTR_PAUSE(pVCpu, pCtx); break; case DBGFEVENT_INSTR_SGDT: VBOXVMM_INSTR_SGDT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_SIDT: VBOXVMM_INSTR_SIDT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_LGDT: VBOXVMM_INSTR_LGDT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_LIDT: VBOXVMM_INSTR_LIDT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_SLDT: VBOXVMM_INSTR_SLDT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_STR: VBOXVMM_INSTR_STR(pVCpu, pCtx); break; case DBGFEVENT_INSTR_LLDT: VBOXVMM_INSTR_LLDT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_LTR: VBOXVMM_INSTR_LTR(pVCpu, pCtx); break; case DBGFEVENT_INSTR_RDTSCP: VBOXVMM_INSTR_RDTSCP(pVCpu, pCtx); break; case DBGFEVENT_INSTR_WBINVD: VBOXVMM_INSTR_WBINVD(pVCpu, pCtx); break; case DBGFEVENT_INSTR_XSETBV: VBOXVMM_INSTR_XSETBV(pVCpu, pCtx); break; case DBGFEVENT_INSTR_RDRAND: VBOXVMM_INSTR_RDRAND(pVCpu, pCtx); break; case DBGFEVENT_INSTR_RDSEED: VBOXVMM_INSTR_RDSEED(pVCpu, pCtx); break; case DBGFEVENT_INSTR_XSAVES: VBOXVMM_INSTR_XSAVES(pVCpu, pCtx); break; case DBGFEVENT_INSTR_XRSTORS: VBOXVMM_INSTR_XRSTORS(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMM_CALL: VBOXVMM_INSTR_VMM_CALL(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMCLEAR: VBOXVMM_INSTR_VMX_VMCLEAR(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMLAUNCH: VBOXVMM_INSTR_VMX_VMLAUNCH(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMPTRLD: VBOXVMM_INSTR_VMX_VMPTRLD(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMPTRST: VBOXVMM_INSTR_VMX_VMPTRST(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMREAD: VBOXVMM_INSTR_VMX_VMREAD(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMRESUME: VBOXVMM_INSTR_VMX_VMRESUME(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMWRITE: VBOXVMM_INSTR_VMX_VMWRITE(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMXOFF: VBOXVMM_INSTR_VMX_VMXOFF(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMXON: VBOXVMM_INSTR_VMX_VMXON(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_INVEPT: VBOXVMM_INSTR_VMX_INVEPT(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_INVVPID: VBOXVMM_INSTR_VMX_INVVPID(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_INVPCID: VBOXVMM_INSTR_VMX_INVPCID(pVCpu, pCtx); break; case DBGFEVENT_INSTR_VMX_VMFUNC: VBOXVMM_INSTR_VMX_VMFUNC(pVCpu, pCtx); break; default: AssertMsgFailed(("enmEvent1=%d uExitReason=%d\n", enmEvent1, uExitReason)); break; } switch (enmEvent2) { /** @todo consider which extra parameters would be helpful for each probe. */ case DBGFEVENT_END: break; case DBGFEVENT_EXIT_TASK_SWITCH: VBOXVMM_EXIT_TASK_SWITCH(pVCpu, pCtx); break; case DBGFEVENT_EXIT_CPUID: VBOXVMM_EXIT_CPUID(pVCpu, pCtx, pCtx->eax, pCtx->ecx); break; case DBGFEVENT_EXIT_GETSEC: VBOXVMM_EXIT_GETSEC(pVCpu, pCtx); break; case DBGFEVENT_EXIT_HALT: VBOXVMM_EXIT_HALT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_INVD: VBOXVMM_EXIT_INVD(pVCpu, pCtx); break; case DBGFEVENT_EXIT_INVLPG: VBOXVMM_EXIT_INVLPG(pVCpu, pCtx); break; case DBGFEVENT_EXIT_RDPMC: VBOXVMM_EXIT_RDPMC(pVCpu, pCtx); break; case DBGFEVENT_EXIT_RDTSC: VBOXVMM_EXIT_RDTSC(pVCpu, pCtx); break; case DBGFEVENT_EXIT_RSM: VBOXVMM_EXIT_RSM(pVCpu, pCtx); break; case DBGFEVENT_EXIT_CRX_READ: VBOXVMM_EXIT_CRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_EXIT_CRX_WRITE: VBOXVMM_EXIT_CRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_EXIT_DRX_READ: VBOXVMM_EXIT_DRX_READ(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_EXIT_DRX_WRITE: VBOXVMM_EXIT_DRX_WRITE(pVCpu, pCtx, (uint8_t)uEventArg); break; case DBGFEVENT_EXIT_RDMSR: VBOXVMM_EXIT_RDMSR(pVCpu, pCtx, pCtx->ecx); break; case DBGFEVENT_EXIT_WRMSR: VBOXVMM_EXIT_WRMSR(pVCpu, pCtx, pCtx->ecx, RT_MAKE_U64(pCtx->eax, pCtx->edx)); break; case DBGFEVENT_EXIT_MWAIT: VBOXVMM_EXIT_MWAIT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_MONITOR: VBOXVMM_EXIT_MONITOR(pVCpu, pCtx); break; case DBGFEVENT_EXIT_PAUSE: VBOXVMM_EXIT_PAUSE(pVCpu, pCtx); break; case DBGFEVENT_EXIT_SGDT: VBOXVMM_EXIT_SGDT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_SIDT: VBOXVMM_EXIT_SIDT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_LGDT: VBOXVMM_EXIT_LGDT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_LIDT: VBOXVMM_EXIT_LIDT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_SLDT: VBOXVMM_EXIT_SLDT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_STR: VBOXVMM_EXIT_STR(pVCpu, pCtx); break; case DBGFEVENT_EXIT_LLDT: VBOXVMM_EXIT_LLDT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_LTR: VBOXVMM_EXIT_LTR(pVCpu, pCtx); break; case DBGFEVENT_EXIT_RDTSCP: VBOXVMM_EXIT_RDTSCP(pVCpu, pCtx); break; case DBGFEVENT_EXIT_WBINVD: VBOXVMM_EXIT_WBINVD(pVCpu, pCtx); break; case DBGFEVENT_EXIT_XSETBV: VBOXVMM_EXIT_XSETBV(pVCpu, pCtx); break; case DBGFEVENT_EXIT_RDRAND: VBOXVMM_EXIT_RDRAND(pVCpu, pCtx); break; case DBGFEVENT_EXIT_RDSEED: VBOXVMM_EXIT_RDSEED(pVCpu, pCtx); break; case DBGFEVENT_EXIT_XSAVES: VBOXVMM_EXIT_XSAVES(pVCpu, pCtx); break; case DBGFEVENT_EXIT_XRSTORS: VBOXVMM_EXIT_XRSTORS(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMM_CALL: VBOXVMM_EXIT_VMM_CALL(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMCLEAR: VBOXVMM_EXIT_VMX_VMCLEAR(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMLAUNCH: VBOXVMM_EXIT_VMX_VMLAUNCH(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMPTRLD: VBOXVMM_EXIT_VMX_VMPTRLD(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMPTRST: VBOXVMM_EXIT_VMX_VMPTRST(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMREAD: VBOXVMM_EXIT_VMX_VMREAD(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMRESUME: VBOXVMM_EXIT_VMX_VMRESUME(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMWRITE: VBOXVMM_EXIT_VMX_VMWRITE(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMXOFF: VBOXVMM_EXIT_VMX_VMXOFF(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMXON: VBOXVMM_EXIT_VMX_VMXON(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_INVEPT: VBOXVMM_EXIT_VMX_INVEPT(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_INVVPID: VBOXVMM_EXIT_VMX_INVVPID(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_INVPCID: VBOXVMM_EXIT_VMX_INVPCID(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VMFUNC: VBOXVMM_EXIT_VMX_VMFUNC(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_EPT_MISCONFIG: VBOXVMM_EXIT_VMX_EPT_MISCONFIG(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_EPT_VIOLATION: VBOXVMM_EXIT_VMX_EPT_VIOLATION(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VAPIC_ACCESS: VBOXVMM_EXIT_VMX_VAPIC_ACCESS(pVCpu, pCtx); break; case DBGFEVENT_EXIT_VMX_VAPIC_WRITE: VBOXVMM_EXIT_VMX_VAPIC_WRITE(pVCpu, pCtx); break; default: AssertMsgFailed(("enmEvent2=%d uExitReason=%d\n", enmEvent2, uExitReason)); break; } } /* * Fire of the DBGF event, if enabled (our check here is just a quick one, * the DBGF call will do a full check). * * Note! DBGF sets DBGFEVENT_INTERRUPT_SOFTWARE in the bitmap. * Note! If we have to events, we prioritize the first, i.e. the instruction * one, in order to avoid event nesting. */ PVM pVM = pVCpu->CTX_SUFF(pVM); if ( enmEvent1 != DBGFEVENT_END && DBGF_IS_EVENT_ENABLED(pVM, enmEvent1)) { HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP); VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent1, DBGFEVENTCTX_HM, 1, uEventArg); if (rcStrict != VINF_SUCCESS) return rcStrict; } else if ( enmEvent2 != DBGFEVENT_END && DBGF_IS_EVENT_ENABLED(pVM, enmEvent2)) { HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP); VBOXSTRICTRC rcStrict = DBGFEventGenericWithArgs(pVM, pVCpu, enmEvent2, DBGFEVENTCTX_HM, 1, uEventArg); if (rcStrict != VINF_SUCCESS) return rcStrict; } return VINF_SUCCESS; } /** * Single-stepping VM-exit filtering. * * This is preprocessing the VM-exits and deciding whether we've gotten far * enough to return VINF_EM_DBG_STEPPED already. If not, normal VM-exit * handling is performed. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure of the calling EMT. * @param pVmxTransient Pointer to the VMX-transient structure. * @param pDbgState The debug state. */ DECLINLINE(VBOXSTRICTRC) hmR0VmxRunDebugHandleExit(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, PVMXRUNDBGSTATE pDbgState) { /* * Expensive (saves context) generic dtrace VM-exit probe. */ uint32_t const uExitReason = pVmxTransient->uExitReason; if (!VBOXVMM_R0_HMVMX_VMEXIT_ENABLED()) { /* more likely */ } else { hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); int rc = hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRC(rc); VBOXVMM_R0_HMVMX_VMEXIT(pVCpu, &pVCpu->cpum.GstCtx, pVmxTransient->uExitReason, pVmxTransient->uExitQual); } /* * Check for host NMI, just to get that out of the way. */ if (uExitReason != VMX_EXIT_XCPT_OR_NMI) { /* normally likely */ } else { int rc2 = hmR0VmxReadExitIntInfoVmcs(pVmxTransient); AssertRCReturn(rc2, rc2); uint32_t uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo); if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI) return hmR0VmxExitXcptOrNmi(pVCpu, pVmxTransient); } /* * Check for single stepping event if we're stepping. */ if (pVCpu->hm.s.fSingleInstruction) { switch (uExitReason) { case VMX_EXIT_MTF: return hmR0VmxExitMtf(pVCpu, pVmxTransient); /* Various events: */ case VMX_EXIT_XCPT_OR_NMI: case VMX_EXIT_EXT_INT: case VMX_EXIT_TRIPLE_FAULT: case VMX_EXIT_INT_WINDOW: case VMX_EXIT_NMI_WINDOW: case VMX_EXIT_TASK_SWITCH: case VMX_EXIT_TPR_BELOW_THRESHOLD: case VMX_EXIT_APIC_ACCESS: case VMX_EXIT_EPT_VIOLATION: case VMX_EXIT_EPT_MISCONFIG: case VMX_EXIT_PREEMPT_TIMER: /* Instruction specific VM-exits: */ case VMX_EXIT_CPUID: case VMX_EXIT_GETSEC: case VMX_EXIT_HLT: case VMX_EXIT_INVD: case VMX_EXIT_INVLPG: case VMX_EXIT_RDPMC: case VMX_EXIT_RDTSC: case VMX_EXIT_RSM: case VMX_EXIT_VMCALL: case VMX_EXIT_VMCLEAR: case VMX_EXIT_VMLAUNCH: case VMX_EXIT_VMPTRLD: case VMX_EXIT_VMPTRST: case VMX_EXIT_VMREAD: case VMX_EXIT_VMRESUME: case VMX_EXIT_VMWRITE: case VMX_EXIT_VMXOFF: case VMX_EXIT_VMXON: case VMX_EXIT_MOV_CRX: case VMX_EXIT_MOV_DRX: case VMX_EXIT_IO_INSTR: case VMX_EXIT_RDMSR: case VMX_EXIT_WRMSR: case VMX_EXIT_MWAIT: case VMX_EXIT_MONITOR: case VMX_EXIT_PAUSE: case VMX_EXIT_GDTR_IDTR_ACCESS: case VMX_EXIT_LDTR_TR_ACCESS: case VMX_EXIT_INVEPT: case VMX_EXIT_RDTSCP: case VMX_EXIT_INVVPID: case VMX_EXIT_WBINVD: case VMX_EXIT_XSETBV: case VMX_EXIT_RDRAND: case VMX_EXIT_INVPCID: case VMX_EXIT_VMFUNC: case VMX_EXIT_RDSEED: case VMX_EXIT_XSAVES: case VMX_EXIT_XRSTORS: { int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP); AssertRCReturn(rc, rc); if ( pVCpu->cpum.GstCtx.rip != pDbgState->uRipStart || pVCpu->cpum.GstCtx.cs.Sel != pDbgState->uCsStart) return VINF_EM_DBG_STEPPED; break; } /* Errors and unexpected events: */ case VMX_EXIT_INIT_SIGNAL: case VMX_EXIT_SIPI: case VMX_EXIT_IO_SMI: case VMX_EXIT_SMI: case VMX_EXIT_ERR_INVALID_GUEST_STATE: case VMX_EXIT_ERR_MSR_LOAD: case VMX_EXIT_ERR_MACHINE_CHECK: case VMX_EXIT_APIC_WRITE: /* Some talk about this being fault like, so I guess we must process it? */ break; default: AssertMsgFailed(("Unexpected VM-exit=%#x\n", uExitReason)); break; } } /* * Check for debugger event breakpoints and dtrace probes. */ if ( uExitReason < RT_ELEMENTS(pDbgState->bmExitsToCheck) * 32U && ASMBitTest(pDbgState->bmExitsToCheck, uExitReason) ) { VBOXSTRICTRC rcStrict = hmR0VmxHandleExitDtraceEvents(pVCpu, pVmxTransient, uExitReason); if (rcStrict != VINF_SUCCESS) return rcStrict; } /* * Normal processing. */ #ifdef HMVMX_USE_FUNCTION_TABLE return g_apfnVMExitHandlers[uExitReason](pVCpu, pVmxTransient); #else return hmR0VmxHandleExit(pVCpu, pVmxTransient, uExitReason); #endif } /** * Single steps guest code using VT-x. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. * * @note Mostly the same as hmR0VmxRunGuestCodeNormal(). */ static VBOXSTRICTRC hmR0VmxRunGuestCodeDebug(PVMCPU pVCpu) { VMXTRANSIENT VmxTransient; VmxTransient.fUpdateTscOffsettingAndPreemptTimer = true; /* Set HMCPU indicators. */ bool const fSavedSingleInstruction = pVCpu->hm.s.fSingleInstruction; pVCpu->hm.s.fSingleInstruction = pVCpu->hm.s.fSingleInstruction || DBGFIsStepping(pVCpu); pVCpu->hm.s.fDebugWantRdTscExit = false; pVCpu->hm.s.fUsingDebugLoop = true; /* State we keep to help modify and later restore the VMCS fields we alter, and for detecting steps. */ VMXRUNDBGSTATE DbgState; hmR0VmxRunDebugStateInit(pVCpu, &DbgState); hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &DbgState, &VmxTransient); /* * The loop. */ VBOXSTRICTRC rcStrict = VERR_INTERNAL_ERROR_5; for (uint32_t cLoops = 0; ; cLoops++) { Assert(!HMR0SuspendPending()); HMVMX_ASSERT_CPU_SAFE(pVCpu); bool fStepping = pVCpu->hm.s.fSingleInstruction; /* * Preparatory work for running guest code, this may force us to return * to ring-3. This bugger disables interrupts on VINF_SUCCESS! */ STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatEntry, x); hmR0VmxPreRunGuestDebugStateApply(pVCpu, &DbgState); /* Set up execute controls the next to can respond to. */ rcStrict = hmR0VmxPreRunGuest(pVCpu, &VmxTransient, fStepping); if (rcStrict != VINF_SUCCESS) break; hmR0VmxPreRunGuestCommitted(pVCpu, &VmxTransient); hmR0VmxPreRunGuestDebugStateApply(pVCpu, &DbgState); /* Override any obnoxious code in the above two calls. */ /* * Now we can run the guest code. */ int rcRun = hmR0VmxRunGuest(pVCpu); /* * Restore any residual host-state and save any bits shared between host * and guest into the guest-CPU state. Re-enables interrupts! */ hmR0VmxPostRunGuest(pVCpu, &VmxTransient, rcRun); /* Check for errors with running the VM (VMLAUNCH/VMRESUME). */ if (RT_SUCCESS(rcRun)) { /* very likely */ } else { STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatPreExit, x); hmR0VmxReportWorldSwitchError(pVCpu, rcRun, &VmxTransient); return rcRun; } /* Profile the VM-exit. */ AssertMsg(VmxTransient.uExitReason <= VMX_EXIT_MAX, ("%#x\n", VmxTransient.uExitReason)); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitAll); STAM_COUNTER_INC(&pVCpu->hm.s.paStatExitReasonR0[VmxTransient.uExitReason & MASK_EXITREASON_STAT]); STAM_PROFILE_ADV_STOP_START(&pVCpu->hm.s.StatPreExit, &pVCpu->hm.s.StatExitHandling, x); HMVMX_START_EXIT_DISPATCH_PROF(); VBOXVMM_R0_HMVMX_VMEXIT_NOCTX(pVCpu, &pVCpu->cpum.GstCtx, VmxTransient.uExitReason); /* * Handle the VM-exit - we quit earlier on certain VM-exits, see hmR0VmxHandleExitDebug(). */ rcStrict = hmR0VmxRunDebugHandleExit(pVCpu, &VmxTransient, &DbgState); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitHandling, x); if (rcStrict != VINF_SUCCESS) break; if (cLoops > pVCpu->CTX_SUFF(pVM)->hm.s.cMaxResumeLoops) { STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchMaxResumeLoops); rcStrict = VINF_EM_RAW_INTERRUPT; break; } /* * Stepping: Did the RIP change, if so, consider it a single step. * Otherwise, make sure one of the TFs gets set. */ if (fStepping) { int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP); AssertRC(rc); if ( pVCpu->cpum.GstCtx.rip != DbgState.uRipStart || pVCpu->cpum.GstCtx.cs.Sel != DbgState.uCsStart) { rcStrict = VINF_EM_DBG_STEPPED; break; } ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7); } /* * Update when dtrace settings changes (DBGF kicks us, so no need to check). */ if (VBOXVMM_GET_SETTINGS_SEQ_NO() != DbgState.uDtraceSettingsSeqNo) hmR0VmxPreRunGuestDebugStateUpdate(pVCpu, &DbgState, &VmxTransient); } /* * Clear the X86_EFL_TF if necessary. */ if (pVCpu->hm.s.fClearTrapFlag) { int rc = hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_RFLAGS); AssertRC(rc); pVCpu->hm.s.fClearTrapFlag = false; pVCpu->cpum.GstCtx.eflags.Bits.u1TF = 0; } /** @todo there seems to be issues with the resume flag when the monitor trap * flag is pending without being used. Seen early in bios init when * accessing APIC page in protected mode. */ /* * Restore VM-exit control settings as we may not reenter this function the * next time around. */ rcStrict = hmR0VmxRunDebugStateRevert(pVCpu, &DbgState, rcStrict); /* Restore HMCPU indicators. */ pVCpu->hm.s.fUsingDebugLoop = false; pVCpu->hm.s.fDebugWantRdTscExit = false; pVCpu->hm.s.fSingleInstruction = fSavedSingleInstruction; STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatEntry, x); return rcStrict; } /** @} */ /** * Checks if any expensive dtrace probes are enabled and we should go to the * debug loop. * * @returns true if we should use debug loop, false if not. */ static bool hmR0VmxAnyExpensiveProbesEnabled(void) { /* It's probably faster to OR the raw 32-bit counter variables together. Since the variables are in an array and the probes are next to one another (more or less), we have good locality. So, better read eight-nine cache lines ever time and only have one conditional, than 128+ conditionals, right? */ return ( VBOXVMM_R0_HMVMX_VMEXIT_ENABLED_RAW() /* expensive too due to context */ | VBOXVMM_XCPT_DE_ENABLED_RAW() | VBOXVMM_XCPT_DB_ENABLED_RAW() | VBOXVMM_XCPT_BP_ENABLED_RAW() | VBOXVMM_XCPT_OF_ENABLED_RAW() | VBOXVMM_XCPT_BR_ENABLED_RAW() | VBOXVMM_XCPT_UD_ENABLED_RAW() | VBOXVMM_XCPT_NM_ENABLED_RAW() | VBOXVMM_XCPT_DF_ENABLED_RAW() | VBOXVMM_XCPT_TS_ENABLED_RAW() | VBOXVMM_XCPT_NP_ENABLED_RAW() | VBOXVMM_XCPT_SS_ENABLED_RAW() | VBOXVMM_XCPT_GP_ENABLED_RAW() | VBOXVMM_XCPT_PF_ENABLED_RAW() | VBOXVMM_XCPT_MF_ENABLED_RAW() | VBOXVMM_XCPT_AC_ENABLED_RAW() | VBOXVMM_XCPT_XF_ENABLED_RAW() | VBOXVMM_XCPT_VE_ENABLED_RAW() | VBOXVMM_XCPT_SX_ENABLED_RAW() | VBOXVMM_INT_SOFTWARE_ENABLED_RAW() | VBOXVMM_INT_HARDWARE_ENABLED_RAW() ) != 0 || ( VBOXVMM_INSTR_HALT_ENABLED_RAW() | VBOXVMM_INSTR_MWAIT_ENABLED_RAW() | VBOXVMM_INSTR_MONITOR_ENABLED_RAW() | VBOXVMM_INSTR_CPUID_ENABLED_RAW() | VBOXVMM_INSTR_INVD_ENABLED_RAW() | VBOXVMM_INSTR_WBINVD_ENABLED_RAW() | VBOXVMM_INSTR_INVLPG_ENABLED_RAW() | VBOXVMM_INSTR_RDTSC_ENABLED_RAW() | VBOXVMM_INSTR_RDTSCP_ENABLED_RAW() | VBOXVMM_INSTR_RDPMC_ENABLED_RAW() | VBOXVMM_INSTR_RDMSR_ENABLED_RAW() | VBOXVMM_INSTR_WRMSR_ENABLED_RAW() | VBOXVMM_INSTR_CRX_READ_ENABLED_RAW() | VBOXVMM_INSTR_CRX_WRITE_ENABLED_RAW() | VBOXVMM_INSTR_DRX_READ_ENABLED_RAW() | VBOXVMM_INSTR_DRX_WRITE_ENABLED_RAW() | VBOXVMM_INSTR_PAUSE_ENABLED_RAW() | VBOXVMM_INSTR_XSETBV_ENABLED_RAW() | VBOXVMM_INSTR_SIDT_ENABLED_RAW() | VBOXVMM_INSTR_LIDT_ENABLED_RAW() | VBOXVMM_INSTR_SGDT_ENABLED_RAW() | VBOXVMM_INSTR_LGDT_ENABLED_RAW() | VBOXVMM_INSTR_SLDT_ENABLED_RAW() | VBOXVMM_INSTR_LLDT_ENABLED_RAW() | VBOXVMM_INSTR_STR_ENABLED_RAW() | VBOXVMM_INSTR_LTR_ENABLED_RAW() | VBOXVMM_INSTR_GETSEC_ENABLED_RAW() | VBOXVMM_INSTR_RSM_ENABLED_RAW() | VBOXVMM_INSTR_RDRAND_ENABLED_RAW() | VBOXVMM_INSTR_RDSEED_ENABLED_RAW() | VBOXVMM_INSTR_XSAVES_ENABLED_RAW() | VBOXVMM_INSTR_XRSTORS_ENABLED_RAW() | VBOXVMM_INSTR_VMM_CALL_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMCLEAR_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMLAUNCH_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMPTRLD_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMPTRST_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMREAD_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMRESUME_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMWRITE_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMXOFF_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMXON_ENABLED_RAW() | VBOXVMM_INSTR_VMX_VMFUNC_ENABLED_RAW() | VBOXVMM_INSTR_VMX_INVEPT_ENABLED_RAW() | VBOXVMM_INSTR_VMX_INVVPID_ENABLED_RAW() | VBOXVMM_INSTR_VMX_INVPCID_ENABLED_RAW() ) != 0 || ( VBOXVMM_EXIT_TASK_SWITCH_ENABLED_RAW() | VBOXVMM_EXIT_HALT_ENABLED_RAW() | VBOXVMM_EXIT_MWAIT_ENABLED_RAW() | VBOXVMM_EXIT_MONITOR_ENABLED_RAW() | VBOXVMM_EXIT_CPUID_ENABLED_RAW() | VBOXVMM_EXIT_INVD_ENABLED_RAW() | VBOXVMM_EXIT_WBINVD_ENABLED_RAW() | VBOXVMM_EXIT_INVLPG_ENABLED_RAW() | VBOXVMM_EXIT_RDTSC_ENABLED_RAW() | VBOXVMM_EXIT_RDTSCP_ENABLED_RAW() | VBOXVMM_EXIT_RDPMC_ENABLED_RAW() | VBOXVMM_EXIT_RDMSR_ENABLED_RAW() | VBOXVMM_EXIT_WRMSR_ENABLED_RAW() | VBOXVMM_EXIT_CRX_READ_ENABLED_RAW() | VBOXVMM_EXIT_CRX_WRITE_ENABLED_RAW() | VBOXVMM_EXIT_DRX_READ_ENABLED_RAW() | VBOXVMM_EXIT_DRX_WRITE_ENABLED_RAW() | VBOXVMM_EXIT_PAUSE_ENABLED_RAW() | VBOXVMM_EXIT_XSETBV_ENABLED_RAW() | VBOXVMM_EXIT_SIDT_ENABLED_RAW() | VBOXVMM_EXIT_LIDT_ENABLED_RAW() | VBOXVMM_EXIT_SGDT_ENABLED_RAW() | VBOXVMM_EXIT_LGDT_ENABLED_RAW() | VBOXVMM_EXIT_SLDT_ENABLED_RAW() | VBOXVMM_EXIT_LLDT_ENABLED_RAW() | VBOXVMM_EXIT_STR_ENABLED_RAW() | VBOXVMM_EXIT_LTR_ENABLED_RAW() | VBOXVMM_EXIT_GETSEC_ENABLED_RAW() | VBOXVMM_EXIT_RSM_ENABLED_RAW() | VBOXVMM_EXIT_RDRAND_ENABLED_RAW() | VBOXVMM_EXIT_RDSEED_ENABLED_RAW() | VBOXVMM_EXIT_XSAVES_ENABLED_RAW() | VBOXVMM_EXIT_XRSTORS_ENABLED_RAW() | VBOXVMM_EXIT_VMM_CALL_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMCLEAR_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMLAUNCH_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMPTRLD_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMPTRST_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMREAD_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMRESUME_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMWRITE_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMXOFF_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMXON_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VMFUNC_ENABLED_RAW() | VBOXVMM_EXIT_VMX_INVEPT_ENABLED_RAW() | VBOXVMM_EXIT_VMX_INVVPID_ENABLED_RAW() | VBOXVMM_EXIT_VMX_INVPCID_ENABLED_RAW() | VBOXVMM_EXIT_VMX_EPT_VIOLATION_ENABLED_RAW() | VBOXVMM_EXIT_VMX_EPT_MISCONFIG_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VAPIC_ACCESS_ENABLED_RAW() | VBOXVMM_EXIT_VMX_VAPIC_WRITE_ENABLED_RAW() ) != 0; } /** * Runs the guest code using VT-x. * * @returns Strict VBox status code (i.e. informational status codes too). * @param pVCpu The cross context virtual CPU structure. */ VMMR0DECL(VBOXSTRICTRC) VMXR0RunGuestCode(PVMCPU pVCpu) { PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; Assert(VMMRZCallRing3IsEnabled(pVCpu)); Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn)); HMVMX_ASSERT_PREEMPT_SAFE(pVCpu); VMMRZCallRing3SetNotification(pVCpu, hmR0VmxCallRing3Callback, pCtx); VBOXSTRICTRC rcStrict; if ( !pVCpu->hm.s.fUseDebugLoop && (!VBOXVMM_ANY_PROBES_ENABLED() || !hmR0VmxAnyExpensiveProbesEnabled()) && !DBGFIsStepping(pVCpu) && !pVCpu->CTX_SUFF(pVM)->dbgf.ro.cEnabledInt3Breakpoints) rcStrict = hmR0VmxRunGuestCodeNormal(pVCpu); else rcStrict = hmR0VmxRunGuestCodeDebug(pVCpu); if (rcStrict == VERR_EM_INTERPRETER) rcStrict = VINF_EM_RAW_EMULATE_INSTR; else if (rcStrict == VINF_EM_RESET) rcStrict = VINF_EM_TRIPLE_FAULT; int rc2 = hmR0VmxExitToRing3(pVCpu, rcStrict); if (RT_FAILURE(rc2)) { pVCpu->hm.s.u32HMError = (uint32_t)VBOXSTRICTRC_VAL(rcStrict); rcStrict = rc2; } Assert(!ASMAtomicUoReadU64(&pCtx->fExtrn)); Assert(!VMMRZCallRing3IsNotificationSet(pVCpu)); return rcStrict; } #ifndef HMVMX_USE_FUNCTION_TABLE DECLINLINE(VBOXSTRICTRC) hmR0VmxHandleExit(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, uint32_t rcReason) { #ifdef DEBUG_ramshankar #define VMEXIT_CALL_RET(a_fSave, a_CallExpr) \ do { \ if (a_fSave != 0) \ hmR0VmxImportGuestState(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); \ VBOXSTRICTRC rcStrict = a_CallExpr; \ if (a_fSave != 0) \ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); \ return rcStrict; \ } while (0) #else # define VMEXIT_CALL_RET(a_fSave, a_CallExpr) return a_CallExpr #endif switch (rcReason) { case VMX_EXIT_EPT_MISCONFIG: VMEXIT_CALL_RET(0, hmR0VmxExitEptMisconfig(pVCpu, pVmxTransient)); case VMX_EXIT_EPT_VIOLATION: VMEXIT_CALL_RET(0, hmR0VmxExitEptViolation(pVCpu, pVmxTransient)); case VMX_EXIT_IO_INSTR: VMEXIT_CALL_RET(0, hmR0VmxExitIoInstr(pVCpu, pVmxTransient)); case VMX_EXIT_CPUID: VMEXIT_CALL_RET(0, hmR0VmxExitCpuid(pVCpu, pVmxTransient)); case VMX_EXIT_RDTSC: VMEXIT_CALL_RET(0, hmR0VmxExitRdtsc(pVCpu, pVmxTransient)); case VMX_EXIT_RDTSCP: VMEXIT_CALL_RET(0, hmR0VmxExitRdtscp(pVCpu, pVmxTransient)); case VMX_EXIT_APIC_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitApicAccess(pVCpu, pVmxTransient)); case VMX_EXIT_XCPT_OR_NMI: VMEXIT_CALL_RET(0, hmR0VmxExitXcptOrNmi(pVCpu, pVmxTransient)); case VMX_EXIT_MOV_CRX: VMEXIT_CALL_RET(0, hmR0VmxExitMovCRx(pVCpu, pVmxTransient)); case VMX_EXIT_EXT_INT: VMEXIT_CALL_RET(0, hmR0VmxExitExtInt(pVCpu, pVmxTransient)); case VMX_EXIT_INT_WINDOW: VMEXIT_CALL_RET(0, hmR0VmxExitIntWindow(pVCpu, pVmxTransient)); case VMX_EXIT_TPR_BELOW_THRESHOLD: VMEXIT_CALL_RET(0, hmR0VmxExitTprBelowThreshold(pVCpu, pVmxTransient)); case VMX_EXIT_MWAIT: VMEXIT_CALL_RET(0, hmR0VmxExitMwait(pVCpu, pVmxTransient)); case VMX_EXIT_MONITOR: VMEXIT_CALL_RET(0, hmR0VmxExitMonitor(pVCpu, pVmxTransient)); case VMX_EXIT_TASK_SWITCH: VMEXIT_CALL_RET(0, hmR0VmxExitTaskSwitch(pVCpu, pVmxTransient)); case VMX_EXIT_PREEMPT_TIMER: VMEXIT_CALL_RET(0, hmR0VmxExitPreemptTimer(pVCpu, pVmxTransient)); case VMX_EXIT_RDMSR: VMEXIT_CALL_RET(0, hmR0VmxExitRdmsr(pVCpu, pVmxTransient)); case VMX_EXIT_WRMSR: VMEXIT_CALL_RET(0, hmR0VmxExitWrmsr(pVCpu, pVmxTransient)); case VMX_EXIT_VMCALL: VMEXIT_CALL_RET(0, hmR0VmxExitVmcall(pVCpu, pVmxTransient)); case VMX_EXIT_MOV_DRX: VMEXIT_CALL_RET(0, hmR0VmxExitMovDRx(pVCpu, pVmxTransient)); case VMX_EXIT_HLT: VMEXIT_CALL_RET(0, hmR0VmxExitHlt(pVCpu, pVmxTransient)); case VMX_EXIT_INVD: VMEXIT_CALL_RET(0, hmR0VmxExitInvd(pVCpu, pVmxTransient)); case VMX_EXIT_INVLPG: VMEXIT_CALL_RET(0, hmR0VmxExitInvlpg(pVCpu, pVmxTransient)); case VMX_EXIT_RSM: VMEXIT_CALL_RET(0, hmR0VmxExitRsm(pVCpu, pVmxTransient)); case VMX_EXIT_MTF: VMEXIT_CALL_RET(0, hmR0VmxExitMtf(pVCpu, pVmxTransient)); case VMX_EXIT_PAUSE: VMEXIT_CALL_RET(0, hmR0VmxExitPause(pVCpu, pVmxTransient)); case VMX_EXIT_GDTR_IDTR_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitXdtrAccess(pVCpu, pVmxTransient)); case VMX_EXIT_LDTR_TR_ACCESS: VMEXIT_CALL_RET(0, hmR0VmxExitXdtrAccess(pVCpu, pVmxTransient)); case VMX_EXIT_WBINVD: VMEXIT_CALL_RET(0, hmR0VmxExitWbinvd(pVCpu, pVmxTransient)); case VMX_EXIT_XSETBV: VMEXIT_CALL_RET(0, hmR0VmxExitXsetbv(pVCpu, pVmxTransient)); case VMX_EXIT_RDRAND: VMEXIT_CALL_RET(0, hmR0VmxExitRdrand(pVCpu, pVmxTransient)); case VMX_EXIT_INVPCID: VMEXIT_CALL_RET(0, hmR0VmxExitInvpcid(pVCpu, pVmxTransient)); case VMX_EXIT_GETSEC: VMEXIT_CALL_RET(0, hmR0VmxExitGetsec(pVCpu, pVmxTransient)); case VMX_EXIT_RDPMC: VMEXIT_CALL_RET(0, hmR0VmxExitRdpmc(pVCpu, pVmxTransient)); #ifdef VBOX_WITH_NESTED_HWVIRT_VMX case VMX_EXIT_VMCLEAR: VMEXIT_CALL_RET(0, hmR0VmxExitVmclear(pVCpu, pVmxTransient)); case VMX_EXIT_VMLAUNCH: VMEXIT_CALL_RET(0, hmR0VmxExitVmlaunch(pVCpu, pVmxTransient)); case VMX_EXIT_VMPTRLD: VMEXIT_CALL_RET(0, hmR0VmxExitVmptrld(pVCpu, pVmxTransient)); case VMX_EXIT_VMPTRST: VMEXIT_CALL_RET(0, hmR0VmxExitVmptrst(pVCpu, pVmxTransient)); case VMX_EXIT_VMREAD: VMEXIT_CALL_RET(0, hmR0VmxExitVmread(pVCpu, pVmxTransient)); case VMX_EXIT_VMRESUME: VMEXIT_CALL_RET(0, hmR0VmxExitVmwrite(pVCpu, pVmxTransient)); case VMX_EXIT_VMWRITE: VMEXIT_CALL_RET(0, hmR0VmxExitVmresume(pVCpu, pVmxTransient)); case VMX_EXIT_VMXOFF: VMEXIT_CALL_RET(0, hmR0VmxExitVmxoff(pVCpu, pVmxTransient)); case VMX_EXIT_VMXON: VMEXIT_CALL_RET(0, hmR0VmxExitVmxon(pVCpu, pVmxTransient)); #else case VMX_EXIT_VMCLEAR: case VMX_EXIT_VMLAUNCH: case VMX_EXIT_VMPTRLD: case VMX_EXIT_VMPTRST: case VMX_EXIT_VMREAD: case VMX_EXIT_VMRESUME: case VMX_EXIT_VMWRITE: case VMX_EXIT_VMXOFF: case VMX_EXIT_VMXON: return hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient); #endif case VMX_EXIT_TRIPLE_FAULT: return hmR0VmxExitTripleFault(pVCpu, pVmxTransient); case VMX_EXIT_NMI_WINDOW: return hmR0VmxExitNmiWindow(pVCpu, pVmxTransient); case VMX_EXIT_INIT_SIGNAL: return hmR0VmxExitInitSignal(pVCpu, pVmxTransient); case VMX_EXIT_SIPI: return hmR0VmxExitSipi(pVCpu, pVmxTransient); case VMX_EXIT_IO_SMI: return hmR0VmxExitIoSmi(pVCpu, pVmxTransient); case VMX_EXIT_SMI: return hmR0VmxExitSmi(pVCpu, pVmxTransient); case VMX_EXIT_ERR_MSR_LOAD: return hmR0VmxExitErrMsrLoad(pVCpu, pVmxTransient); case VMX_EXIT_ERR_INVALID_GUEST_STATE: return hmR0VmxExitErrInvalidGuestState(pVCpu, pVmxTransient); case VMX_EXIT_ERR_MACHINE_CHECK: return hmR0VmxExitErrMachineCheck(pVCpu, pVmxTransient); case VMX_EXIT_INVEPT: case VMX_EXIT_INVVPID: case VMX_EXIT_VMFUNC: case VMX_EXIT_XSAVES: case VMX_EXIT_XRSTORS: return hmR0VmxExitSetPendingXcptUD(pVCpu, pVmxTransient); case VMX_EXIT_ENCLS: case VMX_EXIT_RDSEED: /* only spurious VM-exits, so undefined */ case VMX_EXIT_PML_FULL: default: return hmR0VmxExitErrUndefined(pVCpu, pVmxTransient); } #undef VMEXIT_CALL_RET } #endif /* !HMVMX_USE_FUNCTION_TABLE */ #ifdef VBOX_STRICT /* Is there some generic IPRT define for this that are not in Runtime/internal/\* ?? */ # define HMVMX_ASSERT_PREEMPT_CPUID_VAR() \ RTCPUID const idAssertCpu = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId() # define HMVMX_ASSERT_PREEMPT_CPUID() \ do { \ RTCPUID const idAssertCpuNow = RTThreadPreemptIsEnabled(NIL_RTTHREAD) ? NIL_RTCPUID : RTMpCpuId(); \ AssertMsg(idAssertCpu == idAssertCpuNow, ("VMX %#x, %#x\n", idAssertCpu, idAssertCpuNow)); \ } while (0) # define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \ do { \ AssertPtr((a_pVCpu)); \ AssertPtr((a_pVmxTransient)); \ Assert((a_pVmxTransient)->fVMEntryFailed == false); \ Assert(ASMIntAreEnabled()); \ HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \ HMVMX_ASSERT_PREEMPT_CPUID_VAR(); \ Log4Func(("vcpu[%RU32] -v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v-v\n", (a_pVCpu)->idCpu)); \ HMVMX_ASSERT_PREEMPT_SAFE(a_pVCpu); \ if (VMMR0IsLogFlushDisabled((a_pVCpu))) \ HMVMX_ASSERT_PREEMPT_CPUID(); \ HMVMX_STOP_EXIT_DISPATCH_PROF(); \ } while (0) # define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \ do { \ Log4Func(("\n")); \ } while (0) #else # define HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) \ do { \ HMVMX_STOP_EXIT_DISPATCH_PROF(); \ NOREF((a_pVCpu)); NOREF((a_pVmxTransient)); \ } while (0) # define HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(a_pVCpu, a_pVmxTransient) do { } while (0) #endif /** * Advances the guest RIP by the specified number of bytes. * * @param pVCpu The cross context virtual CPU structure. * @param cbInstr Number of bytes to advance the RIP by. * * @remarks No-long-jump zone!!! */ DECLINLINE(void) hmR0VmxAdvanceGuestRipBy(PVMCPU pVCpu, uint32_t cbInstr) { /* Advance the RIP. */ pVCpu->cpum.GstCtx.rip += cbInstr; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP); /* Update interrupt inhibition. */ if ( VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS) && pVCpu->cpum.GstCtx.rip != EMGetInhibitInterruptsPC(pVCpu)) VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); } /** * Advances the guest RIP after reading it from the VMCS. * * @returns VBox status code, no informational status codes. * @param pVCpu The cross context virtual CPU structure. * @param pVmxTransient Pointer to the VMX transient structure. * * @remarks No-long-jump zone!!! */ static int hmR0VmxAdvanceGuestRip(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= hmR0VmxImportGuestState(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS); AssertRCReturn(rc, rc); hmR0VmxAdvanceGuestRipBy(pVCpu, pVmxTransient->cbInstr); return VINF_SUCCESS; } /** * Tries to determine what part of the guest-state VT-x has deemed as invalid * and update error record fields accordingly. * * @return VMX_IGS_* return codes. * @retval VMX_IGS_REASON_NOT_FOUND if this function could not find anything * wrong with the guest state. * * @param pVCpu The cross context virtual CPU structure. * * @remarks This function assumes our cache of the VMCS controls * are valid, i.e. hmR0VmxCheckVmcsCtls() succeeded. */ static uint32_t hmR0VmxCheckGuestState(PVMCPU pVCpu) { #define HMVMX_ERROR_BREAK(err) { uError = (err); break; } #define HMVMX_CHECK_BREAK(expr, err) if (!(expr)) { \ uError = (err); \ break; \ } else do { } while (0) int rc; PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; uint32_t uError = VMX_IGS_ERROR; uint32_t u32Val; bool const fUnrestrictedGuest = pVM->hm.s.vmx.fUnrestrictedGuest; do { /* * CR0. */ uint32_t fSetCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); uint32_t const fZapCr0 = (uint32_t)(pVM->hm.s.vmx.Msrs.u64Cr0Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr0Fixed1); /* Exceptions for unrestricted-guests for fixed CR0 bits (PE, PG). See Intel spec. 26.3.1 "Checks on Guest Control Registers, Debug Registers and MSRs." */ if (fUnrestrictedGuest) fSetCr0 &= ~(X86_CR0_PE | X86_CR0_PG); uint32_t u32GuestCr0; rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32GuestCr0); AssertRCBreak(rc); HMVMX_CHECK_BREAK((u32GuestCr0 & fSetCr0) == fSetCr0, VMX_IGS_CR0_FIXED1); HMVMX_CHECK_BREAK(!(u32GuestCr0 & ~fZapCr0), VMX_IGS_CR0_FIXED0); if ( !fUnrestrictedGuest && (u32GuestCr0 & X86_CR0_PG) && !(u32GuestCr0 & X86_CR0_PE)) { HMVMX_ERROR_BREAK(VMX_IGS_CR0_PG_PE_COMBO); } /* * CR4. */ uint64_t const fSetCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 & pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); uint64_t const fZapCr4 = (pVM->hm.s.vmx.Msrs.u64Cr4Fixed0 | pVM->hm.s.vmx.Msrs.u64Cr4Fixed1); uint32_t u32GuestCr4; rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR4, &u32GuestCr4); AssertRCBreak(rc); HMVMX_CHECK_BREAK((u32GuestCr4 & fSetCr4) == fSetCr4, VMX_IGS_CR4_FIXED1); HMVMX_CHECK_BREAK(!(u32GuestCr4 & ~fZapCr4), VMX_IGS_CR4_FIXED0); /* * IA32_DEBUGCTL MSR. */ uint64_t u64Val; rc = VMXReadVmcs64(VMX_VMCS64_GUEST_DEBUGCTL_FULL, &u64Val); AssertRCBreak(rc); if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG) && (u64Val & 0xfffffe3c)) /* Bits 31:9, bits 5:2 MBZ. */ { HMVMX_ERROR_BREAK(VMX_IGS_DEBUGCTL_MSR_RESERVED); } uint64_t u64DebugCtlMsr = u64Val; #ifdef VBOX_STRICT rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY, &u32Val); AssertRCBreak(rc); Assert(u32Val == pVCpu->hm.s.vmx.u32EntryCtls); #endif bool const fLongModeGuest = RT_BOOL(pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_IA32E_MODE_GUEST); /* * RIP and RFLAGS. */ uint32_t u32Eflags; #if HC_ARCH_BITS == 64 rc = VMXReadVmcs64(VMX_VMCS_GUEST_RIP, &u64Val); AssertRCBreak(rc); /* pCtx->rip can be different than the one in the VMCS (e.g. run guest code and VM-exits that don't update it). */ if ( !fLongModeGuest || !pCtx->cs.Attr.n.u1Long) { HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffff00000000)), VMX_IGS_LONGMODE_RIP_INVALID); } /** @todo If the processor supports N < 64 linear-address bits, bits 63:N * must be identical if the "IA-32e mode guest" VM-entry * control is 1 and CS.L is 1. No check applies if the * CPU supports 64 linear-address bits. */ /* Flags in pCtx can be different (real-on-v86 for instance). We are only concerned about the VMCS contents here. */ rc = VMXReadVmcs64(VMX_VMCS_GUEST_RFLAGS, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffc08028)), /* Bit 63:22, Bit 15, 5, 3 MBZ. */ VMX_IGS_RFLAGS_RESERVED); HMVMX_CHECK_BREAK((u64Val & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */ u32Eflags = u64Val; #else rc = VMXReadVmcs32(VMX_VMCS_GUEST_RFLAGS, &u32Eflags); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u32Eflags & 0xffc08028), VMX_IGS_RFLAGS_RESERVED); /* Bit 31:22, Bit 15, 5, 3 MBZ. */ HMVMX_CHECK_BREAK((u32Eflags & X86_EFL_RA1_MASK), VMX_IGS_RFLAGS_RESERVED1); /* Bit 1 MB1. */ #endif if ( fLongModeGuest || ( fUnrestrictedGuest && !(u32GuestCr0 & X86_CR0_PE))) { HMVMX_CHECK_BREAK(!(u32Eflags & X86_EFL_VM), VMX_IGS_RFLAGS_VM_INVALID); } uint32_t u32EntryInfo; rc = VMXReadVmcs32(VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO, &u32EntryInfo); AssertRCBreak(rc); if ( VMX_ENTRY_INT_INFO_IS_VALID(u32EntryInfo) && VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_EXT_INT) { HMVMX_CHECK_BREAK(u32Eflags & X86_EFL_IF, VMX_IGS_RFLAGS_IF_INVALID); } /* * 64-bit checks. */ #if HC_ARCH_BITS == 64 if (fLongModeGuest) { HMVMX_CHECK_BREAK(u32GuestCr0 & X86_CR0_PG, VMX_IGS_CR0_PG_LONGMODE); HMVMX_CHECK_BREAK(u32GuestCr4 & X86_CR4_PAE, VMX_IGS_CR4_PAE_LONGMODE); } if ( !fLongModeGuest && (u32GuestCr4 & X86_CR4_PCIDE)) { HMVMX_ERROR_BREAK(VMX_IGS_CR4_PCIDE); } /** @todo CR3 field must be such that bits 63:52 and bits in the range * 51:32 beyond the processor's physical-address width are 0. */ if ( (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_DEBUG) && (pCtx->dr[7] & X86_DR7_MBZ_MASK)) { HMVMX_ERROR_BREAK(VMX_IGS_DR7_RESERVED); } rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_ESP, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_ESP_NOT_CANONICAL); rc = VMXReadVmcs64(VMX_VMCS_HOST_SYSENTER_EIP, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_SYSENTER_EIP_NOT_CANONICAL); #endif /* * PERF_GLOBAL MSR. */ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PERF_MSR) { rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PERF_GLOBAL_CTRL_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffff8fffffffc)), VMX_IGS_PERF_GLOBAL_MSR_RESERVED); /* Bits 63:35, bits 31:2 MBZ. */ } /* * PAT MSR. */ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_PAT_MSR) { rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PAT_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0x707070707070707)), VMX_IGS_PAT_MSR_RESERVED); for (unsigned i = 0; i < 8; i++) { uint8_t u8Val = (u64Val & 0xff); if ( u8Val != 0 /* UC */ && u8Val != 1 /* WC */ && u8Val != 4 /* WT */ && u8Val != 5 /* WP */ && u8Val != 6 /* WB */ && u8Val != 7 /* UC- */) { HMVMX_ERROR_BREAK(VMX_IGS_PAT_MSR_INVALID); } u64Val >>= 8; } } /* * EFER MSR. */ if (pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_LOAD_EFER_MSR) { Assert(pVM->hm.s.vmx.fSupportsVmcsEfer); rc = VMXReadVmcs64(VMX_VMCS64_GUEST_EFER_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xfffffffffffff2fe)), VMX_IGS_EFER_MSR_RESERVED); /* Bits 63:12, bit 9, bits 7:1 MBZ. */ HMVMX_CHECK_BREAK(RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL( pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_IA32E_MODE_GUEST), VMX_IGS_EFER_LMA_GUEST_MODE_MISMATCH); /** @todo r=ramshankar: Unrestricted check here is probably wrong, see * iemVmxVmentryCheckGuestState(). */ HMVMX_CHECK_BREAK( fUnrestrictedGuest || !(u32GuestCr0 & X86_CR0_PG) || RT_BOOL(u64Val & MSR_K6_EFER_LMA) == RT_BOOL(u64Val & MSR_K6_EFER_LME), VMX_IGS_EFER_LMA_LME_MISMATCH); } /* * Segment registers. */ HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) || !(pCtx->ldtr.Sel & X86_SEL_LDT), VMX_IGS_LDTR_TI_INVALID); if (!(u32Eflags & X86_EFL_VM)) { /* CS */ HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1Present, VMX_IGS_CS_ATTR_P_INVALID); HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xf00), VMX_IGS_CS_ATTR_RESERVED); HMVMX_CHECK_BREAK(!(pCtx->cs.Attr.u & 0xfffe0000), VMX_IGS_CS_ATTR_RESERVED); HMVMX_CHECK_BREAK( (pCtx->cs.u32Limit & 0xfff) == 0xfff || !(pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->cs.u32Limit & 0xfff00000) || (pCtx->cs.Attr.n.u1Granularity), VMX_IGS_CS_ATTR_G_INVALID); /* CS cannot be loaded with NULL in protected mode. */ HMVMX_CHECK_BREAK(pCtx->cs.Attr.u && !(pCtx->cs.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_CS_ATTR_UNUSABLE); HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u1DescType, VMX_IGS_CS_ATTR_S_INVALID); if (pCtx->cs.Attr.n.u4Type == 9 || pCtx->cs.Attr.n.u4Type == 11) HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_UNEQUAL); else if (pCtx->cs.Attr.n.u4Type == 13 || pCtx->cs.Attr.n.u4Type == 15) HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl <= pCtx->ss.Attr.n.u2Dpl, VMX_IGS_CS_SS_ATTR_DPL_MISMATCH); else if (pVM->hm.s.vmx.fUnrestrictedGuest && pCtx->cs.Attr.n.u4Type == 3) HMVMX_CHECK_BREAK(pCtx->cs.Attr.n.u2Dpl == 0, VMX_IGS_CS_ATTR_DPL_INVALID); else HMVMX_ERROR_BREAK(VMX_IGS_CS_ATTR_TYPE_INVALID); /* SS */ HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest || (pCtx->ss.Sel & X86_SEL_RPL) == (pCtx->cs.Sel & X86_SEL_RPL), VMX_IGS_SS_CS_RPL_UNEQUAL); HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u2Dpl == (pCtx->ss.Sel & X86_SEL_RPL), VMX_IGS_SS_ATTR_DPL_RPL_UNEQUAL); if ( !(pCtx->cr0 & X86_CR0_PE) || pCtx->cs.Attr.n.u4Type == 3) { HMVMX_CHECK_BREAK(!pCtx->ss.Attr.n.u2Dpl, VMX_IGS_SS_ATTR_DPL_INVALID); } if (!(pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE)) { HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u4Type == 3 || pCtx->ss.Attr.n.u4Type == 7, VMX_IGS_SS_ATTR_TYPE_INVALID); HMVMX_CHECK_BREAK(pCtx->ss.Attr.n.u1Present, VMX_IGS_SS_ATTR_P_INVALID); HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xf00), VMX_IGS_SS_ATTR_RESERVED); HMVMX_CHECK_BREAK(!(pCtx->ss.Attr.u & 0xfffe0000), VMX_IGS_SS_ATTR_RESERVED); HMVMX_CHECK_BREAK( (pCtx->ss.u32Limit & 0xfff) == 0xfff || !(pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->ss.u32Limit & 0xfff00000) || (pCtx->ss.Attr.n.u1Granularity), VMX_IGS_SS_ATTR_G_INVALID); } /* DS, ES, FS, GS - only check for usable selectors, see hmR0VmxExportGuestSegmenReg(). */ if (!(pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE)) { HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_DS_ATTR_A_INVALID); HMVMX_CHECK_BREAK(pCtx->ds.Attr.n.u1Present, VMX_IGS_DS_ATTR_P_INVALID); HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest || pCtx->ds.Attr.n.u4Type > 11 || pCtx->ds.Attr.n.u2Dpl >= (pCtx->ds.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL); HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xf00), VMX_IGS_DS_ATTR_RESERVED); HMVMX_CHECK_BREAK(!(pCtx->ds.Attr.u & 0xfffe0000), VMX_IGS_DS_ATTR_RESERVED); HMVMX_CHECK_BREAK( (pCtx->ds.u32Limit & 0xfff) == 0xfff || !(pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->ds.u32Limit & 0xfff00000) || (pCtx->ds.Attr.n.u1Granularity), VMX_IGS_DS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->ds.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_DS_ATTR_TYPE_INVALID); } if (!(pCtx->es.Attr.u & X86DESCATTR_UNUSABLE)) { HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_ES_ATTR_A_INVALID); HMVMX_CHECK_BREAK(pCtx->es.Attr.n.u1Present, VMX_IGS_ES_ATTR_P_INVALID); HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest || pCtx->es.Attr.n.u4Type > 11 || pCtx->es.Attr.n.u2Dpl >= (pCtx->es.Sel & X86_SEL_RPL), VMX_IGS_DS_ATTR_DPL_RPL_UNEQUAL); HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xf00), VMX_IGS_ES_ATTR_RESERVED); HMVMX_CHECK_BREAK(!(pCtx->es.Attr.u & 0xfffe0000), VMX_IGS_ES_ATTR_RESERVED); HMVMX_CHECK_BREAK( (pCtx->es.u32Limit & 0xfff) == 0xfff || !(pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->es.u32Limit & 0xfff00000) || (pCtx->es.Attr.n.u1Granularity), VMX_IGS_ES_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->es.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_ES_ATTR_TYPE_INVALID); } if (!(pCtx->fs.Attr.u & X86DESCATTR_UNUSABLE)) { HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_FS_ATTR_A_INVALID); HMVMX_CHECK_BREAK(pCtx->fs.Attr.n.u1Present, VMX_IGS_FS_ATTR_P_INVALID); HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest || pCtx->fs.Attr.n.u4Type > 11 || pCtx->fs.Attr.n.u2Dpl >= (pCtx->fs.Sel & X86_SEL_RPL), VMX_IGS_FS_ATTR_DPL_RPL_UNEQUAL); HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xf00), VMX_IGS_FS_ATTR_RESERVED); HMVMX_CHECK_BREAK(!(pCtx->fs.Attr.u & 0xfffe0000), VMX_IGS_FS_ATTR_RESERVED); HMVMX_CHECK_BREAK( (pCtx->fs.u32Limit & 0xfff) == 0xfff || !(pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->fs.u32Limit & 0xfff00000) || (pCtx->fs.Attr.n.u1Granularity), VMX_IGS_FS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->fs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_FS_ATTR_TYPE_INVALID); } if (!(pCtx->gs.Attr.u & X86DESCATTR_UNUSABLE)) { HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_ACCESSED, VMX_IGS_GS_ATTR_A_INVALID); HMVMX_CHECK_BREAK(pCtx->gs.Attr.n.u1Present, VMX_IGS_GS_ATTR_P_INVALID); HMVMX_CHECK_BREAK( pVM->hm.s.vmx.fUnrestrictedGuest || pCtx->gs.Attr.n.u4Type > 11 || pCtx->gs.Attr.n.u2Dpl >= (pCtx->gs.Sel & X86_SEL_RPL), VMX_IGS_GS_ATTR_DPL_RPL_UNEQUAL); HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xf00), VMX_IGS_GS_ATTR_RESERVED); HMVMX_CHECK_BREAK(!(pCtx->gs.Attr.u & 0xfffe0000), VMX_IGS_GS_ATTR_RESERVED); HMVMX_CHECK_BREAK( (pCtx->gs.u32Limit & 0xfff) == 0xfff || !(pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->gs.u32Limit & 0xfff00000) || (pCtx->gs.Attr.n.u1Granularity), VMX_IGS_GS_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_CODE) || (pCtx->gs.Attr.n.u4Type & X86_SEL_TYPE_READ), VMX_IGS_GS_ATTR_TYPE_INVALID); } /* 64-bit capable CPUs. */ #if HC_ARCH_BITS == 64 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL); HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL); HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL); HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID); HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base), VMX_IGS_LONGMODE_SS_BASE_INVALID); HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base), VMX_IGS_LONGMODE_DS_BASE_INVALID); HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base), VMX_IGS_LONGMODE_ES_BASE_INVALID); #endif } else { /* V86 mode checks. */ uint32_t u32CSAttr, u32SSAttr, u32DSAttr, u32ESAttr, u32FSAttr, u32GSAttr; if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { u32CSAttr = 0xf3; u32SSAttr = 0xf3; u32DSAttr = 0xf3; u32ESAttr = 0xf3; u32FSAttr = 0xf3; u32GSAttr = 0xf3; } else { u32CSAttr = pCtx->cs.Attr.u; u32SSAttr = pCtx->ss.Attr.u; u32DSAttr = pCtx->ds.Attr.u; u32ESAttr = pCtx->es.Attr.u; u32FSAttr = pCtx->fs.Attr.u; u32GSAttr = pCtx->gs.Attr.u; } /* CS */ HMVMX_CHECK_BREAK((pCtx->cs.u64Base == (uint64_t)pCtx->cs.Sel << 4), VMX_IGS_V86_CS_BASE_INVALID); HMVMX_CHECK_BREAK(pCtx->cs.u32Limit == 0xffff, VMX_IGS_V86_CS_LIMIT_INVALID); HMVMX_CHECK_BREAK(u32CSAttr == 0xf3, VMX_IGS_V86_CS_ATTR_INVALID); /* SS */ HMVMX_CHECK_BREAK((pCtx->ss.u64Base == (uint64_t)pCtx->ss.Sel << 4), VMX_IGS_V86_SS_BASE_INVALID); HMVMX_CHECK_BREAK(pCtx->ss.u32Limit == 0xffff, VMX_IGS_V86_SS_LIMIT_INVALID); HMVMX_CHECK_BREAK(u32SSAttr == 0xf3, VMX_IGS_V86_SS_ATTR_INVALID); /* DS */ HMVMX_CHECK_BREAK((pCtx->ds.u64Base == (uint64_t)pCtx->ds.Sel << 4), VMX_IGS_V86_DS_BASE_INVALID); HMVMX_CHECK_BREAK(pCtx->ds.u32Limit == 0xffff, VMX_IGS_V86_DS_LIMIT_INVALID); HMVMX_CHECK_BREAK(u32DSAttr == 0xf3, VMX_IGS_V86_DS_ATTR_INVALID); /* ES */ HMVMX_CHECK_BREAK((pCtx->es.u64Base == (uint64_t)pCtx->es.Sel << 4), VMX_IGS_V86_ES_BASE_INVALID); HMVMX_CHECK_BREAK(pCtx->es.u32Limit == 0xffff, VMX_IGS_V86_ES_LIMIT_INVALID); HMVMX_CHECK_BREAK(u32ESAttr == 0xf3, VMX_IGS_V86_ES_ATTR_INVALID); /* FS */ HMVMX_CHECK_BREAK((pCtx->fs.u64Base == (uint64_t)pCtx->fs.Sel << 4), VMX_IGS_V86_FS_BASE_INVALID); HMVMX_CHECK_BREAK(pCtx->fs.u32Limit == 0xffff, VMX_IGS_V86_FS_LIMIT_INVALID); HMVMX_CHECK_BREAK(u32FSAttr == 0xf3, VMX_IGS_V86_FS_ATTR_INVALID); /* GS */ HMVMX_CHECK_BREAK((pCtx->gs.u64Base == (uint64_t)pCtx->gs.Sel << 4), VMX_IGS_V86_GS_BASE_INVALID); HMVMX_CHECK_BREAK(pCtx->gs.u32Limit == 0xffff, VMX_IGS_V86_GS_LIMIT_INVALID); HMVMX_CHECK_BREAK(u32GSAttr == 0xf3, VMX_IGS_V86_GS_ATTR_INVALID); /* 64-bit capable CPUs. */ #if HC_ARCH_BITS == 64 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->fs.u64Base), VMX_IGS_FS_BASE_NOT_CANONICAL); HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->gs.u64Base), VMX_IGS_GS_BASE_NOT_CANONICAL); HMVMX_CHECK_BREAK( (pCtx->ldtr.Attr.u & X86DESCATTR_UNUSABLE) || X86_IS_CANONICAL(pCtx->ldtr.u64Base), VMX_IGS_LDTR_BASE_NOT_CANONICAL); HMVMX_CHECK_BREAK(!RT_HI_U32(pCtx->cs.u64Base), VMX_IGS_LONGMODE_CS_BASE_INVALID); HMVMX_CHECK_BREAK((pCtx->ss.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ss.u64Base), VMX_IGS_LONGMODE_SS_BASE_INVALID); HMVMX_CHECK_BREAK((pCtx->ds.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->ds.u64Base), VMX_IGS_LONGMODE_DS_BASE_INVALID); HMVMX_CHECK_BREAK((pCtx->es.Attr.u & X86DESCATTR_UNUSABLE) || !RT_HI_U32(pCtx->es.u64Base), VMX_IGS_LONGMODE_ES_BASE_INVALID); #endif } /* * TR. */ HMVMX_CHECK_BREAK(!(pCtx->tr.Sel & X86_SEL_LDT), VMX_IGS_TR_TI_INVALID); /* 64-bit capable CPUs. */ #if HC_ARCH_BITS == 64 HMVMX_CHECK_BREAK(X86_IS_CANONICAL(pCtx->tr.u64Base), VMX_IGS_TR_BASE_NOT_CANONICAL); #endif if (fLongModeGuest) { HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u4Type == 11, /* 64-bit busy TSS. */ VMX_IGS_LONGMODE_TR_ATTR_TYPE_INVALID); } else { HMVMX_CHECK_BREAK( pCtx->tr.Attr.n.u4Type == 3 /* 16-bit busy TSS. */ || pCtx->tr.Attr.n.u4Type == 11, /* 32-bit busy TSS.*/ VMX_IGS_TR_ATTR_TYPE_INVALID); } HMVMX_CHECK_BREAK(!pCtx->tr.Attr.n.u1DescType, VMX_IGS_TR_ATTR_S_INVALID); HMVMX_CHECK_BREAK(pCtx->tr.Attr.n.u1Present, VMX_IGS_TR_ATTR_P_INVALID); HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & 0xf00), VMX_IGS_TR_ATTR_RESERVED); /* Bits 11:8 MBZ. */ HMVMX_CHECK_BREAK( (pCtx->tr.u32Limit & 0xfff) == 0xfff || !(pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID); HMVMX_CHECK_BREAK( !(pCtx->tr.u32Limit & 0xfff00000) || (pCtx->tr.Attr.n.u1Granularity), VMX_IGS_TR_ATTR_G_INVALID); HMVMX_CHECK_BREAK(!(pCtx->tr.Attr.u & X86DESCATTR_UNUSABLE), VMX_IGS_TR_ATTR_UNUSABLE); /* * GDTR and IDTR. */ #if HC_ARCH_BITS == 64 rc = VMXReadVmcs64(VMX_VMCS_GUEST_GDTR_BASE, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_GDTR_BASE_NOT_CANONICAL); rc = VMXReadVmcs64(VMX_VMCS_GUEST_IDTR_BASE, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(X86_IS_CANONICAL(u64Val), VMX_IGS_IDTR_BASE_NOT_CANONICAL); #endif rc = VMXReadVmcs32(VMX_VMCS32_GUEST_GDTR_LIMIT, &u32Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_GDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */ rc = VMXReadVmcs32(VMX_VMCS32_GUEST_IDTR_LIMIT, &u32Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u32Val & 0xffff0000), VMX_IGS_IDTR_LIMIT_INVALID); /* Bits 31:16 MBZ. */ /* * Guest Non-Register State. */ /* Activity State. */ uint32_t u32ActivityState; rc = VMXReadVmcs32(VMX_VMCS32_GUEST_ACTIVITY_STATE, &u32ActivityState); AssertRCBreak(rc); HMVMX_CHECK_BREAK( !u32ActivityState || (u32ActivityState & RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Misc, VMX_BF_MISC_ACTIVITY_STATES)), VMX_IGS_ACTIVITY_STATE_INVALID); HMVMX_CHECK_BREAK( !(pCtx->ss.Attr.n.u2Dpl) || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_HLT, VMX_IGS_ACTIVITY_STATE_HLT_INVALID); uint32_t u32IntrState; rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &u32IntrState); AssertRCBreak(rc); if ( u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS || u32IntrState == VMX_VMCS_GUEST_INT_STATE_BLOCK_STI) { HMVMX_CHECK_BREAK(u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_ACTIVE, VMX_IGS_ACTIVITY_STATE_ACTIVE_INVALID); } /** @todo Activity state and injecting interrupts. Left as a todo since we * currently don't use activity states but ACTIVE. */ HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM) || u32ActivityState != VMX_VMCS_GUEST_ACTIVITY_SIPI_WAIT, VMX_IGS_ACTIVITY_STATE_SIPI_WAIT_INVALID); /* Guest interruptibility-state. */ HMVMX_CHECK_BREAK(!(u32IntrState & 0xffffffe0), VMX_IGS_INTERRUPTIBILITY_STATE_RESERVED); HMVMX_CHECK_BREAK((u32IntrState & (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)) != (VMX_VMCS_GUEST_INT_STATE_BLOCK_STI | VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS), VMX_IGS_INTERRUPTIBILITY_STATE_STI_MOVSS_INVALID); HMVMX_CHECK_BREAK( (u32Eflags & X86_EFL_IF) || !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI), VMX_IGS_INTERRUPTIBILITY_STATE_STI_EFL_INVALID); if (VMX_ENTRY_INT_INFO_IS_VALID(u32EntryInfo)) { if (VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_EXT_INT) { HMVMX_CHECK_BREAK( !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI) && !(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS), VMX_IGS_INTERRUPTIBILITY_STATE_EXT_INT_INVALID); } else if (VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_NMI) { HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS), VMX_IGS_INTERRUPTIBILITY_STATE_MOVSS_INVALID); HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI), VMX_IGS_INTERRUPTIBILITY_STATE_STI_INVALID); } } /** @todo Assumes the processor is not in SMM. */ HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI), VMX_IGS_INTERRUPTIBILITY_STATE_SMI_INVALID); HMVMX_CHECK_BREAK( !(pVCpu->hm.s.vmx.u32EntryCtls & VMX_ENTRY_CTLS_ENTRY_TO_SMM) || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_SMI), VMX_IGS_INTERRUPTIBILITY_STATE_SMI_SMM_INVALID); if ( (pVCpu->hm.s.vmx.u32PinCtls & VMX_PIN_CTLS_VIRT_NMI) && VMX_ENTRY_INT_INFO_IS_VALID(u32EntryInfo) && VMX_ENTRY_INT_INFO_TYPE(u32EntryInfo) == VMX_EXIT_INT_INFO_TYPE_NMI) { HMVMX_CHECK_BREAK(!(u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_NMI), VMX_IGS_INTERRUPTIBILITY_STATE_NMI_INVALID); } /* Pending debug exceptions. */ #if HC_ARCH_BITS == 64 rc = VMXReadVmcs64(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &u64Val); AssertRCBreak(rc); /* Bits 63:15, Bit 13, Bits 11:4 MBZ. */ HMVMX_CHECK_BREAK(!(u64Val & UINT64_C(0xffffffffffffaff0)), VMX_IGS_LONGMODE_PENDING_DEBUG_RESERVED); u32Val = u64Val; /* For pending debug exceptions checks below. */ #else rc = VMXReadVmcs32(VMX_VMCS_GUEST_PENDING_DEBUG_XCPTS, &u32Val); AssertRCBreak(rc); /* Bits 31:15, Bit 13, Bits 11:4 MBZ. */ HMVMX_CHECK_BREAK(!(u32Val & 0xffffaff0), VMX_IGS_PENDING_DEBUG_RESERVED); #endif if ( (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI) || (u32IntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS) || u32ActivityState == VMX_VMCS_GUEST_ACTIVITY_HLT) { if ( (u32Eflags & X86_EFL_TF) && !(u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */ { /* Bit 14 is PendingDebug.BS. */ HMVMX_CHECK_BREAK(u32Val & RT_BIT(14), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_SET); } if ( !(u32Eflags & X86_EFL_TF) || (u64DebugCtlMsr & RT_BIT_64(1))) /* Bit 1 is IA32_DEBUGCTL.BTF. */ { /* Bit 14 is PendingDebug.BS. */ HMVMX_CHECK_BREAK(!(u32Val & RT_BIT(14)), VMX_IGS_PENDING_DEBUG_XCPT_BS_NOT_CLEAR); } } /* VMCS link pointer. */ rc = VMXReadVmcs64(VMX_VMCS64_GUEST_VMCS_LINK_PTR_FULL, &u64Val); AssertRCBreak(rc); if (u64Val != UINT64_C(0xffffffffffffffff)) { HMVMX_CHECK_BREAK(!(u64Val & 0xfff), VMX_IGS_VMCS_LINK_PTR_RESERVED); /** @todo Bits beyond the processor's physical-address width MBZ. */ /** @todo 32-bit located in memory referenced by value of this field (as a * physical address) must contain the processor's VMCS revision ID. */ /** @todo SMM checks. */ } /** @todo Checks on Guest Page-Directory-Pointer-Table Entries when guest is * not using Nested Paging? */ if ( pVM->hm.s.fNestedPaging && !fLongModeGuest && CPUMIsGuestInPAEModeEx(pCtx)) { rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE0_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE1_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE2_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); rc = VMXReadVmcs64(VMX_VMCS64_GUEST_PDPTE3_FULL, &u64Val); AssertRCBreak(rc); HMVMX_CHECK_BREAK(!(u64Val & X86_PDPE_PAE_MBZ_MASK), VMX_IGS_PAE_PDPTE_RESERVED); } /* Shouldn't happen but distinguish it from AssertRCBreak() errors. */ if (uError == VMX_IGS_ERROR) uError = VMX_IGS_REASON_NOT_FOUND; } while (0); pVCpu->hm.s.u32HMError = uError; return uError; #undef HMVMX_ERROR_BREAK #undef HMVMX_CHECK_BREAK } /** @name VM-exit handlers. * @{ */ /* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- VM-exit handlers -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */ /* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /** * VM-exit handler for external interrupts (VMX_EXIT_EXT_INT). */ HMVMX_EXIT_DECL hmR0VmxExitExtInt(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitExtInt); /* Windows hosts (32-bit and 64-bit) have DPC latency issues. See @bugref{6853}. */ if (VMMR0ThreadCtxHookIsEnabled(pVCpu)) return VINF_SUCCESS; return VINF_EM_RAW_INTERRUPT; } /** * VM-exit handler for exceptions or NMIs (VMX_EXIT_XCPT_OR_NMI). */ HMVMX_EXIT_DECL hmR0VmxExitXcptOrNmi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitXcptNmi, y3); int rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient); AssertRCReturn(rc, rc); uint32_t uIntType = VMX_EXIT_INT_INFO_TYPE(pVmxTransient->uExitIntInfo); Assert( !(pVCpu->hm.s.vmx.u32ExitCtls & VMX_EXIT_CTLS_ACK_EXT_INT) && uIntType != VMX_EXIT_INT_INFO_TYPE_EXT_INT); Assert(VMX_EXIT_INT_INFO_IS_VALID(pVmxTransient->uExitIntInfo)); if (uIntType == VMX_EXIT_INT_INFO_TYPE_NMI) { /* * This cannot be a guest NMI as the only way for the guest to receive an NMI is if we * injected it ourselves and anything we inject is not going to cause a VM-exit directly * for the event being injected[1]. Go ahead and dispatch the NMI to the host[2]. * * [1] -- See Intel spec. 27.2.3 "Information for VM Exits During Event Delivery". * [2] -- See Intel spec. 27.5.5 "Updating Non-Register State". */ VMXDispatchHostNmi(); STAM_REL_COUNTER_INC(&pVCpu->hm.s.StatExitHostNmiInGC); STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); return VINF_SUCCESS; } /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ VBOXSTRICTRC rcStrictRc1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient); if (RT_UNLIKELY(rcStrictRc1 == VINF_SUCCESS)) { /* likely */ } else { if (rcStrictRc1 == VINF_HM_DOUBLE_FAULT) rcStrictRc1 = VINF_SUCCESS; STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); return rcStrictRc1; } uint32_t uExitIntInfo = pVmxTransient->uExitIntInfo; uint32_t uVector = VMX_EXIT_INT_INFO_VECTOR(uExitIntInfo); switch (uIntType) { case VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT: /* Privileged software exception. (#DB from ICEBP) */ Assert(uVector == X86_XCPT_DB); RT_FALL_THRU(); case VMX_EXIT_INT_INFO_TYPE_SW_XCPT: /* Software exception. (#BP or #OF) */ Assert(uVector == X86_XCPT_BP || uVector == X86_XCPT_OF || uIntType == VMX_EXIT_INT_INFO_TYPE_PRIV_SW_XCPT); RT_FALL_THRU(); case VMX_EXIT_INT_INFO_TYPE_HW_XCPT: { /* * If there's any exception caused as a result of event injection, the resulting * secondary/final execption will be pending, we shall continue guest execution * after injecting the event. The page-fault case is complicated and we manually * handle any currently pending event in hmR0VmxExitXcptPF. */ if (!pVCpu->hm.s.Event.fPending) { /* likely */ } else if (uVector != X86_XCPT_PF) { rc = VINF_SUCCESS; break; } switch (uVector) { case X86_XCPT_PF: rc = hmR0VmxExitXcptPF(pVCpu, pVmxTransient); break; case X86_XCPT_GP: rc = hmR0VmxExitXcptGP(pVCpu, pVmxTransient); break; case X86_XCPT_MF: rc = hmR0VmxExitXcptMF(pVCpu, pVmxTransient); break; case X86_XCPT_DB: rc = hmR0VmxExitXcptDB(pVCpu, pVmxTransient); break; case X86_XCPT_BP: rc = hmR0VmxExitXcptBP(pVCpu, pVmxTransient); break; case X86_XCPT_AC: rc = hmR0VmxExitXcptAC(pVCpu, pVmxTransient); break; case X86_XCPT_NM: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNM); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; case X86_XCPT_XF: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXF); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; case X86_XCPT_DE: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDE); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; case X86_XCPT_UD: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestUD); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; case X86_XCPT_SS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestSS); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; case X86_XCPT_NP: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestNP); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; case X86_XCPT_TS: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestTS); rc = hmR0VmxExitXcptGeneric(pVCpu, pVmxTransient); break; default: { STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestXcpUnk); if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { Assert(pVCpu->CTX_SUFF(pVM)->hm.s.vmx.pRealModeTSS); Assert(PDMVmmDevHeapIsEnabled(pVCpu->CTX_SUFF(pVM))); Assert(CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx)); rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); AssertRCReturn(rc, rc); hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); } else { AssertMsgFailed(("Unexpected VM-exit caused by exception %#x\n", uVector)); pVCpu->hm.s.u32HMError = uVector; rc = VERR_VMX_UNEXPECTED_EXCEPTION; } break; } } break; } default: { pVCpu->hm.s.u32HMError = uExitIntInfo; rc = VERR_VMX_UNEXPECTED_INTERRUPTION_EXIT_TYPE; AssertMsgFailed(("Unexpected interruption info %#x\n", VMX_EXIT_INT_INFO_TYPE(uExitIntInfo))); break; } } STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitXcptNmi, y3); return rc; } /** * VM-exit handler for interrupt-window exiting (VMX_EXIT_INT_WINDOW). */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitIntWindow(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* Indicate that we no longer need to VM-exit when the guest is ready to receive interrupts, it is now ready. */ hmR0VmxClearIntWindowExitVmcs(pVCpu); /* Deliver the pending interrupts via hmR0VmxEvaluatePendingEvent() and resume guest execution. */ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIntWindow); return VINF_SUCCESS; } /** * VM-exit handler for NMI-window exiting (VMX_EXIT_NMI_WINDOW). */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitNmiWindow(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); if (RT_UNLIKELY(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_NMI_WINDOW_EXIT))) { AssertMsgFailed(("Unexpected NMI-window exit.\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } Assert(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS)); /* * If block-by-STI is set when we get this VM-exit, it means the CPU doesn't block NMIs following STI. * It is therefore safe to unblock STI and deliver the NMI ourselves. See @bugref{7445}. */ uint32_t fIntrState = 0; int rc = VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState); AssertRCReturn(rc, rc); Assert(!(fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_MOVSS)); if (fIntrState & VMX_VMCS_GUEST_INT_STATE_BLOCK_STI) { if (VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)) VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS); fIntrState &= ~VMX_VMCS_GUEST_INT_STATE_BLOCK_STI; rc = VMXWriteVmcs32(VMX_VMCS32_GUEST_INT_STATE, fIntrState); AssertRCReturn(rc, rc); } /* Indicate that we no longer need to VM-exit when the guest is ready to receive NMIs, it is now ready */ hmR0VmxClearNmiWindowExitVmcs(pVCpu); /* Deliver the pending NMI via hmR0VmxEvaluatePendingEvent() and resume guest execution. */ return VINF_SUCCESS; } /** * VM-exit handler for WBINVD (VMX_EXIT_WBINVD). Conditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitWbinvd(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); } /** * VM-exit handler for INVD (VMX_EXIT_INVD). Unconditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitInvd(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); } /** * VM-exit handler for CPUID (VMX_EXIT_CPUID). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitCpuid(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* * Get the state we need and update the exit history entry. */ int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict; PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_CPUID), pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base); if (!pExitRec) { /* * Regular CPUID instruction execution. */ rcStrict = IEMExecDecodedCpuid(pVCpu, pVmxTransient->cbInstr); if (rcStrict == VINF_SUCCESS) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } } else { /* * Frequent exit or something needing probing. Get state and call EMHistoryExec. */ int rc2 = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc2, rc2); Log4(("CpuIdExit/%u: %04x:%08RX64: %#x/%#x -> EMHistoryExec\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, pVCpu->cpum.GstCtx.eax, pVCpu->cpum.GstCtx.ecx)); rcStrict = EMHistoryExec(pVCpu, pExitRec, 0); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); Log4(("CpuIdExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip)); } return rcStrict; } /** * VM-exit handler for GETSEC (VMX_EXIT_GETSEC). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitGetsec(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR4); AssertRCReturn(rc, rc); if (pVCpu->cpum.GstCtx.cr4 & X86_CR4_SMXE) return VINF_EM_RAW_EMULATE_INSTR; AssertMsgFailed(("hmR0VmxExitGetsec: unexpected VM-exit when CR4.SMXE is 0.\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for RDTSC (VMX_EXIT_RDTSC). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitRdtsc(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict = IEMExecDecodedRdtsc(pVCpu, pVmxTransient->cbInstr); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING) pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); } else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; } /** * VM-exit handler for RDTSCP (VMX_EXIT_RDTSCP). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitRdtscp(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_TSC_AUX); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict = IEMExecDecodedRdtscp(pVCpu, pVmxTransient->cbInstr); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* If we get a spurious VM-exit when offsetting is enabled, we must reset offsetting on VM-reentry. See @bugref{6634}. */ if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TSC_OFFSETTING) pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); } else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; } /** * VM-exit handler for RDPMC (VMX_EXIT_RDPMC). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitRdpmc(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR4 | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS); AssertRCReturn(rc, rc); PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rc = EMInterpretRdpmc(pVM, pVCpu, CPUMCTX2CORE(pCtx)); if (RT_LIKELY(rc == VINF_SUCCESS)) { rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); Assert(pVmxTransient->cbInstr == 2); } else { AssertMsgFailed(("hmR0VmxExitRdpmc: EMInterpretRdpmc failed with %Rrc\n", rc)); rc = VERR_EM_INTERPRETER; } return rc; } /** * VM-exit handler for VMCALL (VMX_EXIT_VMCALL). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmcall(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); VBOXSTRICTRC rcStrict = VERR_VMX_IPE_3; if (EMAreHypercallInstructionsEnabled(pVCpu)) { int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_SS | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_EFER); AssertRCReturn(rc, rc); /* Perform the hypercall. */ rcStrict = GIMHypercall(pVCpu, &pVCpu->cpum.GstCtx); if (rcStrict == VINF_SUCCESS) { rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); } else Assert( rcStrict == VINF_GIM_R3_HYPERCALL || rcStrict == VINF_GIM_HYPERCALL_CONTINUING || RT_FAILURE(rcStrict)); /* If the hypercall changes anything other than guest's general-purpose registers, we would need to reload the guest changed bits here before VM-entry. */ } else Log4Func(("Hypercalls not enabled\n")); /* If hypercalls are disabled or the hypercall failed for some reason, raise #UD and continue. */ if (RT_FAILURE(rcStrict)) { hmR0VmxSetPendingXcptUD(pVCpu); rcStrict = VINF_SUCCESS; } return rcStrict; } /** * VM-exit handler for INVLPG (VMX_EXIT_INVLPG). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitInvlpg(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging || pVCpu->hm.s.fUsingDebugLoop); int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict = IEMExecDecodedInvlpg(pVCpu, pVmxTransient->cbInstr, pVmxTransient->uExitQual); if (rcStrict == VINF_SUCCESS || rcStrict == VINF_PGM_SYNC_CR3) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } else AssertMsgFailed(("Unexpected IEMExecDecodedInvlpg(%#RX64) sttus: %Rrc\n", pVmxTransient->uExitQual, VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /** * VM-exit handler for MONITOR (VMX_EXIT_MONITOR). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitMonitor(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS); AssertRCReturn(rc, rc); PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rc = EMInterpretMonitor(pVM, pVCpu, CPUMCTX2CORE(pCtx)); if (RT_LIKELY(rc == VINF_SUCCESS)) rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); else { AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMonitor: EMInterpretMonitor failed with %Rrc\n", rc)); rc = VERR_EM_INTERPRETER; } STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMonitor); return rc; } /** * VM-exit handler for MWAIT (VMX_EXIT_MWAIT). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitMwait(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0 | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_SS); AssertRCReturn(rc, rc); PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; VBOXSTRICTRC rc2 = EMInterpretMWait(pVM, pVCpu, CPUMCTX2CORE(pCtx)); rc = VBOXSTRICTRC_VAL(rc2); if (RT_LIKELY( rc == VINF_SUCCESS || rc == VINF_EM_HALT)) { int rc3 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); AssertRCReturn(rc3, rc3); if ( rc == VINF_EM_HALT && EMMonitorWaitShouldContinue(pVCpu, pCtx)) rc = VINF_SUCCESS; } else { AssertMsg(rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: EMInterpretMWait failed with %Rrc\n", rc)); rc = VERR_EM_INTERPRETER; } AssertMsg(rc == VINF_SUCCESS || rc == VINF_EM_HALT || rc == VERR_EM_INTERPRETER, ("hmR0VmxExitMwait: failed, invalid error code %Rrc\n", rc)); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMwait); return rc; } /** * VM-exit handler for RSM (VMX_EXIT_RSM). Unconditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitRsm(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { /* * Execution of RSM outside of SMM mode causes #UD regardless of VMX root or VMX non-root * mode. In theory, we should never get this VM-exit. This can happen only if dual-monitor * treatment of SMI and VMX is enabled, which can (only?) be done by executing VMCALL in * VMX root operation. If we get here, something funny is going on. * * See Intel spec. 33.15.5 "Enabling the Dual-Monitor Treatment". */ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); AssertMsgFailed(("Unexpected RSM VM-exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for SMI (VMX_EXIT_SMI). Unconditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitSmi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { /* * This can only happen if we support dual-monitor treatment of SMI, which can be activated * by executing VMCALL in VMX root operation. Only an STM (SMM transfer monitor) would get * this VM-exit when we (the executive monitor) execute a VMCALL in VMX root mode or receive * an SMI. If we get here, something funny is going on. * * See Intel spec. 33.15.6 "Activating the Dual-Monitor Treatment" * See Intel spec. 25.3 "Other Causes of VM-Exits" */ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); AssertMsgFailed(("Unexpected SMI VM-exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for IO SMI (VMX_EXIT_IO_SMI). Unconditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitIoSmi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { /* Same treatment as VMX_EXIT_SMI. See comment in hmR0VmxExitSmi(). */ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); AssertMsgFailed(("Unexpected IO SMI VM-exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for SIPI (VMX_EXIT_SIPI). Conditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitSipi(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { /* * SIPI exits can only occur in VMX non-root operation when the "wait-for-SIPI" guest activity state is used. * We don't make use of it as our guests don't have direct access to the host LAPIC. * See Intel spec. 25.3 "Other Causes of VM-exits". */ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); AssertMsgFailed(("Unexpected SIPI VM-exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for INIT signal (VMX_EXIT_INIT_SIGNAL). Unconditional * VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitInitSignal(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { /* * INIT signals are blocked in VMX root operation by VMXON and by SMI in SMM. * See Intel spec. 33.14.1 Default Treatment of SMI Delivery" and Intel spec. 29.3 "VMX Instructions" for "VMXON". * * It is -NOT- blocked in VMX non-root operation so we can, in theory, still get these VM-exits. * See Intel spec. "23.8 Restrictions on VMX operation". */ HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); return VINF_SUCCESS; } /** * VM-exit handler for triple faults (VMX_EXIT_TRIPLE_FAULT). Unconditional * VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitTripleFault(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); return VINF_EM_RESET; } /** * VM-exit handler for HLT (VMX_EXIT_HLT). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitHlt(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_HLT_EXIT); int rc = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RFLAGS); AssertRCReturn(rc, rc); if (EMShouldContinueAfterHalt(pVCpu, &pVCpu->cpum.GstCtx)) /* Requires eflags. */ rc = VINF_SUCCESS; else rc = VINF_EM_HALT; STAM_COUNTER_INC(&pVCpu->hm.s.StatExitHlt); if (rc != VINF_SUCCESS) STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchHltToR3); return rc; } /** * VM-exit handler for instructions that result in a \#UD exception delivered to * the guest. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitSetPendingXcptUD(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); hmR0VmxSetPendingXcptUD(pVCpu); return VINF_SUCCESS; } /** * VM-exit handler for expiry of the VMX preemption timer. */ HMVMX_EXIT_DECL hmR0VmxExitPreemptTimer(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* If the preemption-timer has expired, reinitialize the preemption timer on next VM-entry. */ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; /* If there are any timer events pending, fall back to ring-3, otherwise resume guest execution. */ PVM pVM = pVCpu->CTX_SUFF(pVM); bool fTimersPending = TMTimerPollBool(pVM, pVCpu); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitPreemptTimer); return fTimersPending ? VINF_EM_RAW_TIMER_PENDING : VINF_SUCCESS; } /** * VM-exit handler for XSETBV (VMX_EXIT_XSETBV). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitXsetbv(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_CR4); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict = IEMExecDecodedXsetbv(pVCpu, pVmxTransient->cbInstr); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, rcStrict != VINF_IEM_RAISED_XCPT ? HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS : HM_CHANGED_RAISED_XCPT_MASK); PCCPUMCTX pCtx = &pVCpu->cpum.GstCtx; pVCpu->hm.s.fLoadSaveGuestXcr0 = (pCtx->cr4 & X86_CR4_OSXSAVE) && pCtx->aXcr[0] != ASMGetXcr0(); return rcStrict; } /** * VM-exit handler for INVPCID (VMX_EXIT_INVPCID). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitInvpcid(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /** @todo Use VM-exit instruction information. */ return VERR_EM_INTERPRETER; } /** * VM-exit handler for invalid-guest-state (VMX_EXIT_ERR_INVALID_GUEST_STATE). * Error VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrInvalidGuestState(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); rc = hmR0VmxCheckVmcsCtls(pVCpu); if (RT_FAILURE(rc)) return rc; uint32_t uInvalidReason = hmR0VmxCheckGuestState(pVCpu); NOREF(uInvalidReason); #ifdef VBOX_STRICT uint32_t fIntrState; RTHCUINTREG uHCReg; uint64_t u64Val; uint32_t u32Val; rc = hmR0VmxReadEntryIntInfoVmcs(pVmxTransient); rc |= hmR0VmxReadEntryXcptErrorCodeVmcs(pVmxTransient); rc |= hmR0VmxReadEntryInstrLenVmcs(pVmxTransient); rc |= VMXReadVmcs32(VMX_VMCS32_GUEST_INT_STATE, &fIntrState); AssertRCReturn(rc, rc); Log4(("uInvalidReason %u\n", uInvalidReason)); Log4(("VMX_VMCS32_CTRL_ENTRY_INTERRUPTION_INFO %#RX32\n", pVmxTransient->uEntryIntInfo)); Log4(("VMX_VMCS32_CTRL_ENTRY_EXCEPTION_ERRCODE %#RX32\n", pVmxTransient->uEntryXcptErrorCode)); Log4(("VMX_VMCS32_CTRL_ENTRY_INSTR_LENGTH %#RX32\n", pVmxTransient->cbEntryInstr)); Log4(("VMX_VMCS32_GUEST_INT_STATE %#RX32\n", fIntrState)); rc = VMXReadVmcs32(VMX_VMCS_GUEST_CR0, &u32Val); AssertRC(rc); Log4(("VMX_VMCS_GUEST_CR0 %#RX32\n", u32Val)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_MASK, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR0_MASK %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR0_READ_SHADOW, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_MASK, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR4_MASK %#RHr\n", uHCReg)); rc = VMXReadVmcsHstN(VMX_VMCS_CTRL_CR4_READ_SHADOW, &uHCReg); AssertRC(rc); Log4(("VMX_VMCS_CTRL_CR4_READ_SHADOW %#RHr\n", uHCReg)); rc = VMXReadVmcs64(VMX_VMCS64_CTRL_EPTP_FULL, &u64Val); AssertRC(rc); Log4(("VMX_VMCS64_CTRL_EPTP_FULL %#RX64\n", u64Val)); hmR0DumpRegs(pVCpu); #else NOREF(pVmxTransient); #endif return VERR_VMX_INVALID_GUEST_STATE; } /** * VM-exit handler for VM-entry failure due to an MSR-load * (VMX_EXIT_ERR_MSR_LOAD). Error VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrMsrLoad(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { AssertMsgFailed(("Unexpected MSR-load exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for VM-entry failure due to a machine-check event * (VMX_EXIT_ERR_MACHINE_CHECK). Error VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrMachineCheck(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { AssertMsgFailed(("Unexpected machine-check event exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for all undefined reasons. Should never ever happen.. in * theory. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitErrUndefined(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { RT_NOREF2(pVCpu, pVmxTransient); AssertMsgFailed(("Huh!? Undefined VM-exit reason %d\n", pVmxTransient->uExitReason)); return VERR_VMX_UNDEFINED_EXIT_CODE; } /** * VM-exit handler for XDTR (LGDT, SGDT, LIDT, SIDT) accesses * (VMX_EXIT_GDTR_IDTR_ACCESS) and LDT and TR access (LLDT, LTR, SLDT, STR). * Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitXdtrAccess(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* By default, we don't enable VMX_PROC_CTLS2_DESCRIPTOR_TABLE_EXIT. */ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitXdtrAccess); if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_DESC_TABLE_EXIT) return VERR_EM_INTERPRETER; AssertMsgFailed(("Unexpected XDTR access\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for RDRAND (VMX_EXIT_RDRAND). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitRdrand(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* By default, we don't enable VMX_PROC_CTLS2_RDRAND_EXIT. */ if (pVCpu->hm.s.vmx.u32ProcCtls2 & VMX_PROC_CTLS2_RDRAND_EXIT) return VERR_EM_INTERPRETER; AssertMsgFailed(("Unexpected RDRAND exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /** * VM-exit handler for RDMSR (VMX_EXIT_RDMSR). */ HMVMX_EXIT_DECL hmR0VmxExitRdmsr(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /** @todo Optimize this: We currently drag in in the whole MSR state * (CPUMCTX_EXTRN_ALL_MSRS) here. We should optimize this to only get * MSRs required. That would require changes to IEM and possibly CPUM too. * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */ uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx; int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS); switch (idMsr) { /* The FS and GS base MSRs are not part of the above all-MSRs mask. */ case MSR_K8_FS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_FS); break; case MSR_K8_GS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_GS); break; } AssertRCReturn(rc, rc); Log4Func(("ecx=%#RX32\n", idMsr)); #ifdef VBOX_STRICT if (pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS) { if ( hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, idMsr) && idMsr != MSR_K6_EFER) { AssertMsgFailed(("Unexpected RDMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", idMsr)); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr)) { VMXMSREXITREAD enmRead; VMXMSREXITWRITE enmWrite; int rc2 = HMVmxGetMsrPermission(pVCpu->hm.s.vmx.pvMsrBitmap, idMsr, &enmRead, &enmWrite); AssertRCReturn(rc2, rc2); if (enmRead == VMXMSREXIT_PASSTHRU_READ) { AssertMsgFailed(("Unexpected RDMSR for a passthru lazy-restore MSR. ecx=%#RX32\n", idMsr)); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } } } #endif VBOXSTRICTRC rcStrict = IEMExecDecodedRdmsr(pVCpu, pVmxTransient->cbInstr); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitRdmsr); if (rcStrict == VINF_SUCCESS) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_RAX | HM_CHANGED_GUEST_RDX); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } else AssertMsg(rcStrict == VINF_CPUM_R3_MSR_READ, ("Unexpected IEMExecDecodedRdmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /** * VM-exit handler for WRMSR (VMX_EXIT_WRMSR). */ HMVMX_EXIT_DECL hmR0VmxExitWrmsr(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /** @todo Optimize this: We currently drag in in the whole MSR state * (CPUMCTX_EXTRN_ALL_MSRS) here. We should optimize this to only get * MSRs required. That would require changes to IEM and possibly CPUM too. * (Should probably do it lazy fashion from CPUMAllMsrs.cpp). */ uint32_t const idMsr = pVCpu->cpum.GstCtx.ecx; int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK | CPUMCTX_EXTRN_ALL_MSRS); switch (idMsr) { /* * The FS and GS base MSRs are not part of the above all-MSRs mask. * * Although we don't need to fetch the base as it will be overwritten shortly, while * loading guest-state we would also load the entire segment register including limit * and attributes and thus we need to load them here. */ case MSR_K8_FS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_FS); break; case MSR_K8_GS_BASE: rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_GS); break; } AssertRCReturn(rc, rc); Log4Func(("ecx=%#RX32 edx:eax=%#RX32:%#RX32\n", idMsr, pVCpu->cpum.GstCtx.edx, pVCpu->cpum.GstCtx.eax)); VBOXSTRICTRC rcStrict = IEMExecDecodedWrmsr(pVCpu, pVmxTransient->cbInstr); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitWrmsr); if (rcStrict == VINF_SUCCESS) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); /* If this is an X2APIC WRMSR access, update the APIC state as well. */ if ( idMsr == MSR_IA32_APICBASE || ( idMsr >= MSR_IA32_X2APIC_START && idMsr <= MSR_IA32_X2APIC_END)) { /* * We've already saved the APIC related guest-state (TPR) in hmR0VmxPostRunGuest(). When full APIC register * virtualization is implemented we'll have to make sure APIC state is saved from the VMCS before IEM changes it. */ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_APIC_TPR); } else if (idMsr == MSR_IA32_TSC) /* Windows 7 does this during bootup. See @bugref{6398}. */ pVmxTransient->fUpdateTscOffsettingAndPreemptTimer = true; else if (idMsr == MSR_K6_EFER) { /* * If the guest touches EFER we need to update the VM-Entry and VM-Exit controls as well, * even if it is -not- touching bits that cause paging mode changes (LMA/LME). We care about * the other bits as well, SCE and NXE. See @bugref{7368}. */ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_EFER_MSR | HM_CHANGED_VMX_ENTRY_CTLS | HM_CHANGED_VMX_EXIT_CTLS); } /* Update MSRs that are part of the VMCS and auto-load/store area when MSR-bitmaps are not supported. */ if (!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_MSR_BITMAPS)) { switch (idMsr) { case MSR_IA32_SYSENTER_CS: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_CS_MSR); break; case MSR_IA32_SYSENTER_EIP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_EIP_MSR); break; case MSR_IA32_SYSENTER_ESP: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_SYSENTER_ESP_MSR); break; case MSR_K8_FS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_FS); break; case MSR_K8_GS_BASE: ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_GS); break; case MSR_K6_EFER: /* Nothing to do, already handled above. */ break; default: { if (hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, idMsr)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_AUTO_MSRS); else if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_VMX_GUEST_LAZY_MSRS); break; } } } #ifdef VBOX_STRICT else { /* Paranoia. Validate that MSRs in the MSR-bitmaps with write-passthru are not intercepted. */ switch (idMsr) { case MSR_IA32_SYSENTER_CS: case MSR_IA32_SYSENTER_EIP: case MSR_IA32_SYSENTER_ESP: case MSR_K8_FS_BASE: case MSR_K8_GS_BASE: { AssertMsgFailed(("Unexpected WRMSR for an MSR in the VMCS. ecx=%#RX32\n", idMsr)); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } /* Writes to MSRs in auto-load/store area/swapped MSRs, shouldn't cause VM-exits with MSR-bitmaps. */ default: { if (hmR0VmxIsAutoLoadStoreGuestMsr(pVCpu, idMsr)) { /* EFER writes are always intercepted, see hmR0VmxExportGuestMsrs(). */ if (idMsr != MSR_K6_EFER) { AssertMsgFailed(("Unexpected WRMSR for an MSR in the auto-load/store area in the VMCS. ecx=%#RX32\n", idMsr)); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } } if (hmR0VmxIsLazyGuestMsr(pVCpu, idMsr)) { VMXMSREXITREAD enmRead; VMXMSREXITWRITE enmWrite; int rc2 = HMVmxGetMsrPermission(pVCpu->hm.s.vmx.pvMsrBitmap, idMsr, &enmRead, &enmWrite); AssertRCReturn(rc2, rc2); if (enmWrite == VMXMSREXIT_PASSTHRU_WRITE) { AssertMsgFailed(("Unexpected WRMSR for passthru, lazy-restore MSR. ecx=%#RX32\n", idMsr)); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } } break; } } } #endif /* VBOX_STRICT */ } else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } else AssertMsg(rcStrict == VINF_CPUM_R3_MSR_WRITE, ("Unexpected IEMExecDecodedWrmsr rc (%Rrc)\n", VBOXSTRICTRC_VAL(rcStrict))); return rcStrict; } /** * VM-exit handler for PAUSE (VMX_EXIT_PAUSE). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitPause(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /** @todo The guest has likely hit a contended spinlock. We might want to * poke a schedule different guest VCPU. */ return VINF_EM_RAW_INTERRUPT; } /** * VM-exit handler for when the TPR value is lowered below the specified * threshold (VMX_EXIT_TPR_BELOW_THRESHOLD). Conditional VM-exit. */ HMVMX_EXIT_NSRC_DECL hmR0VmxExitTprBelowThreshold(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW); /* * The TPR shadow would've been synced with the APIC TPR in hmR0VmxPostRunGuest(). We'll re-evaluate * pending interrupts and inject them before the next VM-entry so we can just continue execution here. */ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTprBelowThreshold); return VINF_SUCCESS; } /** * VM-exit handler for control-register accesses (VMX_EXIT_MOV_CRX). Conditional * VM-exit. * * @retval VINF_SUCCESS when guest execution can continue. * @retval VINF_PGM_SYNC_CR3 CR3 sync is required, back to ring-3. * @retval VERR_EM_INTERPRETER when something unexpected happened, fallback to * interpreter. */ HMVMX_EXIT_DECL hmR0VmxExitMovCRx(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitMovCRx, y2); int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict; PVM pVM = pVCpu->CTX_SUFF(pVM); RTGCUINTPTR const uExitQual = pVmxTransient->uExitQual; uint32_t const uAccessType = VMX_EXIT_QUAL_CRX_ACCESS(uExitQual); switch (uAccessType) { case VMX_EXIT_QUAL_CRX_ACCESS_WRITE: /* MOV to CRx */ { uint32_t const uOldCr0 = pVCpu->cpum.GstCtx.cr0; rcStrict = IEMExecDecodedMovCRxWrite(pVCpu, pVmxTransient->cbInstr, VMX_EXIT_QUAL_CRX_REGISTER(uExitQual), VMX_EXIT_QUAL_CRX_GENREG(uExitQual)); AssertMsg( rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT || rcStrict == VINF_PGM_SYNC_CR3, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); switch (VMX_EXIT_QUAL_CRX_REGISTER(uExitQual)) { case 0: { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Write); Log4Func(("CR0 write rcStrict=%Rrc CR0=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr0)); /* * This is a kludge for handling switches back to real mode when we try to use * V86 mode to run real mode code directly. Problem is that V86 mode cannot * deal with special selector values, so we have to return to ring-3 and run * there till the selector values are V86 mode compatible. * * Note! Using VINF_EM_RESCHEDULE_REM here rather than VINF_EM_RESCHEDULE since the * latter is an alias for VINF_IEM_RAISED_XCPT which is converted to VINF_SUCCESs * at the end of this function. */ if ( rc == VINF_SUCCESS && !pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest && CPUMIsGuestInRealModeEx(&pVCpu->cpum.GstCtx) && (uOldCr0 & X86_CR0_PE) && !(pVCpu->cpum.GstCtx.cr0 & X86_CR0_PE) ) { /** @todo check selectors rather than returning all the time. */ Log4Func(("CR0 write, back to real mode -> VINF_EM_RESCHEDULE_REM\n")); rcStrict = VINF_EM_RESCHEDULE_REM; } break; } case 2: { STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Write); /* Nothing to do here, CR2 it's not part of the VMCS. */ break; } case 3: { Assert( !pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx) || pVCpu->hm.s.fUsingDebugLoop); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Write); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR3); Log4Func(("CR3 write rcStrict=%Rrc CR3=%#RX64\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr3)); break; } case 4: { STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Write); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR4); Log4Func(("CR4 write rc=%Rrc CR4=%#RX64 fLoadSaveGuestXcr0=%u\n", VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cr4, pVCpu->hm.s.fLoadSaveGuestXcr0)); break; } case 8: { STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Write); Assert(!(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_APIC_TPR); break; } default: AssertMsgFailed(("Invalid CRx register %#x\n", VMX_EXIT_QUAL_CRX_REGISTER(uExitQual))); break; } break; } case VMX_EXIT_QUAL_CRX_ACCESS_READ: /* MOV from CRx */ { Assert( !pVM->hm.s.fNestedPaging || !CPUMIsGuestPagingEnabledEx(&pVCpu->cpum.GstCtx) || pVCpu->hm.s.fUsingDebugLoop || VMX_EXIT_QUAL_CRX_REGISTER(uExitQual) != 3); /* CR8 reads only cause a VM-exit when the TPR shadow feature isn't enabled. */ Assert( VMX_EXIT_QUAL_CRX_REGISTER(uExitQual) != 8 || !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW)); rcStrict = IEMExecDecodedMovCRxRead(pVCpu, pVmxTransient->cbInstr, VMX_EXIT_QUAL_CRX_GENREG(uExitQual), VMX_EXIT_QUAL_CRX_REGISTER(uExitQual)); AssertMsg( rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); #ifdef VBOX_WITH_STATISTICS switch (VMX_EXIT_QUAL_CRX_REGISTER(uExitQual)) { case 0: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR0Read); break; case 2: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR2Read); break; case 3: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR3Read); break; case 4: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR4Read); break; case 8: STAM_COUNTER_INC(&pVCpu->hm.s.StatExitCR8Read); break; } #endif Log4Func(("CR%d Read access rcStrict=%Rrc\n", VMX_EXIT_QUAL_CRX_REGISTER(uExitQual), VBOXSTRICTRC_VAL(rcStrict))); if (VMX_EXIT_QUAL_CRX_GENREG(uExitQual) == X86_GREG_xSP) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_RSP); else ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS); break; } case VMX_EXIT_QUAL_CRX_ACCESS_CLTS: /* CLTS (Clear Task-Switch Flag in CR0) */ { rcStrict = IEMExecDecodedClts(pVCpu, pVmxTransient->cbInstr); AssertMsg( rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitClts); Log4Func(("CLTS rcStrict=%d\n", VBOXSTRICTRC_VAL(rcStrict))); break; } case VMX_EXIT_QUAL_CRX_ACCESS_LMSW: /* LMSW (Load Machine-Status Word into CR0) */ { /* Note! LMSW cannot clear CR0.PE, so no fRealOnV86Active kludge needed here. */ rc = hmR0VmxReadGuestLinearAddrVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); rcStrict = IEMExecDecodedLmsw(pVCpu, pVmxTransient->cbInstr, VMX_EXIT_QUAL_CRX_LMSW_DATA(uExitQual), pVmxTransient->uGuestLinearAddr); AssertMsg( rcStrict == VINF_SUCCESS || rcStrict == VINF_IEM_RAISED_XCPT , ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_CR0); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitLmsw); Log4Func(("LMSW rcStrict=%d\n", VBOXSTRICTRC_VAL(rcStrict))); break; } default: AssertMsgFailedReturn(("Invalid access-type in Mov CRx VM-exit qualification %#x\n", uAccessType), VERR_VMX_UNEXPECTED_EXCEPTION); } Assert( (pVCpu->hm.s.fCtxChanged & (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS)) == (HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS)); if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitMovCRx, y2); NOREF(pVM); return rcStrict; } /** * VM-exit handler for I/O instructions (VMX_EXIT_IO_INSTR). Conditional * VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitIoInstr(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_PROFILE_ADV_START(&pVCpu->hm.s.StatExitIO, y1); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_EFER); /* EFER also required for longmode checks in EMInterpretDisasCurrent(), but it's always up-to-date. */ AssertRCReturn(rc, rc); /* Refer Intel spec. 27-5. "Exit Qualifications for I/O Instructions" for the format. */ uint32_t uIOPort = VMX_EXIT_QUAL_IO_PORT(pVmxTransient->uExitQual); uint8_t uIOWidth = VMX_EXIT_QUAL_IO_WIDTH(pVmxTransient->uExitQual); bool fIOWrite = (VMX_EXIT_QUAL_IO_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_IO_DIRECTION_OUT); bool fIOString = VMX_EXIT_QUAL_IO_IS_STRING(pVmxTransient->uExitQual); bool fGstStepping = RT_BOOL(pCtx->eflags.Bits.u1TF); bool fDbgStepping = pVCpu->hm.s.fSingleInstruction; AssertReturn(uIOWidth <= 3 && uIOWidth != 2, VERR_VMX_IPE_1); /* * Update exit history to see if this exit can be optimized. */ VBOXSTRICTRC rcStrict; PCEMEXITREC pExitRec = NULL; if ( !fGstStepping && !fDbgStepping) pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu, !fIOString ? !fIOWrite ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_READ) : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_WRITE) : !fIOWrite ? EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_READ) : EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_IO_PORT_STR_WRITE), pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base); if (!pExitRec) { /* I/O operation lookup arrays. */ static uint32_t const s_aIOSizes[4] = { 1, 2, 0, 4 }; /* Size of the I/O accesses. */ static uint32_t const s_aIOOpAnd[4] = { 0xff, 0xffff, 0, 0xffffffff }; /* AND masks for saving result in AL/AX/EAX. */ uint32_t const cbValue = s_aIOSizes[uIOWidth]; uint32_t const cbInstr = pVmxTransient->cbInstr; bool fUpdateRipAlready = false; /* ugly hack, should be temporary. */ PVM pVM = pVCpu->CTX_SUFF(pVM); if (fIOString) { /* * INS/OUTS - I/O String instruction. * * Use instruction-information if available, otherwise fall back on * interpreting the instruction. */ Log4Func(("CS:RIP=%04x:%08RX64 %#06x/%u %c str\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r')); AssertReturn(pCtx->dx == uIOPort, VERR_VMX_IPE_2); bool const fInsOutsInfo = RT_BF_GET(pVM->hm.s.vmx.Msrs.u64Basic, VMX_BF_BASIC_VMCS_INS_OUTS); if (fInsOutsInfo) { int rc2 = hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); AssertRCReturn(rc2, rc2); AssertReturn(pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize <= 2, VERR_VMX_IPE_3); AssertCompile(IEMMODE_16BIT == 0 && IEMMODE_32BIT == 1 && IEMMODE_64BIT == 2); IEMMODE const enmAddrMode = (IEMMODE)pVmxTransient->ExitInstrInfo.StrIo.u3AddrSize; bool const fRep = VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual); if (fIOWrite) rcStrict = IEMExecStringIoWrite(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, pVmxTransient->ExitInstrInfo.StrIo.iSegReg, true /*fIoChecked*/); else { /* * The segment prefix for INS cannot be overridden and is always ES. We can safely assume X86_SREG_ES. * Hence "iSegReg" field is undefined in the instruction-information field in VT-x for INS. * See Intel Instruction spec. for "INS". * See Intel spec. Table 27-8 "Format of the VM-Exit Instruction-Information Field as Used for INS and OUTS". */ rcStrict = IEMExecStringIoRead(pVCpu, cbValue, enmAddrMode, fRep, cbInstr, true /*fIoChecked*/); } } else rcStrict = IEMExecOne(pVCpu); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP); fUpdateRipAlready = true; } else { /* * IN/OUT - I/O instruction. */ Log4Func(("CS:RIP=%04x:%08RX64 %#06x/%u %c\n", pCtx->cs.Sel, pCtx->rip, uIOPort, cbValue, fIOWrite ? 'w' : 'r')); uint32_t const uAndVal = s_aIOOpAnd[uIOWidth]; Assert(!VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual)); if (fIOWrite) { rcStrict = IOMIOPortWrite(pVM, pVCpu, uIOPort, pCtx->eax & uAndVal, cbValue); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIOWrite); if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE && !pCtx->eflags.Bits.u1TF) rcStrict = EMRZSetPendingIoPortWrite(pVCpu, uIOPort, cbInstr, cbValue, pCtx->eax & uAndVal); } else { uint32_t u32Result = 0; rcStrict = IOMIOPortRead(pVM, pVCpu, uIOPort, &u32Result, cbValue); if (IOM_SUCCESS(rcStrict)) { /* Save result of I/O IN instr. in AL/AX/EAX. */ pCtx->eax = (pCtx->eax & ~uAndVal) | (u32Result & uAndVal); } if ( rcStrict == VINF_IOM_R3_IOPORT_READ && !pCtx->eflags.Bits.u1TF) rcStrict = EMRZSetPendingIoPortRead(pVCpu, uIOPort, cbInstr, cbValue); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitIORead); } } if (IOM_SUCCESS(rcStrict)) { if (!fUpdateRipAlready) { hmR0VmxAdvanceGuestRipBy(pVCpu, cbInstr); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP); } /* * INS/OUTS with REP prefix updates RFLAGS, can be observed with triple-fault guru * while booting Fedora 17 64-bit guest. * * See Intel Instruction reference for REP/REPE/REPZ/REPNE/REPNZ. */ if (fIOString) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RFLAGS); /* * If any I/O breakpoints are armed, we need to check if one triggered * and take appropriate action. * Note that the I/O breakpoint type is undefined if CR4.DE is 0. */ rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_DR7); AssertRCReturn(rc, rc); /** @todo Optimize away the DBGFBpIsHwIoArmed call by having DBGF tell the * execution engines about whether hyper BPs and such are pending. */ uint32_t const uDr7 = pCtx->dr[7]; if (RT_UNLIKELY( ( (uDr7 & X86_DR7_ENABLED_MASK) && X86_DR7_ANY_RW_IO(uDr7) && (pCtx->cr4 & X86_CR4_DE)) || DBGFBpIsHwIoArmed(pVM))) { STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxIoCheck); /* We're playing with the host CPU state here, make sure we don't preempt or longjmp. */ VMMRZCallRing3Disable(pVCpu); HM_DISABLE_PREEMPT(pVCpu); bool fIsGuestDbgActive = CPUMR0DebugStateMaybeSaveGuest(pVCpu, true /* fDr6 */); VBOXSTRICTRC rcStrict2 = DBGFBpCheckIo(pVM, pVCpu, pCtx, uIOPort, cbValue); if (rcStrict2 == VINF_EM_RAW_GUEST_TRAP) { /* Raise #DB. */ if (fIsGuestDbgActive) ASMSetDR6(pCtx->dr[6]); if (pCtx->dr[7] != uDr7) pVCpu->hm.s.fCtxChanged |= HM_CHANGED_GUEST_DR7; hmR0VmxSetPendingXcptDB(pVCpu); } /* rcStrict is VINF_SUCCESS, VINF_IOM_R3_IOPORT_COMMIT_WRITE, or in [VINF_EM_FIRST..VINF_EM_LAST], however we can ditch VINF_IOM_R3_IOPORT_COMMIT_WRITE as it has VMCPU_FF_IOM as backup. */ else if ( rcStrict2 != VINF_SUCCESS && (rcStrict == VINF_SUCCESS || rcStrict2 < rcStrict)) rcStrict = rcStrict2; AssertCompile(VINF_EM_LAST < VINF_IOM_R3_IOPORT_COMMIT_WRITE); HM_RESTORE_PREEMPT(); VMMRZCallRing3Enable(pVCpu); } } #ifdef VBOX_STRICT if ( rcStrict == VINF_IOM_R3_IOPORT_READ || rcStrict == VINF_EM_PENDING_R3_IOPORT_READ) Assert(!fIOWrite); else if ( rcStrict == VINF_IOM_R3_IOPORT_WRITE || rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE) Assert(fIOWrite); else { # if 0 /** @todo r=bird: This is missing a bunch of VINF_EM_FIRST..VINF_EM_LAST * statuses, that the VMM device and some others may return. See * IOM_SUCCESS() for guidance. */ AssertMsg( RT_FAILURE(rcStrict) || rcStrict == VINF_SUCCESS || rcStrict == VINF_EM_RAW_EMULATE_INSTR || rcStrict == VINF_EM_DBG_BREAKPOINT || rcStrict == VINF_EM_RAW_GUEST_TRAP || rcStrict == VINF_EM_RAW_TO_R3 || rcStrict == VINF_TRPM_XCPT_DISPATCHED, ("%Rrc\n", VBOXSTRICTRC_VAL(rcStrict))); # endif } #endif STAM_PROFILE_ADV_STOP(&pVCpu->hm.s.StatExitIO, y1); } else { /* * Frequent exit or something needing probing. Get state and call EMHistoryExec. */ int rc2 = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc2, rc2); STAM_COUNTER_INC(!fIOString ? fIOWrite ? &pVCpu->hm.s.StatExitIOWrite : &pVCpu->hm.s.StatExitIORead : fIOWrite ? &pVCpu->hm.s.StatExitIOStringWrite : &pVCpu->hm.s.StatExitIOStringRead); Log4(("IOExit/%u: %04x:%08RX64: %s%s%s %#x LB %u -> EMHistoryExec\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, VMX_EXIT_QUAL_IO_IS_REP(pVmxTransient->uExitQual) ? "REP " : "", fIOWrite ? "OUT" : "IN", fIOString ? "S" : "", uIOPort, uIOWidth)); rcStrict = EMHistoryExec(pVCpu, pExitRec, 0); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); Log4(("IOExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip)); } return rcStrict; } /** * VM-exit handler for task switches (VMX_EXIT_TASK_SWITCH). Unconditional * VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitTaskSwitch(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* Check if this task-switch occurred while delivery an event through the guest IDT. */ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); if (VMX_EXIT_QUAL_TASK_SWITCH_TYPE(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_TASK_SWITCH_TYPE_IDT) { rc = hmR0VmxReadIdtVectoringInfoVmcs(pVmxTransient); AssertRCReturn(rc, rc); if (VMX_IDT_VECTORING_INFO_IS_VALID(pVmxTransient->uIdtVectoringInfo)) { uint32_t uErrCode; RTGCUINTPTR GCPtrFaultAddress; uint32_t const uIntType = VMX_IDT_VECTORING_INFO_TYPE(pVmxTransient->uIdtVectoringInfo); uint32_t const uVector = VMX_IDT_VECTORING_INFO_VECTOR(pVmxTransient->uIdtVectoringInfo); bool const fErrorCodeValid = VMX_IDT_VECTORING_INFO_IS_ERROR_CODE_VALID(pVmxTransient->uIdtVectoringInfo); if (fErrorCodeValid) { rc = hmR0VmxReadIdtVectoringErrorCodeVmcs(pVmxTransient); AssertRCReturn(rc, rc); uErrCode = pVmxTransient->uIdtVectoringErrorCode; } else uErrCode = 0; if ( uIntType == VMX_IDT_VECTORING_INFO_TYPE_HW_XCPT && uVector == X86_XCPT_PF) GCPtrFaultAddress = pVCpu->cpum.GstCtx.cr2; else GCPtrFaultAddress = 0; rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); AssertRCReturn(rc, rc); hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_IDT_INFO(pVmxTransient->uIdtVectoringInfo), pVmxTransient->cbInstr, uErrCode, GCPtrFaultAddress); Log4Func(("Pending event. uIntType=%#x uVector=%#x\n", uIntType, uVector)); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch); return VINF_EM_RAW_INJECT_TRPM_EVENT; } } /* Fall back to the interpreter to emulate the task-switch. */ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitTaskSwitch); return VERR_EM_INTERPRETER; } /** * VM-exit handler for monitor-trap-flag (VMX_EXIT_MTF). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitMtf(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); Assert(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_MONITOR_TRAP_FLAG); pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_MONITOR_TRAP_FLAG; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRCReturn(rc, rc); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitMtf); return VINF_EM_DBG_STEPPED; } /** * VM-exit handler for APIC access (VMX_EXIT_APIC_ACCESS). Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitApicAccess(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitApicAccess); /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ VBOXSTRICTRC rcStrict1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient); if (RT_LIKELY(rcStrict1 == VINF_SUCCESS)) { /* For some crazy guest, if an event delivery causes an APIC-access VM-exit, go to instruction emulation. */ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending)) { STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingInterpret); return VINF_EM_RAW_INJECT_TRPM_EVENT; } } else { if (rcStrict1 == VINF_HM_DOUBLE_FAULT) rcStrict1 = VINF_SUCCESS; return rcStrict1; } /* IOMMIOPhysHandler() below may call into IEM, save the necessary state. */ int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); /* See Intel spec. 27-6 "Exit Qualifications for APIC-access VM-exits from Linear Accesses & Guest-Phyiscal Addresses" */ uint32_t uAccessType = VMX_EXIT_QUAL_APIC_ACCESS_TYPE(pVmxTransient->uExitQual); VBOXSTRICTRC rcStrict2; switch (uAccessType) { case VMX_APIC_ACCESS_TYPE_LINEAR_WRITE: case VMX_APIC_ACCESS_TYPE_LINEAR_READ: { AssertMsg( !(pVCpu->hm.s.vmx.u32ProcCtls & VMX_PROC_CTLS_USE_TPR_SHADOW) || VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual) != XAPIC_OFF_TPR, ("hmR0VmxExitApicAccess: can't access TPR offset while using TPR shadowing.\n")); RTGCPHYS GCPhys = pVCpu->hm.s.vmx.u64MsrApicBase; /* Always up-to-date, u64MsrApicBase is not part of the VMCS. */ GCPhys &= PAGE_BASE_GC_MASK; GCPhys += VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual); PVM pVM = pVCpu->CTX_SUFF(pVM); Log4Func(("Linear access uAccessType=%#x GCPhys=%#RGp Off=%#x\n", uAccessType, GCPhys, VMX_EXIT_QUAL_APIC_ACCESS_OFFSET(pVmxTransient->uExitQual))); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rcStrict2 = IOMMMIOPhysHandler(pVM, pVCpu, uAccessType == VMX_APIC_ACCESS_TYPE_LINEAR_READ ? 0 : X86_TRAP_PF_RW, CPUMCTX2CORE(pCtx), GCPhys); Log4Func(("IOMMMIOPhysHandler returned %Rrc\n", VBOXSTRICTRC_VAL(rcStrict2))); if ( rcStrict2 == VINF_SUCCESS || rcStrict2 == VERR_PAGE_TABLE_NOT_PRESENT || rcStrict2 == VERR_PAGE_NOT_PRESENT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_APIC_TPR); rcStrict2 = VINF_SUCCESS; } break; } default: Log4Func(("uAccessType=%#x\n", uAccessType)); rcStrict2 = VINF_EM_RAW_EMULATE_INSTR; break; } if (rcStrict2 != VINF_SUCCESS) STAM_COUNTER_INC(&pVCpu->hm.s.StatSwitchApicAccessToR3); return rcStrict2; } /** * VM-exit handler for debug-register accesses (VMX_EXIT_MOV_DRX). Conditional * VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitMovDRx(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* We should -not- get this VM-exit if the guest's debug registers were active. */ if (pVmxTransient->fWasGuestDebugStateActive) { AssertMsgFailed(("Unexpected MOV DRx exit\n")); HMVMX_UNEXPECTED_EXIT_RET(pVCpu, pVmxTransient); } if ( !pVCpu->hm.s.fSingleInstruction && !pVmxTransient->fWasHyperDebugStateActive) { Assert(!DBGFIsStepping(pVCpu)); Assert(pVCpu->hm.s.vmx.u32XcptBitmap & RT_BIT_32(X86_XCPT_DB)); /* Don't intercept MOV DRx any more. */ pVCpu->hm.s.vmx.u32ProcCtls &= ~VMX_PROC_CTLS_MOV_DR_EXIT; int rc = VMXWriteVmcs32(VMX_VMCS32_CTRL_PROC_EXEC, pVCpu->hm.s.vmx.u32ProcCtls); AssertRCReturn(rc, rc); /* We're playing with the host CPU state here, make sure we can't preempt or longjmp. */ VMMRZCallRing3Disable(pVCpu); HM_DISABLE_PREEMPT(pVCpu); /* Save the host & load the guest debug state, restart execution of the MOV DRx instruction. */ CPUMR0LoadGuestDebugState(pVCpu, true /* include DR6 */); Assert(CPUMIsGuestDebugStateActive(pVCpu) || HC_ARCH_BITS == 32); HM_RESTORE_PREEMPT(); VMMRZCallRing3Enable(pVCpu); #ifdef VBOX_WITH_STATISTICS rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE) STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); else STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); #endif STAM_COUNTER_INC(&pVCpu->hm.s.StatDRxContextSwitch); return VINF_SUCCESS; } /* * EMInterpretDRx[Write|Read]() calls CPUMIsGuestIn64BitCode() which requires EFER, CS. EFER is always up-to-date. * Update the segment registers and DR7 from the CPU. */ PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_SREG_MASK | CPUMCTX_EXTRN_DR7); AssertRCReturn(rc, rc); Log4Func(("CS:RIP=%04x:%08RX64\n", pCtx->cs.Sel, pCtx->rip)); PVM pVM = pVCpu->CTX_SUFF(pVM); if (VMX_EXIT_QUAL_DRX_DIRECTION(pVmxTransient->uExitQual) == VMX_EXIT_QUAL_DRX_DIRECTION_WRITE) { rc = EMInterpretDRxWrite(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual), VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual)); if (RT_SUCCESS(rc)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_DR7); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxWrite); } else { rc = EMInterpretDRxRead(pVM, pVCpu, CPUMCTX2CORE(pCtx), VMX_EXIT_QUAL_DRX_GENREG(pVmxTransient->uExitQual), VMX_EXIT_QUAL_DRX_REGISTER(pVmxTransient->uExitQual)); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitDRxRead); } Assert(rc == VINF_SUCCESS || rc == VERR_EM_INTERPRETER); if (RT_SUCCESS(rc)) { int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); AssertRCReturn(rc2, rc2); return VINF_SUCCESS; } return rc; } /** * VM-exit handler for EPT misconfiguration (VMX_EXIT_EPT_MISCONFIG). * Conditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitEptMisconfig(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ VBOXSTRICTRC rcStrict1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient); if (RT_LIKELY(rcStrict1 == VINF_SUCCESS)) { /* If event delivery causes an EPT misconfig (MMIO), go back to instruction emulation as otherwise injecting the original pending event would most likely cause the same EPT misconfig VM-exit. */ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending)) { STAM_COUNTER_INC(&pVCpu->hm.s.StatInjectPendingInterpret); return VINF_EM_RAW_INJECT_TRPM_EVENT; } } else { if (rcStrict1 == VINF_HM_DOUBLE_FAULT) rcStrict1 = VINF_SUCCESS; return rcStrict1; } /* * Get sufficent state and update the exit history entry. */ RTGCPHYS GCPhys; int rc = VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &GCPhys); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict; PCEMEXITREC pExitRec = EMHistoryUpdateFlagsAndTypeAndPC(pVCpu, EMEXIT_MAKE_FT(EMEXIT_F_KIND_EM | EMEXIT_F_HM, EMEXITTYPE_MMIO), pVCpu->cpum.GstCtx.rip + pVCpu->cpum.GstCtx.cs.u64Base); if (!pExitRec) { /* * If we succeed, resume guest execution. * If we fail in interpreting the instruction because we couldn't get the guest physical address * of the page containing the instruction via the guest's page tables (we would invalidate the guest page * in the host TLB), resume execution which would cause a guest page fault to let the guest handle this * weird case. See @bugref{6043}. */ PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rcStrict = PGMR0Trap0eHandlerNPMisconfig(pVM, pVCpu, PGMMODE_EPT, CPUMCTX2CORE(pCtx), GCPhys, UINT32_MAX); Log4Func(("At %#RGp RIP=%#RX64 rc=%Rrc\n", GCPhys, pCtx->rip, VBOXSTRICTRC_VAL(rcStrict))); if ( rcStrict == VINF_SUCCESS || rcStrict == VERR_PAGE_TABLE_NOT_PRESENT || rcStrict == VERR_PAGE_NOT_PRESENT) { /* Successfully handled MMIO operation. */ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_APIC_TPR); rcStrict = VINF_SUCCESS; } } else { /* * Frequent exit or something needing probing. Get state and call EMHistoryExec. */ int rc2 = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); AssertRCReturn(rc2, rc2); Log4(("EptMisscfgExit/%u: %04x:%08RX64: %RGp -> EMHistoryExec\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, GCPhys)); rcStrict = EMHistoryExec(pVCpu, pExitRec, 0); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); Log4(("EptMisscfgExit/%u: %04x:%08RX64: EMHistoryExec -> %Rrc + %04x:%08RX64\n", pVCpu->idCpu, pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip, VBOXSTRICTRC_VAL(rcStrict), pVCpu->cpum.GstCtx.cs.Sel, pVCpu->cpum.GstCtx.rip)); } return VBOXSTRICTRC_TODO(rcStrict); } /** * VM-exit handler for EPT violation (VMX_EXIT_EPT_VIOLATION). Conditional * VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitEptViolation(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); Assert(pVCpu->CTX_SUFF(pVM)->hm.s.fNestedPaging); /* If this VM-exit occurred while delivering an event through the guest IDT, handle it accordingly. */ VBOXSTRICTRC rcStrict1 = hmR0VmxCheckExitDueToEventDelivery(pVCpu, pVmxTransient); if (RT_LIKELY(rcStrict1 == VINF_SUCCESS)) { /* In the unlikely case that the EPT violation happened as a result of delivering an event, log it. */ if (RT_UNLIKELY(pVCpu->hm.s.Event.fPending)) Log4Func(("EPT violation with an event pending u64IntInfo=%#RX64\n", pVCpu->hm.s.Event.u64IntInfo)); } else { if (rcStrict1 == VINF_HM_DOUBLE_FAULT) rcStrict1 = VINF_SUCCESS; return rcStrict1; } RTGCPHYS GCPhys; int rc = VMXReadVmcs64(VMX_VMCS64_RO_GUEST_PHYS_ADDR_FULL, &GCPhys); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_MUST_MASK); AssertRCReturn(rc, rc); /* Intel spec. Table 27-7 "Exit Qualifications for EPT violations". */ AssertMsg(((pVmxTransient->uExitQual >> 7) & 3) != 2, ("%#RX64", pVmxTransient->uExitQual)); RTGCUINT uErrorCode = 0; if (pVmxTransient->uExitQual & VMX_EXIT_QUAL_EPT_INSTR_FETCH) uErrorCode |= X86_TRAP_PF_ID; if (pVmxTransient->uExitQual & VMX_EXIT_QUAL_EPT_DATA_WRITE) uErrorCode |= X86_TRAP_PF_RW; if (pVmxTransient->uExitQual & VMX_EXIT_QUAL_EPT_ENTRY_PRESENT) uErrorCode |= X86_TRAP_PF_P; TRPMAssertXcptPF(pVCpu, GCPhys, uErrorCode); /* Handle the pagefault trap for the nested shadow table. */ PVM pVM = pVCpu->CTX_SUFF(pVM); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; Log4Func(("EPT violation %#x at %#RX64 ErrorCode %#x CS:RIP=%04x:%08RX64\n", pVmxTransient->uExitQual, GCPhys, uErrorCode, pCtx->cs.Sel, pCtx->rip)); VBOXSTRICTRC rcStrict2 = PGMR0Trap0eHandlerNestedPaging(pVM, pVCpu, PGMMODE_EPT, uErrorCode, CPUMCTX2CORE(pCtx), GCPhys); TRPMResetTrap(pVCpu); /* Same case as PGMR0Trap0eHandlerNPMisconfig(). See comment above, @bugref{6043}. */ if ( rcStrict2 == VINF_SUCCESS || rcStrict2 == VERR_PAGE_TABLE_NOT_PRESENT || rcStrict2 == VERR_PAGE_NOT_PRESENT) { /* Successfully synced our nested page tables. */ STAM_COUNTER_INC(&pVCpu->hm.s.StatExitReasonNpf); ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RSP | HM_CHANGED_GUEST_RFLAGS); return VINF_SUCCESS; } Log4Func(("EPT return to ring-3 rcStrict2=%Rrc\n", VBOXSTRICTRC_VAL(rcStrict2))); return rcStrict2; } /** @} */ /** @name VM-exit exception handlers. * @{ */ /* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= VM-exit exception handlers =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=- */ /* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /** * VM-exit exception handler for \#MF (Math Fault: floating point exception). */ static int hmR0VmxExitXcptMF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestMF); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR0); AssertRCReturn(rc, rc); if (!(pVCpu->cpum.GstCtx.cr0 & X86_CR0_NE)) { /* Convert a #MF into a FERR -> IRQ 13. See @bugref{6117}. */ rc = PDMIsaSetIrq(pVCpu->CTX_SUFF(pVM), 13, 1, 0 /* uTagSrc */); /** @todo r=ramshankar: The Intel spec. does -not- specify that this VM-exit * provides VM-exit instruction length. If this causes problem later, * disassemble the instruction like it's done on AMD-V. */ int rc2 = hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); AssertRCReturn(rc2, rc2); return rc; } hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); return rc; } /** * VM-exit exception handler for \#BP (Breakpoint exception). */ static int hmR0VmxExitXcptBP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestBP); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rc = DBGFRZTrap03Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx)); if (rc == VINF_EM_RAW_GUEST_TRAP) { rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); AssertRCReturn(rc, rc); hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); } Assert(rc == VINF_SUCCESS || rc == VINF_EM_RAW_GUEST_TRAP || rc == VINF_EM_DBG_BREAKPOINT); return rc; } /** * VM-exit exception handler for \#AC (alignment check exception). */ static int hmR0VmxExitXcptAC(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); /* * Re-inject it. We'll detect any nesting before getting here. */ int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); AssertRCReturn(rc, rc); Assert(ASMAtomicUoReadU32(&pVmxTransient->fVmcsFieldsRead) & HMVMX_READ_EXIT_INTERRUPTION_INFO); hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); return VINF_SUCCESS; } /** * VM-exit exception handler for \#DB (Debug exception). */ static int hmR0VmxExitXcptDB(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestDB); /* * Get the DR6-like values from the VM-exit qualification and pass it to DBGF * for processing. */ int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); /* Refer Intel spec. Table 27-1. "Exit Qualifications for debug exceptions" for the format. */ uint64_t uDR6 = X86_DR6_INIT_VAL; uDR6 |= (pVmxTransient->uExitQual & (X86_DR6_B0 | X86_DR6_B1 | X86_DR6_B2 | X86_DR6_B3 | X86_DR6_BD | X86_DR6_BS)); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rc = DBGFRZTrap01Handler(pVCpu->CTX_SUFF(pVM), pVCpu, CPUMCTX2CORE(pCtx), uDR6, pVCpu->hm.s.fSingleInstruction); Log6Func(("rc=%Rrc\n", rc)); if (rc == VINF_EM_RAW_GUEST_TRAP) { /* * The exception was for the guest. Update DR6, DR7.GD and * IA32_DEBUGCTL.LBR before forwarding it. * (See Intel spec. 27.1 "Architectural State before a VM-Exit".) */ VMMRZCallRing3Disable(pVCpu); HM_DISABLE_PREEMPT(pVCpu); pCtx->dr[6] &= ~X86_DR6_B_MASK; pCtx->dr[6] |= uDR6; if (CPUMIsGuestDebugStateActive(pVCpu)) ASMSetDR6(pCtx->dr[6]); HM_RESTORE_PREEMPT(); VMMRZCallRing3Enable(pVCpu); rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_DR7); AssertRCReturn(rc, rc); /* X86_DR7_GD will be cleared if DRx accesses should be trapped inside the guest. */ pCtx->dr[7] &= ~X86_DR7_GD; /* Paranoia. */ pCtx->dr[7] &= ~X86_DR7_RAZ_MASK; pCtx->dr[7] |= X86_DR7_RA1_MASK; rc = VMXWriteVmcs32(VMX_VMCS_GUEST_DR7, (uint32_t)pCtx->dr[7]); AssertRCReturn(rc, rc); /* * Raise #DB in the guest. * * It is important to reflect exactly what the VM-exit gave us (preserving the * interruption-type) rather than use hmR0VmxSetPendingXcptDB() as the #DB could've * been raised while executing ICEBP (INT1) and not the regular #DB. Thus it may * trigger different handling in the CPU (like skipping DPL checks), see @bugref{6398}. * * Intel re-documented ICEBP/INT1 on May 2018 previously documented as part of * Intel 386, see Intel spec. 24.8.3 "VM-Entry Controls for Event Injection". */ rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); AssertRCReturn(rc, rc); hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); return VINF_SUCCESS; } /* * Not a guest trap, must be a hypervisor related debug event then. * Update DR6 in case someone is interested in it. */ AssertMsg(rc == VINF_EM_DBG_STEPPED || rc == VINF_EM_DBG_BREAKPOINT, ("%Rrc\n", rc)); AssertReturn(pVmxTransient->fWasHyperDebugStateActive, VERR_HM_IPE_5); CPUMSetHyperDR6(pVCpu, uDR6); return rc; } /** * Hacks its way around the lovely mesa driver's backdoor accesses. * * @sa hmR0SvmHandleMesaDrvGp */ static int hmR0VmxHandleMesaDrvGp(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx) { Log(("hmR0VmxHandleMesaDrvGp: at %04x:%08RX64 rcx=%RX64 rbx=%RX64\n", pCtx->cs.Sel, pCtx->rip, pCtx->rcx, pCtx->rbx)); RT_NOREF(pCtx); /* For now we'll just skip the instruction. */ return hmR0VmxAdvanceGuestRip(pVCpu, pVmxTransient); } /** * Checks if the \#GP'ing instruction is the mesa driver doing it's lovely * backdoor logging w/o checking what it is running inside. * * This recognizes an "IN EAX,DX" instruction executed in flat ring-3, with the * backdoor port and magic numbers loaded in registers. * * @returns true if it is, false if it isn't. * @sa hmR0SvmIsMesaDrvGp */ DECLINLINE(bool) hmR0VmxIsMesaDrvGp(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient, PCPUMCTX pCtx) { /* 0xed: IN eAX,dx */ uint8_t abInstr[1]; if (pVmxTransient->cbInstr != sizeof(abInstr)) return false; /* Check that it is #GP(0). */ if (pVmxTransient->uExitIntErrorCode != 0) return false; /* Check magic and port. */ Assert(!(pCtx->fExtrn & (CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RDX | CPUMCTX_EXTRN_RCX))); /*Log(("hmR0VmxIsMesaDrvGp: rax=%RX64 rdx=%RX64\n", pCtx->rax, pCtx->rdx));*/ if (pCtx->rax != UINT32_C(0x564d5868)) return false; if (pCtx->dx != UINT32_C(0x5658)) return false; /* Flat ring-3 CS. */ AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_CS); Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_CS)); /*Log(("hmR0VmxIsMesaDrvGp: cs.Attr.n.u2Dpl=%d base=%Rx64\n", pCtx->cs.Attr.n.u2Dpl, pCtx->cs.u64Base));*/ if (pCtx->cs.Attr.n.u2Dpl != 3) return false; if (pCtx->cs.u64Base != 0) return false; /* Check opcode. */ AssertCompile(HMVMX_CPUMCTX_EXTRN_ALL & CPUMCTX_EXTRN_RIP); Assert(!(pCtx->fExtrn & CPUMCTX_EXTRN_RIP)); int rc = PGMPhysSimpleReadGCPtr(pVCpu, abInstr, pCtx->rip, sizeof(abInstr)); /*Log(("hmR0VmxIsMesaDrvGp: PGMPhysSimpleReadGCPtr -> %Rrc %#x\n", rc, abInstr[0]));*/ if (RT_FAILURE(rc)) return false; if (abInstr[0] != 0xed) return false; return true; } /** * VM-exit exception handler for \#GP (General-protection exception). * * @remarks Requires pVmxTransient->uExitIntInfo to be up-to-date. */ static int hmR0VmxExitXcptGP(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestGP); PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; if (pVCpu->hm.s.vmx.RealMode.fRealOnV86Active) { /* likely */ } else { #ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS Assert(pVCpu->hm.s.fUsingDebugLoop || pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv); #endif /* If the guest is not in real-mode or we have unrestricted execution support, reflect #GP to the guest. */ int rc = hmR0VmxReadExitIntInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); Log4Func(("Gst: CS:RIP %04x:%08RX64 ErrorCode=%#x CR0=%#RX64 CPL=%u TR=%#04x\n", pCtx->cs.Sel, pCtx->rip, pVmxTransient->uExitIntErrorCode, pCtx->cr0, CPUMGetGuestCPL(pVCpu), pCtx->tr.Sel)); if ( !pVCpu->hm.s.fTrapXcptGpForLovelyMesaDrv || !hmR0VmxIsMesaDrvGp(pVCpu, pVmxTransient, pCtx)) hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); else rc = hmR0VmxHandleMesaDrvGp(pVCpu, pVmxTransient, pCtx); return rc; } Assert(CPUMIsGuestInRealModeEx(pCtx)); Assert(!pVCpu->CTX_SUFF(pVM)->hm.s.vmx.fUnrestrictedGuest); int rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); VBOXSTRICTRC rcStrict = IEMExecOne(pVCpu); if (rcStrict == VINF_SUCCESS) { if (!CPUMIsGuestInRealModeEx(pCtx)) { /* * The guest is no longer in real-mode, check if we can continue executing the * guest using hardware-assisted VMX. Otherwise, fall back to emulation. */ if (HMVmxCanExecuteGuest(pVCpu, pCtx)) { Log4Func(("Mode changed but guest still suitable for executing using VT-x\n")); pVCpu->hm.s.vmx.RealMode.fRealOnV86Active = false; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); } else { Log4Func(("Mode changed -> VINF_EM_RESCHEDULE\n")); rcStrict = VINF_EM_RESCHEDULE; } } else ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); } else if (rcStrict == VINF_IEM_RAISED_XCPT) { rcStrict = VINF_SUCCESS; ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); } return VBOXSTRICTRC_VAL(rcStrict); } /** * VM-exit exception handler wrapper for generic exceptions. Simply re-injects * the exception reported in the VMX transient structure back into the VM. * * @remarks Requires uExitIntInfo in the VMX transient structure to be * up-to-date. */ static int hmR0VmxExitXcptGeneric(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef HMVMX_ALWAYS_TRAP_ALL_XCPTS AssertMsg(pVCpu->hm.s.fUsingDebugLoop || pVCpu->hm.s.vmx.RealMode.fRealOnV86Active, ("uVector=%#x u32XcptBitmap=%#X32\n", VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo), pVCpu->hm.s.vmx.u32XcptBitmap)); #endif /* Re-inject the exception into the guest. This cannot be a double-fault condition which would have been handled in hmR0VmxCheckExitDueToEventDelivery(). */ int rc = hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); rc |= hmR0VmxReadExitInstrLenVmcs(pVmxTransient); AssertRCReturn(rc, rc); Assert(ASMAtomicUoReadU32(&pVmxTransient->fVmcsFieldsRead) & HMVMX_READ_EXIT_INTERRUPTION_INFO); #ifdef DEBUG_ramshankar rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RIP); uint8_t uVector = VMX_EXIT_INT_INFO_VECTOR(pVmxTransient->uExitIntInfo); Log(("hmR0VmxExitXcptGeneric: Reinjecting Xcpt. uVector=%#x cs:rip=%#04x:%#RX64\n", uVector, pCtx->cs.Sel, pCtx->rip)); #endif hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), pVmxTransient->cbInstr, pVmxTransient->uExitIntErrorCode, 0 /* GCPtrFaultAddress */); return VINF_SUCCESS; } /** * VM-exit exception handler for \#PF (Page-fault exception). */ static int hmR0VmxExitXcptPF(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_XCPT_HANDLER_PARAMS(pVCpu, pVmxTransient); PVM pVM = pVCpu->CTX_SUFF(pVM); int rc = hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); rc |= hmR0VmxReadExitIntInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitIntErrorCodeVmcs(pVmxTransient); AssertRCReturn(rc, rc); if (!pVM->hm.s.fNestedPaging) { /* likely */ } else { #if !defined(HMVMX_ALWAYS_TRAP_ALL_XCPTS) && !defined(HMVMX_ALWAYS_TRAP_PF) Assert(pVCpu->hm.s.fUsingDebugLoop); #endif pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory or vectoring #PF. */ if (RT_LIKELY(!pVmxTransient->fVectoringDoublePF)) { hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */, pVmxTransient->uExitIntErrorCode, pVmxTransient->uExitQual); } else { /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ hmR0VmxSetPendingXcptDF(pVCpu); Log4Func(("Pending #DF due to vectoring #PF w/ NestedPaging\n")); } STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); return rc; } /* If it's a vectoring #PF, emulate injecting the original event injection as PGMTrap0eHandler() is incapable of differentiating between instruction emulation and event injection that caused a #PF. See @bugref{6607}. */ if (pVmxTransient->fVectoringPF) { Assert(pVCpu->hm.s.Event.fPending); return VINF_EM_RAW_INJECT_TRPM_EVENT; } PCPUMCTX pCtx = &pVCpu->cpum.GstCtx; rc = HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, HMVMX_CPUMCTX_EXTRN_ALL); AssertRCReturn(rc, rc); Log4Func(("#PF: cr2=%#RX64 cs:rip=%#04x:%#RX64 uErrCode %#RX32 cr3=%#RX64\n", pVmxTransient->uExitQual, pCtx->cs.Sel, pCtx->rip, pVmxTransient->uExitIntErrorCode, pCtx->cr3)); TRPMAssertXcptPF(pVCpu, pVmxTransient->uExitQual, (RTGCUINT)pVmxTransient->uExitIntErrorCode); rc = PGMTrap0eHandler(pVCpu, pVmxTransient->uExitIntErrorCode, CPUMCTX2CORE(pCtx), (RTGCPTR)pVmxTransient->uExitQual); Log4Func(("#PF: rc=%Rrc\n", rc)); if (rc == VINF_SUCCESS) { /* * This is typically a shadow page table sync or a MMIO instruction. But we may have * emulated something like LTR or a far jump. Any part of the CPU context may have changed. */ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); TRPMResetTrap(pVCpu); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPF); return rc; } if (rc == VINF_EM_RAW_GUEST_TRAP) { if (!pVmxTransient->fVectoringDoublePF) { /* It's a guest page fault and needs to be reflected to the guest. */ uint32_t uGstErrorCode = TRPMGetErrorCode(pVCpu); TRPMResetTrap(pVCpu); pVCpu->hm.s.Event.fPending = false; /* In case it's a contributory #PF. */ hmR0VmxSetPendingEvent(pVCpu, VMX_ENTRY_INT_INFO_FROM_EXIT_INT_INFO(pVmxTransient->uExitIntInfo), 0 /* cbInstr */, uGstErrorCode, pVmxTransient->uExitQual); } else { /* A guest page-fault occurred during delivery of a page-fault. Inject #DF. */ TRPMResetTrap(pVCpu); pVCpu->hm.s.Event.fPending = false; /* Clear pending #PF to replace it with #DF. */ hmR0VmxSetPendingXcptDF(pVCpu); Log4Func(("#PF: Pending #DF due to vectoring #PF\n")); } STAM_COUNTER_INC(&pVCpu->hm.s.StatExitGuestPF); return VINF_SUCCESS; } TRPMResetTrap(pVCpu); STAM_COUNTER_INC(&pVCpu->hm.s.StatExitShadowPFEM); return rc; } /** @} */ #ifdef VBOX_WITH_NESTED_HWVIRT_VMX /** @name Nested-guest VM-exit handlers. * @{ */ /* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /* -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Nested-guest VM-exit handlers =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /* -=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=--=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= */ /** * VM-exit handler for VMCLEAR (VMX_EXIT_VMCLEAR). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmclear(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VMXVEXITINFO ExitInfo; RT_ZERO(ExitInfo); ExitInfo.uReason = pVmxTransient->uExitReason; ExitInfo.u64Qual = pVmxTransient->uExitQual; ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u; ExitInfo.cbInstr = pVmxTransient->cbInstr; HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr); VBOXSTRICTRC rcStrict = IEMExecDecodedVmclear(pVCpu, &ExitInfo); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMLAUNCH (VMX_EXIT_VMLAUNCH). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmlaunch(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbInstr, VMXINSTRID_VMLAUNCH); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); Assert(rcStrict != VINF_IEM_RAISED_XCPT); return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMPTRLD (VMX_EXIT_VMPTRLD). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmptrld(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VMXVEXITINFO ExitInfo; RT_ZERO(ExitInfo); ExitInfo.uReason = pVmxTransient->uExitReason; ExitInfo.u64Qual = pVmxTransient->uExitQual; ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u; ExitInfo.cbInstr = pVmxTransient->cbInstr; HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr); VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrld(pVCpu, &ExitInfo); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMPTRST (VMX_EXIT_VMPTRST). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmptrst(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VMXVEXITINFO ExitInfo; RT_ZERO(ExitInfo); ExitInfo.uReason = pVmxTransient->uExitReason; ExitInfo.u64Qual = pVmxTransient->uExitQual; ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u; ExitInfo.cbInstr = pVmxTransient->cbInstr; HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr); VBOXSTRICTRC rcStrict = IEMExecDecodedVmptrst(pVCpu, &ExitInfo); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMREAD (VMX_EXIT_VMREAD). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmread(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VMXVEXITINFO ExitInfo; RT_ZERO(ExitInfo); ExitInfo.uReason = pVmxTransient->uExitReason; ExitInfo.u64Qual = pVmxTransient->uExitQual; ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u; ExitInfo.cbInstr = pVmxTransient->cbInstr; if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand) HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_WRITE, &ExitInfo.GCPtrEffAddr); VBOXSTRICTRC rcStrict = IEMExecDecodedVmread(pVCpu, &ExitInfo); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMRESUME (VMX_EXIT_VMRESUME). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmresume(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, IEM_CPUMCTX_EXTRN_VMX_VMENTRY_MASK); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VBOXSTRICTRC rcStrict = IEMExecDecodedVmlaunchVmresume(pVCpu, pVmxTransient->cbInstr, VMXINSTRID_VMRESUME); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_ALL_GUEST); Assert(rcStrict != VINF_IEM_RAISED_XCPT); return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMWRITE (VMX_EXIT_VMWRITE). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmwrite(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VMXVEXITINFO ExitInfo; RT_ZERO(ExitInfo); ExitInfo.uReason = pVmxTransient->uExitReason; ExitInfo.u64Qual = pVmxTransient->uExitQual; ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u; ExitInfo.cbInstr = pVmxTransient->cbInstr; if (!ExitInfo.InstrInfo.VmreadVmwrite.fIsRegOperand) HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr); VBOXSTRICTRC rcStrict = IEMExecDecodedVmwrite(pVCpu, &ExitInfo); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMXOFF (VMX_EXIT_VMXOFF). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmxoff(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_CR4 | IEM_CPUMCTX_EXTRN_EXEC_DECODED_NO_MEM_MASK); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VBOXSTRICTRC rcStrict = IEMExecDecodedVmxoff(pVCpu, pVmxTransient->cbInstr); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) { /* VMXOFF changes the internal hwvirt. state but not anything that's visible to the guest other than RIP. */ ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_HWVIRT); } else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** * VM-exit handler for VMXON (VMX_EXIT_VMXON). Unconditional VM-exit. */ HMVMX_EXIT_DECL hmR0VmxExitVmxon(PVMCPU pVCpu, PVMXTRANSIENT pVmxTransient) { HMVMX_VALIDATE_EXIT_HANDLER_PARAMS(pVCpu, pVmxTransient); #ifndef VBOX_WITH_NESTED_HWVIRT_ONLY_IN_IEM int rc = hmR0VmxReadExitInstrLenVmcs(pVmxTransient); rc |= HMVMX_CPUMCTX_IMPORT_STATE(pVCpu, CPUMCTX_EXTRN_RSP | CPUMCTX_EXTRN_SREG_MASK | IEM_CPUMCTX_EXTRN_EXEC_DECODED_MEM_MASK); rc |= hmR0VmxReadExitInstrInfoVmcs(pVmxTransient); rc |= hmR0VmxReadExitQualVmcs(pVCpu, pVmxTransient); AssertRCReturn(rc, rc); HMVMX_CHECK_EXIT_DUE_TO_VMX_INSTR(pVCpu, pVmxTransient->uExitReason); VMXVEXITINFO ExitInfo; RT_ZERO(ExitInfo); ExitInfo.uReason = pVmxTransient->uExitReason; ExitInfo.u64Qual = pVmxTransient->uExitQual; ExitInfo.InstrInfo.u = pVmxTransient->ExitInstrInfo.u; ExitInfo.cbInstr = pVmxTransient->cbInstr; HMVMX_DECODE_MEM_OPERAND(pVCpu, ExitInfo.InstrInfo.u, ExitInfo.u64Qual, VMXMEMACCESS_READ, &ExitInfo.GCPtrEffAddr); VBOXSTRICTRC rcStrict = IEMExecDecodedVmxon(pVCpu, &ExitInfo); if (RT_LIKELY(rcStrict == VINF_SUCCESS)) ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_GUEST_RIP | HM_CHANGED_GUEST_RFLAGS | HM_CHANGED_GUEST_HWVIRT); else if (rcStrict == VINF_IEM_RAISED_XCPT) { ASMAtomicUoOrU64(&pVCpu->hm.s.fCtxChanged, HM_CHANGED_RAISED_XCPT_MASK); rcStrict = VINF_SUCCESS; } return rcStrict; #else return VERR_EM_INTERPRETER; #endif } /** @} */ #endif /* VBOX_WITH_NESTED_HWVIRT_VMX */