/* $Id: CPUMR0.cpp 48003 2013-08-22 17:45:07Z vboxsync $ */ /** @file * CPUM - Host Context Ring 0. */ /* * Copyright (C) 2006-2013 Oracle Corporation * * This file is part of VirtualBox Open Source Edition (OSE), as * available from http://www.virtualbox.org. This file is free software; * you can redistribute it and/or modify it under the terms of the GNU * General Public License (GPL) as published by the Free Software * Foundation, in version 2 as it comes in the "COPYING" file of the * VirtualBox OSE distribution. VirtualBox OSE is distributed in the * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind. */ /******************************************************************************* * Header Files * *******************************************************************************/ #define LOG_GROUP LOG_GROUP_CPUM #include #include "CPUMInternal.h" #include #include #include #include #include #include #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI # include # include # include #endif #include /******************************************************************************* * Structures and Typedefs * *******************************************************************************/ #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI /** * Local APIC mappings. */ typedef struct CPUMHOSTLAPIC { /** Indicates that the entry is in use and have valid data. */ bool fEnabled; /** Whether it's operating in X2APIC mode (EXTD). */ bool fX2Apic; /** The APIC version number. */ uint32_t uVersion; /** Has APIC_REG_LVT_THMR. Not used. */ uint32_t fHasThermal; /** The physical address of the APIC registers. */ RTHCPHYS PhysBase; /** The memory object entering the physical address. */ RTR0MEMOBJ hMemObj; /** The mapping object for hMemObj. */ RTR0MEMOBJ hMapObj; /** The mapping address APIC registers. * @remarks Different CPUs may use the same physical address to map their * APICs, so this pointer is only valid when on the CPU owning the * APIC. */ void *pv; } CPUMHOSTLAPIC; #endif /******************************************************************************* * Global Variables * *******************************************************************************/ #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI static CPUMHOSTLAPIC g_aLApics[RTCPUSET_MAX_CPUS]; #endif /******************************************************************************* * Internal Functions * *******************************************************************************/ #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI static int cpumR0MapLocalApics(void); static void cpumR0UnmapLocalApics(void); #endif static int cpumR0SaveHostDebugState(PVMCPU pVCpu); /** * Does the Ring-0 CPU initialization once during module load. * XXX Host-CPU hot-plugging? */ VMMR0_INT_DECL(int) CPUMR0ModuleInit(void) { int rc = VINF_SUCCESS; #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI rc = cpumR0MapLocalApics(); #endif return rc; } /** * Terminate the module. */ VMMR0_INT_DECL(int) CPUMR0ModuleTerm(void) { #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI cpumR0UnmapLocalApics(); #endif return VINF_SUCCESS; } /** * Check the CPUID features of this particular CPU and disable relevant features * for the guest which do not exist on this CPU. We have seen systems where the * X86_CPUID_FEATURE_ECX_MONITOR feature flag is only set on some host CPUs, see * @{bugref 5436}. * * @note This function might be called simultaneously on more than one CPU! * * @param idCpu The identifier for the CPU the function is called on. * @param pvUser1 Pointer to the VM structure. * @param pvUser2 Ignored. */ static DECLCALLBACK(void) cpumR0CheckCpuid(RTCPUID idCpu, void *pvUser1, void *pvUser2) { struct { uint32_t uLeave; /* leave to check */ uint32_t ecx; /* which bits in ecx to unify between CPUs */ uint32_t edx; /* which bits in edx to unify between CPUs */ } aCpuidUnify[] = { { 0x00000001, X86_CPUID_FEATURE_ECX_CX16 | X86_CPUID_FEATURE_ECX_MONITOR, X86_CPUID_FEATURE_EDX_CX8 } }; PVM pVM = (PVM)pvUser1; PCPUM pCPUM = &pVM->cpum.s; for (uint32_t i = 0; i < RT_ELEMENTS(aCpuidUnify); i++) { uint32_t uLeave = aCpuidUnify[i].uLeave; uint32_t eax, ebx, ecx, edx; ASMCpuId_Idx_ECX(uLeave, 0, &eax, &ebx, &ecx, &edx); PCPUMCPUID paLeaves; if (uLeave < 0x80000000) paLeaves = &pCPUM->aGuestCpuIdStd[uLeave - 0x00000000]; else if (uLeave < 0xc0000000) paLeaves = &pCPUM->aGuestCpuIdExt[uLeave - 0x80000000]; else paLeaves = &pCPUM->aGuestCpuIdCentaur[uLeave - 0xc0000000]; /* unify important bits */ ASMAtomicAndU32(&paLeaves->ecx, ecx | ~aCpuidUnify[i].ecx); ASMAtomicAndU32(&paLeaves->edx, edx | ~aCpuidUnify[i].edx); } } /** * Does Ring-0 CPUM initialization. * * This is mainly to check that the Host CPU mode is compatible * with VBox. * * @returns VBox status code. * @param pVM Pointer to the VM. */ VMMR0_INT_DECL(int) CPUMR0InitVM(PVM pVM) { LogFlow(("CPUMR0Init: %p\n", pVM)); /* * Check CR0 & CR4 flags. */ uint32_t u32CR0 = ASMGetCR0(); if ((u32CR0 & (X86_CR0_PE | X86_CR0_PG)) != (X86_CR0_PE | X86_CR0_PG)) /* a bit paranoid perhaps.. */ { Log(("CPUMR0Init: PE or PG not set. cr0=%#x\n", u32CR0)); return VERR_UNSUPPORTED_CPU_MODE; } /* * Check for sysenter and syscall usage. */ if (ASMHasCpuId()) { /* * SYSENTER/SYSEXIT * * Intel docs claim you should test both the flag and family, model & * stepping because some Pentium Pro CPUs have the SEP cpuid flag set, * but don't support it. AMD CPUs may support this feature in legacy * mode, they've banned it from long mode. Since we switch to 32-bit * mode when entering raw-mode context the feature would become * accessible again on AMD CPUs, so we have to check regardless of * host bitness. */ uint32_t u32CpuVersion; uint32_t u32Dummy; uint32_t fFeatures; ASMCpuId(1, &u32CpuVersion, &u32Dummy, &u32Dummy, &fFeatures); uint32_t u32Family = u32CpuVersion >> 8; uint32_t u32Model = (u32CpuVersion >> 4) & 0xF; uint32_t u32Stepping = u32CpuVersion & 0xF; if ( (fFeatures & X86_CPUID_FEATURE_EDX_SEP) && ( u32Family != 6 /* (> pentium pro) */ || u32Model >= 3 || u32Stepping >= 3 || !ASMIsIntelCpu()) ) { /* * Read the MSR and see if it's in use or not. */ uint32_t u32 = ASMRdMsr_Low(MSR_IA32_SYSENTER_CS); if (u32) { pVM->cpum.s.fHostUseFlags |= CPUM_USE_SYSENTER; Log(("CPUMR0Init: host uses sysenter cs=%08x%08x\n", ASMRdMsr_High(MSR_IA32_SYSENTER_CS), u32)); } } /* * SYSCALL/SYSRET * * This feature is indicated by the SEP bit returned in EDX by CPUID * function 0x80000001. Intel CPUs only supports this feature in * long mode. Since we're not running 64-bit guests in raw-mode there * are no issues with 32-bit intel hosts. */ uint32_t cExt = 0; ASMCpuId(0x80000000, &cExt, &u32Dummy, &u32Dummy, &u32Dummy); if ( cExt >= 0x80000001 && cExt <= 0x8000ffff) { uint32_t fExtFeaturesEDX = ASMCpuId_EDX(0x80000001); if (fExtFeaturesEDX & X86_CPUID_EXT_FEATURE_EDX_SYSCALL) { #ifdef RT_ARCH_X86 # ifdef VBOX_WITH_HYBRID_32BIT_KERNEL if (fExtFeaturesEDX & X86_CPUID_EXT_FEATURE_EDX_LONG_MODE) # else if (!ASMIsIntelCpu()) # endif #endif { uint64_t fEfer = ASMRdMsr(MSR_K6_EFER); if (fEfer & MSR_K6_EFER_SCE) { pVM->cpum.s.fHostUseFlags |= CPUM_USE_SYSCALL; Log(("CPUMR0Init: host uses syscall\n")); } } } } RTMpOnAll(cpumR0CheckCpuid, pVM, NULL); } /* * Check if debug registers are armed. * This ASSUMES that DR7.GD is not set, or that it's handled transparently! */ uint32_t u32DR7 = ASMGetDR7(); if (u32DR7 & X86_DR7_ENABLED_MASK) { for (VMCPUID i = 0; i < pVM->cCpus; i++) pVM->aCpus[i].cpum.s.fUseFlags |= CPUM_USE_DEBUG_REGS_HOST; Log(("CPUMR0Init: host uses debug registers (dr7=%x)\n", u32DR7)); } return VINF_SUCCESS; } /** * Lazily sync in the FPU/XMM state * * @returns VBox status code. * @param pVM Pointer to the VM. * @param pVCpu Pointer to the VMCPU. * @param pCtx Pointer to the guest CPU context. */ VMMR0_INT_DECL(int) CPUMR0LoadGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { Assert(pVM->cpum.s.CPUFeatures.edx.u1FXSR); Assert(ASMGetCR4() & X86_CR4_OSFSXR); /* If the FPU state has already been loaded, then it's a guest trap. */ if (CPUMIsGuestFPUStateActive(pVCpu)) { Assert( ((pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS | X86_CR0_EM)) || ((pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) == (X86_CR0_MP | X86_CR0_TS))); return VINF_EM_RAW_GUEST_TRAP; } /* * There are two basic actions: * 1. Save host fpu and restore guest fpu. * 2. Generate guest trap. * * When entering the hypervisor we'll always enable MP (for proper wait * trapping) and TS (for intercepting all fpu/mmx/sse stuff). The EM flag * is taken from the guest OS in order to get proper SSE handling. * * * Actions taken depending on the guest CR0 flags: * * 3 2 1 * TS | EM | MP | FPUInstr | WAIT :: VMM Action * ------------------------------------------------------------------------ * 0 | 0 | 0 | Exec | Exec :: Clear TS & MP, Save HC, Load GC. * 0 | 0 | 1 | Exec | Exec :: Clear TS, Save HC, Load GC. * 0 | 1 | 0 | #NM | Exec :: Clear TS & MP, Save HC, Load GC. * 0 | 1 | 1 | #NM | Exec :: Clear TS, Save HC, Load GC. * 1 | 0 | 0 | #NM | Exec :: Clear MP, Save HC, Load GC. (EM is already cleared.) * 1 | 0 | 1 | #NM | #NM :: Go to guest taking trap there. * 1 | 1 | 0 | #NM | Exec :: Clear MP, Save HC, Load GC. (EM is already set.) * 1 | 1 | 1 | #NM | #NM :: Go to guest taking trap there. */ switch (pCtx->cr0 & (X86_CR0_MP | X86_CR0_EM | X86_CR0_TS)) { case X86_CR0_MP | X86_CR0_TS: case X86_CR0_MP | X86_CR0_TS | X86_CR0_EM: return VINF_EM_RAW_GUEST_TRAP; default: break; } #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(pCtx)) { Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_FPU_STATE)); /* Save the host state and record the fact (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM). */ cpumR0SaveHostFPUState(&pVCpu->cpum.s); /* Restore the state on entry as we need to be in 64 bits mode to access the full state. */ pVCpu->cpum.s.fUseFlags |= CPUM_SYNC_FPU_STATE; } else #endif { #ifndef CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE # if defined(VBOX_WITH_HYBRID_32BIT_KERNEL) || defined(VBOX_WITH_KERNEL_USING_XMM) /** @todo remove the #else here and move cpumHandleLazyFPUAsm back to VMMGC after branching out 3.0!!. */ Assert(!(pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE)); /** @todo Move the FFXR handling down into * cpumR0SaveHostRestoreguestFPUState to optimize the * VBOX_WITH_KERNEL_USING_XMM handling. */ /* Clear MSR_K6_EFER_FFXSR or else we'll be unable to save/restore the XMM state with fxsave/fxrstor. */ uint64_t SavedEFER = 0; if (pVM->cpum.s.CPUFeaturesExt.edx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) { SavedEFER = ASMRdMsr(MSR_K6_EFER); if (SavedEFER & MSR_K6_EFER_FFXSR) { ASMWrMsr(MSR_K6_EFER, SavedEFER & ~MSR_K6_EFER_FFXSR); pVCpu->cpum.s.fUseFlags |= CPUM_USED_MANUAL_XMM_RESTORE; } } /* Do the job and record that we've switched FPU state. */ cpumR0SaveHostRestoreGuestFPUState(&pVCpu->cpum.s); /* Restore EFER. */ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) ASMWrMsr(MSR_K6_EFER, SavedEFER); # else uint64_t oldMsrEFERHost = 0; uint32_t oldCR0 = ASMGetCR0(); /* Clear MSR_K6_EFER_FFXSR or else we'll be unable to save/restore the XMM state with fxsave/fxrstor. */ if (pVM->cpum.s.CPUFeaturesExt.edx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) { /** @todo Do we really need to read this every time?? The host could change this on the fly though. * bird: what about starting by skipping the ASMWrMsr below if we didn't * change anything? Ditto for the stuff in CPUMR0SaveGuestFPU. */ oldMsrEFERHost = ASMRdMsr(MSR_K6_EFER); if (oldMsrEFERHost & MSR_K6_EFER_FFXSR) { ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost & ~MSR_K6_EFER_FFXSR); pVCpu->cpum.s.fUseFlags |= CPUM_USED_MANUAL_XMM_RESTORE; } } /* If we sync the FPU/XMM state on-demand, then we can continue execution as if nothing has happened. */ int rc = CPUMHandleLazyFPU(pVCpu); AssertRC(rc); Assert(CPUMIsGuestFPUStateActive(pVCpu)); /* Restore EFER MSR */ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost); /* CPUMHandleLazyFPU could have changed CR0; restore it. */ ASMSetCR0(oldCR0); # endif #else /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ /* * Save the FPU control word and MXCSR, so we can restore the state properly afterwards. * We don't want the guest to be able to trigger floating point/SSE exceptions on the host. */ pVCpu->cpum.s.Host.fpu.FCW = CPUMGetFCW(); if (pVM->cpum.s.CPUFeatures.edx.u1SSE) pVCpu->cpum.s.Host.fpu.MXCSR = CPUMGetMXCSR(); cpumR0LoadFPU(pCtx); /* * The MSR_K6_EFER_FFXSR feature is AMD only so far, but check the cpuid just in case Intel adds it in the future. * * MSR_K6_EFER_FFXSR changes the behaviour of fxsave and fxrstore: the XMM state isn't saved/restored */ if (pVM->cpum.s.CPUFeaturesExt.edx & X86_CPUID_AMD_FEATURE_EDX_FFXSR) { /** @todo Do we really need to read this every time?? The host could change this on the fly though. */ uint64_t msrEFERHost = ASMRdMsr(MSR_K6_EFER); if (msrEFERHost & MSR_K6_EFER_FFXSR) { /* fxrstor doesn't restore the XMM state! */ cpumR0LoadXMM(pCtx); pVCpu->cpum.s.fUseFlags |= CPUM_USED_MANUAL_XMM_RESTORE; } } #endif /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ } Assert((pVCpu->cpum.s.fUseFlags & (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM)) == (CPUM_USED_FPU | CPUM_USED_FPU_SINCE_REM)); return VINF_SUCCESS; } /** * Save guest FPU/XMM state * * @returns VBox status code. * @param pVM Pointer to the VM. * @param pVCpu Pointer to the VMCPU. * @param pCtx Pointer to the guest CPU context. */ VMMR0_INT_DECL(int) CPUMR0SaveGuestFPU(PVM pVM, PVMCPU pVCpu, PCPUMCTX pCtx) { Assert(pVM->cpum.s.CPUFeatures.edx.u1FXSR); Assert(ASMGetCR4() & X86_CR4_OSFSXR); AssertReturn((pVCpu->cpum.s.fUseFlags & CPUM_USED_FPU), VINF_SUCCESS); NOREF(pCtx); #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(pCtx)) { if (!(pVCpu->cpum.s.fUseFlags & CPUM_SYNC_FPU_STATE)) { HMR0SaveFPUState(pVM, pVCpu, pCtx); cpumR0RestoreHostFPUState(&pVCpu->cpum.s); } /* else nothing to do; we didn't perform a world switch */ } else #endif { #ifndef CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE # ifdef VBOX_WITH_KERNEL_USING_XMM /* * We've already saved the XMM registers in the assembly wrapper, so * we have to save them before saving the entire FPU state and put them * back afterwards. */ /** @todo This could be skipped if MSR_K6_EFER_FFXSR is set, but * I'm not able to test such an optimization tonight. * We could just all this in assembly. */ uint128_t aGuestXmmRegs[16]; memcpy(&aGuestXmmRegs[0], &pVCpu->cpum.s.Guest.fpu.aXMM[0], sizeof(aGuestXmmRegs)); # endif /* Clear MSR_K6_EFER_FFXSR or else we'll be unable to save/restore the XMM state with fxsave/fxrstor. */ uint64_t oldMsrEFERHost = 0; if (pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) { oldMsrEFERHost = ASMRdMsr(MSR_K6_EFER); ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost & ~MSR_K6_EFER_FFXSR); } cpumR0SaveGuestRestoreHostFPUState(&pVCpu->cpum.s); /* Restore EFER MSR */ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) ASMWrMsr(MSR_K6_EFER, oldMsrEFERHost | MSR_K6_EFER_FFXSR); # ifdef VBOX_WITH_KERNEL_USING_XMM memcpy(&pVCpu->cpum.s.Guest.fpu.aXMM[0], &aGuestXmmRegs[0], sizeof(aGuestXmmRegs)); # endif #else /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ # ifdef VBOX_WITH_KERNEL_USING_XMM # error "Fix all the NM_TRAPS_IN_KERNEL_MODE code path. I'm not going to fix unused code now." # endif cpumR0SaveFPU(pCtx); if (pVCpu->cpum.s.fUseFlags & CPUM_USED_MANUAL_XMM_RESTORE) { /* fxsave doesn't save the XMM state! */ cpumR0SaveXMM(pCtx); } /* * Restore the original FPU control word and MXCSR. * We don't want the guest to be able to trigger floating point/SSE exceptions on the host. */ cpumR0SetFCW(pVCpu->cpum.s.Host.fpu.FCW); if (pVM->cpum.s.CPUFeatures.edx.u1SSE) cpumR0SetMXCSR(pVCpu->cpum.s.Host.fpu.MXCSR); #endif /* CPUM_CAN_HANDLE_NM_TRAPS_IN_KERNEL_MODE */ } pVCpu->cpum.s.fUseFlags &= ~(CPUM_USED_FPU | CPUM_SYNC_FPU_STATE | CPUM_USED_MANUAL_XMM_RESTORE); return VINF_SUCCESS; } /** * Saves the host debug state, setting CPUM_USED_HOST_DEBUG_STATE and loading * DR7 with safe values. * * @returns VBox status code. * @param pVCpu Pointer to the VMCPU. */ static int cpumR0SaveHostDebugState(PVMCPU pVCpu) { /* * Save the host state. */ #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL AssertCompile((uintptr_t)&pVCpu->cpum.s.Host.dr3 - (uintptr_t)&pVCpu->cpum.s.Host.dr0 == sizeof(uint64_t) * 3); cpumR0SaveDRx(&pVCpu->cpum.s.Host.dr0); #else pVCpu->cpum.s.Host.dr0 = ASMGetDR0(); pVCpu->cpum.s.Host.dr1 = ASMGetDR1(); pVCpu->cpum.s.Host.dr2 = ASMGetDR2(); pVCpu->cpum.s.Host.dr3 = ASMGetDR3(); #endif pVCpu->cpum.s.Host.dr6 = ASMGetDR6(); /** @todo dr7 might already have been changed to 0x400; don't care right now as it's harmless. */ pVCpu->cpum.s.Host.dr7 = ASMGetDR7(); /* Preemption paranoia. */ ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HOST); /* * Make sure DR7 is harmless or else we could trigger breakpoints when * load guest or hypervisor DRx values later. */ if (pVCpu->cpum.s.Host.dr7 != X86_DR7_INIT_VAL) ASMSetDR7(X86_DR7_INIT_VAL); return VINF_SUCCESS; } /** * Saves the guest DRx state residing in host registers and restore the host * register values. * * The guest DRx state is only saved if CPUMR0LoadGuestDebugState was called, * since it's assumed that we're shadowing the guest DRx register values * accurately when using the combined hypervisor debug register values * (CPUMR0LoadHyperDebugState). * * @returns true if either guest or hypervisor debug registers were loaded. * @param pVCpu The cross context CPU structure for the calling EMT. * @param fDr6 Whether to include DR6 or not. * @thread EMT(pVCpu) */ VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuestAndRestoreHost(PVMCPU pVCpu, bool fDr6) { bool const fDrXLoaded = RT_BOOL(pVCpu->cpum.s.fUseFlags & (CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER)); /* * Do we need to save the guest DRx registered loaded into host registers? * (DR7 and DR6 (if fDr6 is true) are left to the caller.) */ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST) { #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) { uint64_t uDr6 = pVCpu->cpum.s.Guest.dr[6]; HMR0SaveDebugState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest); if (!fDr6) pVCpu->cpum.s.Guest.dr[6] = uDr6; } else #endif { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cpumR0SaveDRx(&pVCpu->cpum.s.Guest.dr[0]); #else pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0(); pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1(); pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2(); pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3(); #endif if (fDr6) pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6(); } } ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~( CPUM_USED_DEBUG_REGS_GUEST | CPUM_USED_DEBUG_REGS_HYPER | CPUM_SYNC_DEBUG_REGS_GUEST | CPUM_SYNC_DEBUG_REGS_HYPER)); /* * Restore the host's debug state. DR0-3, DR6 and only then DR7! */ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_HOST) { /* A bit of paranoia first... */ uint64_t uCurDR7 = ASMGetDR7(); if (uCurDR7 != X86_DR7_INIT_VAL) ASMSetDR7(X86_DR7_INIT_VAL); #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL AssertCompile((uintptr_t)&pVCpu->cpum.s.Host.dr3 - (uintptr_t)&pVCpu->cpum.s.Host.dr0 == sizeof(uint64_t) * 3); cpumR0LoadDRx(&pVCpu->cpum.s.Host.dr0); #else ASMSetDR0(pVCpu->cpum.s.Host.dr0); ASMSetDR1(pVCpu->cpum.s.Host.dr1); ASMSetDR2(pVCpu->cpum.s.Host.dr2); ASMSetDR3(pVCpu->cpum.s.Host.dr3); #endif /** @todo consider only updating if they differ, esp. DR6. Need to figure how * expensive DRx reads are over DRx writes. */ ASMSetDR6(pVCpu->cpum.s.Host.dr6); ASMSetDR7(pVCpu->cpum.s.Host.dr7); ASMAtomicAndU32(&pVCpu->cpum.s.fUseFlags, ~CPUM_USED_DEBUG_REGS_HOST); } return fDrXLoaded; } /** * Saves the guest DRx state if it resides host registers. * * This does NOT clear any use flags, so the host registers remains loaded with * the guest DRx state upon return. The purpose is only to make sure the values * in the CPU context structure is up to date. * * @returns true if the host registers contains guest values, false if not. * @param pVCpu The cross context CPU structure for the calling EMT. * @param fDr6 Whether to include DR6 or not. * @thread EMT(pVCpu) */ VMMR0_INT_DECL(bool) CPUMR0DebugStateMaybeSaveGuest(PVMCPU pVCpu, bool fDr6) { /* * Do we need to save the guest DRx registered loaded into host registers? * (DR7 and DR6 (if fDr6 is true) are left to the caller.) */ if (pVCpu->cpum.s.fUseFlags & CPUM_USED_DEBUG_REGS_GUEST) { #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) { uint64_t uDr6 = pVCpu->cpum.s.Guest.dr[6]; HMR0SaveDebugState(pVCpu->CTX_SUFF(pVM), pVCpu, &pVCpu->cpum.s.Guest); if (!fDr6) pVCpu->cpum.s.Guest.dr[6] = uDr6; } else #endif { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cpumR0SaveDRx(&pVCpu->cpum.s.Guest.dr[0]); #else pVCpu->cpum.s.Guest.dr[0] = ASMGetDR0(); pVCpu->cpum.s.Guest.dr[1] = ASMGetDR1(); pVCpu->cpum.s.Guest.dr[2] = ASMGetDR2(); pVCpu->cpum.s.Guest.dr[3] = ASMGetDR3(); #endif if (fDr6) pVCpu->cpum.s.Guest.dr[6] = ASMGetDR6(); } return true; } return false; } /** * Lazily sync in the debug state. * * @param pVCpu The cross context CPU structure for the calling EMT. * @param fDr6 Whether to include DR6 or not. * @thread EMT(pVCpu) */ VMMR0_INT_DECL(void) CPUMR0LoadGuestDebugState(PVMCPU pVCpu, bool fDr6) { /* * Save the host state and disarm all host BPs. */ cpumR0SaveHostDebugState(pVCpu); Assert(ASMGetDR7() == X86_DR7_INIT_VAL); /* * Activate the guest state DR0-3. * DR7 and DR6 (if fDr6 is true) are left to the caller. */ #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_SYNC_DEBUG_REGS_GUEST); /* Postpone it to the world switch. */ else #endif { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cpumR0LoadDRx(&pVCpu->cpum.s.Guest.dr[0]); #else ASMSetDR0(pVCpu->cpum.s.Guest.dr[0]); ASMSetDR1(pVCpu->cpum.s.Guest.dr[1]); ASMSetDR2(pVCpu->cpum.s.Guest.dr[2]); ASMSetDR3(pVCpu->cpum.s.Guest.dr[3]); #endif if (fDr6) ASMSetDR6(pVCpu->cpum.s.Guest.dr[6]); ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_GUEST); } } /** * Lazily sync in the hypervisor debug state * * @returns VBox status code. * @param pVCpu The cross context CPU structure for the calling EMT. * @param fDr6 Whether to include DR6 or not. * @thread EMT(pVCpu) */ VMMR0_INT_DECL(void) CPUMR0LoadHyperDebugState(PVMCPU pVCpu, bool fDr6) { /* * Save the host state and disarm all host BPs. */ cpumR0SaveHostDebugState(pVCpu); Assert(ASMGetDR7() == X86_DR7_INIT_VAL); /* * Make sure the hypervisor values are up to date. */ CPUMRecalcHyperDRx(pVCpu, UINT8_MAX /* no loading, please */, true); /* * Activate the guest state DR0-3. * DR7 and DR6 (if fDr6 is true) are left to the caller. */ #if HC_ARCH_BITS == 32 && defined(VBOX_WITH_64_BITS_GUESTS) && !defined(VBOX_WITH_HYBRID_32BIT_KERNEL) if (CPUMIsGuestInLongModeEx(&pVCpu->cpum.s.Guest)) ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_SYNC_DEBUG_REGS_HYPER); /* Postpone it. */ else #endif { #ifdef VBOX_WITH_HYBRID_32BIT_KERNEL cpumR0LoadDRx(&pVCpu->cpum.s.Hyper.dr[0]); #else ASMSetDR0(pVCpu->cpum.s.Hyper.dr[0]); ASMSetDR1(pVCpu->cpum.s.Hyper.dr[1]); ASMSetDR2(pVCpu->cpum.s.Hyper.dr[2]); ASMSetDR3(pVCpu->cpum.s.Hyper.dr[3]); #endif if (fDr6) ASMSetDR6(X86_DR6_INIT_VAL); ASMAtomicOrU32(&pVCpu->cpum.s.fUseFlags, CPUM_USED_DEBUG_REGS_HYPER); } } #ifdef VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI /** * Per-CPU callback that probes the CPU for APIC support. * * @param idCpu The identifier for the CPU the function is called on. * @param pvUser1 Ignored. * @param pvUser2 Ignored. */ static DECLCALLBACK(void) cpumR0MapLocalApicCpuProber(RTCPUID idCpu, void *pvUser1, void *pvUser2) { NOREF(pvUser1); NOREF(pvUser2); int iCpu = RTMpCpuIdToSetIndex(idCpu); AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics)); /* * Check for APIC support. */ uint32_t uMaxLeaf, u32EBX, u32ECX, u32EDX; ASMCpuId(0, &uMaxLeaf, &u32EBX, &u32ECX, &u32EDX); if ( ( ASMIsIntelCpuEx(u32EBX, u32ECX, u32EDX) || ASMIsAmdCpuEx(u32EBX, u32ECX, u32EDX) || ASMIsViaCentaurCpuEx(u32EBX, u32ECX, u32EDX)) && ASMIsValidStdRange(uMaxLeaf)) { uint32_t uDummy; ASMCpuId(1, &uDummy, &u32EBX, &u32ECX, &u32EDX); if ( (u32EDX & X86_CPUID_FEATURE_EDX_APIC) && (u32EDX & X86_CPUID_FEATURE_EDX_MSR)) { /* * Safe to access the MSR. Read it and calc the BASE (a little complicated). */ uint64_t u64ApicBase = ASMRdMsr(MSR_IA32_APICBASE); uint64_t u64Mask = MSR_IA32_APICBASE_BASE_MIN; /* see Intel Manual: Local APIC Status and Location: MAXPHYADDR default is bit 36 */ uint32_t uMaxExtLeaf; ASMCpuId(0x80000000, &uMaxExtLeaf, &u32EBX, &u32ECX, &u32EDX); if ( uMaxExtLeaf >= UINT32_C(0x80000008) && ASMIsValidExtRange(uMaxExtLeaf)) { uint32_t u32PhysBits; ASMCpuId(0x80000008, &u32PhysBits, &u32EBX, &u32ECX, &u32EDX); u32PhysBits &= 0xff; u64Mask = ((UINT64_C(1) << u32PhysBits) - 1) & UINT64_C(0xfffffffffffff000); } AssertCompile(sizeof(g_aLApics[iCpu].PhysBase) == sizeof(u64ApicBase)); g_aLApics[iCpu].PhysBase = u64ApicBase & u64Mask; g_aLApics[iCpu].fEnabled = RT_BOOL(u64ApicBase & MSR_IA32_APICBASE_EN); g_aLApics[iCpu].fX2Apic = (u64ApicBase & (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN)) == (MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_EN); } } } /** * Per-CPU callback that verifies our APIC expectations. * * @param idCpu The identifier for the CPU the function is called on. * @param pvUser1 Ignored. * @param pvUser2 Ignored. */ static DECLCALLBACK(void) cpumR0MapLocalApicCpuChecker(RTCPUID idCpu, void *pvUser1, void *pvUser2) { int iCpu = RTMpCpuIdToSetIndex(idCpu); AssertReturnVoid(iCpu >= 0 && (unsigned)iCpu < RT_ELEMENTS(g_aLApics)); if (!g_aLApics[iCpu].fEnabled) return; /* * 0x0X 82489 external APIC * 0x1X Local APIC * 0x2X..0xFF reserved */ uint32_t uApicVersion; if (g_aLApics[iCpu].fX2Apic) uApicVersion = ApicX2RegRead32(APIC_REG_VERSION); else uApicVersion = ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_VERSION); if ((APIC_REG_VERSION_GET_VER(uApicVersion) & 0xF0) == 0x10) { g_aLApics[iCpu].uVersion = uApicVersion; g_aLApics[iCpu].fHasThermal = APIC_REG_VERSION_GET_MAX_LVT(uApicVersion) >= 5; #if 0 /* enable if you need it. */ if (g_aLApics[iCpu].fX2Apic) SUPR0Printf("CPUM: X2APIC %02u - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x\n", iCpu, uApicVersion, ApicX2RegRead32(APIC_REG_LVT_LINT0), ApicX2RegRead32(APIC_REG_LVT_LINT1), ApicX2RegRead32(APIC_REG_LVT_PC), ApicX2RegRead32(APIC_REG_LVT_THMR) ); else SUPR0Printf("CPUM: APIC %02u at %RGp (mapped at %p) - ver %#010x, lint0=%#07x lint1=%#07x pc=%#07x thmr=%#07x\n", iCpu, g_aLApics[iCpu].PhysBase, g_aLApics[iCpu].pv, uApicVersion, ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT0), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_LINT1), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_PC), ApicRegRead(g_aLApics[iCpu].pv, APIC_REG_LVT_THMR) ); #endif } else { g_aLApics[iCpu].fEnabled = false; g_aLApics[iCpu].fX2Apic = false; SUPR0Printf("VBox/CPUM: Unsupported APIC version %#x (iCpu=%d)\n", uApicVersion, iCpu); } } /** * Map the MMIO page of each local APIC in the system. */ static int cpumR0MapLocalApics(void) { /* * Check that we'll always stay within the array bounds. */ if (RTMpGetArraySize() > RT_ELEMENTS(g_aLApics)) { LogRel(("CPUM: Too many real CPUs/cores/threads - %u, max %u\n", RTMpGetArraySize(), RT_ELEMENTS(g_aLApics))); return VERR_TOO_MANY_CPUS; } /* * Create mappings for all online CPUs we think have legacy APICs. */ int rc = RTMpOnAll(cpumR0MapLocalApicCpuProber, NULL, NULL); for (unsigned iCpu = 0; RT_SUCCESS(rc) && iCpu < RT_ELEMENTS(g_aLApics); iCpu++) { if (g_aLApics[iCpu].fEnabled && !g_aLApics[iCpu].fX2Apic) { rc = RTR0MemObjEnterPhys(&g_aLApics[iCpu].hMemObj, g_aLApics[iCpu].PhysBase, PAGE_SIZE, RTMEM_CACHE_POLICY_MMIO); if (RT_SUCCESS(rc)) { rc = RTR0MemObjMapKernel(&g_aLApics[iCpu].hMapObj, g_aLApics[iCpu].hMemObj, (void *)-1, PAGE_SIZE, RTMEM_PROT_READ | RTMEM_PROT_WRITE); if (RT_SUCCESS(rc)) { g_aLApics[iCpu].pv = RTR0MemObjAddress(g_aLApics[iCpu].hMapObj); continue; } RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */); } g_aLApics[iCpu].fEnabled = false; } g_aLApics[iCpu].pv = NULL; } /* * Check the APICs. */ if (RT_SUCCESS(rc)) rc = RTMpOnAll(cpumR0MapLocalApicCpuChecker, NULL, NULL); if (RT_FAILURE(rc)) { cpumR0UnmapLocalApics(); return rc; } #ifdef LOG_ENABLED /* * Log the result (pretty useless, requires enabling CPUM in VBoxDrv * and !VBOX_WITH_R0_LOGGING). */ if (LogIsEnabled()) { uint32_t cEnabled = 0; uint32_t cX2Apics = 0; for (unsigned iCpu = 0; iCpu < RT_ELEMENTS(g_aLApics); iCpu++) if (g_aLApics[iCpu].fEnabled) { cEnabled++; cX2Apics += g_aLApics[iCpu].fX2Apic; } Log(("CPUM: %u APICs, %u X2APICs\n", cEnabled, cX2Apics)); } #endif return VINF_SUCCESS; } /** * Unmap the Local APIC of all host CPUs. */ static void cpumR0UnmapLocalApics(void) { for (unsigned iCpu = RT_ELEMENTS(g_aLApics); iCpu-- > 0;) { if (g_aLApics[iCpu].pv) { RTR0MemObjFree(g_aLApics[iCpu].hMapObj, true /* fFreeMappings */); RTR0MemObjFree(g_aLApics[iCpu].hMemObj, true /* fFreeMappings */); g_aLApics[iCpu].hMapObj = NIL_RTR0MEMOBJ; g_aLApics[iCpu].hMemObj = NIL_RTR0MEMOBJ; g_aLApics[iCpu].fEnabled = false; g_aLApics[iCpu].fX2Apic = false; g_aLApics[iCpu].pv = NULL; } } } /** * Updates CPUMCPU::pvApicBase and CPUMCPU::fX2Apic prior to world switch. * * Writes the Local APIC mapping address of the current host CPU to CPUMCPU so * the world switchers can access the APIC registers for the purpose of * disabling and re-enabling the NMIs. Must be called with disabled preemption * or disabled interrupts! * * @param pVCpu Pointer to the cross context CPU structure of the * calling EMT. * @param idHostCpu The ID of the current host CPU. */ VMMR0_INT_DECL(void) CPUMR0SetLApic(PVMCPU pVCpu, RTCPUID idHostCpu) { int idxCpu = RTMpCpuIdToSetIndex(idHostCpu); pVCpu->cpum.s.pvApicBase = g_aLApics[idxCpu].pv; pVCpu->cpum.s.fX2Apic = g_aLApics[idxCpu].fX2Apic; // Log6(("CPUMR0SetLApic: pvApicBase=%p fX2Apic=%d\n", g_aLApics[idxCpu].pv, g_aLApics[idxCpu].fX2Apic)); } #endif /* VBOX_WITH_VMMR0_DISABLE_LAPIC_NMI */