VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/GIMKvm.cpp@ 55808

Last change on this file since 55808 was 55688, checked in by vboxsync, 10 years ago

VMM/GIM: comment.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.2 KB
Line 
1/* $Id: GIMKvm.cpp 55688 2015-05-06 09:22:10Z vboxsync $ */
2/** @file
3 * GIM - Guest Interface Manager, KVM implementation.
4 */
5
6/*
7 * Copyright (C) 2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18/*******************************************************************************
19* Header Files *
20*******************************************************************************/
21#define LOG_GROUP LOG_GROUP_GIM
22#include "GIMInternal.h"
23
24#include <iprt/asm-math.h>
25#include <iprt/assert.h>
26#include <iprt/err.h>
27#include <iprt/string.h>
28#include <iprt/mem.h>
29#include <iprt/spinlock.h>
30
31#include <VBox/vmm/cpum.h>
32#include <VBox/disopcode.h>
33#include <VBox/vmm/ssm.h>
34#include <VBox/vmm/vm.h>
35#include <VBox/vmm/hm.h>
36#include <VBox/vmm/pdmapi.h>
37#include <VBox/version.h>
38
39
40/*******************************************************************************
41* Defined Constants And Macros *
42*******************************************************************************/
43
44/**
45 * GIM KVM saved-state version.
46 */
47#define GIM_KVM_SAVED_STATE_VERSION UINT32_C(1)
48
49/**
50 * VBox internal struct. to passback to EMT rendezvous callback while enabling
51 * the KVM wall-clock.
52 */
53typedef struct KVMWALLCLOCKINFO
54{
55 /** Guest physical address of the wall-clock struct. */
56 RTGCPHYS GCPhysWallClock;
57} KVMWALLCLOCKINFO;
58/** Pointer to the wall-clock info. struct. */
59typedef KVMWALLCLOCKINFO *PKVMWALLCLOCKINFO;
60
61/*******************************************************************************
62* Global Variables *
63*******************************************************************************/
64#ifdef VBOX_WITH_STATISTICS
65# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
66 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } }
67#else
68# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
69 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName }
70#endif
71
72/**
73 * Array of MSR ranges supported by KVM.
74 */
75static CPUMMSRRANGE const g_aMsrRanges_Kvm[] =
76{
77 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE0_START, MSR_GIM_KVM_RANGE0_END, "KVM range 0"),
78 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE1_START, MSR_GIM_KVM_RANGE1_END, "KVM range 1")
79};
80#undef GIMKVM_MSRRANGE
81
82
83/**
84 * Initializes the KVM GIM provider.
85 *
86 * @returns VBox status code.
87 * @param pVM Pointer to the VM.
88 * @param uVersion The interface version this VM should use.
89 */
90VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM)
91{
92 AssertReturn(pVM, VERR_INVALID_PARAMETER);
93 AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_KVM, VERR_INTERNAL_ERROR_5);
94
95 int rc;
96 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
97
98 /*
99 * Determine interface capabilities based on the version.
100 */
101 if (!pVM->gim.s.u32Version)
102 {
103 /* Basic features. */
104 pKvm->uBaseFeat = 0
105 | GIM_KVM_BASE_FEAT_CLOCK_OLD
106 //| GIM_KVM_BASE_FEAT_NOP_IO_DELAY
107 //| GIM_KVM_BASE_FEAT_MMU_OP
108 | GIM_KVM_BASE_FEAT_CLOCK
109 //| GIM_KVM_BASE_FEAT_ASYNC_PF
110 //| GIM_KVM_BASE_FEAT_STEAL_TIME
111 //| GIM_KVM_BASE_FEAT_PV_EOI
112 | GIM_KVM_BASE_FEAT_PV_UNHALT
113 ;
114 /* Rest of the features are determined in gimR3KvmInitCompleted(). */
115 }
116
117 /*
118 * Expose HVP (Hypervisor Present) bit to the guest.
119 */
120 CPUMSetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP);
121
122 /*
123 * Modify the standard hypervisor leaves for KVM.
124 */
125 CPUMCPUIDLEAF HyperLeaf;
126 RT_ZERO(HyperLeaf);
127 HyperLeaf.uLeaf = UINT32_C(0x40000000);
128 HyperLeaf.uEax = UINT32_C(0x40000001); /* Minimum value for KVM is 0x40000001. */
129 HyperLeaf.uEbx = 0x4B4D564B; /* 'KVMK' */
130 HyperLeaf.uEcx = 0x564B4D56; /* 'VMKV' */
131 HyperLeaf.uEdx = 0x0000004D; /* 'M000' */
132 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
133 AssertLogRelRCReturn(rc, rc);
134
135 /*
136 * Add KVM specific leaves.
137 */
138 HyperLeaf.uLeaf = UINT32_C(0x40000001);
139 HyperLeaf.uEax = pKvm->uBaseFeat;
140 HyperLeaf.uEbx = 0; /* Reserved */
141 HyperLeaf.uEcx = 0; /* Reserved */
142 HyperLeaf.uEdx = 0; /* Reserved */
143 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
144 AssertLogRelRCReturn(rc, rc);
145
146 /*
147 * Insert all MSR ranges of KVM.
148 */
149 for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_Kvm); i++)
150 {
151 rc = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_Kvm[i]);
152 AssertLogRelRCReturn(rc, rc);
153 }
154
155 /*
156 * Setup hypercall and #UD handling.
157 */
158 for (VMCPUID i = 0; i < pVM->cCpus; i++)
159 VMMHypercallsEnable(&pVM->aCpus[i]);
160
161 if (ASMIsAmdCpu())
162 {
163 pKvm->fTrapXcptUD = true;
164 pKvm->uOpCodeNative = OP_VMMCALL;
165 }
166 else
167 {
168 Assert(ASMIsIntelCpu() || ASMIsViaCentaurCpu());
169 pKvm->fTrapXcptUD = false;
170 pKvm->uOpCodeNative = OP_VMCALL;
171 }
172
173 /* We always need to trap VMCALL/VMMCALL hypercall using #UDs for raw-mode VMs. */
174 if (!HMIsEnabled(pVM))
175 pKvm->fTrapXcptUD = true;
176
177 return VINF_SUCCESS;
178}
179
180
181/**
182 * Initializes remaining bits of the KVM provider.
183 *
184 * This is called after initializing HM and almost all other VMM components.
185 *
186 * @returns VBox status code.
187 * @param pVM Pointer to the VM.
188 */
189VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM)
190{
191 if (TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */))
192 {
193 /** @todo We might want to consider just enabling this bit *always*. As far
194 * as I can see in the Linux guest, the "TSC_STABLE" bit is only
195 * translated as a "monotonic" bit which even in Async systems we
196 * -should- be reporting a strictly monotonic TSC to the guest. */
197 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
198 pKvm->uBaseFeat |= GIM_KVM_BASE_FEAT_TSC_STABLE;
199
200 CPUMCPUIDLEAF HyperLeaf;
201 RT_ZERO(HyperLeaf);
202 HyperLeaf.uLeaf = UINT32_C(0x40000001);
203 HyperLeaf.uEax = pKvm->uBaseFeat;
204 HyperLeaf.uEbx = 0;
205 HyperLeaf.uEcx = 0;
206 HyperLeaf.uEdx = 0;
207 int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
208 AssertLogRelRCReturn(rc, rc);
209 }
210
211 return VINF_SUCCESS;
212}
213
214
215/**
216 * Terminates the KVM GIM provider.
217 *
218 * @returns VBox status code.
219 * @param pVM Pointer to the VM.
220 */
221VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM)
222{
223 gimR3KvmReset(pVM);
224 return VINF_SUCCESS;
225}
226
227
228/**
229 * Applies relocations to data and code managed by this component.
230 *
231 * This function will be called at init and whenever the VMM need to relocate
232 * itself inside the GC.
233 *
234 * @param pVM Pointer to the VM.
235 * @param offDelta Relocation delta relative to old location.
236 */
237VMMR3_INT_DECL(void) gimR3KvmRelocate(PVM pVM, RTGCINTPTR offDelta)
238{
239 NOREF(pVM); NOREF(offDelta);
240}
241
242
243/**
244 * This resets KVM provider MSRs and unmaps whatever KVM regions that
245 * the guest may have mapped.
246 *
247 * This is called when the VM is being reset.
248 *
249 * @param pVM Pointer to the VM.
250 * @thread EMT(0).
251 */
252VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM)
253{
254 VM_ASSERT_EMT0(pVM);
255 LogRel(("GIM: KVM: Resetting MSRs\n"));
256
257 /*
258 * Reset MSRs.
259 */
260 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
261 pKvm->u64WallClockMsr = 0;
262 for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
263 {
264 PGIMKVMCPU pKvmCpu = &pVM->aCpus[iCpu].gim.s.u.KvmCpu;
265 pKvmCpu->u64SystemTimeMsr = 0;
266 }
267}
268
269
270/**
271 * KVM state-save operation.
272 *
273 * @returns VBox status code.
274 * @param pVM Pointer to the VM.
275 * @param pSSM Pointer to the SSM handle.
276 */
277VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM)
278{
279 PCGIMKVM pcKvm = &pVM->gim.s.u.Kvm;
280
281 /*
282 * Save the KVM SSM version.
283 */
284 SSMR3PutU32(pSSM, GIM_KVM_SAVED_STATE_VERSION);
285
286 /*
287 * Save per-VCPU data.
288 */
289 for (uint32_t i = 0; i < pVM->cCpus; i++)
290 {
291 PCGIMKVMCPU pcKvmCpu = &pVM->aCpus[i].gim.s.u.KvmCpu;
292
293 /* Guest may alter flags (namely GIM_KVM_SYSTEM_TIME_FLAGS_GUEST_PAUSED bit). So re-read them from guest-memory. */
294 GIMKVMSYSTEMTIME SystemTime;
295 RT_ZERO(SystemTime);
296 if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pcKvmCpu->u64SystemTimeMsr))
297 {
298 int rc = PGMPhysSimpleReadGCPhys(pVM, &SystemTime, pcKvmCpu->GCPhysSystemTime, sizeof(GIMKVMSYSTEMTIME));
299 AssertRCReturn(rc, rc);
300 }
301
302 SSMR3PutU64(pSSM, pcKvmCpu->u64SystemTimeMsr);
303 SSMR3PutU64(pSSM, pcKvmCpu->uTsc);
304 SSMR3PutU64(pSSM, pcKvmCpu->uVirtNanoTS);
305 SSMR3PutGCPhys(pSSM, pcKvmCpu->GCPhysSystemTime);
306 SSMR3PutU32(pSSM, pcKvmCpu->u32SystemTimeVersion);
307 SSMR3PutU8(pSSM, SystemTime.fFlags);
308 }
309
310 /*
311 * Save per-VM data.
312 */
313 SSMR3PutU64(pSSM, pcKvm->u64WallClockMsr);
314 return SSMR3PutU32(pSSM, pcKvm->uBaseFeat);
315}
316
317
318/**
319 * KVM state-load operation, final pass.
320 *
321 * @returns VBox status code.
322 * @param pVM Pointer to the VM.
323 * @param pSSM Pointer to the SSM handle.
324 * @param uSSMVersion The GIM saved-state version.
325 */
326VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM, uint32_t uSSMVersion)
327{
328 /*
329 * Load the KVM SSM version first.
330 */
331 uint32_t uKvmSavedStatVersion;
332 int rc = SSMR3GetU32(pSSM, &uKvmSavedStatVersion);
333 AssertRCReturn(rc, rc);
334 if (uKvmSavedStatVersion != GIM_KVM_SAVED_STATE_VERSION)
335 return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS,
336 N_("Unsupported KVM saved-state version %u (expected %u)."), uKvmSavedStatVersion,
337 GIM_KVM_SAVED_STATE_VERSION);
338
339 /*
340 * Load per-VCPU data.
341 */
342 for (uint32_t i = 0; i < pVM->cCpus; i++)
343 {
344 PVMCPU pVCpu = &pVM->aCpus[i];
345 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
346
347 uint8_t fSystemTimeFlags = 0;
348 SSMR3GetU64(pSSM, &pKvmCpu->u64SystemTimeMsr);
349 SSMR3GetU64(pSSM, &pKvmCpu->uTsc);
350 SSMR3GetU64(pSSM, &pKvmCpu->uVirtNanoTS);
351 SSMR3GetGCPhys(pSSM, &pKvmCpu->GCPhysSystemTime);
352 SSMR3GetU32(pSSM, &pKvmCpu->u32SystemTimeVersion);
353 SSMR3GetU8(pSSM, &fSystemTimeFlags);
354
355 /* Enable the system-time struct. if necessary. */
356 if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr))
357 {
358 Assert(!TMVirtualIsTicking(pVM)); /* paranoia. */
359 Assert(!TMCpuTickIsTicking(pVCpu));
360 rc = gimR3KvmEnableSystemTime(pVM, pVCpu, pKvmCpu, fSystemTimeFlags);
361 AssertRCReturn(rc, rc);
362 }
363 }
364
365 /*
366 * Load per-VM data.
367 */
368 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
369 SSMR3GetU64(pSSM, &pKvm->u64WallClockMsr);
370 rc = SSMR3GetU32(pSSM, &pKvm->uBaseFeat);
371 AssertRCReturn(rc, rc);
372
373 return VINF_SUCCESS;
374}
375
376
377/**
378 * Enables the KVM VCPU system-time structure.
379 *
380 * @returns VBox status code.
381 * @param pVM Pointer to the VM.
382 * @param pVCpu Pointer to the VMCPU.
383 * @param pKvmCpu Pointer to the GIMKVMCPU with all fields
384 * populated by the caller.
385 * @param fFlags The system-time struct. flags.
386 *
387 * @remarks Don't do any release assertions here, these can be triggered by
388 * guest R0 code.
389 */
390VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVM pVM, PVMCPU pVCpu, PGIMKVMCPU pKvmCpu, uint8_t fFlags)
391{
392 GIMKVMSYSTEMTIME SystemTime;
393 RT_ZERO(SystemTime);
394 SystemTime.u32Version = pKvmCpu->u32SystemTimeVersion;
395 SystemTime.u64NanoTS = pKvmCpu->uVirtNanoTS;
396 SystemTime.u64Tsc = pKvmCpu->uTsc;
397 SystemTime.fFlags = fFlags | GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE;
398
399 /*
400 * How the guest calculates the system time (nanoseconds):
401 *
402 * tsc = rdtsc - SysTime.u64Tsc
403 * if (SysTime.i8TscShift >= 0)
404 * tsc <<= i8TscShift;
405 * else
406 * tsc >>= -i8TscShift;
407 * time = ((tsc * SysTime.u32TscScale) >> 32) + SysTime.u64NanoTS
408 */
409 uint64_t u64TscFreq = TMCpuTicksPerSecond(pVM);
410 SystemTime.i8TscShift = 0;
411 while (u64TscFreq > 2 * RT_NS_1SEC_64)
412 {
413 u64TscFreq >>= 1;
414 SystemTime.i8TscShift--;
415 }
416 uint32_t uTscFreqLo = (uint32_t)u64TscFreq;
417 while (uTscFreqLo <= RT_NS_1SEC)
418 {
419 uTscFreqLo <<= 1;
420 SystemTime.i8TscShift++;
421 }
422 SystemTime.u32TscScale = ASMDivU64ByU32RetU32(RT_NS_1SEC_64 << 32, uTscFreqLo);
423
424 Assert(!(SystemTime.u32Version & UINT32_C(1)));
425 Assert(PGMPhysIsGCPhysNormal(pVM, pKvmCpu->GCPhysSystemTime));
426 int rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime, &SystemTime, sizeof(GIMKVMSYSTEMTIME));
427 if (RT_SUCCESS(rc))
428 {
429 LogRel(("GIM: KVM: VCPU%3d: Enabled system-time struct. at %#RGp - u32TscScale=%#RX32 i8TscShift=%d uVersion=%#RU32 "
430 "fFlags=%#x uTsc=%#RX64 uVirtNanoTS=%#RX64\n", pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, SystemTime.u32TscScale,
431 SystemTime.i8TscShift, SystemTime.u32Version, SystemTime.fFlags, pKvmCpu->uTsc, pKvmCpu->uVirtNanoTS));
432 TMR3CpuTickParavirtEnable(pVM);
433 }
434 else
435 LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. at %#RGp. rc=%Rrc\n", pKvmCpu->GCPhysSystemTime, rc));
436
437 return rc;
438}
439
440
441/**
442 * Disables the KVM system-time struct.
443 *
444 * @returns VBox status code.
445 * @param pVM Pointer to the VM.
446 */
447VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM)
448{
449 TMR3CpuTickParavirtDisable(pVM);
450 return VINF_SUCCESS;
451}
452
453
454/**
455 * @callback_method_impl{PFNVMMEMTRENDEZVOUS,
456 * Worker for gimR3KvmEnableWallClock}
457 */
458static DECLCALLBACK(VBOXSTRICTRC) gimR3KvmEnableWallClockCallback(PVM pVM, PVMCPU pVCpu, void *pvData)
459{
460 Assert(pvData);
461 PKVMWALLCLOCKINFO pWallClockInfo = (PKVMWALLCLOCKINFO)pvData;
462 RTGCPHYS GCPhysWallClock = pWallClockInfo->GCPhysWallClock;
463
464 /*
465 * Read the wall-clock version (sequence) from the guest.
466 */
467 uint32_t uVersion;
468 Assert(PGMPhysIsGCPhysNormal(pVM, GCPhysWallClock));
469 int rc = PGMPhysSimpleReadGCPhys(pVM, &uVersion, GCPhysWallClock, sizeof(uVersion));
470 if (RT_FAILURE(rc))
471 {
472 LogRel(("GIM: KVM: Failed to read wall-clock struct. version at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
473 return rc;
474 }
475
476 /*
477 * Ensure the version is incrementally even.
478 */
479 if (!(uVersion & 1))
480 ++uVersion;
481 ++uVersion;
482
483 /*
484 * Update wall-clock guest struct. with UTC information.
485 */
486 RTTIMESPEC TimeSpec;
487 int32_t iSec;
488 int32_t iNano;
489 TMR3UtcNow(pVM, &TimeSpec);
490 RTTimeSpecGetSecondsAndNano(&TimeSpec, &iSec, &iNano);
491
492 GIMKVMWALLCLOCK WallClock;
493 RT_ZERO(WallClock);
494 AssertCompile(sizeof(uVersion) == sizeof(WallClock.u32Version));
495 WallClock.u32Version = uVersion;
496 WallClock.u32Sec = iSec;
497 WallClock.u32Nano = iNano;
498
499 /*
500 * Write out the wall-clock struct. to guest memory.
501 */
502 Assert(!(WallClock.u32Version & 1));
503 rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysWallClock, &WallClock, sizeof(GIMKVMWALLCLOCK));
504 if (RT_SUCCESS(rc))
505 {
506 LogRel(("GIM: KVM: Enabled wall-clock struct. at %#RGp - u32Sec=%u u32Nano=%u uVersion=%#RU32\n", GCPhysWallClock,
507 WallClock.u32Sec, WallClock.u32Nano, WallClock.u32Version));
508 }
509 else
510 LogRel(("GIM: KVM: Failed to write wall-clock struct. at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
511 return rc;
512}
513
514
515/**
516 * Enables the KVM wall-clock structure.
517 *
518 * Since the wall-clock can be read by any VCPU but it is a global struct. in
519 * guest-memory, we do an EMT rendezvous here to be on the safe side. The
520 * alternative is to use an MMIO2 region and use the WallClock.u32Version field
521 * for transactional update. However, this MSR is rarely written to (typically
522 * once during bootup) it's currently not a performance issue especially since
523 * we're already in ring-3. If we really wanted better performance in this code
524 * path, we should be doing it in ring-0 with transactional update while make
525 * sure there is only 1 writer as well.
526 *
527 * @returns VBox status code.
528 * @param pVM Pointer to the VM.
529 * @param GCPhysWallClock Where the guest wall-clock structure is located.
530 * @param uVersion The version (sequence number) value to use.
531 *
532 * @remarks Don't do any release assertions here, these can be triggered by
533 * guest R0 code.
534 */
535VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysWallClock)
536{
537 KVMWALLCLOCKINFO WallClockInfo;
538 WallClockInfo.GCPhysWallClock = GCPhysWallClock;
539 return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, gimR3KvmEnableWallClockCallback, &WallClockInfo);
540}
541
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette