VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/GIMKvm.cpp@ 70290

Last change on this file since 70290 was 69111, checked in by vboxsync, 7 years ago

(C) year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.6 KB
Line 
1/* $Id: GIMKvm.cpp 69111 2017-10-17 14:26:02Z vboxsync $ */
2/** @file
3 * GIM - Guest Interface Manager, KVM implementation.
4 */
5
6/*
7 * Copyright (C) 2015-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_GIM
23#include <VBox/vmm/gim.h>
24#include <VBox/vmm/cpum.h>
25#include <VBox/vmm/hm.h>
26#include <VBox/vmm/pdmapi.h>
27#include <VBox/vmm/ssm.h>
28#include "GIMInternal.h"
29#include <VBox/vmm/vm.h>
30
31#include <VBox/disopcode.h>
32#include <VBox/version.h>
33
34#include <iprt/asm-math.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38#include <iprt/mem.h>
39#include <iprt/spinlock.h>
40
41
42
43/*********************************************************************************************************************************
44* Defined Constants And Macros *
45*********************************************************************************************************************************/
46
47/**
48 * GIM KVM saved-state version.
49 */
50#define GIM_KVM_SAVED_STATE_VERSION UINT32_C(1)
51
52/**
53 * VBox internal struct. to passback to EMT rendezvous callback while enabling
54 * the KVM wall-clock.
55 */
56typedef struct KVMWALLCLOCKINFO
57{
58 /** Guest physical address of the wall-clock struct. */
59 RTGCPHYS GCPhysWallClock;
60} KVMWALLCLOCKINFO;
61/** Pointer to the wall-clock info. struct. */
62typedef KVMWALLCLOCKINFO *PKVMWALLCLOCKINFO;
63
64
65/*********************************************************************************************************************************
66* Global Variables *
67*********************************************************************************************************************************/
68#ifdef VBOX_WITH_STATISTICS
69# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
70 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } }
71#else
72# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
73 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName }
74#endif
75
76/**
77 * Array of MSR ranges supported by KVM.
78 */
79static CPUMMSRRANGE const g_aMsrRanges_Kvm[] =
80{
81 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE0_START, MSR_GIM_KVM_RANGE0_END, "KVM range 0"),
82 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE1_START, MSR_GIM_KVM_RANGE1_END, "KVM range 1")
83};
84#undef GIMKVM_MSRRANGE
85
86
87/**
88 * Initializes the KVM GIM provider.
89 *
90 * @returns VBox status code.
91 * @param pVM The cross context VM structure.
92 */
93VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM)
94{
95 AssertReturn(pVM, VERR_INVALID_PARAMETER);
96 AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_KVM, VERR_INTERNAL_ERROR_5);
97
98 int rc;
99 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
100
101 /*
102 * Determine interface capabilities based on the version.
103 */
104 if (!pVM->gim.s.u32Version)
105 {
106 /* Basic features. */
107 pKvm->uBaseFeat = 0
108 | GIM_KVM_BASE_FEAT_CLOCK_OLD
109 //| GIM_KVM_BASE_FEAT_NOP_IO_DELAY
110 //| GIM_KVM_BASE_FEAT_MMU_OP
111 | GIM_KVM_BASE_FEAT_CLOCK
112 //| GIM_KVM_BASE_FEAT_ASYNC_PF
113 //| GIM_KVM_BASE_FEAT_STEAL_TIME
114 //| GIM_KVM_BASE_FEAT_PV_EOI
115 | GIM_KVM_BASE_FEAT_PV_UNHALT
116 ;
117 /* Rest of the features are determined in gimR3KvmInitCompleted(). */
118 }
119
120 /*
121 * Expose HVP (Hypervisor Present) bit to the guest.
122 */
123 CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP);
124
125 /*
126 * Modify the standard hypervisor leaves for KVM.
127 */
128 CPUMCPUIDLEAF HyperLeaf;
129 RT_ZERO(HyperLeaf);
130 HyperLeaf.uLeaf = UINT32_C(0x40000000);
131 HyperLeaf.uEax = UINT32_C(0x40000001); /* Minimum value for KVM is 0x40000001. */
132 HyperLeaf.uEbx = 0x4B4D564B; /* 'KVMK' */
133 HyperLeaf.uEcx = 0x564B4D56; /* 'VMKV' */
134 HyperLeaf.uEdx = 0x0000004D; /* 'M000' */
135 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
136 AssertLogRelRCReturn(rc, rc);
137
138 /*
139 * Add KVM specific leaves.
140 */
141 HyperLeaf.uLeaf = UINT32_C(0x40000001);
142 HyperLeaf.uEax = pKvm->uBaseFeat;
143 HyperLeaf.uEbx = 0; /* Reserved */
144 HyperLeaf.uEcx = 0; /* Reserved */
145 HyperLeaf.uEdx = 0; /* Reserved */
146 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
147 AssertLogRelRCReturn(rc, rc);
148
149 /*
150 * Insert all MSR ranges of KVM.
151 */
152 for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_Kvm); i++)
153 {
154 rc = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_Kvm[i]);
155 AssertLogRelRCReturn(rc, rc);
156 }
157
158 /*
159 * Setup hypercall and #UD handling.
160 */
161 for (VMCPUID i = 0; i < pVM->cCpus; i++)
162 VMMHypercallsEnable(&pVM->aCpus[i]);
163
164 if (ASMIsAmdCpu())
165 {
166 pKvm->fTrapXcptUD = true;
167 pKvm->uOpCodeNative = OP_VMMCALL;
168 }
169 else
170 {
171 Assert(ASMIsIntelCpu() || ASMIsViaCentaurCpu());
172 pKvm->fTrapXcptUD = false;
173 pKvm->uOpCodeNative = OP_VMCALL;
174 }
175
176 /* We always need to trap VMCALL/VMMCALL hypercall using #UDs for raw-mode VMs. */
177 if (!HMIsEnabled(pVM))
178 pKvm->fTrapXcptUD = true;
179
180 return VINF_SUCCESS;
181}
182
183
184/**
185 * Initializes remaining bits of the KVM provider.
186 *
187 * This is called after initializing HM and almost all other VMM components.
188 *
189 * @returns VBox status code.
190 * @param pVM The cross context VM structure.
191 */
192VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM)
193{
194 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
195 pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
196
197 if (TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */))
198 {
199 /** @todo We might want to consider just enabling this bit *always*. As far
200 * as I can see in the Linux guest, the "TSC_STABLE" bit is only
201 * translated as a "monotonic" bit which even in Async systems we
202 * -should- be reporting a strictly monotonic TSC to the guest. */
203 pKvm->uBaseFeat |= GIM_KVM_BASE_FEAT_TSC_STABLE;
204
205 CPUMCPUIDLEAF HyperLeaf;
206 RT_ZERO(HyperLeaf);
207 HyperLeaf.uLeaf = UINT32_C(0x40000001);
208 HyperLeaf.uEax = pKvm->uBaseFeat;
209 HyperLeaf.uEbx = 0;
210 HyperLeaf.uEcx = 0;
211 HyperLeaf.uEdx = 0;
212 int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
213 AssertLogRelRCReturn(rc, rc);
214 }
215 return VINF_SUCCESS;
216}
217
218
219/**
220 * Terminates the KVM GIM provider.
221 *
222 * @returns VBox status code.
223 * @param pVM The cross context VM structure.
224 */
225VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM)
226{
227 gimR3KvmReset(pVM);
228 return VINF_SUCCESS;
229}
230
231
232/**
233 * This resets KVM provider MSRs and unmaps whatever KVM regions that
234 * the guest may have mapped.
235 *
236 * This is called when the VM is being reset.
237 *
238 * @param pVM The cross context VM structure.
239 * @thread EMT(0)
240 */
241VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM)
242{
243 VM_ASSERT_EMT0(pVM);
244 LogRel(("GIM: KVM: Resetting MSRs\n"));
245
246 /*
247 * Reset MSRs.
248 */
249 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
250 pKvm->u64WallClockMsr = 0;
251 for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
252 {
253 PGIMKVMCPU pKvmCpu = &pVM->aCpus[iCpu].gim.s.u.KvmCpu;
254 pKvmCpu->u64SystemTimeMsr = 0;
255 pKvmCpu->u32SystemTimeVersion = 0;
256 pKvmCpu->fSystemTimeFlags = 0;
257 pKvmCpu->GCPhysSystemTime = 0;
258 pKvmCpu->uTsc = 0;
259 pKvmCpu->uVirtNanoTS = 0;
260 }
261}
262
263
264/**
265 * KVM state-save operation.
266 *
267 * @returns VBox status code.
268 * @param pVM The cross context VM structure.
269 * @param pSSM The saved state handle.
270 */
271VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM)
272{
273 PCGIMKVM pKvm = &pVM->gim.s.u.Kvm;
274
275 /*
276 * Save the KVM SSM version.
277 */
278 SSMR3PutU32(pSSM, GIM_KVM_SAVED_STATE_VERSION);
279
280 /*
281 * Save per-VCPU data.
282 */
283 for (uint32_t i = 0; i < pVM->cCpus; i++)
284 {
285 PCGIMKVMCPU pKvmCpu = &pVM->aCpus[i].gim.s.u.KvmCpu;
286 SSMR3PutU64(pSSM, pKvmCpu->u64SystemTimeMsr);
287 SSMR3PutU64(pSSM, pKvmCpu->uTsc);
288 SSMR3PutU64(pSSM, pKvmCpu->uVirtNanoTS);
289 SSMR3PutGCPhys(pSSM, pKvmCpu->GCPhysSystemTime);
290 SSMR3PutU32(pSSM, pKvmCpu->u32SystemTimeVersion);
291 SSMR3PutU8(pSSM, pKvmCpu->fSystemTimeFlags);
292 }
293
294 /*
295 * Save per-VM data.
296 */
297 SSMR3PutU64(pSSM, pKvm->u64WallClockMsr);
298 return SSMR3PutU32(pSSM, pKvm->uBaseFeat);
299}
300
301
302/**
303 * KVM state-load operation, final pass.
304 *
305 * @returns VBox status code.
306 * @param pVM The cross context VM structure.
307 * @param pSSM The saved state handle.
308 */
309VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM)
310{
311 /*
312 * Load the KVM SSM version first.
313 */
314 uint32_t uKvmSavedStatVersion;
315 int rc = SSMR3GetU32(pSSM, &uKvmSavedStatVersion);
316 AssertRCReturn(rc, rc);
317 if (uKvmSavedStatVersion != GIM_KVM_SAVED_STATE_VERSION)
318 return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS,
319 N_("Unsupported KVM saved-state version %u (expected %u)."),
320 uKvmSavedStatVersion, GIM_KVM_SAVED_STATE_VERSION);
321
322 /*
323 * Update the TSC frequency from TM.
324 */
325 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
326 pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
327
328 /*
329 * Load per-VCPU data.
330 */
331 for (uint32_t i = 0; i < pVM->cCpus; i++)
332 {
333 PVMCPU pVCpu = &pVM->aCpus[i];
334 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
335
336 SSMR3GetU64(pSSM, &pKvmCpu->u64SystemTimeMsr);
337 SSMR3GetU64(pSSM, &pKvmCpu->uTsc);
338 SSMR3GetU64(pSSM, &pKvmCpu->uVirtNanoTS);
339 SSMR3GetGCPhys(pSSM, &pKvmCpu->GCPhysSystemTime);
340 SSMR3GetU32(pSSM, &pKvmCpu->u32SystemTimeVersion);
341 rc = SSMR3GetU8(pSSM, &pKvmCpu->fSystemTimeFlags);
342 AssertRCReturn(rc, rc);
343
344 /* Enable the system-time struct. if necessary. */
345 /** @todo update guest struct only if cTscTicksPerSecond doesn't match host
346 * anymore. */
347 if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr))
348 {
349 Assert(!TMVirtualIsTicking(pVM)); /* paranoia. */
350 Assert(!TMCpuTickIsTicking(pVCpu));
351 gimR3KvmEnableSystemTime(pVM, pVCpu);
352 }
353 }
354
355 /*
356 * Load per-VM data.
357 */
358 SSMR3GetU64(pSSM, &pKvm->u64WallClockMsr);
359 rc = SSMR3GetU32(pSSM, &pKvm->uBaseFeat);
360 AssertRCReturn(rc, rc);
361
362 return VINF_SUCCESS;
363}
364
365
366/**
367 * Enables the KVM VCPU system-time structure.
368 *
369 * @returns VBox status code.
370 * @param pVM The cross context VM structure.
371 * @param pVCpu The cross context virtual CPU structure.
372 *
373 * @remarks Don't do any release assertions here, these can be triggered by
374 * guest R0 code.
375 */
376VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVM pVM, PVMCPU pVCpu)
377{
378 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
379 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
380
381 /*
382 * Validate the mapping address first.
383 */
384 if (!PGMPhysIsGCPhysNormal(pVM, pKvmCpu->GCPhysSystemTime))
385 {
386 LogRel(("GIM: KVM: VCPU%3d: Invalid physical addr requested for mapping system-time struct. GCPhysSystemTime=%#RGp\n",
387 pVCpu->idCpu, pKvmCpu->GCPhysSystemTime));
388 return VERR_GIM_OPERATION_FAILED;
389 }
390
391 /*
392 * Construct the system-time struct.
393 */
394 GIMKVMSYSTEMTIME SystemTime;
395 RT_ZERO(SystemTime);
396 SystemTime.u32Version = pKvmCpu->u32SystemTimeVersion;
397 SystemTime.u64NanoTS = pKvmCpu->uVirtNanoTS;
398 SystemTime.u64Tsc = pKvmCpu->uTsc;
399 SystemTime.fFlags = pKvmCpu->fSystemTimeFlags | GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE;
400
401 /*
402 * How the guest calculates the system time (nanoseconds):
403 *
404 * tsc = rdtsc - SysTime.u64Tsc
405 * if (SysTime.i8TscShift >= 0)
406 * tsc <<= i8TscShift;
407 * else
408 * tsc >>= -i8TscShift;
409 * time = ((tsc * SysTime.u32TscScale) >> 32) + SysTime.u64NanoTS
410 */
411 uint64_t u64TscFreq = pKvm->cTscTicksPerSecond;
412 SystemTime.i8TscShift = 0;
413 while (u64TscFreq > 2 * RT_NS_1SEC_64)
414 {
415 u64TscFreq >>= 1;
416 SystemTime.i8TscShift--;
417 }
418 uint32_t uTscFreqLo = (uint32_t)u64TscFreq;
419 while (uTscFreqLo <= RT_NS_1SEC)
420 {
421 uTscFreqLo <<= 1;
422 SystemTime.i8TscShift++;
423 }
424 SystemTime.u32TscScale = ASMDivU64ByU32RetU32(RT_NS_1SEC_64 << 32, uTscFreqLo);
425
426 /*
427 * Update guest memory with the system-time struct.
428 */
429 Assert(!(SystemTime.u32Version & UINT32_C(1)));
430 int rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime, &SystemTime, sizeof(GIMKVMSYSTEMTIME));
431 if (RT_SUCCESS(rc))
432 {
433 LogRel(("GIM: KVM: VCPU%3d: Enabled system-time struct. at %#RGp - u32TscScale=%#RX32 i8TscShift=%d uVersion=%#RU32 "
434 "fFlags=%#x uTsc=%#RX64 uVirtNanoTS=%#RX64\n", pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, SystemTime.u32TscScale,
435 SystemTime.i8TscShift, SystemTime.u32Version, SystemTime.fFlags, pKvmCpu->uTsc, pKvmCpu->uVirtNanoTS));
436 TMR3CpuTickParavirtEnable(pVM);
437 }
438 else
439 LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. at %#RGp. rc=%Rrc\n",
440 pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, rc));
441
442 return rc;
443}
444
445
446/**
447 * Disables the KVM system-time struct.
448 *
449 * @returns VBox status code.
450 * @param pVM The cross context VM structure.
451 */
452VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM)
453{
454 TMR3CpuTickParavirtDisable(pVM);
455 return VINF_SUCCESS;
456}
457
458
459/**
460 * @callback_method_impl{PFNVMMEMTRENDEZVOUS,
461 * Worker for gimR3KvmEnableWallClock}
462 */
463static DECLCALLBACK(VBOXSTRICTRC) gimR3KvmEnableWallClockCallback(PVM pVM, PVMCPU pVCpu, void *pvUser)
464{
465 PKVMWALLCLOCKINFO pWallClockInfo = (PKVMWALLCLOCKINFO)pvUser; AssertPtr(pWallClockInfo);
466 RTGCPHYS GCPhysWallClock = pWallClockInfo->GCPhysWallClock;
467 RT_NOREF1(pVCpu);
468
469 /*
470 * Read the wall-clock version (sequence) from the guest.
471 */
472 uint32_t uVersion;
473 Assert(PGMPhysIsGCPhysNormal(pVM, GCPhysWallClock));
474 int rc = PGMPhysSimpleReadGCPhys(pVM, &uVersion, GCPhysWallClock, sizeof(uVersion));
475 if (RT_FAILURE(rc))
476 {
477 LogRel(("GIM: KVM: Failed to read wall-clock struct. version at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
478 return rc;
479 }
480
481 /*
482 * Ensure the version is incrementally even.
483 */
484 /* faster: uVersion = (uVersion | 1) + 1; */
485 if (!(uVersion & 1))
486 ++uVersion;
487 ++uVersion;
488
489 /*
490 * Update wall-clock guest struct. with UTC information.
491 */
492 RTTIMESPEC TimeSpec;
493 int32_t iSec;
494 int32_t iNano;
495 TMR3UtcNow(pVM, &TimeSpec);
496 RTTimeSpecGetSecondsAndNano(&TimeSpec, &iSec, &iNano);
497
498 GIMKVMWALLCLOCK WallClock;
499 RT_ZERO(WallClock);
500 AssertCompile(sizeof(uVersion) == sizeof(WallClock.u32Version));
501 WallClock.u32Version = uVersion;
502 WallClock.u32Sec = iSec;
503 WallClock.u32Nano = iNano;
504
505 /*
506 * Write out the wall-clock struct. to guest memory.
507 */
508 Assert(!(WallClock.u32Version & 1));
509 rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysWallClock, &WallClock, sizeof(GIMKVMWALLCLOCK));
510 if (RT_SUCCESS(rc))
511 LogRel(("GIM: KVM: Enabled wall-clock struct. at %#RGp - u32Sec=%u u32Nano=%u uVersion=%#RU32\n", GCPhysWallClock,
512 WallClock.u32Sec, WallClock.u32Nano, WallClock.u32Version));
513 else
514 LogRel(("GIM: KVM: Failed to write wall-clock struct. at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
515 return rc;
516}
517
518
519/**
520 * Enables the KVM wall-clock structure.
521 *
522 * Since the wall-clock can be read by any VCPU but it is a global struct. in
523 * guest-memory, we do an EMT rendezvous here to be on the safe side. The
524 * alternative is to use an MMIO2 region and use the WallClock.u32Version field
525 * for transactional update. However, this MSR is rarely written to (typically
526 * once during bootup) it's currently not a performance issue especially since
527 * we're already in ring-3. If we really wanted better performance in this code
528 * path, we should be doing it in ring-0 with transactional update while make
529 * sure there is only 1 writer as well.
530 *
531 * @returns VBox status code.
532 * @param pVM The cross context VM structure.
533 * @param GCPhysWallClock Where the guest wall-clock structure is located.
534 *
535 * @remarks Don't do any release assertions here, these can be triggered by
536 * guest R0 code.
537 */
538VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysWallClock)
539{
540 KVMWALLCLOCKINFO WallClockInfo;
541 WallClockInfo.GCPhysWallClock = GCPhysWallClock;
542 return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, gimR3KvmEnableWallClockCallback, &WallClockInfo);
543}
544
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette