VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR3/GIMKvm.cpp@ 72403

Last change on this file since 72403 was 71800, checked in by vboxsync, 7 years ago

VMM/GIM: KVM: Unused header.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Author Date Id Revision
File size: 17.6 KB
Line 
1/* $Id: GIMKvm.cpp 71800 2018-04-10 06:14:02Z vboxsync $ */
2/** @file
3 * GIM - Guest Interface Manager, KVM implementation.
4 */
5
6/*
7 * Copyright (C) 2015-2017 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/*********************************************************************************************************************************
20* Header Files *
21*********************************************************************************************************************************/
22#define LOG_GROUP LOG_GROUP_GIM
23#include <VBox/vmm/gim.h>
24#include <VBox/vmm/cpum.h>
25#include <VBox/vmm/hm.h>
26#include <VBox/vmm/pdmapi.h>
27#include <VBox/vmm/ssm.h>
28#include "GIMInternal.h"
29#include <VBox/vmm/vm.h>
30
31#include <VBox/disopcode.h>
32#include <VBox/version.h>
33
34#include <iprt/asm-math.h>
35#include <iprt/assert.h>
36#include <iprt/err.h>
37#include <iprt/string.h>
38#include <iprt/mem.h>
39
40
41
42/*********************************************************************************************************************************
43* Defined Constants And Macros *
44*********************************************************************************************************************************/
45
46/**
47 * GIM KVM saved-state version.
48 */
49#define GIM_KVM_SAVED_STATE_VERSION UINT32_C(1)
50
51/**
52 * VBox internal struct. to passback to EMT rendezvous callback while enabling
53 * the KVM wall-clock.
54 */
55typedef struct KVMWALLCLOCKINFO
56{
57 /** Guest physical address of the wall-clock struct. */
58 RTGCPHYS GCPhysWallClock;
59} KVMWALLCLOCKINFO;
60/** Pointer to the wall-clock info. struct. */
61typedef KVMWALLCLOCKINFO *PKVMWALLCLOCKINFO;
62
63
64/*********************************************************************************************************************************
65* Global Variables *
66*********************************************************************************************************************************/
67#ifdef VBOX_WITH_STATISTICS
68# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
69 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName, { 0 }, { 0 }, { 0 }, { 0 } }
70#else
71# define GIMKVM_MSRRANGE(a_uFirst, a_uLast, a_szName) \
72 { (a_uFirst), (a_uLast), kCpumMsrRdFn_Gim, kCpumMsrWrFn_Gim, 0, 0, 0, 0, 0, a_szName }
73#endif
74
75/**
76 * Array of MSR ranges supported by KVM.
77 */
78static CPUMMSRRANGE const g_aMsrRanges_Kvm[] =
79{
80 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE0_START, MSR_GIM_KVM_RANGE0_END, "KVM range 0"),
81 GIMKVM_MSRRANGE(MSR_GIM_KVM_RANGE1_START, MSR_GIM_KVM_RANGE1_END, "KVM range 1")
82};
83#undef GIMKVM_MSRRANGE
84
85
86/**
87 * Initializes the KVM GIM provider.
88 *
89 * @returns VBox status code.
90 * @param pVM The cross context VM structure.
91 */
92VMMR3_INT_DECL(int) gimR3KvmInit(PVM pVM)
93{
94 AssertReturn(pVM, VERR_INVALID_PARAMETER);
95 AssertReturn(pVM->gim.s.enmProviderId == GIMPROVIDERID_KVM, VERR_INTERNAL_ERROR_5);
96
97 int rc;
98 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
99
100 /*
101 * Determine interface capabilities based on the version.
102 */
103 if (!pVM->gim.s.u32Version)
104 {
105 /* Basic features. */
106 pKvm->uBaseFeat = 0
107 | GIM_KVM_BASE_FEAT_CLOCK_OLD
108 //| GIM_KVM_BASE_FEAT_NOP_IO_DELAY
109 //| GIM_KVM_BASE_FEAT_MMU_OP
110 | GIM_KVM_BASE_FEAT_CLOCK
111 //| GIM_KVM_BASE_FEAT_ASYNC_PF
112 //| GIM_KVM_BASE_FEAT_STEAL_TIME
113 //| GIM_KVM_BASE_FEAT_PV_EOI
114 | GIM_KVM_BASE_FEAT_PV_UNHALT
115 ;
116 /* Rest of the features are determined in gimR3KvmInitCompleted(). */
117 }
118
119 /*
120 * Expose HVP (Hypervisor Present) bit to the guest.
121 */
122 CPUMR3SetGuestCpuIdFeature(pVM, CPUMCPUIDFEATURE_HVP);
123
124 /*
125 * Modify the standard hypervisor leaves for KVM.
126 */
127 CPUMCPUIDLEAF HyperLeaf;
128 RT_ZERO(HyperLeaf);
129 HyperLeaf.uLeaf = UINT32_C(0x40000000);
130 HyperLeaf.uEax = UINT32_C(0x40000001); /* Minimum value for KVM is 0x40000001. */
131 HyperLeaf.uEbx = 0x4B4D564B; /* 'KVMK' */
132 HyperLeaf.uEcx = 0x564B4D56; /* 'VMKV' */
133 HyperLeaf.uEdx = 0x0000004D; /* 'M000' */
134 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
135 AssertLogRelRCReturn(rc, rc);
136
137 /*
138 * Add KVM specific leaves.
139 */
140 HyperLeaf.uLeaf = UINT32_C(0x40000001);
141 HyperLeaf.uEax = pKvm->uBaseFeat;
142 HyperLeaf.uEbx = 0; /* Reserved */
143 HyperLeaf.uEcx = 0; /* Reserved */
144 HyperLeaf.uEdx = 0; /* Reserved */
145 rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
146 AssertLogRelRCReturn(rc, rc);
147
148 /*
149 * Insert all MSR ranges of KVM.
150 */
151 for (unsigned i = 0; i < RT_ELEMENTS(g_aMsrRanges_Kvm); i++)
152 {
153 rc = CPUMR3MsrRangesInsert(pVM, &g_aMsrRanges_Kvm[i]);
154 AssertLogRelRCReturn(rc, rc);
155 }
156
157 /*
158 * Setup hypercall and #UD handling.
159 */
160 for (VMCPUID i = 0; i < pVM->cCpus; i++)
161 VMMHypercallsEnable(&pVM->aCpus[i]);
162
163 if (ASMIsAmdCpu())
164 {
165 pKvm->fTrapXcptUD = true;
166 pKvm->uOpCodeNative = OP_VMMCALL;
167 }
168 else
169 {
170 Assert(ASMIsIntelCpu() || ASMIsViaCentaurCpu());
171 pKvm->fTrapXcptUD = false;
172 pKvm->uOpCodeNative = OP_VMCALL;
173 }
174
175 /* We always need to trap VMCALL/VMMCALL hypercall using #UDs for raw-mode VMs. */
176 if (VM_IS_RAW_MODE_ENABLED(pVM))
177 pKvm->fTrapXcptUD = true;
178
179 return VINF_SUCCESS;
180}
181
182
183/**
184 * Initializes remaining bits of the KVM provider.
185 *
186 * This is called after initializing HM and almost all other VMM components.
187 *
188 * @returns VBox status code.
189 * @param pVM The cross context VM structure.
190 */
191VMMR3_INT_DECL(int) gimR3KvmInitCompleted(PVM pVM)
192{
193 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
194 pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
195
196 if (TMR3CpuTickIsFixedRateMonotonic(pVM, true /* fWithParavirtEnabled */))
197 {
198 /** @todo We might want to consider just enabling this bit *always*. As far
199 * as I can see in the Linux guest, the "TSC_STABLE" bit is only
200 * translated as a "monotonic" bit which even in Async systems we
201 * -should- be reporting a strictly monotonic TSC to the guest. */
202 pKvm->uBaseFeat |= GIM_KVM_BASE_FEAT_TSC_STABLE;
203
204 CPUMCPUIDLEAF HyperLeaf;
205 RT_ZERO(HyperLeaf);
206 HyperLeaf.uLeaf = UINT32_C(0x40000001);
207 HyperLeaf.uEax = pKvm->uBaseFeat;
208 HyperLeaf.uEbx = 0;
209 HyperLeaf.uEcx = 0;
210 HyperLeaf.uEdx = 0;
211 int rc = CPUMR3CpuIdInsert(pVM, &HyperLeaf);
212 AssertLogRelRCReturn(rc, rc);
213 }
214 return VINF_SUCCESS;
215}
216
217
218/**
219 * Terminates the KVM GIM provider.
220 *
221 * @returns VBox status code.
222 * @param pVM The cross context VM structure.
223 */
224VMMR3_INT_DECL(int) gimR3KvmTerm(PVM pVM)
225{
226 gimR3KvmReset(pVM);
227 return VINF_SUCCESS;
228}
229
230
231/**
232 * This resets KVM provider MSRs and unmaps whatever KVM regions that
233 * the guest may have mapped.
234 *
235 * This is called when the VM is being reset.
236 *
237 * @param pVM The cross context VM structure.
238 * @thread EMT(0)
239 */
240VMMR3_INT_DECL(void) gimR3KvmReset(PVM pVM)
241{
242 VM_ASSERT_EMT0(pVM);
243 LogRel(("GIM: KVM: Resetting MSRs\n"));
244
245 /*
246 * Reset MSRs.
247 */
248 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
249 pKvm->u64WallClockMsr = 0;
250 for (VMCPUID iCpu = 0; iCpu < pVM->cCpus; iCpu++)
251 {
252 PGIMKVMCPU pKvmCpu = &pVM->aCpus[iCpu].gim.s.u.KvmCpu;
253 pKvmCpu->u64SystemTimeMsr = 0;
254 pKvmCpu->u32SystemTimeVersion = 0;
255 pKvmCpu->fSystemTimeFlags = 0;
256 pKvmCpu->GCPhysSystemTime = 0;
257 pKvmCpu->uTsc = 0;
258 pKvmCpu->uVirtNanoTS = 0;
259 }
260}
261
262
263/**
264 * KVM state-save operation.
265 *
266 * @returns VBox status code.
267 * @param pVM The cross context VM structure.
268 * @param pSSM The saved state handle.
269 */
270VMMR3_INT_DECL(int) gimR3KvmSave(PVM pVM, PSSMHANDLE pSSM)
271{
272 PCGIMKVM pKvm = &pVM->gim.s.u.Kvm;
273
274 /*
275 * Save the KVM SSM version.
276 */
277 SSMR3PutU32(pSSM, GIM_KVM_SAVED_STATE_VERSION);
278
279 /*
280 * Save per-VCPU data.
281 */
282 for (uint32_t i = 0; i < pVM->cCpus; i++)
283 {
284 PCGIMKVMCPU pKvmCpu = &pVM->aCpus[i].gim.s.u.KvmCpu;
285 SSMR3PutU64(pSSM, pKvmCpu->u64SystemTimeMsr);
286 SSMR3PutU64(pSSM, pKvmCpu->uTsc);
287 SSMR3PutU64(pSSM, pKvmCpu->uVirtNanoTS);
288 SSMR3PutGCPhys(pSSM, pKvmCpu->GCPhysSystemTime);
289 SSMR3PutU32(pSSM, pKvmCpu->u32SystemTimeVersion);
290 SSMR3PutU8(pSSM, pKvmCpu->fSystemTimeFlags);
291 }
292
293 /*
294 * Save per-VM data.
295 */
296 SSMR3PutU64(pSSM, pKvm->u64WallClockMsr);
297 return SSMR3PutU32(pSSM, pKvm->uBaseFeat);
298}
299
300
301/**
302 * KVM state-load operation, final pass.
303 *
304 * @returns VBox status code.
305 * @param pVM The cross context VM structure.
306 * @param pSSM The saved state handle.
307 */
308VMMR3_INT_DECL(int) gimR3KvmLoad(PVM pVM, PSSMHANDLE pSSM)
309{
310 /*
311 * Load the KVM SSM version first.
312 */
313 uint32_t uKvmSavedStatVersion;
314 int rc = SSMR3GetU32(pSSM, &uKvmSavedStatVersion);
315 AssertRCReturn(rc, rc);
316 if (uKvmSavedStatVersion != GIM_KVM_SAVED_STATE_VERSION)
317 return SSMR3SetLoadError(pSSM, VERR_SSM_UNSUPPORTED_DATA_UNIT_VERSION, RT_SRC_POS,
318 N_("Unsupported KVM saved-state version %u (expected %u)."),
319 uKvmSavedStatVersion, GIM_KVM_SAVED_STATE_VERSION);
320
321 /*
322 * Update the TSC frequency from TM.
323 */
324 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
325 pKvm->cTscTicksPerSecond = TMCpuTicksPerSecond(pVM);
326
327 /*
328 * Load per-VCPU data.
329 */
330 for (uint32_t i = 0; i < pVM->cCpus; i++)
331 {
332 PVMCPU pVCpu = &pVM->aCpus[i];
333 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
334
335 SSMR3GetU64(pSSM, &pKvmCpu->u64SystemTimeMsr);
336 SSMR3GetU64(pSSM, &pKvmCpu->uTsc);
337 SSMR3GetU64(pSSM, &pKvmCpu->uVirtNanoTS);
338 SSMR3GetGCPhys(pSSM, &pKvmCpu->GCPhysSystemTime);
339 SSMR3GetU32(pSSM, &pKvmCpu->u32SystemTimeVersion);
340 rc = SSMR3GetU8(pSSM, &pKvmCpu->fSystemTimeFlags);
341 AssertRCReturn(rc, rc);
342
343 /* Enable the system-time struct. if necessary. */
344 /** @todo update guest struct only if cTscTicksPerSecond doesn't match host
345 * anymore. */
346 if (MSR_GIM_KVM_SYSTEM_TIME_IS_ENABLED(pKvmCpu->u64SystemTimeMsr))
347 {
348 Assert(!TMVirtualIsTicking(pVM)); /* paranoia. */
349 Assert(!TMCpuTickIsTicking(pVCpu));
350 gimR3KvmEnableSystemTime(pVM, pVCpu);
351 }
352 }
353
354 /*
355 * Load per-VM data.
356 */
357 SSMR3GetU64(pSSM, &pKvm->u64WallClockMsr);
358 rc = SSMR3GetU32(pSSM, &pKvm->uBaseFeat);
359 AssertRCReturn(rc, rc);
360
361 return VINF_SUCCESS;
362}
363
364
365/**
366 * Enables the KVM VCPU system-time structure.
367 *
368 * @returns VBox status code.
369 * @param pVM The cross context VM structure.
370 * @param pVCpu The cross context virtual CPU structure.
371 *
372 * @remarks Don't do any release assertions here, these can be triggered by
373 * guest R0 code.
374 */
375VMMR3_INT_DECL(int) gimR3KvmEnableSystemTime(PVM pVM, PVMCPU pVCpu)
376{
377 PGIMKVM pKvm = &pVM->gim.s.u.Kvm;
378 PGIMKVMCPU pKvmCpu = &pVCpu->gim.s.u.KvmCpu;
379
380 /*
381 * Validate the mapping address first.
382 */
383 if (!PGMPhysIsGCPhysNormal(pVM, pKvmCpu->GCPhysSystemTime))
384 {
385 LogRel(("GIM: KVM: VCPU%3d: Invalid physical addr requested for mapping system-time struct. GCPhysSystemTime=%#RGp\n",
386 pVCpu->idCpu, pKvmCpu->GCPhysSystemTime));
387 return VERR_GIM_OPERATION_FAILED;
388 }
389
390 /*
391 * Construct the system-time struct.
392 */
393 GIMKVMSYSTEMTIME SystemTime;
394 RT_ZERO(SystemTime);
395 SystemTime.u32Version = pKvmCpu->u32SystemTimeVersion;
396 SystemTime.u64NanoTS = pKvmCpu->uVirtNanoTS;
397 SystemTime.u64Tsc = pKvmCpu->uTsc;
398 SystemTime.fFlags = pKvmCpu->fSystemTimeFlags | GIM_KVM_SYSTEM_TIME_FLAGS_TSC_STABLE;
399
400 /*
401 * How the guest calculates the system time (nanoseconds):
402 *
403 * tsc = rdtsc - SysTime.u64Tsc
404 * if (SysTime.i8TscShift >= 0)
405 * tsc <<= i8TscShift;
406 * else
407 * tsc >>= -i8TscShift;
408 * time = ((tsc * SysTime.u32TscScale) >> 32) + SysTime.u64NanoTS
409 */
410 uint64_t u64TscFreq = pKvm->cTscTicksPerSecond;
411 SystemTime.i8TscShift = 0;
412 while (u64TscFreq > 2 * RT_NS_1SEC_64)
413 {
414 u64TscFreq >>= 1;
415 SystemTime.i8TscShift--;
416 }
417 uint32_t uTscFreqLo = (uint32_t)u64TscFreq;
418 while (uTscFreqLo <= RT_NS_1SEC)
419 {
420 uTscFreqLo <<= 1;
421 SystemTime.i8TscShift++;
422 }
423 SystemTime.u32TscScale = ASMDivU64ByU32RetU32(RT_NS_1SEC_64 << 32, uTscFreqLo);
424
425 /*
426 * Update guest memory with the system-time struct.
427 */
428 Assert(!(SystemTime.u32Version & UINT32_C(1)));
429 int rc = PGMPhysSimpleWriteGCPhys(pVM, pKvmCpu->GCPhysSystemTime, &SystemTime, sizeof(GIMKVMSYSTEMTIME));
430 if (RT_SUCCESS(rc))
431 {
432 LogRel(("GIM: KVM: VCPU%3d: Enabled system-time struct. at %#RGp - u32TscScale=%#RX32 i8TscShift=%d uVersion=%#RU32 "
433 "fFlags=%#x uTsc=%#RX64 uVirtNanoTS=%#RX64\n", pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, SystemTime.u32TscScale,
434 SystemTime.i8TscShift, SystemTime.u32Version, SystemTime.fFlags, pKvmCpu->uTsc, pKvmCpu->uVirtNanoTS));
435 TMR3CpuTickParavirtEnable(pVM);
436 }
437 else
438 LogRel(("GIM: KVM: VCPU%3d: Failed to write system-time struct. at %#RGp. rc=%Rrc\n",
439 pVCpu->idCpu, pKvmCpu->GCPhysSystemTime, rc));
440
441 return rc;
442}
443
444
445/**
446 * Disables the KVM system-time struct.
447 *
448 * @returns VBox status code.
449 * @param pVM The cross context VM structure.
450 */
451VMMR3_INT_DECL(int) gimR3KvmDisableSystemTime(PVM pVM)
452{
453 TMR3CpuTickParavirtDisable(pVM);
454 return VINF_SUCCESS;
455}
456
457
458/**
459 * @callback_method_impl{PFNVMMEMTRENDEZVOUS,
460 * Worker for gimR3KvmEnableWallClock}
461 */
462static DECLCALLBACK(VBOXSTRICTRC) gimR3KvmEnableWallClockCallback(PVM pVM, PVMCPU pVCpu, void *pvUser)
463{
464 PKVMWALLCLOCKINFO pWallClockInfo = (PKVMWALLCLOCKINFO)pvUser; AssertPtr(pWallClockInfo);
465 RTGCPHYS GCPhysWallClock = pWallClockInfo->GCPhysWallClock;
466 RT_NOREF1(pVCpu);
467
468 /*
469 * Read the wall-clock version (sequence) from the guest.
470 */
471 uint32_t uVersion;
472 Assert(PGMPhysIsGCPhysNormal(pVM, GCPhysWallClock));
473 int rc = PGMPhysSimpleReadGCPhys(pVM, &uVersion, GCPhysWallClock, sizeof(uVersion));
474 if (RT_FAILURE(rc))
475 {
476 LogRel(("GIM: KVM: Failed to read wall-clock struct. version at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
477 return rc;
478 }
479
480 /*
481 * Ensure the version is incrementally even.
482 */
483 /* faster: uVersion = (uVersion | 1) + 1; */
484 if (!(uVersion & 1))
485 ++uVersion;
486 ++uVersion;
487
488 /*
489 * Update wall-clock guest struct. with UTC information.
490 */
491 RTTIMESPEC TimeSpec;
492 int32_t iSec;
493 int32_t iNano;
494 TMR3UtcNow(pVM, &TimeSpec);
495 RTTimeSpecGetSecondsAndNano(&TimeSpec, &iSec, &iNano);
496
497 GIMKVMWALLCLOCK WallClock;
498 RT_ZERO(WallClock);
499 AssertCompile(sizeof(uVersion) == sizeof(WallClock.u32Version));
500 WallClock.u32Version = uVersion;
501 WallClock.u32Sec = iSec;
502 WallClock.u32Nano = iNano;
503
504 /*
505 * Write out the wall-clock struct. to guest memory.
506 */
507 Assert(!(WallClock.u32Version & 1));
508 rc = PGMPhysSimpleWriteGCPhys(pVM, GCPhysWallClock, &WallClock, sizeof(GIMKVMWALLCLOCK));
509 if (RT_SUCCESS(rc))
510 LogRel(("GIM: KVM: Enabled wall-clock struct. at %#RGp - u32Sec=%u u32Nano=%u uVersion=%#RU32\n", GCPhysWallClock,
511 WallClock.u32Sec, WallClock.u32Nano, WallClock.u32Version));
512 else
513 LogRel(("GIM: KVM: Failed to write wall-clock struct. at %#RGp. rc=%Rrc\n", GCPhysWallClock, rc));
514 return rc;
515}
516
517
518/**
519 * Enables the KVM wall-clock structure.
520 *
521 * Since the wall-clock can be read by any VCPU but it is a global struct. in
522 * guest-memory, we do an EMT rendezvous here to be on the safe side. The
523 * alternative is to use an MMIO2 region and use the WallClock.u32Version field
524 * for transactional update. However, this MSR is rarely written to (typically
525 * once during bootup) it's currently not a performance issue especially since
526 * we're already in ring-3. If we really wanted better performance in this code
527 * path, we should be doing it in ring-0 with transactional update while make
528 * sure there is only 1 writer as well.
529 *
530 * @returns VBox status code.
531 * @param pVM The cross context VM structure.
532 * @param GCPhysWallClock Where the guest wall-clock structure is located.
533 *
534 * @remarks Don't do any release assertions here, these can be triggered by
535 * guest R0 code.
536 */
537VMMR3_INT_DECL(int) gimR3KvmEnableWallClock(PVM pVM, RTGCPHYS GCPhysWallClock)
538{
539 KVMWALLCLOCKINFO WallClockInfo;
540 WallClockInfo.GCPhysWallClock = GCPhysWallClock;
541 return VMMR3EmtRendezvous(pVM, VMMEMTRENDEZVOUS_FLAGS_TYPE_ONCE, gimR3KvmEnableWallClockCallback, &WallClockInfo);
542}
543
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette