VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 58591

Last change on this file since 58591 was 58126, checked in by vboxsync, 9 years ago

VMM: Fixed almost all the Doxygen warnings.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 91.1 KB
Line 
1/* $Id: GVMMR0.cpp 58126 2015-10-08 20:59:48Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2015 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#include <VBox/param.h>
61#include <VBox/err.h>
62
63#include <iprt/asm.h>
64#include <iprt/asm-amd64-x86.h>
65#include <iprt/mem.h>
66#include <iprt/semaphore.h>
67#include <iprt/time.h>
68#include <VBox/log.h>
69#include <iprt/thread.h>
70#include <iprt/process.h>
71#include <iprt/param.h>
72#include <iprt/string.h>
73#include <iprt/assert.h>
74#include <iprt/mem.h>
75#include <iprt/memobj.h>
76#include <iprt/mp.h>
77#include <iprt/cpuset.h>
78#include <iprt/spinlock.h>
79#include <iprt/timer.h>
80
81#include "dtrace/VBoxVMM.h"
82
83
84/*********************************************************************************************************************************
85* Defined Constants And Macros *
86*********************************************************************************************************************************/
87#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
88/** Define this to enable the periodic preemption timer. */
89# define GVMM_SCHED_WITH_PPT
90#endif
91
92
93/** @def GVMM_CHECK_SMAP_SETUP
94 * SMAP check setup. */
95/** @def GVMM_CHECK_SMAP_CHECK
96 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
97 * it will be logged and @a a_BadExpr is executed. */
98/** @def GVMM_CHECK_SMAP_CHECK2
99 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
100 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
101 * executed. */
102#if defined(VBOX_STRICT) || 1
103# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
104# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
105 do { \
106 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
107 { \
108 RTCCUINTREG fEflCheck = ASMGetFlags(); \
109 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
110 { /* likely */ } \
111 else \
112 { \
113 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
114 a_BadExpr; \
115 } \
116 } \
117 } while (0)
118# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) \
119 do { \
120 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
121 { \
122 RTCCUINTREG fEflCheck = ASMGetFlags(); \
123 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
124 { /* likely */ } \
125 else \
126 { \
127 SUPR0BadContext((a_pVM) ? (a_pVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
128 a_BadExpr; \
129 } \
130 } \
131 } while (0)
132#else
133# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
134# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
135# define GVMM_CHECK_SMAP_CHECK2(a_pVM, a_BadExpr) NOREF(fKernelFeatures)
136#endif
137
138
139
140/*********************************************************************************************************************************
141* Structures and Typedefs *
142*********************************************************************************************************************************/
143
144/**
145 * Global VM handle.
146 */
147typedef struct GVMHANDLE
148{
149 /** The index of the next handle in the list (free or used). (0 is nil.) */
150 uint16_t volatile iNext;
151 /** Our own index / handle value. */
152 uint16_t iSelf;
153 /** The process ID of the handle owner.
154 * This is used for access checks. */
155 RTPROCESS ProcId;
156 /** The pointer to the ring-0 only (aka global) VM structure. */
157 PGVM pGVM;
158 /** The ring-0 mapping of the shared VM instance data. */
159 PVM pVM;
160 /** The virtual machine object. */
161 void *pvObj;
162 /** The session this VM is associated with. */
163 PSUPDRVSESSION pSession;
164 /** The ring-0 handle of the EMT0 thread.
165 * This is used for ownership checks as well as looking up a VM handle by thread
166 * at times like assertions. */
167 RTNATIVETHREAD hEMT0;
168} GVMHANDLE;
169/** Pointer to a global VM handle. */
170typedef GVMHANDLE *PGVMHANDLE;
171
172/** Number of GVM handles (including the NIL handle). */
173#if HC_ARCH_BITS == 64
174# define GVMM_MAX_HANDLES 8192
175#else
176# define GVMM_MAX_HANDLES 128
177#endif
178
179/**
180 * Per host CPU GVMM data.
181 */
182typedef struct GVMMHOSTCPU
183{
184 /** Magic number (GVMMHOSTCPU_MAGIC). */
185 uint32_t volatile u32Magic;
186 /** The CPU ID. */
187 RTCPUID idCpu;
188 /** The CPU set index. */
189 uint32_t idxCpuSet;
190
191#ifdef GVMM_SCHED_WITH_PPT
192 /** Periodic preemption timer data. */
193 struct
194 {
195 /** The handle to the periodic preemption timer. */
196 PRTTIMER pTimer;
197 /** Spinlock protecting the data below. */
198 RTSPINLOCK hSpinlock;
199 /** The smalles Hz that we need to care about. (static) */
200 uint32_t uMinHz;
201 /** The number of ticks between each historization. */
202 uint32_t cTicksHistoriziationInterval;
203 /** The current historization tick (counting up to
204 * cTicksHistoriziationInterval and then resetting). */
205 uint32_t iTickHistorization;
206 /** The current timer interval. This is set to 0 when inactive. */
207 uint32_t cNsInterval;
208 /** The current timer frequency. This is set to 0 when inactive. */
209 uint32_t uTimerHz;
210 /** The current max frequency reported by the EMTs.
211 * This gets historicize and reset by the timer callback. This is
212 * read without holding the spinlock, so needs atomic updating. */
213 uint32_t volatile uDesiredHz;
214 /** Whether the timer was started or not. */
215 bool volatile fStarted;
216 /** Set if we're starting timer. */
217 bool volatile fStarting;
218 /** The index of the next history entry (mod it). */
219 uint32_t iHzHistory;
220 /** Historicized uDesiredHz values. The array wraps around, new entries
221 * are added at iHzHistory. This is updated approximately every
222 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
223 uint32_t aHzHistory[8];
224 /** Statistics counter for recording the number of interval changes. */
225 uint32_t cChanges;
226 /** Statistics counter for recording the number of timer starts. */
227 uint32_t cStarts;
228 } Ppt;
229#endif /* GVMM_SCHED_WITH_PPT */
230
231} GVMMHOSTCPU;
232/** Pointer to the per host CPU GVMM data. */
233typedef GVMMHOSTCPU *PGVMMHOSTCPU;
234/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
235#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
236/** The interval on history entry should cover (approximately) give in
237 * nanoseconds. */
238#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
239
240
241/**
242 * The GVMM instance data.
243 */
244typedef struct GVMM
245{
246 /** Eyecatcher / magic. */
247 uint32_t u32Magic;
248 /** The index of the head of the free handle chain. (0 is nil.) */
249 uint16_t volatile iFreeHead;
250 /** The index of the head of the active handle chain. (0 is nil.) */
251 uint16_t volatile iUsedHead;
252 /** The number of VMs. */
253 uint16_t volatile cVMs;
254 /** Alignment padding. */
255 uint16_t u16Reserved;
256 /** The number of EMTs. */
257 uint32_t volatile cEMTs;
258 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
259 uint32_t volatile cHaltedEMTs;
260 /** Alignment padding. */
261 uint32_t u32Alignment;
262 /** When the next halted or sleeping EMT will wake up.
263 * This is set to 0 when it needs recalculating and to UINT64_MAX when
264 * there are no halted or sleeping EMTs in the GVMM. */
265 uint64_t uNsNextEmtWakeup;
266 /** The lock used to serialize VM creation, destruction and associated events that
267 * isn't performance critical. Owners may acquire the list lock. */
268 RTSEMFASTMUTEX CreateDestroyLock;
269 /** The lock used to serialize used list updates and accesses.
270 * This indirectly includes scheduling since the scheduler will have to walk the
271 * used list to examin running VMs. Owners may not acquire any other locks. */
272 RTSEMFASTMUTEX UsedLock;
273 /** The handle array.
274 * The size of this array defines the maximum number of currently running VMs.
275 * The first entry is unused as it represents the NIL handle. */
276 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
277
278 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
279 * The number of EMTs that means we no longer consider ourselves alone on a
280 * CPU/Core.
281 */
282 uint32_t cEMTsMeansCompany;
283 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
284 * The minimum sleep time for when we're alone, in nano seconds.
285 */
286 uint32_t nsMinSleepAlone;
287 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
288 * The minimum sleep time for when we've got company, in nano seconds.
289 */
290 uint32_t nsMinSleepCompany;
291 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
292 * The limit for the first round of early wakeups, given in nano seconds.
293 */
294 uint32_t nsEarlyWakeUp1;
295 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
296 * The limit for the second round of early wakeups, given in nano seconds.
297 */
298 uint32_t nsEarlyWakeUp2;
299
300 /** The number of entries in the host CPU array (aHostCpus). */
301 uint32_t cHostCpus;
302 /** Per host CPU data (variable length). */
303 GVMMHOSTCPU aHostCpus[1];
304} GVMM;
305/** Pointer to the GVMM instance data. */
306typedef GVMM *PGVMM;
307
308/** The GVMM::u32Magic value (Charlie Haden). */
309#define GVMM_MAGIC UINT32_C(0x19370806)
310
311
312
313/*********************************************************************************************************************************
314* Global Variables *
315*********************************************************************************************************************************/
316/** Pointer to the GVMM instance data.
317 * (Just my general dislike for global variables.) */
318static PGVMM g_pGVMM = NULL;
319
320/** Macro for obtaining and validating the g_pGVMM pointer.
321 * On failure it will return from the invoking function with the specified return value.
322 *
323 * @param pGVMM The name of the pGVMM variable.
324 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
325 * status codes.
326 */
327#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
328 do { \
329 (pGVMM) = g_pGVMM;\
330 AssertPtrReturn((pGVMM), (rc)); \
331 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
332 } while (0)
333
334/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
335 * On failure it will return from the invoking function.
336 *
337 * @param pGVMM The name of the pGVMM variable.
338 */
339#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
340 do { \
341 (pGVMM) = g_pGVMM;\
342 AssertPtrReturnVoid((pGVMM)); \
343 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
344 } while (0)
345
346
347/*********************************************************************************************************************************
348* Internal Functions *
349*********************************************************************************************************************************/
350static void gvmmR0InitPerVMData(PGVM pGVM);
351static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
352static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
353static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
354#ifdef GVMM_SCHED_WITH_PPT
355static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
356#endif
357
358
359/**
360 * Initializes the GVMM.
361 *
362 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
363 *
364 * @returns VBox status code.
365 */
366GVMMR0DECL(int) GVMMR0Init(void)
367{
368 LogFlow(("GVMMR0Init:\n"));
369
370 /*
371 * Allocate and initialize the instance data.
372 */
373 uint32_t cHostCpus = RTMpGetArraySize();
374 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
375
376 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
377 if (!pGVMM)
378 return VERR_NO_MEMORY;
379 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
380 if (RT_SUCCESS(rc))
381 {
382 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
383 if (RT_SUCCESS(rc))
384 {
385 pGVMM->u32Magic = GVMM_MAGIC;
386 pGVMM->iUsedHead = 0;
387 pGVMM->iFreeHead = 1;
388
389 /* the nil handle */
390 pGVMM->aHandles[0].iSelf = 0;
391 pGVMM->aHandles[0].iNext = 0;
392
393 /* the tail */
394 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
395 pGVMM->aHandles[i].iSelf = i;
396 pGVMM->aHandles[i].iNext = 0; /* nil */
397
398 /* the rest */
399 while (i-- > 1)
400 {
401 pGVMM->aHandles[i].iSelf = i;
402 pGVMM->aHandles[i].iNext = i + 1;
403 }
404
405 /* The default configuration values. */
406 uint32_t cNsResolution = RTSemEventMultiGetResolution();
407 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
408 if (cNsResolution >= 5*RT_NS_100US)
409 {
410 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
411 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
412 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
413 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
414 }
415 else if (cNsResolution > RT_NS_100US)
416 {
417 pGVMM->nsMinSleepAlone = cNsResolution / 2;
418 pGVMM->nsMinSleepCompany = cNsResolution / 4;
419 pGVMM->nsEarlyWakeUp1 = 0;
420 pGVMM->nsEarlyWakeUp2 = 0;
421 }
422 else
423 {
424 pGVMM->nsMinSleepAlone = 2000;
425 pGVMM->nsMinSleepCompany = 2000;
426 pGVMM->nsEarlyWakeUp1 = 0;
427 pGVMM->nsEarlyWakeUp2 = 0;
428 }
429
430 /* The host CPU data. */
431 pGVMM->cHostCpus = cHostCpus;
432 uint32_t iCpu = cHostCpus;
433 RTCPUSET PossibleSet;
434 RTMpGetSet(&PossibleSet);
435 while (iCpu-- > 0)
436 {
437 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
438#ifdef GVMM_SCHED_WITH_PPT
439 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
440 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
441 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
442 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
443 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
444 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
445 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
446 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
447 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
448 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
449 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
450 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
451#endif
452
453 if (RTCpuSetIsMember(&PossibleSet, iCpu))
454 {
455 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
456 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
457
458#ifdef GVMM_SCHED_WITH_PPT
459 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
460 50*1000*1000 /* whatever */,
461 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
462 gvmmR0SchedPeriodicPreemptionTimerCallback,
463 &pGVMM->aHostCpus[iCpu]);
464 if (RT_SUCCESS(rc))
465 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
466 if (RT_FAILURE(rc))
467 {
468 while (iCpu < cHostCpus)
469 {
470 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
471 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
472 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
473 iCpu++;
474 }
475 break;
476 }
477#endif
478 }
479 else
480 {
481 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
482 pGVMM->aHostCpus[iCpu].u32Magic = 0;
483 }
484 }
485 if (RT_SUCCESS(rc))
486 {
487 g_pGVMM = pGVMM;
488 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
489 return VINF_SUCCESS;
490 }
491
492 /* bail out. */
493 RTSemFastMutexDestroy(pGVMM->UsedLock);
494 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
495 }
496 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
497 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
498 }
499
500 RTMemFree(pGVMM);
501 return rc;
502}
503
504
505/**
506 * Terminates the GVM.
507 *
508 * This is called while owning the loader semaphore (see supdrvLdrFree()).
509 * And unless something is wrong, there should be absolutely no VMs
510 * registered at this point.
511 */
512GVMMR0DECL(void) GVMMR0Term(void)
513{
514 LogFlow(("GVMMR0Term:\n"));
515
516 PGVMM pGVMM = g_pGVMM;
517 g_pGVMM = NULL;
518 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
519 {
520 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
521 return;
522 }
523
524 /*
525 * First of all, stop all active timers.
526 */
527 uint32_t cActiveTimers = 0;
528 uint32_t iCpu = pGVMM->cHostCpus;
529 while (iCpu-- > 0)
530 {
531 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
532#ifdef GVMM_SCHED_WITH_PPT
533 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
534 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
535 cActiveTimers++;
536#endif
537 }
538 if (cActiveTimers)
539 RTThreadSleep(1); /* fudge */
540
541 /*
542 * Invalidate the and free resources.
543 */
544 pGVMM->u32Magic = ~GVMM_MAGIC;
545 RTSemFastMutexDestroy(pGVMM->UsedLock);
546 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
547 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
548 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
549
550 pGVMM->iFreeHead = 0;
551 if (pGVMM->iUsedHead)
552 {
553 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
554 pGVMM->iUsedHead = 0;
555 }
556
557#ifdef GVMM_SCHED_WITH_PPT
558 iCpu = pGVMM->cHostCpus;
559 while (iCpu-- > 0)
560 {
561 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
562 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
563 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
564 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
565 }
566#endif
567
568 RTMemFree(pGVMM);
569}
570
571
572/**
573 * A quick hack for setting global config values.
574 *
575 * @returns VBox status code.
576 *
577 * @param pSession The session handle. Used for authentication.
578 * @param pszName The variable name.
579 * @param u64Value The new value.
580 */
581GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
582{
583 /*
584 * Validate input.
585 */
586 PGVMM pGVMM;
587 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
588 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
589 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
590
591 /*
592 * String switch time!
593 */
594 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
595 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
596 int rc = VINF_SUCCESS;
597 pszName += sizeof("/GVMM/") - 1;
598 if (!strcmp(pszName, "cEMTsMeansCompany"))
599 {
600 if (u64Value <= UINT32_MAX)
601 pGVMM->cEMTsMeansCompany = u64Value;
602 else
603 rc = VERR_OUT_OF_RANGE;
604 }
605 else if (!strcmp(pszName, "MinSleepAlone"))
606 {
607 if (u64Value <= RT_NS_100MS)
608 pGVMM->nsMinSleepAlone = u64Value;
609 else
610 rc = VERR_OUT_OF_RANGE;
611 }
612 else if (!strcmp(pszName, "MinSleepCompany"))
613 {
614 if (u64Value <= RT_NS_100MS)
615 pGVMM->nsMinSleepCompany = u64Value;
616 else
617 rc = VERR_OUT_OF_RANGE;
618 }
619 else if (!strcmp(pszName, "EarlyWakeUp1"))
620 {
621 if (u64Value <= RT_NS_100MS)
622 pGVMM->nsEarlyWakeUp1 = u64Value;
623 else
624 rc = VERR_OUT_OF_RANGE;
625 }
626 else if (!strcmp(pszName, "EarlyWakeUp2"))
627 {
628 if (u64Value <= RT_NS_100MS)
629 pGVMM->nsEarlyWakeUp2 = u64Value;
630 else
631 rc = VERR_OUT_OF_RANGE;
632 }
633 else
634 rc = VERR_CFGM_VALUE_NOT_FOUND;
635 return rc;
636}
637
638
639/**
640 * A quick hack for getting global config values.
641 *
642 * @returns VBox status code.
643 *
644 * @param pSession The session handle. Used for authentication.
645 * @param pszName The variable name.
646 * @param pu64Value Where to return the value.
647 */
648GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
649{
650 /*
651 * Validate input.
652 */
653 PGVMM pGVMM;
654 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
655 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
656 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
657 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
658
659 /*
660 * String switch time!
661 */
662 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
663 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
664 int rc = VINF_SUCCESS;
665 pszName += sizeof("/GVMM/") - 1;
666 if (!strcmp(pszName, "cEMTsMeansCompany"))
667 *pu64Value = pGVMM->cEMTsMeansCompany;
668 else if (!strcmp(pszName, "MinSleepAlone"))
669 *pu64Value = pGVMM->nsMinSleepAlone;
670 else if (!strcmp(pszName, "MinSleepCompany"))
671 *pu64Value = pGVMM->nsMinSleepCompany;
672 else if (!strcmp(pszName, "EarlyWakeUp1"))
673 *pu64Value = pGVMM->nsEarlyWakeUp1;
674 else if (!strcmp(pszName, "EarlyWakeUp2"))
675 *pu64Value = pGVMM->nsEarlyWakeUp2;
676 else
677 rc = VERR_CFGM_VALUE_NOT_FOUND;
678 return rc;
679}
680
681
682/**
683 * Try acquire the 'used' lock.
684 *
685 * @returns IPRT status code, see RTSemFastMutexRequest.
686 * @param pGVMM The GVMM instance data.
687 */
688DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
689{
690 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
691 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
692 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
693 return rc;
694}
695
696
697/**
698 * Release the 'used' lock.
699 *
700 * @returns IPRT status code, see RTSemFastMutexRelease.
701 * @param pGVMM The GVMM instance data.
702 */
703DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
704{
705 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
706 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
707 AssertRC(rc);
708 return rc;
709}
710
711
712/**
713 * Try acquire the 'create & destroy' lock.
714 *
715 * @returns IPRT status code, see RTSemFastMutexRequest.
716 * @param pGVMM The GVMM instance data.
717 */
718DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
719{
720 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
721 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
722 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
723 return rc;
724}
725
726
727/**
728 * Release the 'create & destroy' lock.
729 *
730 * @returns IPRT status code, see RTSemFastMutexRequest.
731 * @param pGVMM The GVMM instance data.
732 */
733DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
734{
735 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
736 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
737 AssertRC(rc);
738 return rc;
739}
740
741
742/**
743 * Request wrapper for the GVMMR0CreateVM API.
744 *
745 * @returns VBox status code.
746 * @param pReq The request buffer.
747 */
748GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
749{
750 /*
751 * Validate the request.
752 */
753 if (!VALID_PTR(pReq))
754 return VERR_INVALID_POINTER;
755 if (pReq->Hdr.cbReq != sizeof(*pReq))
756 return VERR_INVALID_PARAMETER;
757 if (!VALID_PTR(pReq->pSession))
758 return VERR_INVALID_POINTER;
759
760 /*
761 * Execute it.
762 */
763 PVM pVM;
764 pReq->pVMR0 = NULL;
765 pReq->pVMR3 = NIL_RTR3PTR;
766 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
767 if (RT_SUCCESS(rc))
768 {
769 pReq->pVMR0 = pVM;
770 pReq->pVMR3 = pVM->pVMR3;
771 }
772 return rc;
773}
774
775
776/**
777 * Allocates the VM structure and registers it with GVM.
778 *
779 * The caller will become the VM owner and there by the EMT.
780 *
781 * @returns VBox status code.
782 * @param pSession The support driver session.
783 * @param cCpus Number of virtual CPUs for the new VM.
784 * @param ppVM Where to store the pointer to the VM structure.
785 *
786 * @thread EMT.
787 */
788GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
789{
790 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
791 PGVMM pGVMM;
792 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
793
794 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
795 *ppVM = NULL;
796
797 if ( cCpus == 0
798 || cCpus > VMM_MAX_CPU_COUNT)
799 return VERR_INVALID_PARAMETER;
800
801 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
802 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
803 RTPROCESS ProcId = RTProcSelf();
804 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
805
806 /*
807 * The whole allocation process is protected by the lock.
808 */
809 int rc = gvmmR0CreateDestroyLock(pGVMM);
810 AssertRCReturn(rc, rc);
811
812 /*
813 * Allocate a handle first so we don't waste resources unnecessarily.
814 */
815 uint16_t iHandle = pGVMM->iFreeHead;
816 if (iHandle)
817 {
818 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
819
820 /* consistency checks, a bit paranoid as always. */
821 if ( !pHandle->pVM
822 && !pHandle->pGVM
823 && !pHandle->pvObj
824 && pHandle->iSelf == iHandle)
825 {
826 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
827 if (pHandle->pvObj)
828 {
829 /*
830 * Move the handle from the free to used list and perform permission checks.
831 */
832 rc = gvmmR0UsedLock(pGVMM);
833 AssertRC(rc);
834
835 pGVMM->iFreeHead = pHandle->iNext;
836 pHandle->iNext = pGVMM->iUsedHead;
837 pGVMM->iUsedHead = iHandle;
838 pGVMM->cVMs++;
839
840 pHandle->pVM = NULL;
841 pHandle->pGVM = NULL;
842 pHandle->pSession = pSession;
843 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
844 pHandle->ProcId = NIL_RTPROCESS;
845
846 gvmmR0UsedUnlock(pGVMM);
847
848 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
849 if (RT_SUCCESS(rc))
850 {
851 /*
852 * Allocate the global VM structure (GVM) and initialize it.
853 */
854 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
855 if (pGVM)
856 {
857 pGVM->u32Magic = GVM_MAGIC;
858 pGVM->hSelf = iHandle;
859 pGVM->pVM = NULL;
860 pGVM->cCpus = cCpus;
861
862 gvmmR0InitPerVMData(pGVM);
863 GMMR0InitPerVMData(pGVM);
864
865 /*
866 * Allocate the shared VM structure and associated page array.
867 */
868 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
869 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
870 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
871 if (RT_SUCCESS(rc))
872 {
873 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
874 memset(pVM, 0, cPages << PAGE_SHIFT);
875 pVM->enmVMState = VMSTATE_CREATING;
876 pVM->pVMR0 = pVM;
877 pVM->pSession = pSession;
878 pVM->hSelf = iHandle;
879 pVM->cbSelf = cbVM;
880 pVM->cCpus = cCpus;
881 pVM->uCpuExecutionCap = 100; /* default is no cap. */
882 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
883 AssertCompileMemberAlignment(VM, cpum, 64);
884 AssertCompileMemberAlignment(VM, tm, 64);
885 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
886
887 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
888 if (RT_SUCCESS(rc))
889 {
890 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
891 for (uint32_t iPage = 0; iPage < cPages; iPage++)
892 {
893 paPages[iPage].uReserved = 0;
894 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
895 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
896 }
897
898 /*
899 * Map them into ring-3.
900 */
901 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
902 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
903 if (RT_SUCCESS(rc))
904 {
905 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
906 AssertPtr((void *)pVM->pVMR3);
907
908 /* Initialize all the VM pointers. */
909 for (uint32_t i = 0; i < cCpus; i++)
910 {
911 pVM->aCpus[i].pVMR0 = pVM;
912 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
913 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
914 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
915 }
916
917 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
918 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
919 NIL_RTR0PROCESS);
920 if (RT_SUCCESS(rc))
921 {
922 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
923 AssertPtr((void *)pVM->paVMPagesR3);
924
925 /* complete the handle - take the UsedLock sem just to be careful. */
926 rc = gvmmR0UsedLock(pGVMM);
927 AssertRC(rc);
928
929 pHandle->pVM = pVM;
930 pHandle->pGVM = pGVM;
931 pHandle->hEMT0 = hEMT0;
932 pHandle->ProcId = ProcId;
933 pGVM->pVM = pVM;
934 pGVM->aCpus[0].hEMT = hEMT0;
935 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
936 pGVMM->cEMTs += cCpus;
937
938 rc = VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[0]);
939 if (RT_SUCCESS(rc))
940 {
941 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
942
943 gvmmR0UsedUnlock(pGVMM);
944 gvmmR0CreateDestroyUnlock(pGVMM);
945
946 *ppVM = pVM;
947 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
948 return VINF_SUCCESS;
949 }
950 }
951
952 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
953 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
954 }
955 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
956 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
957 }
958 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
959 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
960 }
961 }
962 }
963 /* else: The user wasn't permitted to create this VM. */
964
965 /*
966 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
967 * object reference here. A little extra mess because of non-recursive lock.
968 */
969 void *pvObj = pHandle->pvObj;
970 pHandle->pvObj = NULL;
971 gvmmR0CreateDestroyUnlock(pGVMM);
972
973 SUPR0ObjRelease(pvObj, pSession);
974
975 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
976 return rc;
977 }
978
979 rc = VERR_NO_MEMORY;
980 }
981 else
982 rc = VERR_GVMM_IPE_1;
983 }
984 else
985 rc = VERR_GVM_TOO_MANY_VMS;
986
987 gvmmR0CreateDestroyUnlock(pGVMM);
988 return rc;
989}
990
991
992/**
993 * Initializes the per VM data belonging to GVMM.
994 *
995 * @param pGVM Pointer to the global VM structure.
996 */
997static void gvmmR0InitPerVMData(PGVM pGVM)
998{
999 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1000 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1001 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1002 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1003 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1004 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1005 pGVM->gvmm.s.fDoneVMMR0Init = false;
1006 pGVM->gvmm.s.fDoneVMMR0Term = false;
1007
1008 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1009 {
1010 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1011 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1012 }
1013}
1014
1015
1016/**
1017 * Does the VM initialization.
1018 *
1019 * @returns VBox status code.
1020 * @param pVM The cross context VM structure.
1021 */
1022GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
1023{
1024 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
1025
1026 /*
1027 * Validate the VM structure, state and handle.
1028 */
1029 PGVM pGVM;
1030 PGVMM pGVMM;
1031 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1032 if (RT_SUCCESS(rc))
1033 {
1034 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1035 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1036 {
1037 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1038 {
1039 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1040 if (RT_FAILURE(rc))
1041 {
1042 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1043 break;
1044 }
1045 }
1046 }
1047 else
1048 rc = VERR_WRONG_ORDER;
1049 }
1050
1051 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1052 return rc;
1053}
1054
1055
1056/**
1057 * Indicates that we're done with the ring-0 initialization
1058 * of the VM.
1059 *
1060 * @param pVM The cross context VM structure.
1061 * @thread EMT(0)
1062 */
1063GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1064{
1065 /* Validate the VM structure, state and handle. */
1066 PGVM pGVM;
1067 PGVMM pGVMM;
1068 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1069 AssertRCReturnVoid(rc);
1070
1071 /* Set the indicator. */
1072 pGVM->gvmm.s.fDoneVMMR0Init = true;
1073}
1074
1075
1076/**
1077 * Indicates that we're doing the ring-0 termination of the VM.
1078 *
1079 * @returns true if termination hasn't been done already, false if it has.
1080 * @param pVM The cross context VM structure.
1081 * @param pGVM Pointer to the global VM structure. Optional.
1082 * @thread EMT(0)
1083 */
1084GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1085{
1086 /* Validate the VM structure, state and handle. */
1087 AssertPtrNullReturn(pGVM, false);
1088 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1089 if (!pGVM)
1090 {
1091 PGVMM pGVMM;
1092 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1093 AssertRCReturn(rc, false);
1094 }
1095
1096 /* Set the indicator. */
1097 if (pGVM->gvmm.s.fDoneVMMR0Term)
1098 return false;
1099 pGVM->gvmm.s.fDoneVMMR0Term = true;
1100 return true;
1101}
1102
1103
1104/**
1105 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1106 *
1107 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1108 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1109 * would've been nice if the caller was actually the EMT thread or that we somehow
1110 * could've associated the calling thread with the VM up front.
1111 *
1112 * @returns VBox status code.
1113 * @param pVM The cross context VM structure.
1114 *
1115 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1116 */
1117GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1118{
1119 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1120 PGVMM pGVMM;
1121 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1122
1123 /*
1124 * Validate the VM structure, state and caller.
1125 */
1126 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1127 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1128 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1129 VERR_WRONG_ORDER);
1130
1131 uint32_t hGVM = pVM->hSelf;
1132 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1133 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1134
1135 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1136 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1137
1138 RTPROCESS ProcId = RTProcSelf();
1139 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1140 AssertReturn( ( pHandle->hEMT0 == hSelf
1141 && pHandle->ProcId == ProcId)
1142 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1143
1144 /*
1145 * Lookup the handle and destroy the object.
1146 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1147 * object, we take some precautions against racing callers just in case...
1148 */
1149 int rc = gvmmR0CreateDestroyLock(pGVMM);
1150 AssertRC(rc);
1151
1152 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1153 if ( pHandle->pVM == pVM
1154 && ( ( pHandle->hEMT0 == hSelf
1155 && pHandle->ProcId == ProcId)
1156 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1157 && VALID_PTR(pHandle->pvObj)
1158 && VALID_PTR(pHandle->pSession)
1159 && VALID_PTR(pHandle->pGVM)
1160 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1161 {
1162 void *pvObj = pHandle->pvObj;
1163 pHandle->pvObj = NULL;
1164 gvmmR0CreateDestroyUnlock(pGVMM);
1165
1166 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1167 {
1168 /** @todo Can we busy wait here for all thread-context hooks to be
1169 * deregistered before releasing (destroying) it? Only until we find a
1170 * solution for not deregistering hooks everytime we're leaving HMR0
1171 * context. */
1172 VMMR0ThreadCtxHookDestroyForEmt(&pVM->aCpus[idCpu]);
1173 }
1174
1175 SUPR0ObjRelease(pvObj, pHandle->pSession);
1176 }
1177 else
1178 {
1179 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1180 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1181 gvmmR0CreateDestroyUnlock(pGVMM);
1182 rc = VERR_GVMM_IPE_2;
1183 }
1184
1185 return rc;
1186}
1187
1188
1189/**
1190 * Performs VM cleanup task as part of object destruction.
1191 *
1192 * @param pGVM The GVM pointer.
1193 */
1194static void gvmmR0CleanupVM(PGVM pGVM)
1195{
1196 if ( pGVM->gvmm.s.fDoneVMMR0Init
1197 && !pGVM->gvmm.s.fDoneVMMR0Term)
1198 {
1199 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1200 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1201 {
1202 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1203 VMMR0TermVM(pGVM->pVM, pGVM);
1204 }
1205 else
1206 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1207 }
1208
1209 GMMR0CleanupVM(pGVM);
1210}
1211
1212
1213/**
1214 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1215 *
1216 * pvUser1 is the GVM instance pointer.
1217 * pvUser2 is the handle pointer.
1218 */
1219static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1220{
1221 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1222
1223 NOREF(pvObj);
1224
1225 /*
1226 * Some quick, paranoid, input validation.
1227 */
1228 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1229 AssertPtr(pHandle);
1230 PGVMM pGVMM = (PGVMM)pvUser1;
1231 Assert(pGVMM == g_pGVMM);
1232 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1233 if ( !iHandle
1234 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1235 || iHandle != pHandle->iSelf)
1236 {
1237 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1238 return;
1239 }
1240
1241 int rc = gvmmR0CreateDestroyLock(pGVMM);
1242 AssertRC(rc);
1243 rc = gvmmR0UsedLock(pGVMM);
1244 AssertRC(rc);
1245
1246 /*
1247 * This is a tad slow but a doubly linked list is too much hassle.
1248 */
1249 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1250 {
1251 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1252 gvmmR0UsedUnlock(pGVMM);
1253 gvmmR0CreateDestroyUnlock(pGVMM);
1254 return;
1255 }
1256
1257 if (pGVMM->iUsedHead == iHandle)
1258 pGVMM->iUsedHead = pHandle->iNext;
1259 else
1260 {
1261 uint16_t iPrev = pGVMM->iUsedHead;
1262 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1263 while (iPrev)
1264 {
1265 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1266 {
1267 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1268 gvmmR0UsedUnlock(pGVMM);
1269 gvmmR0CreateDestroyUnlock(pGVMM);
1270 return;
1271 }
1272 if (RT_UNLIKELY(c-- <= 0))
1273 {
1274 iPrev = 0;
1275 break;
1276 }
1277
1278 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1279 break;
1280 iPrev = pGVMM->aHandles[iPrev].iNext;
1281 }
1282 if (!iPrev)
1283 {
1284 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1285 gvmmR0UsedUnlock(pGVMM);
1286 gvmmR0CreateDestroyUnlock(pGVMM);
1287 return;
1288 }
1289
1290 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1291 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1292 }
1293 pHandle->iNext = 0;
1294 pGVMM->cVMs--;
1295
1296 /*
1297 * Do the global cleanup round.
1298 */
1299 PGVM pGVM = pHandle->pGVM;
1300 if ( VALID_PTR(pGVM)
1301 && pGVM->u32Magic == GVM_MAGIC)
1302 {
1303 pGVMM->cEMTs -= pGVM->cCpus;
1304 gvmmR0UsedUnlock(pGVMM);
1305
1306 gvmmR0CleanupVM(pGVM);
1307
1308 /*
1309 * Do the GVMM cleanup - must be done last.
1310 */
1311 /* The VM and VM pages mappings/allocations. */
1312 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1313 {
1314 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1315 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1316 }
1317
1318 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1319 {
1320 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1321 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1322 }
1323
1324 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1325 {
1326 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1327 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1328 }
1329
1330 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1331 {
1332 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1333 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1334 }
1335
1336 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1337 {
1338 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1339 {
1340 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1341 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1342 }
1343 }
1344
1345 /* the GVM structure itself. */
1346 pGVM->u32Magic |= UINT32_C(0x80000000);
1347 RTMemFree(pGVM);
1348
1349 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1350 rc = gvmmR0UsedLock(pGVMM);
1351 AssertRC(rc);
1352 }
1353 /* else: GVMMR0CreateVM cleanup. */
1354
1355 /*
1356 * Free the handle.
1357 */
1358 pHandle->iNext = pGVMM->iFreeHead;
1359 pGVMM->iFreeHead = iHandle;
1360 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1361 ASMAtomicWriteNullPtr(&pHandle->pVM);
1362 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1363 ASMAtomicWriteNullPtr(&pHandle->pSession);
1364 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1365 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1366
1367 gvmmR0UsedUnlock(pGVMM);
1368 gvmmR0CreateDestroyUnlock(pGVMM);
1369 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1370}
1371
1372
1373/**
1374 * Registers the calling thread as the EMT of a Virtual CPU.
1375 *
1376 * Note that VCPU 0 is automatically registered during VM creation.
1377 *
1378 * @returns VBox status code
1379 * @param pVM The cross context VM structure.
1380 * @param idCpu VCPU id.
1381 */
1382GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1383{
1384 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1385
1386 /*
1387 * Validate the VM structure, state and handle.
1388 */
1389 PGVM pGVM;
1390 PGVMM pGVMM;
1391 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1392 if (RT_FAILURE(rc))
1393 return rc;
1394
1395 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1396 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1397 Assert(pGVM->cCpus == pVM->cCpus);
1398 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1399
1400 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1401
1402 return VMMR0ThreadCtxHookCreateForEmt(&pVM->aCpus[idCpu]);
1403}
1404
1405
1406/**
1407 * Lookup a GVM structure by its handle.
1408 *
1409 * @returns The GVM pointer on success, NULL on failure.
1410 * @param hGVM The global VM handle. Asserts on bad handle.
1411 */
1412GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1413{
1414 PGVMM pGVMM;
1415 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1416
1417 /*
1418 * Validate.
1419 */
1420 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1421 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1422
1423 /*
1424 * Look it up.
1425 */
1426 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1427 AssertPtrReturn(pHandle->pVM, NULL);
1428 AssertPtrReturn(pHandle->pvObj, NULL);
1429 PGVM pGVM = pHandle->pGVM;
1430 AssertPtrReturn(pGVM, NULL);
1431 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1432
1433 return pHandle->pGVM;
1434}
1435
1436
1437/**
1438 * Lookup a GVM structure by the shared VM structure.
1439 *
1440 * The calling thread must be in the same process as the VM. All current lookups
1441 * are by threads inside the same process, so this will not be an issue.
1442 *
1443 * @returns VBox status code.
1444 * @param pVM The cross context VM structure.
1445 * @param ppGVM Where to store the GVM pointer.
1446 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1447 * @param fTakeUsedLock Whether to take the used lock or not.
1448 * Be very careful if not taking the lock as it's possible that
1449 * the VM will disappear then.
1450 *
1451 * @remark This will not assert on an invalid pVM but try return silently.
1452 */
1453static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1454{
1455 RTPROCESS ProcId = RTProcSelf();
1456 PGVMM pGVMM;
1457 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1458
1459 /*
1460 * Validate.
1461 */
1462 if (RT_UNLIKELY( !VALID_PTR(pVM)
1463 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1464 return VERR_INVALID_POINTER;
1465 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1466 || pVM->enmVMState >= VMSTATE_TERMINATED))
1467 return VERR_INVALID_POINTER;
1468
1469 uint16_t hGVM = pVM->hSelf;
1470 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1471 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1472 return VERR_INVALID_HANDLE;
1473
1474 /*
1475 * Look it up.
1476 */
1477 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1478 PGVM pGVM;
1479 if (fTakeUsedLock)
1480 {
1481 int rc = gvmmR0UsedLock(pGVMM);
1482 AssertRCReturn(rc, rc);
1483
1484 pGVM = pHandle->pGVM;
1485 if (RT_UNLIKELY( pHandle->pVM != pVM
1486 || pHandle->ProcId != ProcId
1487 || !VALID_PTR(pHandle->pvObj)
1488 || !VALID_PTR(pGVM)
1489 || pGVM->pVM != pVM))
1490 {
1491 gvmmR0UsedUnlock(pGVMM);
1492 return VERR_INVALID_HANDLE;
1493 }
1494 }
1495 else
1496 {
1497 if (RT_UNLIKELY(pHandle->pVM != pVM))
1498 return VERR_INVALID_HANDLE;
1499 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1500 return VERR_INVALID_HANDLE;
1501 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1502 return VERR_INVALID_HANDLE;
1503
1504 pGVM = pHandle->pGVM;
1505 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1506 return VERR_INVALID_HANDLE;
1507 if (RT_UNLIKELY(pGVM->pVM != pVM))
1508 return VERR_INVALID_HANDLE;
1509 }
1510
1511 *ppGVM = pGVM;
1512 *ppGVMM = pGVMM;
1513 return VINF_SUCCESS;
1514}
1515
1516
1517/**
1518 * Lookup a GVM structure by the shared VM structure.
1519 *
1520 * @returns VBox status code.
1521 * @param pVM The cross context VM structure.
1522 * @param ppGVM Where to store the GVM pointer.
1523 *
1524 * @remark This will not take the 'used'-lock because it doesn't do
1525 * nesting and this function will be used from under the lock.
1526 */
1527GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1528{
1529 PGVMM pGVMM;
1530 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1531}
1532
1533
1534/**
1535 * Lookup a GVM structure by the shared VM structure and ensuring that the
1536 * caller is an EMT thread.
1537 *
1538 * @returns VBox status code.
1539 * @param pVM The cross context VM structure.
1540 * @param idCpu The Virtual CPU ID of the calling EMT.
1541 * @param ppGVM Where to store the GVM pointer.
1542 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1543 * @thread EMT
1544 *
1545 * @remark This will assert in all failure paths.
1546 */
1547static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1548{
1549 PGVMM pGVMM;
1550 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1551
1552 /*
1553 * Validate.
1554 */
1555 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1556 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1557
1558 uint16_t hGVM = pVM->hSelf;
1559 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1560 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1561
1562 /*
1563 * Look it up.
1564 */
1565 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1566 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1567 RTPROCESS ProcId = RTProcSelf();
1568 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1569 AssertPtrReturn(pHandle->pvObj, VERR_NOT_OWNER);
1570
1571 PGVM pGVM = pHandle->pGVM;
1572 AssertPtrReturn(pGVM, VERR_NOT_OWNER);
1573 AssertReturn(pGVM->pVM == pVM, VERR_NOT_OWNER);
1574 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1575 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1576 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1577
1578 *ppGVM = pGVM;
1579 *ppGVMM = pGVMM;
1580 return VINF_SUCCESS;
1581}
1582
1583
1584/**
1585 * Lookup a GVM structure by the shared VM structure
1586 * and ensuring that the caller is the EMT thread.
1587 *
1588 * @returns VBox status code.
1589 * @param pVM The cross context VM structure.
1590 * @param idCpu The Virtual CPU ID of the calling EMT.
1591 * @param ppGVM Where to store the GVM pointer.
1592 * @thread EMT
1593 */
1594GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1595{
1596 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1597 PGVMM pGVMM;
1598 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1599}
1600
1601
1602/**
1603 * Lookup a VM by its global handle.
1604 *
1605 * @returns Pointer to the VM on success, NULL on failure.
1606 * @param hGVM The global VM handle. Asserts on bad handle.
1607 */
1608GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1609{
1610 PGVM pGVM = GVMMR0ByHandle(hGVM);
1611 return pGVM ? pGVM->pVM : NULL;
1612}
1613
1614
1615/**
1616 * Looks up the VM belonging to the specified EMT thread.
1617 *
1618 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1619 * unnecessary kernel panics when the EMT thread hits an assertion. The
1620 * call may or not be an EMT thread.
1621 *
1622 * @returns Pointer to the VM on success, NULL on failure.
1623 * @param hEMT The native thread handle of the EMT.
1624 * NIL_RTNATIVETHREAD means the current thread
1625 */
1626GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1627{
1628 /*
1629 * No Assertions here as we're usually called in a AssertMsgN or
1630 * RTAssert* context.
1631 */
1632 PGVMM pGVMM = g_pGVMM;
1633 if ( !VALID_PTR(pGVMM)
1634 || pGVMM->u32Magic != GVMM_MAGIC)
1635 return NULL;
1636
1637 if (hEMT == NIL_RTNATIVETHREAD)
1638 hEMT = RTThreadNativeSelf();
1639 RTPROCESS ProcId = RTProcSelf();
1640
1641 /*
1642 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1643 */
1644 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1645 {
1646 if ( pGVMM->aHandles[i].iSelf == i
1647 && pGVMM->aHandles[i].ProcId == ProcId
1648 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1649 && VALID_PTR(pGVMM->aHandles[i].pVM)
1650 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1651 {
1652 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1653 return pGVMM->aHandles[i].pVM;
1654
1655 /* This is fearly safe with the current process per VM approach. */
1656 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1657 VMCPUID const cCpus = pGVM->cCpus;
1658 if ( cCpus < 1
1659 || cCpus > VMM_MAX_CPU_COUNT)
1660 continue;
1661 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1662 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1663 return pGVMM->aHandles[i].pVM;
1664 }
1665 }
1666 return NULL;
1667}
1668
1669
1670/**
1671 * This is will wake up expired and soon-to-be expired VMs.
1672 *
1673 * @returns Number of VMs that has been woken up.
1674 * @param pGVMM Pointer to the GVMM instance data.
1675 * @param u64Now The current time.
1676 */
1677static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1678{
1679 /*
1680 * Skip this if we've got disabled because of high resolution wakeups or by
1681 * the user.
1682 */
1683 if ( !pGVMM->nsEarlyWakeUp1
1684 && !pGVMM->nsEarlyWakeUp2)
1685 return 0;
1686
1687/** @todo Rewrite this algorithm. See performance defect XYZ. */
1688
1689 /*
1690 * A cheap optimization to stop wasting so much time here on big setups.
1691 */
1692 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1693 if ( pGVMM->cHaltedEMTs == 0
1694 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1695 return 0;
1696
1697 /*
1698 * The first pass will wake up VMs which have actually expired
1699 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1700 */
1701 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1702 uint64_t u64Min = UINT64_MAX;
1703 unsigned cWoken = 0;
1704 unsigned cHalted = 0;
1705 unsigned cTodo2nd = 0;
1706 unsigned cTodo3rd = 0;
1707 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1708 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1709 i = pGVMM->aHandles[i].iNext)
1710 {
1711 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1712 if ( VALID_PTR(pCurGVM)
1713 && pCurGVM->u32Magic == GVM_MAGIC)
1714 {
1715 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1716 {
1717 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1718 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1719 if (u64)
1720 {
1721 if (u64 <= u64Now)
1722 {
1723 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1724 {
1725 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1726 AssertRC(rc);
1727 cWoken++;
1728 }
1729 }
1730 else
1731 {
1732 cHalted++;
1733 if (u64 <= uNsEarlyWakeUp1)
1734 cTodo2nd++;
1735 else if (u64 <= uNsEarlyWakeUp2)
1736 cTodo3rd++;
1737 else if (u64 < u64Min)
1738 u64 = u64Min;
1739 }
1740 }
1741 }
1742 }
1743 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1744 }
1745
1746 if (cTodo2nd)
1747 {
1748 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1749 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1750 i = pGVMM->aHandles[i].iNext)
1751 {
1752 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1753 if ( VALID_PTR(pCurGVM)
1754 && pCurGVM->u32Magic == GVM_MAGIC)
1755 {
1756 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1757 {
1758 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1759 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1760 if ( u64
1761 && u64 <= uNsEarlyWakeUp1)
1762 {
1763 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1764 {
1765 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1766 AssertRC(rc);
1767 cWoken++;
1768 }
1769 }
1770 }
1771 }
1772 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1773 }
1774 }
1775
1776 if (cTodo3rd)
1777 {
1778 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1779 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1780 i = pGVMM->aHandles[i].iNext)
1781 {
1782 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1783 if ( VALID_PTR(pCurGVM)
1784 && pCurGVM->u32Magic == GVM_MAGIC)
1785 {
1786 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1787 {
1788 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1789 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1790 if ( u64
1791 && u64 <= uNsEarlyWakeUp2)
1792 {
1793 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1794 {
1795 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1796 AssertRC(rc);
1797 cWoken++;
1798 }
1799 }
1800 }
1801 }
1802 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1803 }
1804 }
1805
1806 /*
1807 * Set the minimum value.
1808 */
1809 pGVMM->uNsNextEmtWakeup = u64Min;
1810
1811 return cWoken;
1812}
1813
1814
1815/**
1816 * Halt the EMT thread.
1817 *
1818 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1819 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1820 * @param pVM The cross context VM structure.
1821 * @param idCpu The Virtual CPU ID of the calling EMT.
1822 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1823 * @thread EMT(idCpu).
1824 */
1825GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1826{
1827 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1828 GVMM_CHECK_SMAP_SETUP();
1829 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1830
1831 /*
1832 * Validate the VM structure, state and handle.
1833 */
1834 PGVM pGVM;
1835 PGVMM pGVMM;
1836 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1837 if (RT_FAILURE(rc))
1838 return rc;
1839 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1840 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1841
1842 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1843 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1844
1845 /*
1846 * Take the UsedList semaphore, get the current time
1847 * and check if anyone needs waking up.
1848 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1849 */
1850 rc = gvmmR0UsedLock(pGVMM);
1851 AssertRC(rc);
1852 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1853
1854 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1855
1856 /* GIP hack: We might are frequently sleeping for short intervals where the
1857 difference between GIP and system time matters on systems with high resolution
1858 system time. So, convert the input from GIP to System time in that case. */
1859 Assert(ASMGetFlags() & X86_EFL_IF);
1860 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1861 const uint64_t u64NowGip = RTTimeNanoTS();
1862 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1863 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1864 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1865
1866 /*
1867 * Go to sleep if we must...
1868 * Cap the sleep time to 1 second to be on the safe side.
1869 */
1870 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1871 if ( u64NowGip < u64ExpireGipTime
1872 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1873 ? pGVMM->nsMinSleepCompany
1874 : pGVMM->nsMinSleepAlone))
1875 {
1876 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1877 if (cNsInterval > RT_NS_1SEC)
1878 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1879 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1880 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1881 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1882 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1883 gvmmR0UsedUnlock(pGVMM);
1884 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1885
1886 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1887 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1888 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1889 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1890
1891 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1892 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1893
1894 /* Reset the semaphore to try prevent a few false wake-ups. */
1895 if (rc == VINF_SUCCESS)
1896 {
1897 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1898 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1899 }
1900 else if (rc == VERR_TIMEOUT)
1901 {
1902 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1903 rc = VINF_SUCCESS;
1904 }
1905 }
1906 else
1907 {
1908 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1909 gvmmR0UsedUnlock(pGVMM);
1910 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1911 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1912 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1913 }
1914
1915 return rc;
1916}
1917
1918
1919/**
1920 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1921 * the a sleeping EMT.
1922 *
1923 * @retval VINF_SUCCESS if successfully woken up.
1924 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1925 *
1926 * @param pGVM The global (ring-0) VM structure.
1927 * @param pGVCpu The global (ring-0) VCPU structure.
1928 */
1929DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1930{
1931 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1932
1933 /*
1934 * Signal the semaphore regardless of whether it's current blocked on it.
1935 *
1936 * The reason for this is that there is absolutely no way we can be 100%
1937 * certain that it isn't *about* go to go to sleep on it and just got
1938 * delayed a bit en route. So, we will always signal the semaphore when
1939 * the it is flagged as halted in the VMM.
1940 */
1941/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1942 int rc;
1943 if (pGVCpu->gvmm.s.u64HaltExpire)
1944 {
1945 rc = VINF_SUCCESS;
1946 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1947 }
1948 else
1949 {
1950 rc = VINF_GVM_NOT_BLOCKED;
1951 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1952 }
1953
1954 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1955 AssertRC(rc2);
1956
1957 return rc;
1958}
1959
1960
1961/**
1962 * Wakes up the halted EMT thread so it can service a pending request.
1963 *
1964 * @returns VBox status code.
1965 * @retval VINF_SUCCESS if successfully woken up.
1966 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1967 *
1968 * @param pVM The cross context VM structure.
1969 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1970 * @param fTakeUsedLock Take the used lock or not
1971 * @thread Any but EMT.
1972 */
1973GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1974{
1975 GVMM_CHECK_SMAP_SETUP();
1976 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1977
1978 /*
1979 * Validate input and take the UsedLock.
1980 */
1981 PGVM pGVM;
1982 PGVMM pGVMM;
1983 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1984 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1985 if (RT_SUCCESS(rc))
1986 {
1987 if (idCpu < pGVM->cCpus)
1988 {
1989 /*
1990 * Do the actual job.
1991 */
1992 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1993 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
1994
1995 if (fTakeUsedLock)
1996 {
1997 /*
1998 * While we're here, do a round of scheduling.
1999 */
2000 Assert(ASMGetFlags() & X86_EFL_IF);
2001 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2002 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2003 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2004 }
2005 }
2006 else
2007 rc = VERR_INVALID_CPU_ID;
2008
2009 if (fTakeUsedLock)
2010 {
2011 int rc2 = gvmmR0UsedUnlock(pGVMM);
2012 AssertRC(rc2);
2013 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2014 }
2015 }
2016
2017 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2018 return rc;
2019}
2020
2021
2022/**
2023 * Wakes up the halted EMT thread so it can service a pending request.
2024 *
2025 * @returns VBox status code.
2026 * @retval VINF_SUCCESS if successfully woken up.
2027 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2028 *
2029 * @param pVM The cross context VM structure.
2030 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2031 * @thread Any but EMT.
2032 */
2033GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
2034{
2035 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
2036}
2037
2038/**
2039 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2040 * the Virtual CPU if it's still busy executing guest code.
2041 *
2042 * @returns VBox status code.
2043 * @retval VINF_SUCCESS if poked successfully.
2044 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2045 *
2046 * @param pGVM The global (ring-0) VM structure.
2047 * @param pVCpu The cross context virtual CPU structure.
2048 */
2049DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
2050{
2051 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2052
2053 RTCPUID idHostCpu = pVCpu->idHostCpu;
2054 if ( idHostCpu == NIL_RTCPUID
2055 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2056 {
2057 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2058 return VINF_GVM_NOT_BUSY_IN_GC;
2059 }
2060
2061 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2062 RTMpPokeCpu(idHostCpu);
2063 return VINF_SUCCESS;
2064}
2065
2066/**
2067 * Pokes an EMT if it's still busy running guest code.
2068 *
2069 * @returns VBox status code.
2070 * @retval VINF_SUCCESS if poked successfully.
2071 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2072 *
2073 * @param pVM The cross context VM structure.
2074 * @param idCpu The ID of the virtual CPU to poke.
2075 * @param fTakeUsedLock Take the used lock or not
2076 */
2077GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2078{
2079 /*
2080 * Validate input and take the UsedLock.
2081 */
2082 PGVM pGVM;
2083 PGVMM pGVMM;
2084 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
2085 if (RT_SUCCESS(rc))
2086 {
2087 if (idCpu < pGVM->cCpus)
2088 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2089 else
2090 rc = VERR_INVALID_CPU_ID;
2091
2092 if (fTakeUsedLock)
2093 {
2094 int rc2 = gvmmR0UsedUnlock(pGVMM);
2095 AssertRC(rc2);
2096 }
2097 }
2098
2099 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2100 return rc;
2101}
2102
2103
2104/**
2105 * Pokes an EMT if it's still busy running guest code.
2106 *
2107 * @returns VBox status code.
2108 * @retval VINF_SUCCESS if poked successfully.
2109 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2110 *
2111 * @param pVM The cross context VM structure.
2112 * @param idCpu The ID of the virtual CPU to poke.
2113 */
2114GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2115{
2116 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2117}
2118
2119
2120/**
2121 * Wakes up a set of halted EMT threads so they can service pending request.
2122 *
2123 * @returns VBox status code, no informational stuff.
2124 *
2125 * @param pVM The cross context VM structure.
2126 * @param pSleepSet The set of sleepers to wake up.
2127 * @param pPokeSet The set of CPUs to poke.
2128 */
2129GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2130{
2131 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2132 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2133 GVMM_CHECK_SMAP_SETUP();
2134 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2135 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2136
2137 /*
2138 * Validate input and take the UsedLock.
2139 */
2140 PGVM pGVM;
2141 PGVMM pGVMM;
2142 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2143 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2144 if (RT_SUCCESS(rc))
2145 {
2146 rc = VINF_SUCCESS;
2147 VMCPUID idCpu = pGVM->cCpus;
2148 while (idCpu-- > 0)
2149 {
2150 /* Don't try poke or wake up ourselves. */
2151 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2152 continue;
2153
2154 /* just ignore errors for now. */
2155 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2156 {
2157 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2158 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2159 }
2160 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2161 {
2162 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2163 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2164 }
2165 }
2166
2167 int rc2 = gvmmR0UsedUnlock(pGVMM);
2168 AssertRC(rc2);
2169 GVMM_CHECK_SMAP_CHECK2(pVM, RT_NOTHING);
2170 }
2171
2172 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2173 return rc;
2174}
2175
2176
2177/**
2178 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2179 *
2180 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2181 * @param pVM The cross context VM structure.
2182 * @param pReq Pointer to the request packet.
2183 */
2184GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2185{
2186 /*
2187 * Validate input and pass it on.
2188 */
2189 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2190 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2191
2192 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2193}
2194
2195
2196
2197/**
2198 * Poll the schedule to see if someone else should get a chance to run.
2199 *
2200 * This is a bit hackish and will not work too well if the machine is
2201 * under heavy load from non-VM processes.
2202 *
2203 * @returns VINF_SUCCESS if not yielded.
2204 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2205 * @param pVM The cross context VM structure.
2206 * @param idCpu The Virtual CPU ID of the calling EMT.
2207 * @param fYield Whether to yield or not.
2208 * This is for when we're spinning in the halt loop.
2209 * @thread EMT(idCpu).
2210 */
2211GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2212{
2213 /*
2214 * Validate input.
2215 */
2216 PGVM pGVM;
2217 PGVMM pGVMM;
2218 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2219 if (RT_SUCCESS(rc))
2220 {
2221 rc = gvmmR0UsedLock(pGVMM);
2222 AssertRC(rc);
2223 pGVM->gvmm.s.StatsSched.cPollCalls++;
2224
2225 Assert(ASMGetFlags() & X86_EFL_IF);
2226 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2227
2228 if (!fYield)
2229 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2230 else
2231 {
2232 /** @todo implement this... */
2233 rc = VERR_NOT_IMPLEMENTED;
2234 }
2235
2236 gvmmR0UsedUnlock(pGVMM);
2237 }
2238
2239 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2240 return rc;
2241}
2242
2243
2244#ifdef GVMM_SCHED_WITH_PPT
2245/**
2246 * Timer callback for the periodic preemption timer.
2247 *
2248 * @param pTimer The timer handle.
2249 * @param pvUser Pointer to the per cpu structure.
2250 * @param iTick The current tick.
2251 */
2252static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2253{
2254 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2255 NOREF(pTimer); NOREF(iTick);
2256
2257 /*
2258 * Termination check
2259 */
2260 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2261 return;
2262
2263 /*
2264 * Do the house keeping.
2265 */
2266 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2267
2268 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2269 {
2270 /*
2271 * Historicize the max frequency.
2272 */
2273 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2274 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2275 pCpu->Ppt.iTickHistorization = 0;
2276 pCpu->Ppt.uDesiredHz = 0;
2277
2278 /*
2279 * Check if the current timer frequency.
2280 */
2281 uint32_t uHistMaxHz = 0;
2282 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2283 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2284 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2285 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2286 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2287 else if (uHistMaxHz)
2288 {
2289 /*
2290 * Reprogram it.
2291 */
2292 pCpu->Ppt.cChanges++;
2293 pCpu->Ppt.iTickHistorization = 0;
2294 pCpu->Ppt.uTimerHz = uHistMaxHz;
2295 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2296 pCpu->Ppt.cNsInterval = cNsInterval;
2297 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2298 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2299 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2300 / cNsInterval;
2301 else
2302 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2303 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2304
2305 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2306 RTTimerChangeInterval(pTimer, cNsInterval);
2307 }
2308 else
2309 {
2310 /*
2311 * Stop it.
2312 */
2313 pCpu->Ppt.fStarted = false;
2314 pCpu->Ppt.uTimerHz = 0;
2315 pCpu->Ppt.cNsInterval = 0;
2316 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2317
2318 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2319 RTTimerStop(pTimer);
2320 }
2321 }
2322 else
2323 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2324}
2325#endif /* GVMM_SCHED_WITH_PPT */
2326
2327
2328/**
2329 * Updates the periodic preemption timer for the calling CPU.
2330 *
2331 * The caller must have disabled preemption!
2332 * The caller must check that the host can do high resolution timers.
2333 *
2334 * @param pVM The cross context VM structure.
2335 * @param idHostCpu The current host CPU id.
2336 * @param uHz The desired frequency.
2337 */
2338GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2339{
2340 NOREF(pVM);
2341#ifdef GVMM_SCHED_WITH_PPT
2342 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2343 Assert(RTTimerCanDoHighResolution());
2344
2345 /*
2346 * Resolve the per CPU data.
2347 */
2348 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2349 PGVMM pGVMM = g_pGVMM;
2350 if ( !VALID_PTR(pGVMM)
2351 || pGVMM->u32Magic != GVMM_MAGIC)
2352 return;
2353 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2354 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2355 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2356 && pCpu->idCpu == idHostCpu,
2357 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2358
2359 /*
2360 * Check whether we need to do anything about the timer.
2361 * We have to be a little bit careful since we might be race the timer
2362 * callback here.
2363 */
2364 if (uHz > 16384)
2365 uHz = 16384; /** @todo add a query method for this! */
2366 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2367 && uHz >= pCpu->Ppt.uMinHz
2368 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2369 {
2370 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2371
2372 pCpu->Ppt.uDesiredHz = uHz;
2373 uint32_t cNsInterval = 0;
2374 if (!pCpu->Ppt.fStarted)
2375 {
2376 pCpu->Ppt.cStarts++;
2377 pCpu->Ppt.fStarted = true;
2378 pCpu->Ppt.fStarting = true;
2379 pCpu->Ppt.iTickHistorization = 0;
2380 pCpu->Ppt.uTimerHz = uHz;
2381 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2382 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2383 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2384 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2385 / cNsInterval;
2386 else
2387 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2388 }
2389
2390 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2391
2392 if (cNsInterval)
2393 {
2394 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2395 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2396 AssertRC(rc);
2397
2398 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2399 if (RT_FAILURE(rc))
2400 pCpu->Ppt.fStarted = false;
2401 pCpu->Ppt.fStarting = false;
2402 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2403 }
2404 }
2405#else /* !GVMM_SCHED_WITH_PPT */
2406 NOREF(idHostCpu); NOREF(uHz);
2407#endif /* !GVMM_SCHED_WITH_PPT */
2408}
2409
2410
2411/**
2412 * Retrieves the GVMM statistics visible to the caller.
2413 *
2414 * @returns VBox status code.
2415 *
2416 * @param pStats Where to put the statistics.
2417 * @param pSession The current session.
2418 * @param pVM The VM to obtain statistics for. Optional.
2419 */
2420GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2421{
2422 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2423
2424 /*
2425 * Validate input.
2426 */
2427 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2428 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2429 pStats->cVMs = 0; /* (crash before taking the sem...) */
2430
2431 /*
2432 * Take the lock and get the VM statistics.
2433 */
2434 PGVMM pGVMM;
2435 if (pVM)
2436 {
2437 PGVM pGVM;
2438 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2439 if (RT_FAILURE(rc))
2440 return rc;
2441 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2442 }
2443 else
2444 {
2445 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2446 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2447
2448 int rc = gvmmR0UsedLock(pGVMM);
2449 AssertRCReturn(rc, rc);
2450 }
2451
2452 /*
2453 * Enumerate the VMs and add the ones visible to the statistics.
2454 */
2455 pStats->cVMs = 0;
2456 pStats->cEMTs = 0;
2457 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2458
2459 for (unsigned i = pGVMM->iUsedHead;
2460 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2461 i = pGVMM->aHandles[i].iNext)
2462 {
2463 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2464 void *pvObj = pGVMM->aHandles[i].pvObj;
2465 if ( VALID_PTR(pvObj)
2466 && VALID_PTR(pGVM)
2467 && pGVM->u32Magic == GVM_MAGIC
2468 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2469 {
2470 pStats->cVMs++;
2471 pStats->cEMTs += pGVM->cCpus;
2472
2473 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2474 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2475 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2476 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2477 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2478
2479 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2480 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2481 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2482
2483 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2484 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2485
2486 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2487 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2488 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2489 }
2490 }
2491
2492 /*
2493 * Copy out the per host CPU statistics.
2494 */
2495 uint32_t iDstCpu = 0;
2496 uint32_t cSrcCpus = pGVMM->cHostCpus;
2497 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2498 {
2499 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2500 {
2501 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2502 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2503#ifdef GVMM_SCHED_WITH_PPT
2504 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2505 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2506 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2507 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2508#else
2509 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2510 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2511 pStats->aHostCpus[iDstCpu].cChanges = 0;
2512 pStats->aHostCpus[iDstCpu].cStarts = 0;
2513#endif
2514 iDstCpu++;
2515 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2516 break;
2517 }
2518 }
2519 pStats->cHostCpus = iDstCpu;
2520
2521 gvmmR0UsedUnlock(pGVMM);
2522
2523 return VINF_SUCCESS;
2524}
2525
2526
2527/**
2528 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2529 *
2530 * @returns see GVMMR0QueryStatistics.
2531 * @param pVM The cross context VM structure. Optional.
2532 * @param pReq Pointer to the request packet.
2533 */
2534GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2535{
2536 /*
2537 * Validate input and pass it on.
2538 */
2539 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2540 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2541
2542 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2543}
2544
2545
2546/**
2547 * Resets the specified GVMM statistics.
2548 *
2549 * @returns VBox status code.
2550 *
2551 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2552 * @param pSession The current session.
2553 * @param pVM The VM to reset statistics for. Optional.
2554 */
2555GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2556{
2557 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2558
2559 /*
2560 * Validate input.
2561 */
2562 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2563 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2564
2565 /*
2566 * Take the lock and get the VM statistics.
2567 */
2568 PGVMM pGVMM;
2569 if (pVM)
2570 {
2571 PGVM pGVM;
2572 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2573 if (RT_FAILURE(rc))
2574 return rc;
2575# define MAYBE_RESET_FIELD(field) \
2576 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2577 MAYBE_RESET_FIELD(cHaltCalls);
2578 MAYBE_RESET_FIELD(cHaltBlocking);
2579 MAYBE_RESET_FIELD(cHaltTimeouts);
2580 MAYBE_RESET_FIELD(cHaltNotBlocking);
2581 MAYBE_RESET_FIELD(cHaltWakeUps);
2582 MAYBE_RESET_FIELD(cWakeUpCalls);
2583 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2584 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2585 MAYBE_RESET_FIELD(cPokeCalls);
2586 MAYBE_RESET_FIELD(cPokeNotBusy);
2587 MAYBE_RESET_FIELD(cPollCalls);
2588 MAYBE_RESET_FIELD(cPollHalts);
2589 MAYBE_RESET_FIELD(cPollWakeUps);
2590# undef MAYBE_RESET_FIELD
2591 }
2592 else
2593 {
2594 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2595
2596 int rc = gvmmR0UsedLock(pGVMM);
2597 AssertRCReturn(rc, rc);
2598 }
2599
2600 /*
2601 * Enumerate the VMs and add the ones visible to the statistics.
2602 */
2603 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2604 {
2605 for (unsigned i = pGVMM->iUsedHead;
2606 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2607 i = pGVMM->aHandles[i].iNext)
2608 {
2609 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2610 void *pvObj = pGVMM->aHandles[i].pvObj;
2611 if ( VALID_PTR(pvObj)
2612 && VALID_PTR(pGVM)
2613 && pGVM->u32Magic == GVM_MAGIC
2614 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2615 {
2616# define MAYBE_RESET_FIELD(field) \
2617 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2618 MAYBE_RESET_FIELD(cHaltCalls);
2619 MAYBE_RESET_FIELD(cHaltBlocking);
2620 MAYBE_RESET_FIELD(cHaltTimeouts);
2621 MAYBE_RESET_FIELD(cHaltNotBlocking);
2622 MAYBE_RESET_FIELD(cHaltWakeUps);
2623 MAYBE_RESET_FIELD(cWakeUpCalls);
2624 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2625 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2626 MAYBE_RESET_FIELD(cPokeCalls);
2627 MAYBE_RESET_FIELD(cPokeNotBusy);
2628 MAYBE_RESET_FIELD(cPollCalls);
2629 MAYBE_RESET_FIELD(cPollHalts);
2630 MAYBE_RESET_FIELD(cPollWakeUps);
2631# undef MAYBE_RESET_FIELD
2632 }
2633 }
2634 }
2635
2636 gvmmR0UsedUnlock(pGVMM);
2637
2638 return VINF_SUCCESS;
2639}
2640
2641
2642/**
2643 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2644 *
2645 * @returns see GVMMR0ResetStatistics.
2646 * @param pVM The cross context VM structure. Optional.
2647 * @param pReq Pointer to the request packet.
2648 */
2649GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2650{
2651 /*
2652 * Validate input and pass it on.
2653 */
2654 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2655 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2656
2657 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2658}
2659
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette