VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 37462

Last change on this file since 37462 was 37462, checked in by vboxsync, 13 years ago

Assert compile time sanity.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 87.0 KB
Line 
1/* $Id: GVMMR0.cpp 37462 2011-06-15 09:59:37Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmm.h>
59#include <VBox/param.h>
60#include <VBox/err.h>
61#include <iprt/asm.h>
62#include <iprt/asm-amd64-x86.h>
63#include <iprt/mem.h>
64#include <iprt/semaphore.h>
65#include <iprt/time.h>
66#include <VBox/log.h>
67#include <iprt/thread.h>
68#include <iprt/process.h>
69#include <iprt/param.h>
70#include <iprt/string.h>
71#include <iprt/assert.h>
72#include <iprt/mem.h>
73#include <iprt/memobj.h>
74#include <iprt/mp.h>
75#include <iprt/cpuset.h>
76#include <iprt/spinlock.h>
77#include <iprt/timer.h>
78
79
80/*******************************************************************************
81* Defined Constants And Macros *
82*******************************************************************************/
83#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
84/** Define this to enable the periodic preemption timer. */
85# define GVMM_SCHED_WITH_PPT
86#endif
87
88
89/*******************************************************************************
90* Structures and Typedefs *
91*******************************************************************************/
92
93/**
94 * Global VM handle.
95 */
96typedef struct GVMHANDLE
97{
98 /** The index of the next handle in the list (free or used). (0 is nil.) */
99 uint16_t volatile iNext;
100 /** Our own index / handle value. */
101 uint16_t iSelf;
102 /** The process ID of the handle owner.
103 * This is used for access checks. */
104 RTPROCESS ProcId;
105 /** The pointer to the ring-0 only (aka global) VM structure. */
106 PGVM pGVM;
107 /** The ring-0 mapping of the shared VM instance data. */
108 PVM pVM;
109 /** The virtual machine object. */
110 void *pvObj;
111 /** The session this VM is associated with. */
112 PSUPDRVSESSION pSession;
113 /** The ring-0 handle of the EMT0 thread.
114 * This is used for ownership checks as well as looking up a VM handle by thread
115 * at times like assertions. */
116 RTNATIVETHREAD hEMT0;
117} GVMHANDLE;
118/** Pointer to a global VM handle. */
119typedef GVMHANDLE *PGVMHANDLE;
120
121/** Number of GVM handles (including the NIL handle). */
122#if HC_ARCH_BITS == 64
123# define GVMM_MAX_HANDLES 8192
124#else
125# define GVMM_MAX_HANDLES 128
126#endif
127
128/**
129 * Per host CPU GVMM data.
130 */
131typedef struct GVMMHOSTCPU
132{
133 /** Magic number (GVMMHOSTCPU_MAGIC). */
134 uint32_t volatile u32Magic;
135 /** The CPU ID. */
136 RTCPUID idCpu;
137 /** The CPU set index. */
138 uint32_t idxCpuSet;
139
140#ifdef GVMM_SCHED_WITH_PPT
141 /** Periodic preemption timer data. */
142 struct
143 {
144 /** The handle to the periodic preemption timer. */
145 PRTTIMER pTimer;
146 /** Spinlock protecting the data below. */
147 RTSPINLOCK hSpinlock;
148 /** The smalles Hz that we need to care about. (static) */
149 uint32_t uMinHz;
150 /** The number of ticks between each historization. */
151 uint32_t cTicksHistoriziationInterval;
152 /** The current historization tick (counting up to
153 * cTicksHistoriziationInterval and then resetting). */
154 uint32_t iTickHistorization;
155 /** The current timer interval. This is set to 0 when inactive. */
156 uint32_t cNsInterval;
157 /** The current timer frequency. This is set to 0 when inactive. */
158 uint32_t uTimerHz;
159 /** The current max frequency reported by the EMTs.
160 * This gets historicize and reset by the timer callback. This is
161 * read without holding the spinlock, so needs atomic updating. */
162 uint32_t volatile uDesiredHz;
163 /** Whether the timer was started or not. */
164 bool volatile fStarted;
165 /** Set if we're starting timer. */
166 bool volatile fStarting;
167 /** The index of the next history entry (mod it). */
168 uint32_t iHzHistory;
169 /** Historicized uDesiredHz values. The array wraps around, new entries
170 * are added at iHzHistory. This is updated approximately every
171 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
172 uint32_t aHzHistory[8];
173 /** Statistics counter for recording the number of interval changes. */
174 uint32_t cChanges;
175 /** Statistics counter for recording the number of timer starts. */
176 uint32_t cStarts;
177 } Ppt;
178#endif /* GVMM_SCHED_WITH_PPT */
179
180} GVMMHOSTCPU;
181/** Pointer to the per host CPU GVMM data. */
182typedef GVMMHOSTCPU *PGVMMHOSTCPU;
183/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
184#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
185/** The interval on history entry should cover (approximately) give in
186 * nanoseconds. */
187#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
188
189
190/**
191 * The GVMM instance data.
192 */
193typedef struct GVMM
194{
195 /** Eyecatcher / magic. */
196 uint32_t u32Magic;
197 /** The index of the head of the free handle chain. (0 is nil.) */
198 uint16_t volatile iFreeHead;
199 /** The index of the head of the active handle chain. (0 is nil.) */
200 uint16_t volatile iUsedHead;
201 /** The number of VMs. */
202 uint16_t volatile cVMs;
203 /** Alignment padding. */
204 uint16_t u16Reserved;
205 /** The number of EMTs. */
206 uint32_t volatile cEMTs;
207 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
208 uint32_t volatile cHaltedEMTs;
209 /** Alignment padding. */
210 uint32_t u32Alignment;
211 /** When the next halted or sleeping EMT will wake up.
212 * This is set to 0 when it needs recalculating and to UINT64_MAX when
213 * there are no halted or sleeping EMTs in the GVMM. */
214 uint64_t uNsNextEmtWakeup;
215 /** The lock used to serialize VM creation, destruction and associated events that
216 * isn't performance critical. Owners may acquire the list lock. */
217 RTSEMFASTMUTEX CreateDestroyLock;
218 /** The lock used to serialize used list updates and accesses.
219 * This indirectly includes scheduling since the scheduler will have to walk the
220 * used list to examin running VMs. Owners may not acquire any other locks. */
221 RTSEMFASTMUTEX UsedLock;
222 /** The handle array.
223 * The size of this array defines the maximum number of currently running VMs.
224 * The first entry is unused as it represents the NIL handle. */
225 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
226
227 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
228 * The number of EMTs that means we no longer consider ourselves alone on a
229 * CPU/Core.
230 */
231 uint32_t cEMTsMeansCompany;
232 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
233 * The minimum sleep time for when we're alone, in nano seconds.
234 */
235 uint32_t nsMinSleepAlone;
236 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
237 * The minimum sleep time for when we've got company, in nano seconds.
238 */
239 uint32_t nsMinSleepCompany;
240 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
241 * The limit for the first round of early wakeups, given in nano seconds.
242 */
243 uint32_t nsEarlyWakeUp1;
244 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
245 * The limit for the second round of early wakeups, given in nano seconds.
246 */
247 uint32_t nsEarlyWakeUp2;
248
249 /** The number of entries in the host CPU array (aHostCpus). */
250 uint32_t cHostCpus;
251 /** Per host CPU data (variable length). */
252 GVMMHOSTCPU aHostCpus[1];
253} GVMM;
254/** Pointer to the GVMM instance data. */
255typedef GVMM *PGVMM;
256
257/** The GVMM::u32Magic value (Charlie Haden). */
258#define GVMM_MAGIC 0x19370806
259
260
261
262/*******************************************************************************
263* Global Variables *
264*******************************************************************************/
265/** Pointer to the GVMM instance data.
266 * (Just my general dislike for global variables.) */
267static PGVMM g_pGVMM = NULL;
268
269/** Macro for obtaining and validating the g_pGVMM pointer.
270 * On failure it will return from the invoking function with the specified return value.
271 *
272 * @param pGVMM The name of the pGVMM variable.
273 * @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
274 * VBox status codes.
275 */
276#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
277 do { \
278 (pGVMM) = g_pGVMM;\
279 AssertPtrReturn((pGVMM), (rc)); \
280 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
281 } while (0)
282
283/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
284 * On failure it will return from the invoking function.
285 *
286 * @param pGVMM The name of the pGVMM variable.
287 */
288#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
289 do { \
290 (pGVMM) = g_pGVMM;\
291 AssertPtrReturnVoid((pGVMM)); \
292 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
293 } while (0)
294
295
296/*******************************************************************************
297* Internal Functions *
298*******************************************************************************/
299static void gvmmR0InitPerVMData(PGVM pGVM);
300static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
301static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
302static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
303#ifdef GVMM_SCHED_WITH_PPT
304static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
305#endif
306
307
308/**
309 * Initializes the GVMM.
310 *
311 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
312 *
313 * @returns VBox status code.
314 */
315GVMMR0DECL(int) GVMMR0Init(void)
316{
317 LogFlow(("GVMMR0Init:\n"));
318
319 /*
320 * Allocate and initialize the instance data.
321 */
322 uint32_t cHostCpus = RTMpGetArraySize();
323 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_INTERNAL_ERROR_2);
324
325 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
326 if (!pGVMM)
327 return VERR_NO_MEMORY;
328 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
329 if (RT_SUCCESS(rc))
330 {
331 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
332 if (RT_SUCCESS(rc))
333 {
334 pGVMM->u32Magic = GVMM_MAGIC;
335 pGVMM->iUsedHead = 0;
336 pGVMM->iFreeHead = 1;
337
338 /* the nil handle */
339 pGVMM->aHandles[0].iSelf = 0;
340 pGVMM->aHandles[0].iNext = 0;
341
342 /* the tail */
343 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
344 pGVMM->aHandles[i].iSelf = i;
345 pGVMM->aHandles[i].iNext = 0; /* nil */
346
347 /* the rest */
348 while (i-- > 1)
349 {
350 pGVMM->aHandles[i].iSelf = i;
351 pGVMM->aHandles[i].iNext = i + 1;
352 }
353
354 /* The default configuration values. */
355 uint32_t cNsResolution = RTSemEventMultiGetResolution();
356 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
357 if (cNsResolution >= 5*RT_NS_100US)
358 {
359 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
360 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
361 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
362 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
363 }
364 else if (cNsResolution > RT_NS_100US)
365 {
366 pGVMM->nsMinSleepAlone = cNsResolution / 2;
367 pGVMM->nsMinSleepCompany = cNsResolution / 4;
368 pGVMM->nsEarlyWakeUp1 = 0;
369 pGVMM->nsEarlyWakeUp2 = 0;
370 }
371 else
372 {
373 pGVMM->nsMinSleepAlone = 2000;
374 pGVMM->nsMinSleepCompany = 2000;
375 pGVMM->nsEarlyWakeUp1 = 0;
376 pGVMM->nsEarlyWakeUp2 = 0;
377 }
378
379 /* The host CPU data. */
380 pGVMM->cHostCpus = cHostCpus;
381 uint32_t iCpu = cHostCpus;
382 RTCPUSET PossibleSet;
383 RTMpGetSet(&PossibleSet);
384 while (iCpu-- > 0)
385 {
386 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
387#ifdef GVMM_SCHED_WITH_PPT
388 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
389 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
390 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
391 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
392 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
393 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
394 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
395 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
396 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
397 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
398 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
399 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
400#endif
401
402 if (RTCpuSetIsMember(&PossibleSet, iCpu))
403 {
404 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
405 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
406
407#ifdef GVMM_SCHED_WITH_PPT
408 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
409 50*1000*1000 /* whatever */,
410 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
411 gvmmR0SchedPeriodicPreemptionTimerCallback,
412 &pGVMM->aHostCpus[iCpu]);
413 if (RT_SUCCESS(rc))
414 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
415 if (RT_FAILURE(rc))
416 {
417 while (iCpu < cHostCpus)
418 {
419 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
420 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
421 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
422 iCpu++;
423 }
424 break;
425 }
426#endif
427 }
428 else
429 {
430 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
431 pGVMM->aHostCpus[iCpu].u32Magic = 0;
432 }
433 }
434 if (RT_SUCCESS(rc))
435 {
436 g_pGVMM = pGVMM;
437 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
438 return VINF_SUCCESS;
439 }
440
441 /* bail out. */
442 RTSemFastMutexDestroy(pGVMM->UsedLock);
443 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
444 }
445 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
446 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
447 }
448
449 RTMemFree(pGVMM);
450 return rc;
451}
452
453
454/**
455 * Terminates the GVM.
456 *
457 * This is called while owning the loader semaphore (see supdrvLdrFree()).
458 * And unless something is wrong, there should be absolutely no VMs
459 * registered at this point.
460 */
461GVMMR0DECL(void) GVMMR0Term(void)
462{
463 LogFlow(("GVMMR0Term:\n"));
464
465 PGVMM pGVMM = g_pGVMM;
466 g_pGVMM = NULL;
467 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
468 {
469 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
470 return;
471 }
472
473 /*
474 * First of all, stop all active timers.
475 */
476 uint32_t cActiveTimers = 0;
477 uint32_t iCpu = pGVMM->cHostCpus;
478 while (iCpu-- > 0)
479 {
480 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
481#ifdef GVMM_SCHED_WITH_PPT
482 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
483 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
484 cActiveTimers++;
485#endif
486 }
487 if (cActiveTimers)
488 RTThreadSleep(1); /* fudge */
489
490 /*
491 * Invalidate the and free resources.
492 */
493 pGVMM->u32Magic = ~GVMM_MAGIC;
494 RTSemFastMutexDestroy(pGVMM->UsedLock);
495 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
496 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
497 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
498
499 pGVMM->iFreeHead = 0;
500 if (pGVMM->iUsedHead)
501 {
502 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
503 pGVMM->iUsedHead = 0;
504 }
505
506#ifdef GVMM_SCHED_WITH_PPT
507 iCpu = pGVMM->cHostCpus;
508 while (iCpu-- > 0)
509 {
510 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
511 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
512 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
513 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
514 }
515#endif
516
517 RTMemFree(pGVMM);
518}
519
520
521/**
522 * A quick hack for setting global config values.
523 *
524 * @returns VBox status code.
525 *
526 * @param pSession The session handle. Used for authentication.
527 * @param pszName The variable name.
528 * @param u64Value The new value.
529 */
530GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
531{
532 /*
533 * Validate input.
534 */
535 PGVMM pGVMM;
536 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
537 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
538 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
539
540 /*
541 * String switch time!
542 */
543 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
544 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
545 int rc = VINF_SUCCESS;
546 pszName += sizeof("/GVMM/") - 1;
547 if (!strcmp(pszName, "cEMTsMeansCompany"))
548 {
549 if (u64Value <= UINT32_MAX)
550 pGVMM->cEMTsMeansCompany = u64Value;
551 else
552 rc = VERR_OUT_OF_RANGE;
553 }
554 else if (!strcmp(pszName, "MinSleepAlone"))
555 {
556 if (u64Value <= RT_NS_100MS)
557 pGVMM->nsMinSleepAlone = u64Value;
558 else
559 rc = VERR_OUT_OF_RANGE;
560 }
561 else if (!strcmp(pszName, "MinSleepCompany"))
562 {
563 if (u64Value <= RT_NS_100MS)
564 pGVMM->nsMinSleepCompany = u64Value;
565 else
566 rc = VERR_OUT_OF_RANGE;
567 }
568 else if (!strcmp(pszName, "EarlyWakeUp1"))
569 {
570 if (u64Value <= RT_NS_100MS)
571 pGVMM->nsEarlyWakeUp1 = u64Value;
572 else
573 rc = VERR_OUT_OF_RANGE;
574 }
575 else if (!strcmp(pszName, "EarlyWakeUp2"))
576 {
577 if (u64Value <= RT_NS_100MS)
578 pGVMM->nsEarlyWakeUp2 = u64Value;
579 else
580 rc = VERR_OUT_OF_RANGE;
581 }
582 else
583 rc = VERR_CFGM_VALUE_NOT_FOUND;
584 return rc;
585}
586
587
588/**
589 * A quick hack for getting global config values.
590 *
591 * @returns VBox status code.
592 *
593 * @param pSession The session handle. Used for authentication.
594 * @param pszName The variable name.
595 * @param u64Value The new value.
596 */
597GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
598{
599 /*
600 * Validate input.
601 */
602 PGVMM pGVMM;
603 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
604 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
605 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
606 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
607
608 /*
609 * String switch time!
610 */
611 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
612 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
613 int rc = VINF_SUCCESS;
614 pszName += sizeof("/GVMM/") - 1;
615 if (!strcmp(pszName, "cEMTsMeansCompany"))
616 *pu64Value = pGVMM->cEMTsMeansCompany;
617 else if (!strcmp(pszName, "MinSleepAlone"))
618 *pu64Value = pGVMM->nsMinSleepAlone;
619 else if (!strcmp(pszName, "MinSleepCompany"))
620 *pu64Value = pGVMM->nsMinSleepCompany;
621 else if (!strcmp(pszName, "EarlyWakeUp1"))
622 *pu64Value = pGVMM->nsEarlyWakeUp1;
623 else if (!strcmp(pszName, "EarlyWakeUp2"))
624 *pu64Value = pGVMM->nsEarlyWakeUp2;
625 else
626 rc = VERR_CFGM_VALUE_NOT_FOUND;
627 return rc;
628}
629
630
631/**
632 * Try acquire the 'used' lock.
633 *
634 * @returns IPRT status code, see RTSemFastMutexRequest.
635 * @param pGVMM The GVMM instance data.
636 */
637DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
638{
639 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
640 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
641 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
642 return rc;
643}
644
645
646/**
647 * Release the 'used' lock.
648 *
649 * @returns IPRT status code, see RTSemFastMutexRelease.
650 * @param pGVMM The GVMM instance data.
651 */
652DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
653{
654 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
655 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
656 AssertRC(rc);
657 return rc;
658}
659
660
661/**
662 * Try acquire the 'create & destroy' lock.
663 *
664 * @returns IPRT status code, see RTSemFastMutexRequest.
665 * @param pGVMM The GVMM instance data.
666 */
667DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
668{
669 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
670 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
671 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
672 return rc;
673}
674
675
676/**
677 * Release the 'create & destroy' lock.
678 *
679 * @returns IPRT status code, see RTSemFastMutexRequest.
680 * @param pGVMM The GVMM instance data.
681 */
682DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
683{
684 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
685 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
686 AssertRC(rc);
687 return rc;
688}
689
690
691/**
692 * Request wrapper for the GVMMR0CreateVM API.
693 *
694 * @returns VBox status code.
695 * @param pReq The request buffer.
696 */
697GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
698{
699 /*
700 * Validate the request.
701 */
702 if (!VALID_PTR(pReq))
703 return VERR_INVALID_POINTER;
704 if (pReq->Hdr.cbReq != sizeof(*pReq))
705 return VERR_INVALID_PARAMETER;
706 if (!VALID_PTR(pReq->pSession))
707 return VERR_INVALID_POINTER;
708
709 /*
710 * Execute it.
711 */
712 PVM pVM;
713 pReq->pVMR0 = NULL;
714 pReq->pVMR3 = NIL_RTR3PTR;
715 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
716 if (RT_SUCCESS(rc))
717 {
718 pReq->pVMR0 = pVM;
719 pReq->pVMR3 = pVM->pVMR3;
720 }
721 return rc;
722}
723
724
725/**
726 * Allocates the VM structure and registers it with GVM.
727 *
728 * The caller will become the VM owner and there by the EMT.
729 *
730 * @returns VBox status code.
731 * @param pSession The support driver session.
732 * @param cCpus Number of virtual CPUs for the new VM.
733 * @param ppVM Where to store the pointer to the VM structure.
734 *
735 * @thread EMT.
736 */
737GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
738{
739 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
740 PGVMM pGVMM;
741 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
742
743 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
744 *ppVM = NULL;
745
746 if ( cCpus == 0
747 || cCpus > VMM_MAX_CPU_COUNT)
748 return VERR_INVALID_PARAMETER;
749
750 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
751 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR);
752 RTNATIVETHREAD ProcId = RTProcSelf();
753 AssertReturn(ProcId != NIL_RTPROCESS, VERR_INTERNAL_ERROR);
754
755 /*
756 * The whole allocation process is protected by the lock.
757 */
758 int rc = gvmmR0CreateDestroyLock(pGVMM);
759 AssertRCReturn(rc, rc);
760
761 /*
762 * Allocate a handle first so we don't waste resources unnecessarily.
763 */
764 uint16_t iHandle = pGVMM->iFreeHead;
765 if (iHandle)
766 {
767 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
768
769 /* consistency checks, a bit paranoid as always. */
770 if ( !pHandle->pVM
771 && !pHandle->pGVM
772 && !pHandle->pvObj
773 && pHandle->iSelf == iHandle)
774 {
775 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
776 if (pHandle->pvObj)
777 {
778 /*
779 * Move the handle from the free to used list and perform permission checks.
780 */
781 rc = gvmmR0UsedLock(pGVMM);
782 AssertRC(rc);
783
784 pGVMM->iFreeHead = pHandle->iNext;
785 pHandle->iNext = pGVMM->iUsedHead;
786 pGVMM->iUsedHead = iHandle;
787 pGVMM->cVMs++;
788
789 pHandle->pVM = NULL;
790 pHandle->pGVM = NULL;
791 pHandle->pSession = pSession;
792 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
793 pHandle->ProcId = NIL_RTPROCESS;
794
795 gvmmR0UsedUnlock(pGVMM);
796
797 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
798 if (RT_SUCCESS(rc))
799 {
800 /*
801 * Allocate the global VM structure (GVM) and initialize it.
802 */
803 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
804 if (pGVM)
805 {
806 pGVM->u32Magic = GVM_MAGIC;
807 pGVM->hSelf = iHandle;
808 pGVM->pVM = NULL;
809 pGVM->cCpus = cCpus;
810
811 gvmmR0InitPerVMData(pGVM);
812 GMMR0InitPerVMData(pGVM);
813
814 /*
815 * Allocate the shared VM structure and associated page array.
816 */
817 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
818 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
819#ifdef RT_OS_DARWIN /** @todo Figure out why this is broken. Is it only on snow leopard? */
820 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, (cPages + 1) << PAGE_SHIFT, false /* fExecutable */);
821#else
822 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
823#endif
824 if (RT_SUCCESS(rc))
825 {
826 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
827 memset(pVM, 0, cPages << PAGE_SHIFT);
828 pVM->enmVMState = VMSTATE_CREATING;
829 pVM->pVMR0 = pVM;
830 pVM->pSession = pSession;
831 pVM->hSelf = iHandle;
832 pVM->cbSelf = cbVM;
833 pVM->cCpus = cCpus;
834 pVM->uCpuExecutionCap = 100; /* default is no cap. */
835 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
836 AssertCompileMemberAlignment(VM, cpum, 64);
837 AssertCompileMemberAlignment(VM, tm, 64);
838 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
839 AssertCompileMemberAlignment(VM, aCpus[1], PAGE_SIZE);
840
841 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
842 if (RT_SUCCESS(rc))
843 {
844 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
845 for (uint32_t iPage = 0; iPage < cPages; iPage++)
846 {
847 paPages[iPage].uReserved = 0;
848 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
849 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
850 }
851
852 /*
853 * Map them into ring-3.
854 */
855 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
856 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
857 if (RT_SUCCESS(rc))
858 {
859 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
860 AssertPtr((void *)pVM->pVMR3);
861
862 /* Initialize all the VM pointers. */
863 for (uint32_t i = 0; i < cCpus; i++)
864 {
865 pVM->aCpus[i].pVMR0 = pVM;
866 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
867 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
868 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
869 }
870
871 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0,
872 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
873 if (RT_SUCCESS(rc))
874 {
875 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
876 AssertPtr((void *)pVM->paVMPagesR3);
877
878 /* complete the handle - take the UsedLock sem just to be careful. */
879 rc = gvmmR0UsedLock(pGVMM);
880 AssertRC(rc);
881
882 pHandle->pVM = pVM;
883 pHandle->pGVM = pGVM;
884 pHandle->hEMT0 = hEMT0;
885 pHandle->ProcId = ProcId;
886 pGVM->pVM = pVM;
887 pGVM->aCpus[0].hEMT = hEMT0;
888 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
889 pGVMM->cEMTs += cCpus;
890
891 gvmmR0UsedUnlock(pGVMM);
892 gvmmR0CreateDestroyUnlock(pGVMM);
893
894 *ppVM = pVM;
895 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
896 return VINF_SUCCESS;
897 }
898
899 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
900 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
901 }
902 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
903 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
904 }
905 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
906 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
907 }
908 }
909 }
910 /* else: The user wasn't permitted to create this VM. */
911
912 /*
913 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
914 * object reference here. A little extra mess because of non-recursive lock.
915 */
916 void *pvObj = pHandle->pvObj;
917 pHandle->pvObj = NULL;
918 gvmmR0CreateDestroyUnlock(pGVMM);
919
920 SUPR0ObjRelease(pvObj, pSession);
921
922 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
923 return rc;
924 }
925
926 rc = VERR_NO_MEMORY;
927 }
928 else
929 rc = VERR_INTERNAL_ERROR;
930 }
931 else
932 rc = VERR_GVM_TOO_MANY_VMS;
933
934 gvmmR0CreateDestroyUnlock(pGVMM);
935 return rc;
936}
937
938
939/**
940 * Initializes the per VM data belonging to GVMM.
941 *
942 * @param pGVM Pointer to the global VM structure.
943 */
944static void gvmmR0InitPerVMData(PGVM pGVM)
945{
946 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
947 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
948 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
949 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
950 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
951 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
952 pGVM->gvmm.s.fDoneVMMR0Init = false;
953 pGVM->gvmm.s.fDoneVMMR0Term = false;
954
955 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
956 {
957 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
958 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
959 }
960}
961
962
963/**
964 * Does the VM initialization.
965 *
966 * @returns VBox status code.
967 * @param pVM Pointer to the shared VM structure.
968 */
969GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
970{
971 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
972
973 /*
974 * Validate the VM structure, state and handle.
975 */
976 PGVM pGVM;
977 PGVMM pGVMM;
978 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
979 if (RT_SUCCESS(rc))
980 {
981 if ( !pGVM->gvmm.s.fDoneVMMR0Init
982 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
983 {
984 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
985 {
986 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
987 if (RT_FAILURE(rc))
988 {
989 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
990 break;
991 }
992 }
993 }
994 else
995 rc = VERR_WRONG_ORDER;
996 }
997
998 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
999 return rc;
1000}
1001
1002
1003/**
1004 * Indicates that we're done with the ring-0 initialization
1005 * of the VM.
1006 *
1007 * @param pVM Pointer to the shared VM structure.
1008 * @thread EMT(0)
1009 */
1010GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1011{
1012 /* Validate the VM structure, state and handle. */
1013 PGVM pGVM;
1014 PGVMM pGVMM;
1015 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1016 AssertRCReturnVoid(rc);
1017
1018 /* Set the indicator. */
1019 pGVM->gvmm.s.fDoneVMMR0Init = true;
1020}
1021
1022
1023/**
1024 * Indicates that we're doing the ring-0 termination of the VM.
1025 *
1026 * @returns true if termination hasn't been done already, false if it has.
1027 * @param pVM Pointer to the shared VM structure.
1028 * @param pGVM Pointer to the global VM structure. Optional.
1029 * @thread EMT(0)
1030 */
1031GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1032{
1033 /* Validate the VM structure, state and handle. */
1034 AssertPtrNullReturn(pGVM, false);
1035 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1036 if (!pGVM)
1037 {
1038 PGVMM pGVMM;
1039 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1040 AssertRCReturn(rc, false);
1041 }
1042
1043 /* Set the indicator. */
1044 if (pGVM->gvmm.s.fDoneVMMR0Term)
1045 return false;
1046 pGVM->gvmm.s.fDoneVMMR0Term = true;
1047 return true;
1048}
1049
1050
1051/**
1052 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1053 *
1054 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1055 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1056 * would've been nice if the caller was actually the EMT thread or that we somehow
1057 * could've associated the calling thread with the VM up front.
1058 *
1059 * @returns VBox status code.
1060 * @param pVM Where to store the pointer to the VM structure.
1061 *
1062 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1063 */
1064GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1065{
1066 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1067 PGVMM pGVMM;
1068 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1069
1070
1071 /*
1072 * Validate the VM structure, state and caller.
1073 */
1074 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1075 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1076 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER);
1077
1078 uint32_t hGVM = pVM->hSelf;
1079 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1080 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1081
1082 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1083 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1084
1085 RTPROCESS ProcId = RTProcSelf();
1086 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1087 AssertReturn( ( pHandle->hEMT0 == hSelf
1088 && pHandle->ProcId == ProcId)
1089 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1090
1091 /*
1092 * Lookup the handle and destroy the object.
1093 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1094 * object, we take some precautions against racing callers just in case...
1095 */
1096 int rc = gvmmR0CreateDestroyLock(pGVMM);
1097 AssertRC(rc);
1098
1099 /* be careful here because we might theoretically be racing someone else cleaning up. */
1100 if ( pHandle->pVM == pVM
1101 && ( ( pHandle->hEMT0 == hSelf
1102 && pHandle->ProcId == ProcId)
1103 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1104 && VALID_PTR(pHandle->pvObj)
1105 && VALID_PTR(pHandle->pSession)
1106 && VALID_PTR(pHandle->pGVM)
1107 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1108 {
1109 void *pvObj = pHandle->pvObj;
1110 pHandle->pvObj = NULL;
1111 gvmmR0CreateDestroyUnlock(pGVMM);
1112
1113 SUPR0ObjRelease(pvObj, pHandle->pSession);
1114 }
1115 else
1116 {
1117 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1118 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1119 gvmmR0CreateDestroyUnlock(pGVMM);
1120 rc = VERR_INTERNAL_ERROR;
1121 }
1122
1123 return rc;
1124}
1125
1126
1127/**
1128 * Performs VM cleanup task as part of object destruction.
1129 *
1130 * @param pGVM The GVM pointer.
1131 */
1132static void gvmmR0CleanupVM(PGVM pGVM)
1133{
1134 if ( pGVM->gvmm.s.fDoneVMMR0Init
1135 && !pGVM->gvmm.s.fDoneVMMR0Term)
1136 {
1137 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1138 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1139 {
1140 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1141 VMMR0TermVM(pGVM->pVM, pGVM);
1142 }
1143 else
1144 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1145 }
1146
1147 GMMR0CleanupVM(pGVM);
1148}
1149
1150
1151/**
1152 * Handle destructor.
1153 *
1154 * @param pvGVMM The GVM instance pointer.
1155 * @param pvHandle The handle pointer.
1156 */
1157static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1158{
1159 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1160
1161 /*
1162 * Some quick, paranoid, input validation.
1163 */
1164 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1165 AssertPtr(pHandle);
1166 PGVMM pGVMM = (PGVMM)pvGVMM;
1167 Assert(pGVMM == g_pGVMM);
1168 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1169 if ( !iHandle
1170 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1171 || iHandle != pHandle->iSelf)
1172 {
1173 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1174 return;
1175 }
1176
1177 int rc = gvmmR0CreateDestroyLock(pGVMM);
1178 AssertRC(rc);
1179 rc = gvmmR0UsedLock(pGVMM);
1180 AssertRC(rc);
1181
1182 /*
1183 * This is a tad slow but a doubly linked list is too much hassle.
1184 */
1185 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1186 {
1187 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1188 gvmmR0UsedUnlock(pGVMM);
1189 gvmmR0CreateDestroyUnlock(pGVMM);
1190 return;
1191 }
1192
1193 if (pGVMM->iUsedHead == iHandle)
1194 pGVMM->iUsedHead = pHandle->iNext;
1195 else
1196 {
1197 uint16_t iPrev = pGVMM->iUsedHead;
1198 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1199 while (iPrev)
1200 {
1201 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1202 {
1203 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1204 gvmmR0UsedUnlock(pGVMM);
1205 gvmmR0CreateDestroyUnlock(pGVMM);
1206 return;
1207 }
1208 if (RT_UNLIKELY(c-- <= 0))
1209 {
1210 iPrev = 0;
1211 break;
1212 }
1213
1214 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1215 break;
1216 iPrev = pGVMM->aHandles[iPrev].iNext;
1217 }
1218 if (!iPrev)
1219 {
1220 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1221 gvmmR0UsedUnlock(pGVMM);
1222 gvmmR0CreateDestroyUnlock(pGVMM);
1223 return;
1224 }
1225
1226 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1227 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1228 }
1229 pHandle->iNext = 0;
1230 pGVMM->cVMs--;
1231
1232 /*
1233 * Do the global cleanup round.
1234 */
1235 PGVM pGVM = pHandle->pGVM;
1236 if ( VALID_PTR(pGVM)
1237 && pGVM->u32Magic == GVM_MAGIC)
1238 {
1239 pGVMM->cEMTs -= pGVM->cCpus;
1240 gvmmR0UsedUnlock(pGVMM);
1241
1242 gvmmR0CleanupVM(pGVM);
1243
1244 /*
1245 * Do the GVMM cleanup - must be done last.
1246 */
1247 /* The VM and VM pages mappings/allocations. */
1248 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1249 {
1250 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1251 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1252 }
1253
1254 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1255 {
1256 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1257 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1258 }
1259
1260 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1261 {
1262 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1263 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1264 }
1265
1266 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1267 {
1268 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1269 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1270 }
1271
1272 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1273 {
1274 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1275 {
1276 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1277 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1278 }
1279 }
1280
1281 /* the GVM structure itself. */
1282 pGVM->u32Magic |= UINT32_C(0x80000000);
1283 RTMemFree(pGVM);
1284
1285 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1286 rc = gvmmR0UsedLock(pGVMM);
1287 AssertRC(rc);
1288 }
1289 /* else: GVMMR0CreateVM cleanup. */
1290
1291 /*
1292 * Free the handle.
1293 */
1294 pHandle->iNext = pGVMM->iFreeHead;
1295 pGVMM->iFreeHead = iHandle;
1296 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1297 ASMAtomicWriteNullPtr(&pHandle->pVM);
1298 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1299 ASMAtomicWriteNullPtr(&pHandle->pSession);
1300 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1301 ASMAtomicWriteSize(&pHandle->ProcId, NIL_RTPROCESS);
1302
1303 gvmmR0UsedUnlock(pGVMM);
1304 gvmmR0CreateDestroyUnlock(pGVMM);
1305 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1306}
1307
1308
1309/**
1310 * Registers the calling thread as the EMT of a Virtual CPU.
1311 *
1312 * Note that VCPU 0 is automatically registered during VM creation.
1313 *
1314 * @returns VBox status code
1315 * @param pVM The shared VM structure (the ring-0 mapping).
1316 * @param idCpu VCPU id.
1317 */
1318GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1319{
1320 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1321
1322 /*
1323 * Validate the VM structure, state and handle.
1324 */
1325 PGVM pGVM;
1326 PGVMM pGVMM;
1327 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1328 if (RT_FAILURE(rc))
1329 return rc;
1330
1331 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1332 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1333 Assert(pGVM->cCpus == pVM->cCpus);
1334 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1335
1336 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1337
1338 return VINF_SUCCESS;
1339}
1340
1341
1342/**
1343 * Lookup a GVM structure by its handle.
1344 *
1345 * @returns The GVM pointer on success, NULL on failure.
1346 * @param hGVM The global VM handle. Asserts on bad handle.
1347 */
1348GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1349{
1350 PGVMM pGVMM;
1351 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1352
1353 /*
1354 * Validate.
1355 */
1356 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1357 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1358
1359 /*
1360 * Look it up.
1361 */
1362 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1363 AssertPtrReturn(pHandle->pVM, NULL);
1364 AssertPtrReturn(pHandle->pvObj, NULL);
1365 PGVM pGVM = pHandle->pGVM;
1366 AssertPtrReturn(pGVM, NULL);
1367 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1368
1369 return pHandle->pGVM;
1370}
1371
1372
1373/**
1374 * Lookup a GVM structure by the shared VM structure.
1375 *
1376 * The calling thread must be in the same process as the VM. All current lookups
1377 * are by threads inside the same process, so this will not be an issue.
1378 *
1379 * @returns VBox status code.
1380 * @param pVM The shared VM structure (the ring-0 mapping).
1381 * @param ppGVM Where to store the GVM pointer.
1382 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1383 * @param fTakeUsedLock Whether to take the used lock or not.
1384 * Be very careful if not taking the lock as it's possible that
1385 * the VM will disappear then.
1386 *
1387 * @remark This will not assert on an invalid pVM but try return silently.
1388 */
1389static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1390{
1391 RTPROCESS ProcId = RTProcSelf();
1392 PGVMM pGVMM;
1393 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1394
1395 /*
1396 * Validate.
1397 */
1398 if (RT_UNLIKELY( !VALID_PTR(pVM)
1399 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1400 return VERR_INVALID_POINTER;
1401 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1402 || pVM->enmVMState >= VMSTATE_TERMINATED))
1403 return VERR_INVALID_POINTER;
1404
1405 uint16_t hGVM = pVM->hSelf;
1406 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1407 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1408 return VERR_INVALID_HANDLE;
1409
1410 /*
1411 * Look it up.
1412 */
1413 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1414 PGVM pGVM;
1415 if (fTakeUsedLock)
1416 {
1417 int rc = gvmmR0UsedLock(pGVMM);
1418 AssertRCReturn(rc, rc);
1419
1420 pGVM = pHandle->pGVM;
1421 if (RT_UNLIKELY( pHandle->pVM != pVM
1422 || pHandle->ProcId != ProcId
1423 || !VALID_PTR(pHandle->pvObj)
1424 || !VALID_PTR(pGVM)
1425 || pGVM->pVM != pVM))
1426 {
1427 gvmmR0UsedUnlock(pGVMM);
1428 return VERR_INVALID_HANDLE;
1429 }
1430 }
1431 else
1432 {
1433 if (RT_UNLIKELY(pHandle->pVM != pVM))
1434 return VERR_INVALID_HANDLE;
1435 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1436 return VERR_INVALID_HANDLE;
1437 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1438 return VERR_INVALID_HANDLE;
1439
1440 pGVM = pHandle->pGVM;
1441 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1442 return VERR_INVALID_HANDLE;
1443 if (RT_UNLIKELY(pGVM->pVM != pVM))
1444 return VERR_INVALID_HANDLE;
1445 }
1446
1447 *ppGVM = pGVM;
1448 *ppGVMM = pGVMM;
1449 return VINF_SUCCESS;
1450}
1451
1452
1453/**
1454 * Lookup a GVM structure by the shared VM structure.
1455 *
1456 * @returns VBox status code.
1457 * @param pVM The shared VM structure (the ring-0 mapping).
1458 * @param ppGVM Where to store the GVM pointer.
1459 *
1460 * @remark This will not take the 'used'-lock because it doesn't do
1461 * nesting and this function will be used from under the lock.
1462 */
1463GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1464{
1465 PGVMM pGVMM;
1466 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1467}
1468
1469
1470/**
1471 * Lookup a GVM structure by the shared VM structure and ensuring that the
1472 * caller is an EMT thread.
1473 *
1474 * @returns VBox status code.
1475 * @param pVM The shared VM structure (the ring-0 mapping).
1476 * @param idCpu The Virtual CPU ID of the calling EMT.
1477 * @param ppGVM Where to store the GVM pointer.
1478 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1479 * @thread EMT
1480 *
1481 * @remark This will assert in all failure paths.
1482 */
1483static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1484{
1485 PGVMM pGVMM;
1486 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1487
1488 /*
1489 * Validate.
1490 */
1491 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1492 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1493
1494 uint16_t hGVM = pVM->hSelf;
1495 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1496 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1497
1498 /*
1499 * Look it up.
1500 */
1501 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1502 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1503 RTPROCESS ProcId = RTProcSelf();
1504 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1505 AssertPtrReturn(pHandle->pvObj, VERR_INTERNAL_ERROR);
1506
1507 PGVM pGVM = pHandle->pGVM;
1508 AssertPtrReturn(pGVM, VERR_INTERNAL_ERROR);
1509 AssertReturn(pGVM->pVM == pVM, VERR_INTERNAL_ERROR);
1510 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1511 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1512 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_INTERNAL_ERROR);
1513
1514 *ppGVM = pGVM;
1515 *ppGVMM = pGVMM;
1516 return VINF_SUCCESS;
1517}
1518
1519
1520/**
1521 * Lookup a GVM structure by the shared VM structure
1522 * and ensuring that the caller is the EMT thread.
1523 *
1524 * @returns VBox status code.
1525 * @param pVM The shared VM structure (the ring-0 mapping).
1526 * @param idCpu The Virtual CPU ID of the calling EMT.
1527 * @param ppGVM Where to store the GVM pointer.
1528 * @thread EMT
1529 */
1530GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1531{
1532 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1533 PGVMM pGVMM;
1534 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1535}
1536
1537
1538/**
1539 * Lookup a VM by its global handle.
1540 *
1541 * @returns The VM handle on success, NULL on failure.
1542 * @param hGVM The global VM handle. Asserts on bad handle.
1543 */
1544GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1545{
1546 PGVM pGVM = GVMMR0ByHandle(hGVM);
1547 return pGVM ? pGVM->pVM : NULL;
1548}
1549
1550
1551/**
1552 * Looks up the VM belonging to the specified EMT thread.
1553 *
1554 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1555 * unnecessary kernel panics when the EMT thread hits an assertion. The
1556 * call may or not be an EMT thread.
1557 *
1558 * @returns The VM handle on success, NULL on failure.
1559 * @param hEMT The native thread handle of the EMT.
1560 * NIL_RTNATIVETHREAD means the current thread
1561 */
1562GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1563{
1564 /*
1565 * No Assertions here as we're usually called in a AssertMsgN or
1566 * RTAssert* context.
1567 */
1568 PGVMM pGVMM = g_pGVMM;
1569 if ( !VALID_PTR(pGVMM)
1570 || pGVMM->u32Magic != GVMM_MAGIC)
1571 return NULL;
1572
1573 if (hEMT == NIL_RTNATIVETHREAD)
1574 hEMT = RTThreadNativeSelf();
1575 RTPROCESS ProcId = RTProcSelf();
1576
1577 /*
1578 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1579 */
1580 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1581 {
1582 if ( pGVMM->aHandles[i].iSelf == i
1583 && pGVMM->aHandles[i].ProcId == ProcId
1584 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1585 && VALID_PTR(pGVMM->aHandles[i].pVM)
1586 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1587 {
1588 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1589 return pGVMM->aHandles[i].pVM;
1590
1591 /* This is fearly safe with the current process per VM approach. */
1592 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1593 VMCPUID const cCpus = pGVM->cCpus;
1594 if ( cCpus < 1
1595 || cCpus > VMM_MAX_CPU_COUNT)
1596 continue;
1597 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1598 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1599 return pGVMM->aHandles[i].pVM;
1600 }
1601 }
1602 return NULL;
1603}
1604
1605
1606/**
1607 * This is will wake up expired and soon-to-be expired VMs.
1608 *
1609 * @returns Number of VMs that has been woken up.
1610 * @param pGVMM Pointer to the GVMM instance data.
1611 * @param u64Now The current time.
1612 */
1613static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1614{
1615 /*
1616 * Skip this if we've got disabled because of high resolution wakeups or by
1617 * the user.
1618 */
1619 if ( !pGVMM->nsEarlyWakeUp1
1620 && !pGVMM->nsEarlyWakeUp2)
1621 return 0;
1622
1623/** @todo Rewrite this algorithm. See performance defect XYZ. */
1624
1625 /*
1626 * A cheap optimization to stop wasting so much time here on big setups.
1627 */
1628 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1629 if ( pGVMM->cHaltedEMTs == 0
1630 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1631 return 0;
1632
1633 /*
1634 * The first pass will wake up VMs which have actually expired
1635 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1636 */
1637 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1638 uint64_t u64Min = UINT64_MAX;
1639 unsigned cWoken = 0;
1640 unsigned cHalted = 0;
1641 unsigned cTodo2nd = 0;
1642 unsigned cTodo3rd = 0;
1643 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1644 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1645 i = pGVMM->aHandles[i].iNext)
1646 {
1647 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1648 if ( VALID_PTR(pCurGVM)
1649 && pCurGVM->u32Magic == GVM_MAGIC)
1650 {
1651 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1652 {
1653 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1654 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1655 if (u64)
1656 {
1657 if (u64 <= u64Now)
1658 {
1659 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1660 {
1661 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1662 AssertRC(rc);
1663 cWoken++;
1664 }
1665 }
1666 else
1667 {
1668 cHalted++;
1669 if (u64 <= uNsEarlyWakeUp1)
1670 cTodo2nd++;
1671 else if (u64 <= uNsEarlyWakeUp2)
1672 cTodo3rd++;
1673 else if (u64 < u64Min)
1674 u64 = u64Min;
1675 }
1676 }
1677 }
1678 }
1679 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1680 }
1681
1682 if (cTodo2nd)
1683 {
1684 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1685 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1686 i = pGVMM->aHandles[i].iNext)
1687 {
1688 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1689 if ( VALID_PTR(pCurGVM)
1690 && pCurGVM->u32Magic == GVM_MAGIC)
1691 {
1692 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1693 {
1694 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1695 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1696 if ( u64
1697 && u64 <= uNsEarlyWakeUp1)
1698 {
1699 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1700 {
1701 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1702 AssertRC(rc);
1703 cWoken++;
1704 }
1705 }
1706 }
1707 }
1708 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1709 }
1710 }
1711
1712 if (cTodo3rd)
1713 {
1714 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1715 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1716 i = pGVMM->aHandles[i].iNext)
1717 {
1718 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1719 if ( VALID_PTR(pCurGVM)
1720 && pCurGVM->u32Magic == GVM_MAGIC)
1721 {
1722 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1723 {
1724 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1725 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1726 if ( u64
1727 && u64 <= uNsEarlyWakeUp2)
1728 {
1729 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1730 {
1731 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1732 AssertRC(rc);
1733 cWoken++;
1734 }
1735 }
1736 }
1737 }
1738 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1739 }
1740 }
1741
1742 /*
1743 * Set the minimum value.
1744 */
1745 pGVMM->uNsNextEmtWakeup = u64Min;
1746
1747 return cWoken;
1748}
1749
1750
1751/**
1752 * Halt the EMT thread.
1753 *
1754 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1755 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1756 * @param pVM Pointer to the shared VM structure.
1757 * @param idCpu The Virtual CPU ID of the calling EMT.
1758 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1759 * @thread EMT(idCpu).
1760 */
1761GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1762{
1763 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1764
1765 /*
1766 * Validate the VM structure, state and handle.
1767 */
1768 PGVM pGVM;
1769 PGVMM pGVMM;
1770 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1771 if (RT_FAILURE(rc))
1772 return rc;
1773 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1774
1775 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1776 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1777
1778 /*
1779 * Take the UsedList semaphore, get the current time
1780 * and check if anyone needs waking up.
1781 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1782 */
1783 rc = gvmmR0UsedLock(pGVMM);
1784 AssertRC(rc);
1785
1786 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1787
1788 /* GIP hack: We might are frequently sleeping for short intervals where the
1789 difference between GIP and system time matters on systems with high resolution
1790 system time. So, convert the input from GIP to System time in that case. */
1791 Assert(ASMGetFlags() & X86_EFL_IF);
1792 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1793 const uint64_t u64NowGip = RTTimeNanoTS();
1794 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1795
1796 /*
1797 * Go to sleep if we must...
1798 * Cap the sleep time to 1 second to be on the safe side.
1799 */
1800 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1801 if ( u64NowGip < u64ExpireGipTime
1802 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1803 ? pGVMM->nsMinSleepCompany
1804 : pGVMM->nsMinSleepAlone))
1805 {
1806 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1807 if (cNsInterval > RT_NS_1SEC)
1808 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1809 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1810 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1811 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1812 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1813 gvmmR0UsedUnlock(pGVMM);
1814
1815 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1816 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1817 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1818
1819 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1820 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1821
1822 /* Reset the semaphore to try prevent a few false wake-ups. */
1823 if (rc == VINF_SUCCESS)
1824 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1825 else if (rc == VERR_TIMEOUT)
1826 {
1827 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1828 rc = VINF_SUCCESS;
1829 }
1830 }
1831 else
1832 {
1833 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1834 gvmmR0UsedUnlock(pGVMM);
1835 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1836 }
1837
1838 return rc;
1839}
1840
1841
1842/**
1843 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1844 * the a sleeping EMT.
1845 *
1846 * @retval VINF_SUCCESS if successfully woken up.
1847 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1848 *
1849 * @param pGVM The global (ring-0) VM structure.
1850 * @param pGVCpu The global (ring-0) VCPU structure.
1851 */
1852DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1853{
1854 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1855
1856 /*
1857 * Signal the semaphore regardless of whether it's current blocked on it.
1858 *
1859 * The reason for this is that there is absolutely no way we can be 100%
1860 * certain that it isn't *about* go to go to sleep on it and just got
1861 * delayed a bit en route. So, we will always signal the semaphore when
1862 * the it is flagged as halted in the VMM.
1863 */
1864/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1865 int rc;
1866 if (pGVCpu->gvmm.s.u64HaltExpire)
1867 {
1868 rc = VINF_SUCCESS;
1869 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1870 }
1871 else
1872 {
1873 rc = VINF_GVM_NOT_BLOCKED;
1874 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1875 }
1876
1877 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1878 AssertRC(rc2);
1879
1880 return rc;
1881}
1882
1883
1884/**
1885 * Wakes up the halted EMT thread so it can service a pending request.
1886 *
1887 * @returns VBox status code.
1888 * @retval VINF_SUCCESS if successfully woken up.
1889 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1890 *
1891 * @param pVM Pointer to the shared VM structure.
1892 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1893 * @param fTakeUsedLock Take the used lock or not
1894 * @thread Any but EMT.
1895 */
1896GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1897{
1898 /*
1899 * Validate input and take the UsedLock.
1900 */
1901 PGVM pGVM;
1902 PGVMM pGVMM;
1903 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1904 if (RT_SUCCESS(rc))
1905 {
1906 if (idCpu < pGVM->cCpus)
1907 {
1908 /*
1909 * Do the actual job.
1910 */
1911 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1912
1913 if (fTakeUsedLock)
1914 {
1915 /*
1916 * While we're here, do a round of scheduling.
1917 */
1918 Assert(ASMGetFlags() & X86_EFL_IF);
1919 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1920 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1921 }
1922 }
1923 else
1924 rc = VERR_INVALID_CPU_ID;
1925
1926 if (fTakeUsedLock)
1927 {
1928 int rc2 = gvmmR0UsedUnlock(pGVMM);
1929 AssertRC(rc2);
1930 }
1931 }
1932
1933 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1934 return rc;
1935}
1936
1937
1938/**
1939 * Wakes up the halted EMT thread so it can service a pending request.
1940 *
1941 * @returns VBox status code.
1942 * @retval VINF_SUCCESS if successfully woken up.
1943 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1944 *
1945 * @param pVM Pointer to the shared VM structure.
1946 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1947 * @thread Any but EMT.
1948 */
1949GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1950{
1951 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1952}
1953
1954/**
1955 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1956 * the Virtual CPU if it's still busy executing guest code.
1957 *
1958 * @returns VBox status code.
1959 * @retval VINF_SUCCESS if poked successfully.
1960 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1961 *
1962 * @param pGVM The global (ring-0) VM structure.
1963 * @param pVCpu The Virtual CPU handle.
1964 */
1965DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1966{
1967 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1968
1969 RTCPUID idHostCpu = pVCpu->idHostCpu;
1970 if ( idHostCpu == NIL_RTCPUID
1971 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1972 {
1973 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1974 return VINF_GVM_NOT_BUSY_IN_GC;
1975 }
1976
1977 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1978 RTMpPokeCpu(idHostCpu);
1979 return VINF_SUCCESS;
1980}
1981
1982/**
1983 * Pokes an EMT if it's still busy running guest code.
1984 *
1985 * @returns VBox status code.
1986 * @retval VINF_SUCCESS if poked successfully.
1987 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1988 *
1989 * @param pVM Pointer to the shared VM structure.
1990 * @param idCpu The ID of the virtual CPU to poke.
1991 * @param fTakeUsedLock Take the used lock or not
1992 */
1993GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1994{
1995 /*
1996 * Validate input and take the UsedLock.
1997 */
1998 PGVM pGVM;
1999 PGVMM pGVMM;
2000 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
2001 if (RT_SUCCESS(rc))
2002 {
2003 if (idCpu < pGVM->cCpus)
2004 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2005 else
2006 rc = VERR_INVALID_CPU_ID;
2007
2008 if (fTakeUsedLock)
2009 {
2010 int rc2 = gvmmR0UsedUnlock(pGVMM);
2011 AssertRC(rc2);
2012 }
2013 }
2014
2015 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2016 return rc;
2017}
2018
2019
2020/**
2021 * Pokes an EMT if it's still busy running guest code.
2022 *
2023 * @returns VBox status code.
2024 * @retval VINF_SUCCESS if poked successfully.
2025 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2026 *
2027 * @param pVM Pointer to the shared VM structure.
2028 * @param idCpu The ID of the virtual CPU to poke.
2029 */
2030GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2031{
2032 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2033}
2034
2035
2036/**
2037 * Wakes up a set of halted EMT threads so they can service pending request.
2038 *
2039 * @returns VBox status code, no informational stuff.
2040 *
2041 * @param pVM Pointer to the shared VM structure.
2042 * @param pSleepSet The set of sleepers to wake up.
2043 * @param pPokeSet The set of CPUs to poke.
2044 */
2045GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2046{
2047 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2048 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2049 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2050
2051 /*
2052 * Validate input and take the UsedLock.
2053 */
2054 PGVM pGVM;
2055 PGVMM pGVMM;
2056 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2057 if (RT_SUCCESS(rc))
2058 {
2059 rc = VINF_SUCCESS;
2060 VMCPUID idCpu = pGVM->cCpus;
2061 while (idCpu-- > 0)
2062 {
2063 /* Don't try poke or wake up ourselves. */
2064 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2065 continue;
2066
2067 /* just ignore errors for now. */
2068 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2069 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2070 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2071 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2072 }
2073
2074 int rc2 = gvmmR0UsedUnlock(pGVMM);
2075 AssertRC(rc2);
2076 }
2077
2078 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2079 return rc;
2080}
2081
2082
2083/**
2084 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2085 *
2086 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2087 * @param pVM Pointer to the shared VM structure.
2088 * @param pReq The request packet.
2089 */
2090GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2091{
2092 /*
2093 * Validate input and pass it on.
2094 */
2095 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2096 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2097
2098 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2099}
2100
2101
2102
2103/**
2104 * Poll the schedule to see if someone else should get a chance to run.
2105 *
2106 * This is a bit hackish and will not work too well if the machine is
2107 * under heavy load from non-VM processes.
2108 *
2109 * @returns VINF_SUCCESS if not yielded.
2110 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2111 * @param pVM Pointer to the shared VM structure.
2112 * @param idCpu The Virtual CPU ID of the calling EMT.
2113 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2114 * @param fYield Whether to yield or not.
2115 * This is for when we're spinning in the halt loop.
2116 * @thread EMT(idCpu).
2117 */
2118GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2119{
2120 /*
2121 * Validate input.
2122 */
2123 PGVM pGVM;
2124 PGVMM pGVMM;
2125 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2126 if (RT_SUCCESS(rc))
2127 {
2128 rc = gvmmR0UsedLock(pGVMM);
2129 AssertRC(rc);
2130 pGVM->gvmm.s.StatsSched.cPollCalls++;
2131
2132 Assert(ASMGetFlags() & X86_EFL_IF);
2133 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2134
2135 if (!fYield)
2136 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2137 else
2138 {
2139 /** @todo implement this... */
2140 rc = VERR_NOT_IMPLEMENTED;
2141 }
2142
2143 gvmmR0UsedUnlock(pGVMM);
2144 }
2145
2146 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2147 return rc;
2148}
2149
2150
2151#ifdef GVMM_SCHED_WITH_PPT
2152/**
2153 * Timer callback for the periodic preemption timer.
2154 *
2155 * @param pTimer The timer handle.
2156 * @param pvUser Pointer to the per cpu structure.
2157 * @param iTick The current tick.
2158 */
2159static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2160{
2161 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2162
2163 /*
2164 * Termination check
2165 */
2166 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2167 return;
2168
2169 /*
2170 * Do the house keeping.
2171 */
2172 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2173 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2174
2175 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2176 {
2177 /*
2178 * Historicize the max frequency.
2179 */
2180 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2181 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2182 pCpu->Ppt.iTickHistorization = 0;
2183 pCpu->Ppt.uDesiredHz = 0;
2184
2185 /*
2186 * Check if the current timer frequency.
2187 */
2188 uint32_t uHistMaxHz = 0;
2189 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2190 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2191 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2192 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2193 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2194 else if (uHistMaxHz)
2195 {
2196 /*
2197 * Reprogram it.
2198 */
2199 pCpu->Ppt.cChanges++;
2200 pCpu->Ppt.iTickHistorization = 0;
2201 pCpu->Ppt.uTimerHz = uHistMaxHz;
2202 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2203 pCpu->Ppt.cNsInterval = cNsInterval;
2204 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2205 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2206 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2207 / cNsInterval;
2208 else
2209 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2210 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2211
2212 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2213 RTTimerChangeInterval(pTimer, cNsInterval);
2214 }
2215 else
2216 {
2217 /*
2218 * Stop it.
2219 */
2220 pCpu->Ppt.fStarted = false;
2221 pCpu->Ppt.uTimerHz = 0;
2222 pCpu->Ppt.cNsInterval = 0;
2223 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2224
2225 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2226 RTTimerStop(pTimer);
2227 }
2228 }
2229 else
2230 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2231}
2232#endif /* GVMM_SCHED_WITH_PPT */
2233
2234
2235/**
2236 * Updates the periodic preemption timer for the calling CPU.
2237 *
2238 * The caller must have disabled preemption!
2239 * The caller must check that the host can do high resolution timers.
2240 *
2241 * @param pVM The VM handle.
2242 * @param idHostCpu The current host CPU id.
2243 * @param uHz The desired frequency.
2244 */
2245GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2246{
2247#ifdef GVMM_SCHED_WITH_PPT
2248 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2249 Assert(RTTimerCanDoHighResolution());
2250
2251 /*
2252 * Resolve the per CPU data.
2253 */
2254 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2255 PGVMM pGVMM = g_pGVMM;
2256 if ( !VALID_PTR(pGVMM)
2257 || pGVMM->u32Magic != GVMM_MAGIC)
2258 return;
2259 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2260 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2261 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2262 && pCpu->idCpu == idHostCpu,
2263 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2264
2265 /*
2266 * Check whether we need to do anything about the timer.
2267 * We have to be a little bit careful since we might be race the timer
2268 * callback here.
2269 */
2270 if (uHz > 16384)
2271 uHz = 16384; /** @todo add a query method for this! */
2272 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2273 && uHz >= pCpu->Ppt.uMinHz
2274 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2275 {
2276 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2277 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2278
2279 pCpu->Ppt.uDesiredHz = uHz;
2280 uint32_t cNsInterval = 0;
2281 if (!pCpu->Ppt.fStarted)
2282 {
2283 pCpu->Ppt.cStarts++;
2284 pCpu->Ppt.fStarted = true;
2285 pCpu->Ppt.fStarting = true;
2286 pCpu->Ppt.iTickHistorization = 0;
2287 pCpu->Ppt.uTimerHz = uHz;
2288 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2289 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2290 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2291 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2292 / cNsInterval;
2293 else
2294 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2295 }
2296
2297 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2298
2299 if (cNsInterval)
2300 {
2301 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2302 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2303 AssertRC(rc);
2304
2305 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2306 if (RT_FAILURE(rc))
2307 pCpu->Ppt.fStarted = false;
2308 pCpu->Ppt.fStarting = false;
2309 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2310 }
2311 }
2312#endif /* GVMM_SCHED_WITH_PPT */
2313}
2314
2315
2316/**
2317 * Retrieves the GVMM statistics visible to the caller.
2318 *
2319 * @returns VBox status code.
2320 *
2321 * @param pStats Where to put the statistics.
2322 * @param pSession The current session.
2323 * @param pVM The VM to obtain statistics for. Optional.
2324 */
2325GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2326{
2327 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2328
2329 /*
2330 * Validate input.
2331 */
2332 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2333 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2334 pStats->cVMs = 0; /* (crash before taking the sem...) */
2335
2336 /*
2337 * Take the lock and get the VM statistics.
2338 */
2339 PGVMM pGVMM;
2340 if (pVM)
2341 {
2342 PGVM pGVM;
2343 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2344 if (RT_FAILURE(rc))
2345 return rc;
2346 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2347 }
2348 else
2349 {
2350 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2351 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2352
2353 int rc = gvmmR0UsedLock(pGVMM);
2354 AssertRCReturn(rc, rc);
2355 }
2356
2357 /*
2358 * Enumerate the VMs and add the ones visible to the statistics.
2359 */
2360 pStats->cVMs = 0;
2361 pStats->cEMTs = 0;
2362 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2363
2364 for (unsigned i = pGVMM->iUsedHead;
2365 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2366 i = pGVMM->aHandles[i].iNext)
2367 {
2368 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2369 void *pvObj = pGVMM->aHandles[i].pvObj;
2370 if ( VALID_PTR(pvObj)
2371 && VALID_PTR(pGVM)
2372 && pGVM->u32Magic == GVM_MAGIC
2373 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2374 {
2375 pStats->cVMs++;
2376 pStats->cEMTs += pGVM->cCpus;
2377
2378 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2379 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2380 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2381 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2382 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2383
2384 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2385 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2386 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2387
2388 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2389 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2390
2391 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2392 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2393 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2394 }
2395 }
2396
2397 /*
2398 * Copy out the per host CPU statistics.
2399 */
2400 uint32_t iDstCpu = 0;
2401 uint32_t cSrcCpus = pGVMM->cHostCpus;
2402 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2403 {
2404 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2405 {
2406 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2407 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2408#ifdef GVMM_SCHED_WITH_PPT
2409 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2410 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2411 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2412 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2413#else
2414 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2415 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2416 pStats->aHostCpus[iDstCpu].cChanges = 0;
2417 pStats->aHostCpus[iDstCpu].cStarts = 0;
2418#endif
2419 iDstCpu++;
2420 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2421 break;
2422 }
2423 }
2424 pStats->cHostCpus = iDstCpu;
2425
2426 gvmmR0UsedUnlock(pGVMM);
2427
2428 return VINF_SUCCESS;
2429}
2430
2431
2432/**
2433 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2434 *
2435 * @returns see GVMMR0QueryStatistics.
2436 * @param pVM Pointer to the shared VM structure. Optional.
2437 * @param pReq The request packet.
2438 */
2439GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2440{
2441 /*
2442 * Validate input and pass it on.
2443 */
2444 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2445 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2446
2447 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2448}
2449
2450
2451/**
2452 * Resets the specified GVMM statistics.
2453 *
2454 * @returns VBox status code.
2455 *
2456 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2457 * @param pSession The current session.
2458 * @param pVM The VM to reset statistics for. Optional.
2459 */
2460GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2461{
2462 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2463
2464 /*
2465 * Validate input.
2466 */
2467 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2468 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2469
2470 /*
2471 * Take the lock and get the VM statistics.
2472 */
2473 PGVMM pGVMM;
2474 if (pVM)
2475 {
2476 PGVM pGVM;
2477 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2478 if (RT_FAILURE(rc))
2479 return rc;
2480# define MAYBE_RESET_FIELD(field) \
2481 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2482 MAYBE_RESET_FIELD(cHaltCalls);
2483 MAYBE_RESET_FIELD(cHaltBlocking);
2484 MAYBE_RESET_FIELD(cHaltTimeouts);
2485 MAYBE_RESET_FIELD(cHaltNotBlocking);
2486 MAYBE_RESET_FIELD(cHaltWakeUps);
2487 MAYBE_RESET_FIELD(cWakeUpCalls);
2488 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2489 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2490 MAYBE_RESET_FIELD(cPokeCalls);
2491 MAYBE_RESET_FIELD(cPokeNotBusy);
2492 MAYBE_RESET_FIELD(cPollCalls);
2493 MAYBE_RESET_FIELD(cPollHalts);
2494 MAYBE_RESET_FIELD(cPollWakeUps);
2495# undef MAYBE_RESET_FIELD
2496 }
2497 else
2498 {
2499 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2500
2501 int rc = gvmmR0UsedLock(pGVMM);
2502 AssertRCReturn(rc, rc);
2503 }
2504
2505 /*
2506 * Enumerate the VMs and add the ones visible to the statistics.
2507 */
2508 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2509 {
2510 for (unsigned i = pGVMM->iUsedHead;
2511 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2512 i = pGVMM->aHandles[i].iNext)
2513 {
2514 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2515 void *pvObj = pGVMM->aHandles[i].pvObj;
2516 if ( VALID_PTR(pvObj)
2517 && VALID_PTR(pGVM)
2518 && pGVM->u32Magic == GVM_MAGIC
2519 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2520 {
2521# define MAYBE_RESET_FIELD(field) \
2522 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2523 MAYBE_RESET_FIELD(cHaltCalls);
2524 MAYBE_RESET_FIELD(cHaltBlocking);
2525 MAYBE_RESET_FIELD(cHaltTimeouts);
2526 MAYBE_RESET_FIELD(cHaltNotBlocking);
2527 MAYBE_RESET_FIELD(cHaltWakeUps);
2528 MAYBE_RESET_FIELD(cWakeUpCalls);
2529 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2530 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2531 MAYBE_RESET_FIELD(cPokeCalls);
2532 MAYBE_RESET_FIELD(cPokeNotBusy);
2533 MAYBE_RESET_FIELD(cPollCalls);
2534 MAYBE_RESET_FIELD(cPollHalts);
2535 MAYBE_RESET_FIELD(cPollWakeUps);
2536# undef MAYBE_RESET_FIELD
2537 }
2538 }
2539 }
2540
2541 gvmmR0UsedUnlock(pGVMM);
2542
2543 return VINF_SUCCESS;
2544}
2545
2546
2547/**
2548 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2549 *
2550 * @returns see GVMMR0ResetStatistics.
2551 * @param pVM Pointer to the shared VM structure. Optional.
2552 * @param pReq The request packet.
2553 */
2554GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2555{
2556 /*
2557 * Validate input and pass it on.
2558 */
2559 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2560 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2561
2562 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2563}
2564
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette