VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 33170

Last change on this file since 33170 was 33170, checked in by vboxsync, 14 years ago

GVMM,VMEmt: Use RTSemEventWaitEx, avoid the syrup algorithm in gvmmR0SchedDoWakeUps.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 86.5 KB
Line 
1/* $Id: GVMMR0.cpp 33170 2010-10-15 22:51:56Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/gvmm.h>
54#include <VBox/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/gvm.h>
57#include <VBox/vm.h>
58#include <VBox/vmm.h>
59#include <VBox/param.h>
60#include <VBox/err.h>
61#include <iprt/asm.h>
62#include <iprt/asm-amd64-x86.h>
63#include <iprt/mem.h>
64#include <iprt/semaphore.h>
65#include <iprt/time.h>
66#include <VBox/log.h>
67#include <iprt/thread.h>
68#include <iprt/process.h>
69#include <iprt/param.h>
70#include <iprt/string.h>
71#include <iprt/assert.h>
72#include <iprt/mem.h>
73#include <iprt/memobj.h>
74#include <iprt/mp.h>
75#include <iprt/cpuset.h>
76#include <iprt/spinlock.h>
77#include <iprt/timer.h>
78
79
80/*******************************************************************************
81* Defined Constants And Macros *
82*******************************************************************************/
83#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
84/** Define this to enable the periodic preemption timer. */
85# define GVMM_SCHED_WITH_PPT
86#endif
87
88
89/*******************************************************************************
90* Structures and Typedefs *
91*******************************************************************************/
92
93/**
94 * Global VM handle.
95 */
96typedef struct GVMHANDLE
97{
98 /** The index of the next handle in the list (free or used). (0 is nil.) */
99 uint16_t volatile iNext;
100 /** Our own index / handle value. */
101 uint16_t iSelf;
102 /** The pointer to the ring-0 only (aka global) VM structure. */
103 PGVM pGVM;
104 /** The ring-0 mapping of the shared VM instance data. */
105 PVM pVM;
106 /** The virtual machine object. */
107 void *pvObj;
108 /** The session this VM is associated with. */
109 PSUPDRVSESSION pSession;
110 /** The ring-0 handle of the EMT0 thread.
111 * This is used for ownership checks as well as looking up a VM handle by thread
112 * at times like assertions. */
113 RTNATIVETHREAD hEMT0;
114 /** The process ID of the handle owner.
115 * This is used for access checks. */
116 RTPROCESS ProcId;
117} GVMHANDLE;
118/** Pointer to a global VM handle. */
119typedef GVMHANDLE *PGVMHANDLE;
120
121/** Number of GVM handles (including the NIL handle). */
122#if HC_ARCH_BITS == 64
123# define GVMM_MAX_HANDLES 1024
124#else
125# define GVMM_MAX_HANDLES 128
126#endif
127
128/**
129 * Per host CPU GVMM data.
130 */
131typedef struct GVMMHOSTCPU
132{
133 /** Magic number (GVMMHOSTCPU_MAGIC). */
134 uint32_t volatile u32Magic;
135 /** The CPU ID. */
136 RTCPUID idCpu;
137 /** The CPU set index. */
138 uint32_t idxCpuSet;
139
140#ifdef GVMM_SCHED_WITH_PPT
141 /** Periodic preemption timer data. */
142 struct
143 {
144 /** The handle to the periodic preemption timer. */
145 PRTTIMER pTimer;
146 /** Spinlock protecting the data below. */
147 RTSPINLOCK hSpinlock;
148 /** The smalles Hz that we need to care about. (static) */
149 uint32_t uMinHz;
150 /** The number of ticks between each historization. */
151 uint32_t cTicksHistoriziationInterval;
152 /** The current historization tick (counting up to
153 * cTicksHistoriziationInterval and then resetting). */
154 uint32_t iTickHistorization;
155 /** The current timer interval. This is set to 0 when inactive. */
156 uint32_t cNsInterval;
157 /** The current timer frequency. This is set to 0 when inactive. */
158 uint32_t uTimerHz;
159 /** The current max frequency reported by the EMTs.
160 * This gets historicize and reset by the timer callback. This is
161 * read without holding the spinlock, so needs atomic updating. */
162 uint32_t volatile uDesiredHz;
163 /** Whether the timer was started or not. */
164 bool volatile fStarted;
165 /** Set if we're starting timer. */
166 bool volatile fStarting;
167 /** The index of the next history entry (mod it). */
168 uint32_t iHzHistory;
169 /** Hitoricized uDesiredHz values. The array wraps around, new entries
170 * are added at iHzHistory. This is updated approximately every
171 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
172 uint32_t aHzHistory[8];
173 /** Statistics counter for recording the number of interval changes. */
174 uint32_t cChanges;
175 /** Statistics counter for recording the number of timer starts. */
176 uint32_t cStarts;
177 } Ppt;
178#endif /* GVMM_SCHED_WITH_PPT */
179
180} GVMMHOSTCPU;
181/** Pointer to the per host CPU GVMM data. */
182typedef GVMMHOSTCPU *PGVMMHOSTCPU;
183/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
184#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
185/** The interval on history entry should cover (approximately) give in
186 * nanoseconds. */
187#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
188
189
190/**
191 * The GVMM instance data.
192 */
193typedef struct GVMM
194{
195 /** Eyecatcher / magic. */
196 uint32_t u32Magic;
197 /** The index of the head of the free handle chain. (0 is nil.) */
198 uint16_t volatile iFreeHead;
199 /** The index of the head of the active handle chain. (0 is nil.) */
200 uint16_t volatile iUsedHead;
201 /** The number of VMs. */
202 uint16_t volatile cVMs;
203 /** Alignment padding. */
204 uint16_t u16Reserved;
205 /** The number of EMTs. */
206 uint32_t volatile cEMTs;
207 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
208 uint32_t volatile cHaltedEMTs;
209 /** Alignment padding. */
210 uint32_t u32Alignment;
211 /** When the next halted or sleeping EMT will wake up.
212 * This is set to 0 when it needs recalculating and to UINT64_MAX when
213 * there are no halted or sleeping EMTs in the GVMM. */
214 uint64_t uNsNextEmtWakeup;
215 /** The lock used to serialize VM creation, destruction and associated events that
216 * isn't performance critical. Owners may acquire the list lock. */
217 RTSEMFASTMUTEX CreateDestroyLock;
218 /** The lock used to serialize used list updates and accesses.
219 * This indirectly includes scheduling since the scheduler will have to walk the
220 * used list to examin running VMs. Owners may not acquire any other locks. */
221 RTSEMFASTMUTEX UsedLock;
222 /** The handle array.
223 * The size of this array defines the maximum number of currently running VMs.
224 * The first entry is unused as it represents the NIL handle. */
225 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
226
227 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
228 * The number of EMTs that means we no longer consider ourselves alone on a
229 * CPU/Core.
230 */
231 uint32_t cEMTsMeansCompany;
232 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
233 * The minimum sleep time for when we're alone, in nano seconds.
234 */
235 uint32_t nsMinSleepAlone;
236 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
237 * The minimum sleep time for when we've got company, in nano seconds.
238 */
239 uint32_t nsMinSleepCompany;
240 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
241 * The limit for the first round of early wakeups, given in nano seconds.
242 */
243 uint32_t nsEarlyWakeUp1;
244 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
245 * The limit for the second round of early wakeups, given in nano seconds.
246 */
247 uint32_t nsEarlyWakeUp2;
248
249 /** The number of entries in the host CPU array (aHostCpus). */
250 uint32_t cHostCpus;
251 /** Per host CPU data (variable length). */
252 GVMMHOSTCPU aHostCpus[1];
253} GVMM;
254/** Pointer to the GVMM instance data. */
255typedef GVMM *PGVMM;
256
257/** The GVMM::u32Magic value (Charlie Haden). */
258#define GVMM_MAGIC 0x19370806
259
260
261
262/*******************************************************************************
263* Global Variables *
264*******************************************************************************/
265/** Pointer to the GVMM instance data.
266 * (Just my general dislike for global variables.) */
267static PGVMM g_pGVMM = NULL;
268
269/** Macro for obtaining and validating the g_pGVMM pointer.
270 * On failure it will return from the invoking function with the specified return value.
271 *
272 * @param pGVMM The name of the pGVMM variable.
273 * @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
274 * VBox status codes.
275 */
276#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
277 do { \
278 (pGVMM) = g_pGVMM;\
279 AssertPtrReturn((pGVMM), (rc)); \
280 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
281 } while (0)
282
283/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
284 * On failure it will return from the invoking function.
285 *
286 * @param pGVMM The name of the pGVMM variable.
287 */
288#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
289 do { \
290 (pGVMM) = g_pGVMM;\
291 AssertPtrReturnVoid((pGVMM)); \
292 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
293 } while (0)
294
295
296/*******************************************************************************
297* Internal Functions *
298*******************************************************************************/
299static void gvmmR0InitPerVMData(PGVM pGVM);
300static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
301static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
302static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
303#ifdef GVMM_SCHED_WITH_PPT
304static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
305#endif
306
307
308/**
309 * Initializes the GVMM.
310 *
311 * This is called while owninng the loader sempahore (see supdrvIOCtl_LdrLoad()).
312 *
313 * @returns VBox status code.
314 */
315GVMMR0DECL(int) GVMMR0Init(void)
316{
317 LogFlow(("GVMMR0Init:\n"));
318
319 /*
320 * Allocate and initialize the instance data.
321 */
322 uint32_t cHostCpus = RTMpGetArraySize();
323 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_INTERNAL_ERROR_2);
324
325 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
326 if (!pGVMM)
327 return VERR_NO_MEMORY;
328 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
329 if (RT_SUCCESS(rc))
330 {
331 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
332 if (RT_SUCCESS(rc))
333 {
334 pGVMM->u32Magic = GVMM_MAGIC;
335 pGVMM->iUsedHead = 0;
336 pGVMM->iFreeHead = 1;
337
338 /* the nil handle */
339 pGVMM->aHandles[0].iSelf = 0;
340 pGVMM->aHandles[0].iNext = 0;
341
342 /* the tail */
343 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
344 pGVMM->aHandles[i].iSelf = i;
345 pGVMM->aHandles[i].iNext = 0; /* nil */
346
347 /* the rest */
348 while (i-- > 1)
349 {
350 pGVMM->aHandles[i].iSelf = i;
351 pGVMM->aHandles[i].iNext = i + 1;
352 }
353
354 /* The default configuration values. */
355 uint32_t cNsResolution = RTSemEventMultiGetResolution();
356 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
357 if (cNsResolution >= 5*RT_NS_100US)
358 {
359 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
360 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
361 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
362 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
363 }
364 else
365 {
366 cNsResolution = RT_MIN(cNsResolution, 10000);
367 pGVMM->nsMinSleepAlone = cNsResolution - cNsResolution / 3;
368 pGVMM->nsMinSleepCompany = cNsResolution - cNsResolution / 3;
369 pGVMM->nsEarlyWakeUp1 = 0;
370 pGVMM->nsEarlyWakeUp2 = 0;
371 }
372
373 /* The host CPU data. */
374 pGVMM->cHostCpus = cHostCpus;
375 uint32_t iCpu = cHostCpus;
376 RTCPUSET PossibleSet;
377 RTMpGetSet(&PossibleSet);
378 while (iCpu-- > 0)
379 {
380 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
381#ifdef GVMM_SCHED_WITH_PPT
382 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
383 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
384 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
385 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
386 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
387 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
388 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
389 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
390 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
391 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
392 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
393 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
394#endif
395
396 if (RTCpuSetIsMember(&PossibleSet, iCpu))
397 {
398 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
399 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
400
401#ifdef GVMM_SCHED_WITH_PPT
402 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
403 50*1000*1000 /* whatever */,
404 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
405 gvmmR0SchedPeriodicPreemptionTimerCallback,
406 &pGVMM->aHostCpus[iCpu]);
407 if (RT_SUCCESS(rc))
408 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
409 if (RT_FAILURE(rc))
410 {
411 while (iCpu < cHostCpus)
412 {
413 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
414 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
415 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
416 iCpu++;
417 }
418 break;
419 }
420#endif
421 }
422 else
423 {
424 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
425 pGVMM->aHostCpus[iCpu].u32Magic = 0;
426 }
427 }
428 if (RT_SUCCESS(rc))
429 {
430 g_pGVMM = pGVMM;
431 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
432 return VINF_SUCCESS;
433 }
434
435 /* bail out. */
436 RTSemFastMutexDestroy(pGVMM->UsedLock);
437 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
438 }
439 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
440 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
441 }
442
443 RTMemFree(pGVMM);
444 return rc;
445}
446
447
448/**
449 * Terminates the GVM.
450 *
451 * This is called while owning the loader semaphore (see supdrvLdrFree()).
452 * And unless something is wrong, there should be absolutely no VMs
453 * registered at this point.
454 */
455GVMMR0DECL(void) GVMMR0Term(void)
456{
457 LogFlow(("GVMMR0Term:\n"));
458
459 PGVMM pGVMM = g_pGVMM;
460 g_pGVMM = NULL;
461 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
462 {
463 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
464 return;
465 }
466
467 /*
468 * First of all, stop all active timers.
469 */
470 uint32_t cActiveTimers = 0;
471 uint32_t iCpu = pGVMM->cHostCpus;
472 while (iCpu-- > 0)
473 {
474 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
475#ifdef GVMM_SCHED_WITH_PPT
476 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
477 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
478 cActiveTimers++;
479#endif
480 }
481 if (cActiveTimers)
482 RTThreadSleep(1); /* fudge */
483
484 /*
485 * Invalidate the and free resources.
486 */
487 pGVMM->u32Magic = ~GVMM_MAGIC;
488 RTSemFastMutexDestroy(pGVMM->UsedLock);
489 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
490 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
491 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
492
493 pGVMM->iFreeHead = 0;
494 if (pGVMM->iUsedHead)
495 {
496 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
497 pGVMM->iUsedHead = 0;
498 }
499
500#ifdef GVMM_SCHED_WITH_PPT
501 iCpu = pGVMM->cHostCpus;
502 while (iCpu-- > 0)
503 {
504 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
505 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
506 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
507 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
508 }
509#endif
510
511 RTMemFree(pGVMM);
512}
513
514
515/**
516 * A quick hack for setting global config values.
517 *
518 * @returns VBox status code.
519 *
520 * @param pSession The session handle. Used for authentication.
521 * @param pszName The variable name.
522 * @param u64Value The new value.
523 */
524GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
525{
526 /*
527 * Validate input.
528 */
529 PGVMM pGVMM;
530 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
531 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
532 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
533
534 /*
535 * String switch time!
536 */
537 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
538 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
539 int rc = VINF_SUCCESS;
540 pszName += sizeof("/GVMM/") - 1;
541 if (!strcmp(pszName, "cEMTsMeansCompany"))
542 {
543 if (u64Value <= UINT32_MAX)
544 pGVMM->cEMTsMeansCompany = u64Value;
545 else
546 rc = VERR_OUT_OF_RANGE;
547 }
548 else if (!strcmp(pszName, "MinSleepAlone"))
549 {
550 if (u64Value <= RT_NS_100MS)
551 pGVMM->nsMinSleepAlone = u64Value;
552 else
553 rc = VERR_OUT_OF_RANGE;
554 }
555 else if (!strcmp(pszName, "MinSleepCompany"))
556 {
557 if (u64Value <= RT_NS_100MS)
558 pGVMM->nsMinSleepCompany = u64Value;
559 else
560 rc = VERR_OUT_OF_RANGE;
561 }
562 else if (!strcmp(pszName, "EarlyWakeUp1"))
563 {
564 if (u64Value <= RT_NS_100MS)
565 pGVMM->nsEarlyWakeUp1 = u64Value;
566 else
567 rc = VERR_OUT_OF_RANGE;
568 }
569 else if (!strcmp(pszName, "EarlyWakeUp2"))
570 {
571 if (u64Value <= RT_NS_100MS)
572 pGVMM->nsEarlyWakeUp2 = u64Value;
573 else
574 rc = VERR_OUT_OF_RANGE;
575 }
576 else
577 rc = VERR_CFGM_VALUE_NOT_FOUND;
578 return rc;
579}
580
581
582/**
583 * A quick hack for getting global config values.
584 *
585 * @returns VBox status code.
586 *
587 * @param pSession The session handle. Used for authentication.
588 * @param pszName The variable name.
589 * @param u64Value The new value.
590 */
591GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
592{
593 /*
594 * Validate input.
595 */
596 PGVMM pGVMM;
597 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
598 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
599 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
600 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
601
602 /*
603 * String switch time!
604 */
605 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
606 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
607 int rc = VINF_SUCCESS;
608 pszName += sizeof("/GVMM/") - 1;
609 if (!strcmp(pszName, "cEMTsMeansCompany"))
610 *pu64Value = pGVMM->cEMTsMeansCompany;
611 else if (!strcmp(pszName, "MinSleepAlone"))
612 *pu64Value = pGVMM->nsMinSleepAlone;
613 else if (!strcmp(pszName, "MinSleepCompany"))
614 *pu64Value = pGVMM->nsMinSleepCompany;
615 else if (!strcmp(pszName, "EarlyWakeUp1"))
616 *pu64Value = pGVMM->nsEarlyWakeUp1;
617 else if (!strcmp(pszName, "EarlyWakeUp2"))
618 *pu64Value = pGVMM->nsEarlyWakeUp2;
619 else
620 rc = VERR_CFGM_VALUE_NOT_FOUND;
621 return rc;
622}
623
624
625/**
626 * Try acquire the 'used' lock.
627 *
628 * @returns IPRT status code, see RTSemFastMutexRequest.
629 * @param pGVMM The GVMM instance data.
630 */
631DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
632{
633 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
634 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
635 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
636 return rc;
637}
638
639
640/**
641 * Release the 'used' lock.
642 *
643 * @returns IPRT status code, see RTSemFastMutexRelease.
644 * @param pGVMM The GVMM instance data.
645 */
646DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
647{
648 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
649 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
650 AssertRC(rc);
651 return rc;
652}
653
654
655/**
656 * Try acquire the 'create & destroy' lock.
657 *
658 * @returns IPRT status code, see RTSemFastMutexRequest.
659 * @param pGVMM The GVMM instance data.
660 */
661DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
662{
663 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
664 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
665 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
666 return rc;
667}
668
669
670/**
671 * Release the 'create & destroy' lock.
672 *
673 * @returns IPRT status code, see RTSemFastMutexRequest.
674 * @param pGVMM The GVMM instance data.
675 */
676DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
677{
678 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
679 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
680 AssertRC(rc);
681 return rc;
682}
683
684
685/**
686 * Request wrapper for the GVMMR0CreateVM API.
687 *
688 * @returns VBox status code.
689 * @param pReq The request buffer.
690 */
691GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
692{
693 /*
694 * Validate the request.
695 */
696 if (!VALID_PTR(pReq))
697 return VERR_INVALID_POINTER;
698 if (pReq->Hdr.cbReq != sizeof(*pReq))
699 return VERR_INVALID_PARAMETER;
700 if (!VALID_PTR(pReq->pSession))
701 return VERR_INVALID_POINTER;
702
703 /*
704 * Execute it.
705 */
706 PVM pVM;
707 pReq->pVMR0 = NULL;
708 pReq->pVMR3 = NIL_RTR3PTR;
709 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
710 if (RT_SUCCESS(rc))
711 {
712 pReq->pVMR0 = pVM;
713 pReq->pVMR3 = pVM->pVMR3;
714 }
715 return rc;
716}
717
718
719/**
720 * Allocates the VM structure and registers it with GVM.
721 *
722 * The caller will become the VM owner and there by the EMT.
723 *
724 * @returns VBox status code.
725 * @param pSession The support driver session.
726 * @param cCpus Number of virtual CPUs for the new VM.
727 * @param ppVM Where to store the pointer to the VM structure.
728 *
729 * @thread EMT.
730 */
731GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
732{
733 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
734 PGVMM pGVMM;
735 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
736
737 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
738 *ppVM = NULL;
739
740 if ( cCpus == 0
741 || cCpus > VMM_MAX_CPU_COUNT)
742 return VERR_INVALID_PARAMETER;
743
744 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
745 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR);
746 RTNATIVETHREAD ProcId = RTProcSelf();
747 AssertReturn(ProcId != NIL_RTPROCESS, VERR_INTERNAL_ERROR);
748
749 /*
750 * The whole allocation process is protected by the lock.
751 */
752 int rc = gvmmR0CreateDestroyLock(pGVMM);
753 AssertRCReturn(rc, rc);
754
755 /*
756 * Allocate a handle first so we don't waste resources unnecessarily.
757 */
758 uint16_t iHandle = pGVMM->iFreeHead;
759 if (iHandle)
760 {
761 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
762
763 /* consistency checks, a bit paranoid as always. */
764 if ( !pHandle->pVM
765 && !pHandle->pGVM
766 && !pHandle->pvObj
767 && pHandle->iSelf == iHandle)
768 {
769 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
770 if (pHandle->pvObj)
771 {
772 /*
773 * Move the handle from the free to used list and perform permission checks.
774 */
775 rc = gvmmR0UsedLock(pGVMM);
776 AssertRC(rc);
777
778 pGVMM->iFreeHead = pHandle->iNext;
779 pHandle->iNext = pGVMM->iUsedHead;
780 pGVMM->iUsedHead = iHandle;
781 pGVMM->cVMs++;
782
783 pHandle->pVM = NULL;
784 pHandle->pGVM = NULL;
785 pHandle->pSession = pSession;
786 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
787 pHandle->ProcId = NIL_RTPROCESS;
788
789 gvmmR0UsedUnlock(pGVMM);
790
791 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
792 if (RT_SUCCESS(rc))
793 {
794 /*
795 * Allocate the global VM structure (GVM) and initialize it.
796 */
797 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
798 if (pGVM)
799 {
800 pGVM->u32Magic = GVM_MAGIC;
801 pGVM->hSelf = iHandle;
802 pGVM->pVM = NULL;
803 pGVM->cCpus = cCpus;
804
805 gvmmR0InitPerVMData(pGVM);
806 GMMR0InitPerVMData(pGVM);
807
808 /*
809 * Allocate the shared VM structure and associated page array.
810 */
811 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
812 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
813#ifdef RT_OS_DARWIN /** @todo Figure out why this is broken. Is it only on snow leopard? */
814 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, (cPages + 1) << PAGE_SHIFT, false /* fExecutable */);
815#else
816 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
817#endif
818 if (RT_SUCCESS(rc))
819 {
820 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
821 memset(pVM, 0, cPages << PAGE_SHIFT);
822 pVM->enmVMState = VMSTATE_CREATING;
823 pVM->pVMR0 = pVM;
824 pVM->pSession = pSession;
825 pVM->hSelf = iHandle;
826 pVM->cbSelf = cbVM;
827 pVM->cCpus = cCpus;
828 pVM->uCpuExecutionCap = 100; /* default is no cap. */
829 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
830
831 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
832 if (RT_SUCCESS(rc))
833 {
834 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
835 for (uint32_t iPage = 0; iPage < cPages; iPage++)
836 {
837 paPages[iPage].uReserved = 0;
838 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
839 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
840 }
841
842 /*
843 * Map them into ring-3.
844 */
845 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
846 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
847 if (RT_SUCCESS(rc))
848 {
849 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
850 AssertPtr((void *)pVM->pVMR3);
851
852 /* Initialize all the VM pointers. */
853 for (uint32_t i = 0; i < cCpus; i++)
854 {
855 pVM->aCpus[i].pVMR0 = pVM;
856 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
857 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
858 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
859 }
860
861 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0,
862 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
863 if (RT_SUCCESS(rc))
864 {
865 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
866 AssertPtr((void *)pVM->paVMPagesR3);
867
868 /* complete the handle - take the UsedLock sem just to be careful. */
869 rc = gvmmR0UsedLock(pGVMM);
870 AssertRC(rc);
871
872 pHandle->pVM = pVM;
873 pHandle->pGVM = pGVM;
874 pHandle->hEMT0 = hEMT0;
875 pHandle->ProcId = ProcId;
876 pGVM->pVM = pVM;
877 pGVM->aCpus[0].hEMT = hEMT0;
878 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
879 pGVMM->cEMTs += cCpus;
880
881 gvmmR0UsedUnlock(pGVMM);
882 gvmmR0CreateDestroyUnlock(pGVMM);
883
884 *ppVM = pVM;
885 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
886 return VINF_SUCCESS;
887 }
888
889 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
890 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
891 }
892 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
893 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
894 }
895 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
896 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
897 }
898 }
899 }
900 /* else: The user wasn't permitted to create this VM. */
901
902 /*
903 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
904 * object reference here. A little extra mess because of non-recursive lock.
905 */
906 void *pvObj = pHandle->pvObj;
907 pHandle->pvObj = NULL;
908 gvmmR0CreateDestroyUnlock(pGVMM);
909
910 SUPR0ObjRelease(pvObj, pSession);
911
912 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
913 return rc;
914 }
915
916 rc = VERR_NO_MEMORY;
917 }
918 else
919 rc = VERR_INTERNAL_ERROR;
920 }
921 else
922 rc = VERR_GVM_TOO_MANY_VMS;
923
924 gvmmR0CreateDestroyUnlock(pGVMM);
925 return rc;
926}
927
928
929/**
930 * Initializes the per VM data belonging to GVMM.
931 *
932 * @param pGVM Pointer to the global VM structure.
933 */
934static void gvmmR0InitPerVMData(PGVM pGVM)
935{
936 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
937 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
938 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
939 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
940 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
941 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
942 pGVM->gvmm.s.fDoneVMMR0Init = false;
943 pGVM->gvmm.s.fDoneVMMR0Term = false;
944
945 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
946 {
947 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
948 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
949 }
950}
951
952
953/**
954 * Does the VM initialization.
955 *
956 * @returns VBox status code.
957 * @param pVM Pointer to the shared VM structure.
958 */
959GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
960{
961 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
962
963 /*
964 * Validate the VM structure, state and handle.
965 */
966 PGVM pGVM;
967 PGVMM pGVMM;
968 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
969 if (RT_SUCCESS(rc))
970 {
971 if ( !pGVM->gvmm.s.fDoneVMMR0Init
972 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
973 {
974 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
975 {
976 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
977 if (RT_FAILURE(rc))
978 {
979 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
980 break;
981 }
982 }
983 }
984 else
985 rc = VERR_WRONG_ORDER;
986 }
987
988 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
989 return rc;
990}
991
992
993/**
994 * Indicates that we're done with the ring-0 initialization
995 * of the VM.
996 *
997 * @param pVM Pointer to the shared VM structure.
998 * @thread EMT(0)
999 */
1000GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1001{
1002 /* Validate the VM structure, state and handle. */
1003 PGVM pGVM;
1004 PGVMM pGVMM;
1005 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1006 AssertRCReturnVoid(rc);
1007
1008 /* Set the indicator. */
1009 pGVM->gvmm.s.fDoneVMMR0Init = true;
1010}
1011
1012
1013/**
1014 * Indicates that we're doing the ring-0 termination of the VM.
1015 *
1016 * @returns true if termination hasn't been done already, false if it has.
1017 * @param pVM Pointer to the shared VM structure.
1018 * @param pGVM Pointer to the global VM structure. Optional.
1019 * @thread EMT(0)
1020 */
1021GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1022{
1023 /* Validate the VM structure, state and handle. */
1024 AssertPtrNullReturn(pGVM, false);
1025 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1026 if (!pGVM)
1027 {
1028 PGVMM pGVMM;
1029 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1030 AssertRCReturn(rc, false);
1031 }
1032
1033 /* Set the indicator. */
1034 if (pGVM->gvmm.s.fDoneVMMR0Term)
1035 return false;
1036 pGVM->gvmm.s.fDoneVMMR0Term = true;
1037 return true;
1038}
1039
1040
1041/**
1042 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1043 *
1044 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1045 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1046 * would've been nice if the caller was actually the EMT thread or that we somehow
1047 * could've associated the calling thread with the VM up front.
1048 *
1049 * @returns VBox status code.
1050 * @param pVM Where to store the pointer to the VM structure.
1051 *
1052 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1053 */
1054GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1055{
1056 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1057 PGVMM pGVMM;
1058 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1059
1060
1061 /*
1062 * Validate the VM structure, state and caller.
1063 */
1064 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1065 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1066 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER);
1067
1068 uint32_t hGVM = pVM->hSelf;
1069 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1070 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1071
1072 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1073 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1074
1075 RTPROCESS ProcId = RTProcSelf();
1076 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1077 AssertReturn( ( pHandle->hEMT0 == hSelf
1078 && pHandle->ProcId == ProcId)
1079 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1080
1081 /*
1082 * Lookup the handle and destroy the object.
1083 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1084 * object, we take some precautions against racing callers just in case...
1085 */
1086 int rc = gvmmR0CreateDestroyLock(pGVMM);
1087 AssertRC(rc);
1088
1089 /* be careful here because we might theoretically be racing someone else cleaning up. */
1090 if ( pHandle->pVM == pVM
1091 && ( ( pHandle->hEMT0 == hSelf
1092 && pHandle->ProcId == ProcId)
1093 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1094 && VALID_PTR(pHandle->pvObj)
1095 && VALID_PTR(pHandle->pSession)
1096 && VALID_PTR(pHandle->pGVM)
1097 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1098 {
1099 void *pvObj = pHandle->pvObj;
1100 pHandle->pvObj = NULL;
1101 gvmmR0CreateDestroyUnlock(pGVMM);
1102
1103 SUPR0ObjRelease(pvObj, pHandle->pSession);
1104 }
1105 else
1106 {
1107 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1108 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1109 gvmmR0CreateDestroyUnlock(pGVMM);
1110 rc = VERR_INTERNAL_ERROR;
1111 }
1112
1113 return rc;
1114}
1115
1116
1117/**
1118 * Performs VM cleanup task as part of object destruction.
1119 *
1120 * @param pGVM The GVM pointer.
1121 */
1122static void gvmmR0CleanupVM(PGVM pGVM)
1123{
1124 if ( pGVM->gvmm.s.fDoneVMMR0Init
1125 && !pGVM->gvmm.s.fDoneVMMR0Term)
1126 {
1127 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1128 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1129 {
1130 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1131 VMMR0TermVM(pGVM->pVM, pGVM);
1132 }
1133 else
1134 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1135 }
1136
1137 GMMR0CleanupVM(pGVM);
1138}
1139
1140
1141/**
1142 * Handle destructor.
1143 *
1144 * @param pvGVMM The GVM instance pointer.
1145 * @param pvHandle The handle pointer.
1146 */
1147static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1148{
1149 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1150
1151 /*
1152 * Some quick, paranoid, input validation.
1153 */
1154 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1155 AssertPtr(pHandle);
1156 PGVMM pGVMM = (PGVMM)pvGVMM;
1157 Assert(pGVMM == g_pGVMM);
1158 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1159 if ( !iHandle
1160 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1161 || iHandle != pHandle->iSelf)
1162 {
1163 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1164 return;
1165 }
1166
1167 int rc = gvmmR0CreateDestroyLock(pGVMM);
1168 AssertRC(rc);
1169 rc = gvmmR0UsedLock(pGVMM);
1170 AssertRC(rc);
1171
1172 /*
1173 * This is a tad slow but a doubly linked list is too much hazzle.
1174 */
1175 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1176 {
1177 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1178 gvmmR0UsedUnlock(pGVMM);
1179 gvmmR0CreateDestroyUnlock(pGVMM);
1180 return;
1181 }
1182
1183 if (pGVMM->iUsedHead == iHandle)
1184 pGVMM->iUsedHead = pHandle->iNext;
1185 else
1186 {
1187 uint16_t iPrev = pGVMM->iUsedHead;
1188 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1189 while (iPrev)
1190 {
1191 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1192 {
1193 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1194 gvmmR0UsedUnlock(pGVMM);
1195 gvmmR0CreateDestroyUnlock(pGVMM);
1196 return;
1197 }
1198 if (RT_UNLIKELY(c-- <= 0))
1199 {
1200 iPrev = 0;
1201 break;
1202 }
1203
1204 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1205 break;
1206 iPrev = pGVMM->aHandles[iPrev].iNext;
1207 }
1208 if (!iPrev)
1209 {
1210 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1211 gvmmR0UsedUnlock(pGVMM);
1212 gvmmR0CreateDestroyUnlock(pGVMM);
1213 return;
1214 }
1215
1216 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1217 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1218 }
1219 pHandle->iNext = 0;
1220 pGVMM->cVMs--;
1221
1222 /*
1223 * Do the global cleanup round.
1224 */
1225 PGVM pGVM = pHandle->pGVM;
1226 if ( VALID_PTR(pGVM)
1227 && pGVM->u32Magic == GVM_MAGIC)
1228 {
1229 pGVMM->cEMTs -= pGVM->cCpus;
1230 gvmmR0UsedUnlock(pGVMM);
1231
1232 gvmmR0CleanupVM(pGVM);
1233
1234 /*
1235 * Do the GVMM cleanup - must be done last.
1236 */
1237 /* The VM and VM pages mappings/allocations. */
1238 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1239 {
1240 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1241 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1242 }
1243
1244 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1245 {
1246 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1247 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1248 }
1249
1250 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1251 {
1252 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1253 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1254 }
1255
1256 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1257 {
1258 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1259 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1260 }
1261
1262 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1263 {
1264 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1265 {
1266 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1267 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1268 }
1269 }
1270
1271 /* the GVM structure itself. */
1272 pGVM->u32Magic |= UINT32_C(0x80000000);
1273 RTMemFree(pGVM);
1274
1275 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1276 rc = gvmmR0UsedLock(pGVMM);
1277 AssertRC(rc);
1278 }
1279 /* else: GVMMR0CreateVM cleanup. */
1280
1281 /*
1282 * Free the handle.
1283 */
1284 pHandle->iNext = pGVMM->iFreeHead;
1285 pGVMM->iFreeHead = iHandle;
1286 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1287 ASMAtomicWriteNullPtr(&pHandle->pVM);
1288 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1289 ASMAtomicWriteNullPtr(&pHandle->pSession);
1290 ASMAtomicWriteSize(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1291 ASMAtomicWriteSize(&pHandle->ProcId, NIL_RTPROCESS);
1292
1293 gvmmR0UsedUnlock(pGVMM);
1294 gvmmR0CreateDestroyUnlock(pGVMM);
1295 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1296}
1297
1298
1299/**
1300 * Registers the calling thread as the EMT of a Virtual CPU.
1301 *
1302 * Note that VCPU 0 is automatically registered during VM creation.
1303 *
1304 * @returns VBox status code
1305 * @param pVM The shared VM structure (the ring-0 mapping).
1306 * @param idCpu VCPU id.
1307 */
1308GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1309{
1310 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1311
1312 /*
1313 * Validate the VM structure, state and handle.
1314 */
1315 PGVM pGVM;
1316 PGVMM pGVMM;
1317 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1318 if (RT_FAILURE(rc))
1319 return rc;
1320
1321 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1322 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1323 Assert(pGVM->cCpus == pVM->cCpus);
1324 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1325
1326 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1327
1328 return VINF_SUCCESS;
1329}
1330
1331
1332/**
1333 * Lookup a GVM structure by its handle.
1334 *
1335 * @returns The GVM pointer on success, NULL on failure.
1336 * @param hGVM The global VM handle. Asserts on bad handle.
1337 */
1338GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1339{
1340 PGVMM pGVMM;
1341 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1342
1343 /*
1344 * Validate.
1345 */
1346 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1347 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1348
1349 /*
1350 * Look it up.
1351 */
1352 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1353 AssertPtrReturn(pHandle->pVM, NULL);
1354 AssertPtrReturn(pHandle->pvObj, NULL);
1355 PGVM pGVM = pHandle->pGVM;
1356 AssertPtrReturn(pGVM, NULL);
1357 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1358
1359 return pHandle->pGVM;
1360}
1361
1362
1363/**
1364 * Lookup a GVM structure by the shared VM structure.
1365 *
1366 * The calling thread must be in the same process as the VM. All current lookups
1367 * are by threads inside the same process, so this will not be an issue.
1368 *
1369 * @returns VBox status code.
1370 * @param pVM The shared VM structure (the ring-0 mapping).
1371 * @param ppGVM Where to store the GVM pointer.
1372 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1373 * @param fTakeUsedLock Whether to take the used lock or not.
1374 * Be very careful if not taking the lock as it's possible that
1375 * the VM will disappear then.
1376 *
1377 * @remark This will not assert on an invalid pVM but try return sliently.
1378 */
1379static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1380{
1381 RTPROCESS ProcId = RTProcSelf();
1382 PGVMM pGVMM;
1383 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1384
1385 /*
1386 * Validate.
1387 */
1388 if (RT_UNLIKELY( !VALID_PTR(pVM)
1389 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1390 return VERR_INVALID_POINTER;
1391 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1392 || pVM->enmVMState >= VMSTATE_TERMINATED))
1393 return VERR_INVALID_POINTER;
1394
1395 uint16_t hGVM = pVM->hSelf;
1396 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1397 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1398 return VERR_INVALID_HANDLE;
1399
1400 /*
1401 * Look it up.
1402 */
1403 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1404 PGVM pGVM;
1405 if (fTakeUsedLock)
1406 {
1407 int rc = gvmmR0UsedLock(pGVMM);
1408 AssertRCReturn(rc, rc);
1409
1410 pGVM = pHandle->pGVM;
1411 if (RT_UNLIKELY( pHandle->pVM != pVM
1412 || pHandle->ProcId != ProcId
1413 || !VALID_PTR(pHandle->pvObj)
1414 || !VALID_PTR(pGVM)
1415 || pGVM->pVM != pVM))
1416 {
1417 gvmmR0UsedUnlock(pGVMM);
1418 return VERR_INVALID_HANDLE;
1419 }
1420 }
1421 else
1422 {
1423 if (RT_UNLIKELY(pHandle->pVM != pVM))
1424 return VERR_INVALID_HANDLE;
1425 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1426 return VERR_INVALID_HANDLE;
1427 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1428 return VERR_INVALID_HANDLE;
1429
1430 pGVM = pHandle->pGVM;
1431 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1432 return VERR_INVALID_HANDLE;
1433 if (RT_UNLIKELY(pGVM->pVM != pVM))
1434 return VERR_INVALID_HANDLE;
1435 }
1436
1437 *ppGVM = pGVM;
1438 *ppGVMM = pGVMM;
1439 return VINF_SUCCESS;
1440}
1441
1442
1443/**
1444 * Lookup a GVM structure by the shared VM structure.
1445 *
1446 * @returns VBox status code.
1447 * @param pVM The shared VM structure (the ring-0 mapping).
1448 * @param ppGVM Where to store the GVM pointer.
1449 *
1450 * @remark This will not take the 'used'-lock because it doesn't do
1451 * nesting and this function will be used from under the lock.
1452 */
1453GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1454{
1455 PGVMM pGVMM;
1456 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1457}
1458
1459
1460/**
1461 * Lookup a GVM structure by the shared VM structure and ensuring that the
1462 * caller is an EMT thread.
1463 *
1464 * @returns VBox status code.
1465 * @param pVM The shared VM structure (the ring-0 mapping).
1466 * @param idCpu The Virtual CPU ID of the calling EMT.
1467 * @param ppGVM Where to store the GVM pointer.
1468 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1469 * @thread EMT
1470 *
1471 * @remark This will assert in all failure paths.
1472 */
1473static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1474{
1475 PGVMM pGVMM;
1476 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1477
1478 /*
1479 * Validate.
1480 */
1481 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1482 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1483
1484 uint16_t hGVM = pVM->hSelf;
1485 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1486 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1487
1488 /*
1489 * Look it up.
1490 */
1491 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1492 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1493 RTPROCESS ProcId = RTProcSelf();
1494 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1495 AssertPtrReturn(pHandle->pvObj, VERR_INTERNAL_ERROR);
1496
1497 PGVM pGVM = pHandle->pGVM;
1498 AssertPtrReturn(pGVM, VERR_INTERNAL_ERROR);
1499 AssertReturn(pGVM->pVM == pVM, VERR_INTERNAL_ERROR);
1500 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1501 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1502 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_INTERNAL_ERROR);
1503
1504 *ppGVM = pGVM;
1505 *ppGVMM = pGVMM;
1506 return VINF_SUCCESS;
1507}
1508
1509
1510/**
1511 * Lookup a GVM structure by the shared VM structure
1512 * and ensuring that the caller is the EMT thread.
1513 *
1514 * @returns VBox status code.
1515 * @param pVM The shared VM structure (the ring-0 mapping).
1516 * @param idCpu The Virtual CPU ID of the calling EMT.
1517 * @param ppGVM Where to store the GVM pointer.
1518 * @thread EMT
1519 */
1520GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1521{
1522 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1523 PGVMM pGVMM;
1524 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1525}
1526
1527
1528/**
1529 * Lookup a VM by its global handle.
1530 *
1531 * @returns The VM handle on success, NULL on failure.
1532 * @param hGVM The global VM handle. Asserts on bad handle.
1533 */
1534GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1535{
1536 PGVM pGVM = GVMMR0ByHandle(hGVM);
1537 return pGVM ? pGVM->pVM : NULL;
1538}
1539
1540
1541/**
1542 * Looks up the VM belonging to the specified EMT thread.
1543 *
1544 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1545 * unnecessary kernel panics when the EMT thread hits an assertion. The
1546 * call may or not be an EMT thread.
1547 *
1548 * @returns The VM handle on success, NULL on failure.
1549 * @param hEMT The native thread handle of the EMT.
1550 * NIL_RTNATIVETHREAD means the current thread
1551 */
1552GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1553{
1554 /*
1555 * No Assertions here as we're usually called in a AssertMsgN or
1556 * RTAssert* context.
1557 */
1558 PGVMM pGVMM = g_pGVMM;
1559 if ( !VALID_PTR(pGVMM)
1560 || pGVMM->u32Magic != GVMM_MAGIC)
1561 return NULL;
1562
1563 if (hEMT == NIL_RTNATIVETHREAD)
1564 hEMT = RTThreadNativeSelf();
1565 RTPROCESS ProcId = RTProcSelf();
1566
1567 /*
1568 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1569 */
1570 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1571 {
1572 if ( pGVMM->aHandles[i].iSelf == i
1573 && pGVMM->aHandles[i].ProcId == ProcId
1574 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1575 && VALID_PTR(pGVMM->aHandles[i].pVM)
1576 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1577 {
1578 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1579 return pGVMM->aHandles[i].pVM;
1580
1581 /* This is fearly safe with the current process per VM approach. */
1582 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1583 VMCPUID const cCpus = pGVM->cCpus;
1584 if ( cCpus < 1
1585 || cCpus > VMM_MAX_CPU_COUNT)
1586 continue;
1587 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1588 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1589 return pGVMM->aHandles[i].pVM;
1590 }
1591 }
1592 return NULL;
1593}
1594
1595
1596/**
1597 * This is will wake up expired and soon-to-be expired VMs.
1598 *
1599 * @returns Number of VMs that has been woken up.
1600 * @param pGVMM Pointer to the GVMM instance data.
1601 * @param u64Now The current time.
1602 */
1603static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1604{
1605 /*
1606 * Skip this if we've got disabled because of high resolution wakeups or by
1607 * the user.
1608 */
1609 if ( !pGVMM->nsEarlyWakeUp1
1610 && !pGVMM->nsEarlyWakeUp2)
1611 return 0;
1612
1613/** @todo Rewrite this algorithm. See performance defect XYZ. */
1614
1615 /*
1616 * A cheap optimization to stop wasting so much time here on big setups.
1617 */
1618 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1619 if ( pGVMM->cHaltedEMTs == 0
1620 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1621 return 0;
1622
1623 /*
1624 * The first pass will wake up VMs which have actually expired
1625 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1626 */
1627 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1628 uint64_t u64Min = UINT64_MAX;
1629 unsigned cWoken = 0;
1630 unsigned cHalted = 0;
1631 unsigned cTodo2nd = 0;
1632 unsigned cTodo3rd = 0;
1633 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1634 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1635 i = pGVMM->aHandles[i].iNext)
1636 {
1637 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1638 if ( VALID_PTR(pCurGVM)
1639 && pCurGVM->u32Magic == GVM_MAGIC)
1640 {
1641 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1642 {
1643 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1644 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1645 if (u64)
1646 {
1647 if (u64 <= u64Now)
1648 {
1649 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1650 {
1651 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1652 AssertRC(rc);
1653 cWoken++;
1654 }
1655 }
1656 else
1657 {
1658 cHalted++;
1659 if (u64 <= uNsEarlyWakeUp1)
1660 cTodo2nd++;
1661 else if (u64 <= uNsEarlyWakeUp2)
1662 cTodo3rd++;
1663 else if (u64 < u64Min)
1664 u64 = u64Min;
1665 }
1666 }
1667 }
1668 }
1669 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1670 }
1671
1672 if (cTodo2nd)
1673 {
1674 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1675 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1676 i = pGVMM->aHandles[i].iNext)
1677 {
1678 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1679 if ( VALID_PTR(pCurGVM)
1680 && pCurGVM->u32Magic == GVM_MAGIC)
1681 {
1682 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1683 {
1684 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1685 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1686 if ( u64
1687 && u64 <= uNsEarlyWakeUp1)
1688 {
1689 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1690 {
1691 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1692 AssertRC(rc);
1693 cWoken++;
1694 }
1695 }
1696 }
1697 }
1698 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1699 }
1700 }
1701
1702 if (cTodo3rd)
1703 {
1704 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1705 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1706 i = pGVMM->aHandles[i].iNext)
1707 {
1708 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1709 if ( VALID_PTR(pCurGVM)
1710 && pCurGVM->u32Magic == GVM_MAGIC)
1711 {
1712 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1713 {
1714 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1715 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1716 if ( u64
1717 && u64 <= uNsEarlyWakeUp2)
1718 {
1719 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1720 {
1721 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1722 AssertRC(rc);
1723 cWoken++;
1724 }
1725 }
1726 }
1727 }
1728 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1729 }
1730 }
1731
1732 /*
1733 * Set the minimum value.
1734 */
1735 pGVMM->uNsNextEmtWakeup = u64Min;
1736
1737 return cWoken;
1738}
1739
1740
1741/**
1742 * Halt the EMT thread.
1743 *
1744 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1745 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1746 * @param pVM Pointer to the shared VM structure.
1747 * @param idCpu The Virtual CPU ID of the calling EMT.
1748 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1749 * @thread EMT(idCpu).
1750 */
1751GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1752{
1753 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1754
1755 /*
1756 * Validate the VM structure, state and handle.
1757 */
1758 PGVM pGVM;
1759 PGVMM pGVMM;
1760 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1761 if (RT_FAILURE(rc))
1762 return rc;
1763 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1764
1765 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1766 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1767
1768 /*
1769 * Take the UsedList semaphore, get the current time
1770 * and check if anyone needs waking up.
1771 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1772 */
1773 rc = gvmmR0UsedLock(pGVMM);
1774 AssertRC(rc);
1775
1776 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1777
1778 /* GIP hack: We might are frequently sleeping for short intervals where the
1779 difference between GIP and system time matters on systems with high resolution
1780 system time. So, convert the input from GIP to System time in that case. */
1781 Assert(ASMGetFlags() & X86_EFL_IF);
1782 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1783 const uint64_t u64NowGip = RTTimeNanoTS();
1784 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1785
1786 /*
1787 * Go to sleep if we must...
1788 * Cap the sleep time to 1 second to be on the safe side.
1789 */
1790 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1791 if ( u64NowGip < u64ExpireGipTime
1792 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1793 ? pGVMM->nsMinSleepCompany
1794 : pGVMM->nsMinSleepAlone))
1795 {
1796 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1797 if (cNsInterval > RT_NS_1SEC)
1798 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1799 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1800 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1801 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1802 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1803 gvmmR0UsedUnlock(pGVMM);
1804
1805 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1806 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1807 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1808
1809 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1810 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1811
1812 /* Reset the semaphore to try prevent a few false wake-ups. */
1813 if (rc == VINF_SUCCESS)
1814 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1815 else if (rc == VERR_TIMEOUT)
1816 {
1817 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1818 rc = VINF_SUCCESS;
1819 }
1820 }
1821 else
1822 {
1823 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1824 gvmmR0UsedUnlock(pGVMM);
1825 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1826 }
1827
1828 return rc;
1829}
1830
1831
1832/**
1833 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1834 * the a sleeping EMT.
1835 *
1836 * @retval VINF_SUCCESS if successfully woken up.
1837 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1838 *
1839 * @param pGVM The global (ring-0) VM structure.
1840 * @param pGVCpu The global (ring-0) VCPU structure.
1841 */
1842DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1843{
1844 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1845
1846 /*
1847 * Signal the semaphore regardless of whether it's current blocked on it.
1848 *
1849 * The reason for this is that there is absolutely no way we can be 100%
1850 * certain that it isn't *about* go to go to sleep on it and just got
1851 * delayed a bit en route. So, we will always signal the semaphore when
1852 * the it is flagged as halted in the VMM.
1853 */
1854/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1855 int rc;
1856 if (pGVCpu->gvmm.s.u64HaltExpire)
1857 {
1858 rc = VINF_SUCCESS;
1859 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1860 }
1861 else
1862 {
1863 rc = VINF_GVM_NOT_BLOCKED;
1864 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1865 }
1866
1867 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1868 AssertRC(rc2);
1869
1870 return rc;
1871}
1872
1873
1874/**
1875 * Wakes up the halted EMT thread so it can service a pending request.
1876 *
1877 * @returns VBox status code.
1878 * @retval VINF_SUCCESS if successfully woken up.
1879 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1880 *
1881 * @param pVM Pointer to the shared VM structure.
1882 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1883 * @param fTakeUsedLock Take the used lock or not
1884 * @thread Any but EMT.
1885 */
1886GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1887{
1888 /*
1889 * Validate input and take the UsedLock.
1890 */
1891 PGVM pGVM;
1892 PGVMM pGVMM;
1893 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1894 if (RT_SUCCESS(rc))
1895 {
1896 if (idCpu < pGVM->cCpus)
1897 {
1898 /*
1899 * Do the actual job.
1900 */
1901 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1902
1903 if (fTakeUsedLock)
1904 {
1905 /*
1906 * While we're here, do a round of scheduling.
1907 */
1908 Assert(ASMGetFlags() & X86_EFL_IF);
1909 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1910 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1911 }
1912 }
1913 else
1914 rc = VERR_INVALID_CPU_ID;
1915
1916 if (fTakeUsedLock)
1917 {
1918 int rc2 = gvmmR0UsedUnlock(pGVMM);
1919 AssertRC(rc2);
1920 }
1921 }
1922
1923 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1924 return rc;
1925}
1926
1927
1928/**
1929 * Wakes up the halted EMT thread so it can service a pending request.
1930 *
1931 * @returns VBox status code.
1932 * @retval VINF_SUCCESS if successfully woken up.
1933 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1934 *
1935 * @param pVM Pointer to the shared VM structure.
1936 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1937 * @thread Any but EMT.
1938 */
1939GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1940{
1941 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1942}
1943
1944/**
1945 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1946 * the Virtual CPU if it's still busy executing guest code.
1947 *
1948 * @returns VBox status code.
1949 * @retval VINF_SUCCESS if poked successfully.
1950 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1951 *
1952 * @param pGVM The global (ring-0) VM structure.
1953 * @param pVCpu The Virtual CPU handle.
1954 */
1955DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1956{
1957 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1958
1959 RTCPUID idHostCpu = pVCpu->idHostCpu;
1960 if ( idHostCpu == NIL_RTCPUID
1961 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1962 {
1963 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1964 return VINF_GVM_NOT_BUSY_IN_GC;
1965 }
1966
1967 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1968 RTMpPokeCpu(idHostCpu);
1969 return VINF_SUCCESS;
1970}
1971
1972/**
1973 * Pokes an EMT if it's still busy running guest code.
1974 *
1975 * @returns VBox status code.
1976 * @retval VINF_SUCCESS if poked successfully.
1977 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1978 *
1979 * @param pVM Pointer to the shared VM structure.
1980 * @param idCpu The ID of the virtual CPU to poke.
1981 * @param fTakeUsedLock Take the used lock or not
1982 */
1983GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1984{
1985 /*
1986 * Validate input and take the UsedLock.
1987 */
1988 PGVM pGVM;
1989 PGVMM pGVMM;
1990 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1991 if (RT_SUCCESS(rc))
1992 {
1993 if (idCpu < pGVM->cCpus)
1994 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
1995 else
1996 rc = VERR_INVALID_CPU_ID;
1997
1998 if (fTakeUsedLock)
1999 {
2000 int rc2 = gvmmR0UsedUnlock(pGVMM);
2001 AssertRC(rc2);
2002 }
2003 }
2004
2005 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2006 return rc;
2007}
2008
2009
2010/**
2011 * Pokes an EMT if it's still busy running guest code.
2012 *
2013 * @returns VBox status code.
2014 * @retval VINF_SUCCESS if poked successfully.
2015 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2016 *
2017 * @param pVM Pointer to the shared VM structure.
2018 * @param idCpu The ID of the virtual CPU to poke.
2019 */
2020GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2021{
2022 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2023}
2024
2025
2026/**
2027 * Wakes up a set of halted EMT threads so they can service pending request.
2028 *
2029 * @returns VBox status code, no informational stuff.
2030 *
2031 * @param pVM Pointer to the shared VM structure.
2032 * @param pSleepSet The set of sleepers to wake up.
2033 * @param pPokeSet The set of CPUs to poke.
2034 */
2035GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2036{
2037 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2038 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2039 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2040
2041 /*
2042 * Validate input and take the UsedLock.
2043 */
2044 PGVM pGVM;
2045 PGVMM pGVMM;
2046 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2047 if (RT_SUCCESS(rc))
2048 {
2049 rc = VINF_SUCCESS;
2050 VMCPUID idCpu = pGVM->cCpus;
2051 while (idCpu-- > 0)
2052 {
2053 /* Don't try poke or wake up ourselves. */
2054 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2055 continue;
2056
2057 /* just ignore errors for now. */
2058 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2059 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2060 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2061 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2062 }
2063
2064 int rc2 = gvmmR0UsedUnlock(pGVMM);
2065 AssertRC(rc2);
2066 }
2067
2068 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2069 return rc;
2070}
2071
2072
2073/**
2074 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2075 *
2076 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2077 * @param pVM Pointer to the shared VM structure.
2078 * @param pReq The request packet.
2079 */
2080GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2081{
2082 /*
2083 * Validate input and pass it on.
2084 */
2085 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2086 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2087
2088 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2089}
2090
2091
2092
2093/**
2094 * Poll the schedule to see if someone else should get a chance to run.
2095 *
2096 * This is a bit hackish and will not work too well if the machine is
2097 * under heavy load from non-VM processes.
2098 *
2099 * @returns VINF_SUCCESS if not yielded.
2100 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2101 * @param pVM Pointer to the shared VM structure.
2102 * @param idCpu The Virtual CPU ID of the calling EMT.
2103 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2104 * @param fYield Whether to yield or not.
2105 * This is for when we're spinning in the halt loop.
2106 * @thread EMT(idCpu).
2107 */
2108GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2109{
2110 /*
2111 * Validate input.
2112 */
2113 PGVM pGVM;
2114 PGVMM pGVMM;
2115 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2116 if (RT_SUCCESS(rc))
2117 {
2118 rc = gvmmR0UsedLock(pGVMM);
2119 AssertRC(rc);
2120 pGVM->gvmm.s.StatsSched.cPollCalls++;
2121
2122 Assert(ASMGetFlags() & X86_EFL_IF);
2123 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2124
2125 if (!fYield)
2126 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2127 else
2128 {
2129 /** @todo implement this... */
2130 rc = VERR_NOT_IMPLEMENTED;
2131 }
2132
2133 gvmmR0UsedUnlock(pGVMM);
2134 }
2135
2136 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2137 return rc;
2138}
2139
2140
2141#ifdef GVMM_SCHED_WITH_PPT
2142/**
2143 * Timer callback for the periodic preemption timer.
2144 *
2145 * @param pTimer The timer handle.
2146 * @param pvUser Pointer to the per cpu structure.
2147 * @param iTick The current tick.
2148 */
2149static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2150{
2151 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2152
2153 /*
2154 * Termination check
2155 */
2156 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2157 return;
2158
2159 /*
2160 * Do the house keeping.
2161 */
2162 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2163 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2164
2165 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2166 {
2167 /*
2168 * Historicize the max frequency.
2169 */
2170 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2171 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2172 pCpu->Ppt.iTickHistorization = 0;
2173 pCpu->Ppt.uDesiredHz = 0;
2174
2175 /*
2176 * Check if the current timer frequency.
2177 */
2178 uint32_t uHistMaxHz = 0;
2179 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2180 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2181 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2182 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2183 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2184 else if (uHistMaxHz)
2185 {
2186 /*
2187 * Reprogram it.
2188 */
2189 pCpu->Ppt.cChanges++;
2190 pCpu->Ppt.iTickHistorization = 0;
2191 pCpu->Ppt.uTimerHz = uHistMaxHz;
2192 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2193 pCpu->Ppt.cNsInterval = cNsInterval;
2194 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2195 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2196 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2197 / cNsInterval;
2198 else
2199 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2200 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2201
2202 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2203 RTTimerChangeInterval(pTimer, cNsInterval);
2204 }
2205 else
2206 {
2207 /*
2208 * Stop it.
2209 */
2210 pCpu->Ppt.fStarted = false;
2211 pCpu->Ppt.uTimerHz = 0;
2212 pCpu->Ppt.cNsInterval = 0;
2213 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2214
2215 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2216 RTTimerStop(pTimer);
2217 }
2218 }
2219 else
2220 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2221}
2222#endif /* GVMM_SCHED_WITH_PPT */
2223
2224
2225/**
2226 * Updates the periodic preemption timer for the calling CPU.
2227 *
2228 * The caller must have disabled preemption!
2229 * The caller must check that the host can do high resolution timers.
2230 *
2231 * @param pVM The VM handle.
2232 * @param idHostCpu The current host CPU id.
2233 * @param uHz The desired frequency.
2234 */
2235GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2236{
2237#ifdef GVMM_SCHED_WITH_PPT
2238 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2239 Assert(RTTimerCanDoHighResolution());
2240
2241 /*
2242 * Resolve the per CPU data.
2243 */
2244 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2245 PGVMM pGVMM = g_pGVMM;
2246 if ( !VALID_PTR(pGVMM)
2247 || pGVMM->u32Magic != GVMM_MAGIC)
2248 return;
2249 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2250 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2251 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2252 && pCpu->idCpu == idHostCpu,
2253 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2254
2255 /*
2256 * Check whether we need to do anything about the timer.
2257 * We have to be a little bit careful since we might be race the timer
2258 * callback here.
2259 */
2260 if (uHz > 16384)
2261 uHz = 16384; /** @todo add a query method for this! */
2262 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2263 && uHz >= pCpu->Ppt.uMinHz
2264 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2265 {
2266 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2267 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2268
2269 pCpu->Ppt.uDesiredHz = uHz;
2270 uint32_t cNsInterval = 0;
2271 if (!pCpu->Ppt.fStarted)
2272 {
2273 pCpu->Ppt.cStarts++;
2274 pCpu->Ppt.fStarted = true;
2275 pCpu->Ppt.fStarting = true;
2276 pCpu->Ppt.iTickHistorization = 0;
2277 pCpu->Ppt.uTimerHz = uHz;
2278 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2279 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2280 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2281 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2282 / cNsInterval;
2283 else
2284 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2285 }
2286
2287 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2288
2289 if (cNsInterval)
2290 {
2291 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2292 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2293 AssertRC(rc);
2294
2295 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2296 if (RT_FAILURE(rc))
2297 pCpu->Ppt.fStarted = false;
2298 pCpu->Ppt.fStarting = false;
2299 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2300 }
2301 }
2302#endif /* GVMM_SCHED_WITH_PPT */
2303}
2304
2305
2306/**
2307 * Retrieves the GVMM statistics visible to the caller.
2308 *
2309 * @returns VBox status code.
2310 *
2311 * @param pStats Where to put the statistics.
2312 * @param pSession The current session.
2313 * @param pVM The VM to obtain statistics for. Optional.
2314 */
2315GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2316{
2317 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2318
2319 /*
2320 * Validate input.
2321 */
2322 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2323 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2324 pStats->cVMs = 0; /* (crash before taking the sem...) */
2325
2326 /*
2327 * Take the lock and get the VM statistics.
2328 */
2329 PGVMM pGVMM;
2330 if (pVM)
2331 {
2332 PGVM pGVM;
2333 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2334 if (RT_FAILURE(rc))
2335 return rc;
2336 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2337 }
2338 else
2339 {
2340 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2341 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2342
2343 int rc = gvmmR0UsedLock(pGVMM);
2344 AssertRCReturn(rc, rc);
2345 }
2346
2347 /*
2348 * Enumerate the VMs and add the ones visibile to the statistics.
2349 */
2350 pStats->cVMs = 0;
2351 pStats->cEMTs = 0;
2352 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2353
2354 for (unsigned i = pGVMM->iUsedHead;
2355 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2356 i = pGVMM->aHandles[i].iNext)
2357 {
2358 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2359 void *pvObj = pGVMM->aHandles[i].pvObj;
2360 if ( VALID_PTR(pvObj)
2361 && VALID_PTR(pGVM)
2362 && pGVM->u32Magic == GVM_MAGIC
2363 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2364 {
2365 pStats->cVMs++;
2366 pStats->cEMTs += pGVM->cCpus;
2367
2368 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2369 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2370 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2371 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2372 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2373
2374 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2375 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2376 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2377
2378 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2379 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2380
2381 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2382 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2383 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2384 }
2385 }
2386
2387 /*
2388 * Copy out the per host CPU statistics.
2389 */
2390 uint32_t iDstCpu = 0;
2391 uint32_t cSrcCpus = pGVMM->cHostCpus;
2392 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2393 {
2394 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2395 {
2396 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2397 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2398#ifdef GVMM_SCHED_WITH_PPT
2399 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2400 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2401 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2402 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2403#else
2404 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2405 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2406 pStats->aHostCpus[iDstCpu].cChanges = 0;
2407 pStats->aHostCpus[iDstCpu].cStarts = 0;
2408#endif
2409 iDstCpu++;
2410 }
2411 }
2412 pStats->cHostCpus = iDstCpu;
2413
2414 gvmmR0UsedUnlock(pGVMM);
2415
2416 return VINF_SUCCESS;
2417}
2418
2419
2420/**
2421 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2422 *
2423 * @returns see GVMMR0QueryStatistics.
2424 * @param pVM Pointer to the shared VM structure. Optional.
2425 * @param pReq The request packet.
2426 */
2427GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2428{
2429 /*
2430 * Validate input and pass it on.
2431 */
2432 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2433 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2434
2435 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2436}
2437
2438
2439/**
2440 * Resets the specified GVMM statistics.
2441 *
2442 * @returns VBox status code.
2443 *
2444 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2445 * @param pSession The current session.
2446 * @param pVM The VM to reset statistics for. Optional.
2447 */
2448GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2449{
2450 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2451
2452 /*
2453 * Validate input.
2454 */
2455 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2456 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2457
2458 /*
2459 * Take the lock and get the VM statistics.
2460 */
2461 PGVMM pGVMM;
2462 if (pVM)
2463 {
2464 PGVM pGVM;
2465 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2466 if (RT_FAILURE(rc))
2467 return rc;
2468# define MAYBE_RESET_FIELD(field) \
2469 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2470 MAYBE_RESET_FIELD(cHaltCalls);
2471 MAYBE_RESET_FIELD(cHaltBlocking);
2472 MAYBE_RESET_FIELD(cHaltTimeouts);
2473 MAYBE_RESET_FIELD(cHaltNotBlocking);
2474 MAYBE_RESET_FIELD(cHaltWakeUps);
2475 MAYBE_RESET_FIELD(cWakeUpCalls);
2476 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2477 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2478 MAYBE_RESET_FIELD(cPokeCalls);
2479 MAYBE_RESET_FIELD(cPokeNotBusy);
2480 MAYBE_RESET_FIELD(cPollCalls);
2481 MAYBE_RESET_FIELD(cPollHalts);
2482 MAYBE_RESET_FIELD(cPollWakeUps);
2483# undef MAYBE_RESET_FIELD
2484 }
2485 else
2486 {
2487 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2488
2489 int rc = gvmmR0UsedLock(pGVMM);
2490 AssertRCReturn(rc, rc);
2491 }
2492
2493 /*
2494 * Enumerate the VMs and add the ones visibile to the statistics.
2495 */
2496 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2497 {
2498 for (unsigned i = pGVMM->iUsedHead;
2499 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2500 i = pGVMM->aHandles[i].iNext)
2501 {
2502 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2503 void *pvObj = pGVMM->aHandles[i].pvObj;
2504 if ( VALID_PTR(pvObj)
2505 && VALID_PTR(pGVM)
2506 && pGVM->u32Magic == GVM_MAGIC
2507 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2508 {
2509# define MAYBE_RESET_FIELD(field) \
2510 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2511 MAYBE_RESET_FIELD(cHaltCalls);
2512 MAYBE_RESET_FIELD(cHaltBlocking);
2513 MAYBE_RESET_FIELD(cHaltTimeouts);
2514 MAYBE_RESET_FIELD(cHaltNotBlocking);
2515 MAYBE_RESET_FIELD(cHaltWakeUps);
2516 MAYBE_RESET_FIELD(cWakeUpCalls);
2517 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2518 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2519 MAYBE_RESET_FIELD(cPokeCalls);
2520 MAYBE_RESET_FIELD(cPokeNotBusy);
2521 MAYBE_RESET_FIELD(cPollCalls);
2522 MAYBE_RESET_FIELD(cPollHalts);
2523 MAYBE_RESET_FIELD(cPollWakeUps);
2524# undef MAYBE_RESET_FIELD
2525 }
2526 }
2527 }
2528
2529 gvmmR0UsedUnlock(pGVMM);
2530
2531 return VINF_SUCCESS;
2532}
2533
2534
2535/**
2536 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2537 *
2538 * @returns see GVMMR0ResetStatistics.
2539 * @param pVM Pointer to the shared VM structure. Optional.
2540 * @param pReq The request packet.
2541 */
2542GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2543{
2544 /*
2545 * Validate input and pass it on.
2546 */
2547 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2548 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2549
2550 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2551}
2552
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette