VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 39913

Last change on this file since 39913 was 39402, checked in by vboxsync, 13 years ago

VMM: don't use generic IPE status codes, use specific ones. Part 1.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 87.1 KB
Line 
1/* $Id: GVMMR0.cpp 39402 2011-11-23 16:25:04Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#include <VBox/param.h>
61#include <VBox/err.h>
62#include <iprt/asm.h>
63#include <iprt/asm-amd64-x86.h>
64#include <iprt/mem.h>
65#include <iprt/semaphore.h>
66#include <iprt/time.h>
67#include <VBox/log.h>
68#include <iprt/thread.h>
69#include <iprt/process.h>
70#include <iprt/param.h>
71#include <iprt/string.h>
72#include <iprt/assert.h>
73#include <iprt/mem.h>
74#include <iprt/memobj.h>
75#include <iprt/mp.h>
76#include <iprt/cpuset.h>
77#include <iprt/spinlock.h>
78#include <iprt/timer.h>
79
80
81/*******************************************************************************
82* Defined Constants And Macros *
83*******************************************************************************/
84#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
85/** Define this to enable the periodic preemption timer. */
86# define GVMM_SCHED_WITH_PPT
87#endif
88
89
90/*******************************************************************************
91* Structures and Typedefs *
92*******************************************************************************/
93
94/**
95 * Global VM handle.
96 */
97typedef struct GVMHANDLE
98{
99 /** The index of the next handle in the list (free or used). (0 is nil.) */
100 uint16_t volatile iNext;
101 /** Our own index / handle value. */
102 uint16_t iSelf;
103 /** The process ID of the handle owner.
104 * This is used for access checks. */
105 RTPROCESS ProcId;
106 /** The pointer to the ring-0 only (aka global) VM structure. */
107 PGVM pGVM;
108 /** The ring-0 mapping of the shared VM instance data. */
109 PVM pVM;
110 /** The virtual machine object. */
111 void *pvObj;
112 /** The session this VM is associated with. */
113 PSUPDRVSESSION pSession;
114 /** The ring-0 handle of the EMT0 thread.
115 * This is used for ownership checks as well as looking up a VM handle by thread
116 * at times like assertions. */
117 RTNATIVETHREAD hEMT0;
118} GVMHANDLE;
119/** Pointer to a global VM handle. */
120typedef GVMHANDLE *PGVMHANDLE;
121
122/** Number of GVM handles (including the NIL handle). */
123#if HC_ARCH_BITS == 64
124# define GVMM_MAX_HANDLES 8192
125#else
126# define GVMM_MAX_HANDLES 128
127#endif
128
129/**
130 * Per host CPU GVMM data.
131 */
132typedef struct GVMMHOSTCPU
133{
134 /** Magic number (GVMMHOSTCPU_MAGIC). */
135 uint32_t volatile u32Magic;
136 /** The CPU ID. */
137 RTCPUID idCpu;
138 /** The CPU set index. */
139 uint32_t idxCpuSet;
140
141#ifdef GVMM_SCHED_WITH_PPT
142 /** Periodic preemption timer data. */
143 struct
144 {
145 /** The handle to the periodic preemption timer. */
146 PRTTIMER pTimer;
147 /** Spinlock protecting the data below. */
148 RTSPINLOCK hSpinlock;
149 /** The smalles Hz that we need to care about. (static) */
150 uint32_t uMinHz;
151 /** The number of ticks between each historization. */
152 uint32_t cTicksHistoriziationInterval;
153 /** The current historization tick (counting up to
154 * cTicksHistoriziationInterval and then resetting). */
155 uint32_t iTickHistorization;
156 /** The current timer interval. This is set to 0 when inactive. */
157 uint32_t cNsInterval;
158 /** The current timer frequency. This is set to 0 when inactive. */
159 uint32_t uTimerHz;
160 /** The current max frequency reported by the EMTs.
161 * This gets historicize and reset by the timer callback. This is
162 * read without holding the spinlock, so needs atomic updating. */
163 uint32_t volatile uDesiredHz;
164 /** Whether the timer was started or not. */
165 bool volatile fStarted;
166 /** Set if we're starting timer. */
167 bool volatile fStarting;
168 /** The index of the next history entry (mod it). */
169 uint32_t iHzHistory;
170 /** Historicized uDesiredHz values. The array wraps around, new entries
171 * are added at iHzHistory. This is updated approximately every
172 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
173 uint32_t aHzHistory[8];
174 /** Statistics counter for recording the number of interval changes. */
175 uint32_t cChanges;
176 /** Statistics counter for recording the number of timer starts. */
177 uint32_t cStarts;
178 } Ppt;
179#endif /* GVMM_SCHED_WITH_PPT */
180
181} GVMMHOSTCPU;
182/** Pointer to the per host CPU GVMM data. */
183typedef GVMMHOSTCPU *PGVMMHOSTCPU;
184/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
185#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
186/** The interval on history entry should cover (approximately) give in
187 * nanoseconds. */
188#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
189
190
191/**
192 * The GVMM instance data.
193 */
194typedef struct GVMM
195{
196 /** Eyecatcher / magic. */
197 uint32_t u32Magic;
198 /** The index of the head of the free handle chain. (0 is nil.) */
199 uint16_t volatile iFreeHead;
200 /** The index of the head of the active handle chain. (0 is nil.) */
201 uint16_t volatile iUsedHead;
202 /** The number of VMs. */
203 uint16_t volatile cVMs;
204 /** Alignment padding. */
205 uint16_t u16Reserved;
206 /** The number of EMTs. */
207 uint32_t volatile cEMTs;
208 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
209 uint32_t volatile cHaltedEMTs;
210 /** Alignment padding. */
211 uint32_t u32Alignment;
212 /** When the next halted or sleeping EMT will wake up.
213 * This is set to 0 when it needs recalculating and to UINT64_MAX when
214 * there are no halted or sleeping EMTs in the GVMM. */
215 uint64_t uNsNextEmtWakeup;
216 /** The lock used to serialize VM creation, destruction and associated events that
217 * isn't performance critical. Owners may acquire the list lock. */
218 RTSEMFASTMUTEX CreateDestroyLock;
219 /** The lock used to serialize used list updates and accesses.
220 * This indirectly includes scheduling since the scheduler will have to walk the
221 * used list to examin running VMs. Owners may not acquire any other locks. */
222 RTSEMFASTMUTEX UsedLock;
223 /** The handle array.
224 * The size of this array defines the maximum number of currently running VMs.
225 * The first entry is unused as it represents the NIL handle. */
226 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
227
228 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
229 * The number of EMTs that means we no longer consider ourselves alone on a
230 * CPU/Core.
231 */
232 uint32_t cEMTsMeansCompany;
233 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
234 * The minimum sleep time for when we're alone, in nano seconds.
235 */
236 uint32_t nsMinSleepAlone;
237 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
238 * The minimum sleep time for when we've got company, in nano seconds.
239 */
240 uint32_t nsMinSleepCompany;
241 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
242 * The limit for the first round of early wakeups, given in nano seconds.
243 */
244 uint32_t nsEarlyWakeUp1;
245 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
246 * The limit for the second round of early wakeups, given in nano seconds.
247 */
248 uint32_t nsEarlyWakeUp2;
249
250 /** The number of entries in the host CPU array (aHostCpus). */
251 uint32_t cHostCpus;
252 /** Per host CPU data (variable length). */
253 GVMMHOSTCPU aHostCpus[1];
254} GVMM;
255/** Pointer to the GVMM instance data. */
256typedef GVMM *PGVMM;
257
258/** The GVMM::u32Magic value (Charlie Haden). */
259#define GVMM_MAGIC UINT32_C(0x19370806)
260
261
262
263/*******************************************************************************
264* Global Variables *
265*******************************************************************************/
266/** Pointer to the GVMM instance data.
267 * (Just my general dislike for global variables.) */
268static PGVMM g_pGVMM = NULL;
269
270/** Macro for obtaining and validating the g_pGVMM pointer.
271 * On failure it will return from the invoking function with the specified return value.
272 *
273 * @param pGVMM The name of the pGVMM variable.
274 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
275 * status codes.
276 */
277#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
278 do { \
279 (pGVMM) = g_pGVMM;\
280 AssertPtrReturn((pGVMM), (rc)); \
281 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
282 } while (0)
283
284/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
285 * On failure it will return from the invoking function.
286 *
287 * @param pGVMM The name of the pGVMM variable.
288 */
289#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
290 do { \
291 (pGVMM) = g_pGVMM;\
292 AssertPtrReturnVoid((pGVMM)); \
293 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
294 } while (0)
295
296
297/*******************************************************************************
298* Internal Functions *
299*******************************************************************************/
300static void gvmmR0InitPerVMData(PGVM pGVM);
301static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
302static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
303static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
304#ifdef GVMM_SCHED_WITH_PPT
305static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
306#endif
307
308
309/**
310 * Initializes the GVMM.
311 *
312 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
313 *
314 * @returns VBox status code.
315 */
316GVMMR0DECL(int) GVMMR0Init(void)
317{
318 LogFlow(("GVMMR0Init:\n"));
319
320 /*
321 * Allocate and initialize the instance data.
322 */
323 uint32_t cHostCpus = RTMpGetArraySize();
324 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
325
326 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
327 if (!pGVMM)
328 return VERR_NO_MEMORY;
329 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
330 if (RT_SUCCESS(rc))
331 {
332 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
333 if (RT_SUCCESS(rc))
334 {
335 pGVMM->u32Magic = GVMM_MAGIC;
336 pGVMM->iUsedHead = 0;
337 pGVMM->iFreeHead = 1;
338
339 /* the nil handle */
340 pGVMM->aHandles[0].iSelf = 0;
341 pGVMM->aHandles[0].iNext = 0;
342
343 /* the tail */
344 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
345 pGVMM->aHandles[i].iSelf = i;
346 pGVMM->aHandles[i].iNext = 0; /* nil */
347
348 /* the rest */
349 while (i-- > 1)
350 {
351 pGVMM->aHandles[i].iSelf = i;
352 pGVMM->aHandles[i].iNext = i + 1;
353 }
354
355 /* The default configuration values. */
356 uint32_t cNsResolution = RTSemEventMultiGetResolution();
357 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
358 if (cNsResolution >= 5*RT_NS_100US)
359 {
360 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
361 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
362 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
363 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
364 }
365 else if (cNsResolution > RT_NS_100US)
366 {
367 pGVMM->nsMinSleepAlone = cNsResolution / 2;
368 pGVMM->nsMinSleepCompany = cNsResolution / 4;
369 pGVMM->nsEarlyWakeUp1 = 0;
370 pGVMM->nsEarlyWakeUp2 = 0;
371 }
372 else
373 {
374 pGVMM->nsMinSleepAlone = 2000;
375 pGVMM->nsMinSleepCompany = 2000;
376 pGVMM->nsEarlyWakeUp1 = 0;
377 pGVMM->nsEarlyWakeUp2 = 0;
378 }
379
380 /* The host CPU data. */
381 pGVMM->cHostCpus = cHostCpus;
382 uint32_t iCpu = cHostCpus;
383 RTCPUSET PossibleSet;
384 RTMpGetSet(&PossibleSet);
385 while (iCpu-- > 0)
386 {
387 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
388#ifdef GVMM_SCHED_WITH_PPT
389 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
390 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
391 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
392 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
393 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
394 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
395 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
396 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
397 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
398 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
399 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
400 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
401#endif
402
403 if (RTCpuSetIsMember(&PossibleSet, iCpu))
404 {
405 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
406 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
407
408#ifdef GVMM_SCHED_WITH_PPT
409 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
410 50*1000*1000 /* whatever */,
411 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
412 gvmmR0SchedPeriodicPreemptionTimerCallback,
413 &pGVMM->aHostCpus[iCpu]);
414 if (RT_SUCCESS(rc))
415 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
416 if (RT_FAILURE(rc))
417 {
418 while (iCpu < cHostCpus)
419 {
420 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
421 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
422 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
423 iCpu++;
424 }
425 break;
426 }
427#endif
428 }
429 else
430 {
431 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
432 pGVMM->aHostCpus[iCpu].u32Magic = 0;
433 }
434 }
435 if (RT_SUCCESS(rc))
436 {
437 g_pGVMM = pGVMM;
438 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
439 return VINF_SUCCESS;
440 }
441
442 /* bail out. */
443 RTSemFastMutexDestroy(pGVMM->UsedLock);
444 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
445 }
446 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
447 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
448 }
449
450 RTMemFree(pGVMM);
451 return rc;
452}
453
454
455/**
456 * Terminates the GVM.
457 *
458 * This is called while owning the loader semaphore (see supdrvLdrFree()).
459 * And unless something is wrong, there should be absolutely no VMs
460 * registered at this point.
461 */
462GVMMR0DECL(void) GVMMR0Term(void)
463{
464 LogFlow(("GVMMR0Term:\n"));
465
466 PGVMM pGVMM = g_pGVMM;
467 g_pGVMM = NULL;
468 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
469 {
470 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
471 return;
472 }
473
474 /*
475 * First of all, stop all active timers.
476 */
477 uint32_t cActiveTimers = 0;
478 uint32_t iCpu = pGVMM->cHostCpus;
479 while (iCpu-- > 0)
480 {
481 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
482#ifdef GVMM_SCHED_WITH_PPT
483 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
484 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
485 cActiveTimers++;
486#endif
487 }
488 if (cActiveTimers)
489 RTThreadSleep(1); /* fudge */
490
491 /*
492 * Invalidate the and free resources.
493 */
494 pGVMM->u32Magic = ~GVMM_MAGIC;
495 RTSemFastMutexDestroy(pGVMM->UsedLock);
496 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
497 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
498 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
499
500 pGVMM->iFreeHead = 0;
501 if (pGVMM->iUsedHead)
502 {
503 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
504 pGVMM->iUsedHead = 0;
505 }
506
507#ifdef GVMM_SCHED_WITH_PPT
508 iCpu = pGVMM->cHostCpus;
509 while (iCpu-- > 0)
510 {
511 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
512 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
513 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
514 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
515 }
516#endif
517
518 RTMemFree(pGVMM);
519}
520
521
522/**
523 * A quick hack for setting global config values.
524 *
525 * @returns VBox status code.
526 *
527 * @param pSession The session handle. Used for authentication.
528 * @param pszName The variable name.
529 * @param u64Value The new value.
530 */
531GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
532{
533 /*
534 * Validate input.
535 */
536 PGVMM pGVMM;
537 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
538 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
539 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
540
541 /*
542 * String switch time!
543 */
544 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
545 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
546 int rc = VINF_SUCCESS;
547 pszName += sizeof("/GVMM/") - 1;
548 if (!strcmp(pszName, "cEMTsMeansCompany"))
549 {
550 if (u64Value <= UINT32_MAX)
551 pGVMM->cEMTsMeansCompany = u64Value;
552 else
553 rc = VERR_OUT_OF_RANGE;
554 }
555 else if (!strcmp(pszName, "MinSleepAlone"))
556 {
557 if (u64Value <= RT_NS_100MS)
558 pGVMM->nsMinSleepAlone = u64Value;
559 else
560 rc = VERR_OUT_OF_RANGE;
561 }
562 else if (!strcmp(pszName, "MinSleepCompany"))
563 {
564 if (u64Value <= RT_NS_100MS)
565 pGVMM->nsMinSleepCompany = u64Value;
566 else
567 rc = VERR_OUT_OF_RANGE;
568 }
569 else if (!strcmp(pszName, "EarlyWakeUp1"))
570 {
571 if (u64Value <= RT_NS_100MS)
572 pGVMM->nsEarlyWakeUp1 = u64Value;
573 else
574 rc = VERR_OUT_OF_RANGE;
575 }
576 else if (!strcmp(pszName, "EarlyWakeUp2"))
577 {
578 if (u64Value <= RT_NS_100MS)
579 pGVMM->nsEarlyWakeUp2 = u64Value;
580 else
581 rc = VERR_OUT_OF_RANGE;
582 }
583 else
584 rc = VERR_CFGM_VALUE_NOT_FOUND;
585 return rc;
586}
587
588
589/**
590 * A quick hack for getting global config values.
591 *
592 * @returns VBox status code.
593 *
594 * @param pSession The session handle. Used for authentication.
595 * @param pszName The variable name.
596 * @param u64Value The new value.
597 */
598GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
599{
600 /*
601 * Validate input.
602 */
603 PGVMM pGVMM;
604 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
605 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
606 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
607 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
608
609 /*
610 * String switch time!
611 */
612 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
613 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
614 int rc = VINF_SUCCESS;
615 pszName += sizeof("/GVMM/") - 1;
616 if (!strcmp(pszName, "cEMTsMeansCompany"))
617 *pu64Value = pGVMM->cEMTsMeansCompany;
618 else if (!strcmp(pszName, "MinSleepAlone"))
619 *pu64Value = pGVMM->nsMinSleepAlone;
620 else if (!strcmp(pszName, "MinSleepCompany"))
621 *pu64Value = pGVMM->nsMinSleepCompany;
622 else if (!strcmp(pszName, "EarlyWakeUp1"))
623 *pu64Value = pGVMM->nsEarlyWakeUp1;
624 else if (!strcmp(pszName, "EarlyWakeUp2"))
625 *pu64Value = pGVMM->nsEarlyWakeUp2;
626 else
627 rc = VERR_CFGM_VALUE_NOT_FOUND;
628 return rc;
629}
630
631
632/**
633 * Try acquire the 'used' lock.
634 *
635 * @returns IPRT status code, see RTSemFastMutexRequest.
636 * @param pGVMM The GVMM instance data.
637 */
638DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
639{
640 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
641 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
642 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
643 return rc;
644}
645
646
647/**
648 * Release the 'used' lock.
649 *
650 * @returns IPRT status code, see RTSemFastMutexRelease.
651 * @param pGVMM The GVMM instance data.
652 */
653DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
654{
655 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
656 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
657 AssertRC(rc);
658 return rc;
659}
660
661
662/**
663 * Try acquire the 'create & destroy' lock.
664 *
665 * @returns IPRT status code, see RTSemFastMutexRequest.
666 * @param pGVMM The GVMM instance data.
667 */
668DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
669{
670 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
671 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
672 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
673 return rc;
674}
675
676
677/**
678 * Release the 'create & destroy' lock.
679 *
680 * @returns IPRT status code, see RTSemFastMutexRequest.
681 * @param pGVMM The GVMM instance data.
682 */
683DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
684{
685 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
686 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
687 AssertRC(rc);
688 return rc;
689}
690
691
692/**
693 * Request wrapper for the GVMMR0CreateVM API.
694 *
695 * @returns VBox status code.
696 * @param pReq The request buffer.
697 */
698GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
699{
700 /*
701 * Validate the request.
702 */
703 if (!VALID_PTR(pReq))
704 return VERR_INVALID_POINTER;
705 if (pReq->Hdr.cbReq != sizeof(*pReq))
706 return VERR_INVALID_PARAMETER;
707 if (!VALID_PTR(pReq->pSession))
708 return VERR_INVALID_POINTER;
709
710 /*
711 * Execute it.
712 */
713 PVM pVM;
714 pReq->pVMR0 = NULL;
715 pReq->pVMR3 = NIL_RTR3PTR;
716 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
717 if (RT_SUCCESS(rc))
718 {
719 pReq->pVMR0 = pVM;
720 pReq->pVMR3 = pVM->pVMR3;
721 }
722 return rc;
723}
724
725
726/**
727 * Allocates the VM structure and registers it with GVM.
728 *
729 * The caller will become the VM owner and there by the EMT.
730 *
731 * @returns VBox status code.
732 * @param pSession The support driver session.
733 * @param cCpus Number of virtual CPUs for the new VM.
734 * @param ppVM Where to store the pointer to the VM structure.
735 *
736 * @thread EMT.
737 */
738GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
739{
740 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
741 PGVMM pGVMM;
742 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
743
744 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
745 *ppVM = NULL;
746
747 if ( cCpus == 0
748 || cCpus > VMM_MAX_CPU_COUNT)
749 return VERR_INVALID_PARAMETER;
750
751 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
752 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
753 RTNATIVETHREAD ProcId = RTProcSelf();
754 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
755
756 /*
757 * The whole allocation process is protected by the lock.
758 */
759 int rc = gvmmR0CreateDestroyLock(pGVMM);
760 AssertRCReturn(rc, rc);
761
762 /*
763 * Allocate a handle first so we don't waste resources unnecessarily.
764 */
765 uint16_t iHandle = pGVMM->iFreeHead;
766 if (iHandle)
767 {
768 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
769
770 /* consistency checks, a bit paranoid as always. */
771 if ( !pHandle->pVM
772 && !pHandle->pGVM
773 && !pHandle->pvObj
774 && pHandle->iSelf == iHandle)
775 {
776 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
777 if (pHandle->pvObj)
778 {
779 /*
780 * Move the handle from the free to used list and perform permission checks.
781 */
782 rc = gvmmR0UsedLock(pGVMM);
783 AssertRC(rc);
784
785 pGVMM->iFreeHead = pHandle->iNext;
786 pHandle->iNext = pGVMM->iUsedHead;
787 pGVMM->iUsedHead = iHandle;
788 pGVMM->cVMs++;
789
790 pHandle->pVM = NULL;
791 pHandle->pGVM = NULL;
792 pHandle->pSession = pSession;
793 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
794 pHandle->ProcId = NIL_RTPROCESS;
795
796 gvmmR0UsedUnlock(pGVMM);
797
798 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
799 if (RT_SUCCESS(rc))
800 {
801 /*
802 * Allocate the global VM structure (GVM) and initialize it.
803 */
804 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
805 if (pGVM)
806 {
807 pGVM->u32Magic = GVM_MAGIC;
808 pGVM->hSelf = iHandle;
809 pGVM->pVM = NULL;
810 pGVM->cCpus = cCpus;
811
812 gvmmR0InitPerVMData(pGVM);
813 GMMR0InitPerVMData(pGVM);
814
815 /*
816 * Allocate the shared VM structure and associated page array.
817 */
818 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
819 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
820#ifdef RT_OS_DARWIN /** @todo Figure out why this is broken. Is it only on snow leopard? */
821 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, (cPages + 1) << PAGE_SHIFT, false /* fExecutable */);
822#else
823 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
824#endif
825 if (RT_SUCCESS(rc))
826 {
827 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
828 memset(pVM, 0, cPages << PAGE_SHIFT);
829 pVM->enmVMState = VMSTATE_CREATING;
830 pVM->pVMR0 = pVM;
831 pVM->pSession = pSession;
832 pVM->hSelf = iHandle;
833 pVM->cbSelf = cbVM;
834 pVM->cCpus = cCpus;
835 pVM->uCpuExecutionCap = 100; /* default is no cap. */
836 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
837 AssertCompileMemberAlignment(VM, cpum, 64);
838 AssertCompileMemberAlignment(VM, tm, 64);
839 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
840
841 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
842 if (RT_SUCCESS(rc))
843 {
844 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
845 for (uint32_t iPage = 0; iPage < cPages; iPage++)
846 {
847 paPages[iPage].uReserved = 0;
848 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
849 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
850 }
851
852 /*
853 * Map them into ring-3.
854 */
855 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
856 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
857 if (RT_SUCCESS(rc))
858 {
859 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
860 AssertPtr((void *)pVM->pVMR3);
861
862 /* Initialize all the VM pointers. */
863 for (uint32_t i = 0; i < cCpus; i++)
864 {
865 pVM->aCpus[i].pVMR0 = pVM;
866 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
867 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
868 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
869 }
870
871 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0,
872 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
873 if (RT_SUCCESS(rc))
874 {
875 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
876 AssertPtr((void *)pVM->paVMPagesR3);
877
878 /* complete the handle - take the UsedLock sem just to be careful. */
879 rc = gvmmR0UsedLock(pGVMM);
880 AssertRC(rc);
881
882 pHandle->pVM = pVM;
883 pHandle->pGVM = pGVM;
884 pHandle->hEMT0 = hEMT0;
885 pHandle->ProcId = ProcId;
886 pGVM->pVM = pVM;
887 pGVM->aCpus[0].hEMT = hEMT0;
888 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
889 pGVMM->cEMTs += cCpus;
890
891 gvmmR0UsedUnlock(pGVMM);
892 gvmmR0CreateDestroyUnlock(pGVMM);
893
894 *ppVM = pVM;
895 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
896 return VINF_SUCCESS;
897 }
898
899 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
900 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
901 }
902 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
903 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
904 }
905 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
906 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
907 }
908 }
909 }
910 /* else: The user wasn't permitted to create this VM. */
911
912 /*
913 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
914 * object reference here. A little extra mess because of non-recursive lock.
915 */
916 void *pvObj = pHandle->pvObj;
917 pHandle->pvObj = NULL;
918 gvmmR0CreateDestroyUnlock(pGVMM);
919
920 SUPR0ObjRelease(pvObj, pSession);
921
922 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
923 return rc;
924 }
925
926 rc = VERR_NO_MEMORY;
927 }
928 else
929 rc = VERR_GVMM_IPE_1;
930 }
931 else
932 rc = VERR_GVM_TOO_MANY_VMS;
933
934 gvmmR0CreateDestroyUnlock(pGVMM);
935 return rc;
936}
937
938
939/**
940 * Initializes the per VM data belonging to GVMM.
941 *
942 * @param pGVM Pointer to the global VM structure.
943 */
944static void gvmmR0InitPerVMData(PGVM pGVM)
945{
946 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
947 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
948 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
949 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
950 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
951 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
952 pGVM->gvmm.s.fDoneVMMR0Init = false;
953 pGVM->gvmm.s.fDoneVMMR0Term = false;
954
955 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
956 {
957 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
958 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
959 }
960}
961
962
963/**
964 * Does the VM initialization.
965 *
966 * @returns VBox status code.
967 * @param pVM Pointer to the shared VM structure.
968 */
969GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
970{
971 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
972
973 /*
974 * Validate the VM structure, state and handle.
975 */
976 PGVM pGVM;
977 PGVMM pGVMM;
978 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
979 if (RT_SUCCESS(rc))
980 {
981 if ( !pGVM->gvmm.s.fDoneVMMR0Init
982 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
983 {
984 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
985 {
986 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
987 if (RT_FAILURE(rc))
988 {
989 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
990 break;
991 }
992 }
993 }
994 else
995 rc = VERR_WRONG_ORDER;
996 }
997
998 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
999 return rc;
1000}
1001
1002
1003/**
1004 * Indicates that we're done with the ring-0 initialization
1005 * of the VM.
1006 *
1007 * @param pVM Pointer to the shared VM structure.
1008 * @thread EMT(0)
1009 */
1010GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1011{
1012 /* Validate the VM structure, state and handle. */
1013 PGVM pGVM;
1014 PGVMM pGVMM;
1015 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1016 AssertRCReturnVoid(rc);
1017
1018 /* Set the indicator. */
1019 pGVM->gvmm.s.fDoneVMMR0Init = true;
1020}
1021
1022
1023/**
1024 * Indicates that we're doing the ring-0 termination of the VM.
1025 *
1026 * @returns true if termination hasn't been done already, false if it has.
1027 * @param pVM Pointer to the shared VM structure.
1028 * @param pGVM Pointer to the global VM structure. Optional.
1029 * @thread EMT(0)
1030 */
1031GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1032{
1033 /* Validate the VM structure, state and handle. */
1034 AssertPtrNullReturn(pGVM, false);
1035 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1036 if (!pGVM)
1037 {
1038 PGVMM pGVMM;
1039 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1040 AssertRCReturn(rc, false);
1041 }
1042
1043 /* Set the indicator. */
1044 if (pGVM->gvmm.s.fDoneVMMR0Term)
1045 return false;
1046 pGVM->gvmm.s.fDoneVMMR0Term = true;
1047 return true;
1048}
1049
1050
1051/**
1052 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1053 *
1054 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1055 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1056 * would've been nice if the caller was actually the EMT thread or that we somehow
1057 * could've associated the calling thread with the VM up front.
1058 *
1059 * @returns VBox status code.
1060 * @param pVM Where to store the pointer to the VM structure.
1061 *
1062 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1063 */
1064GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1065{
1066 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1067 PGVMM pGVMM;
1068 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1069
1070
1071 /*
1072 * Validate the VM structure, state and caller.
1073 */
1074 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1075 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1076 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER);
1077
1078 uint32_t hGVM = pVM->hSelf;
1079 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1080 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1081
1082 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1083 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1084
1085 RTPROCESS ProcId = RTProcSelf();
1086 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1087 AssertReturn( ( pHandle->hEMT0 == hSelf
1088 && pHandle->ProcId == ProcId)
1089 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1090
1091 /*
1092 * Lookup the handle and destroy the object.
1093 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1094 * object, we take some precautions against racing callers just in case...
1095 */
1096 int rc = gvmmR0CreateDestroyLock(pGVMM);
1097 AssertRC(rc);
1098
1099 /* be careful here because we might theoretically be racing someone else cleaning up. */
1100 if ( pHandle->pVM == pVM
1101 && ( ( pHandle->hEMT0 == hSelf
1102 && pHandle->ProcId == ProcId)
1103 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1104 && VALID_PTR(pHandle->pvObj)
1105 && VALID_PTR(pHandle->pSession)
1106 && VALID_PTR(pHandle->pGVM)
1107 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1108 {
1109 void *pvObj = pHandle->pvObj;
1110 pHandle->pvObj = NULL;
1111 gvmmR0CreateDestroyUnlock(pGVMM);
1112
1113 SUPR0ObjRelease(pvObj, pHandle->pSession);
1114 }
1115 else
1116 {
1117 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1118 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1119 gvmmR0CreateDestroyUnlock(pGVMM);
1120 rc = VERR_GVMM_IPE_2;
1121 }
1122
1123 return rc;
1124}
1125
1126
1127/**
1128 * Performs VM cleanup task as part of object destruction.
1129 *
1130 * @param pGVM The GVM pointer.
1131 */
1132static void gvmmR0CleanupVM(PGVM pGVM)
1133{
1134 if ( pGVM->gvmm.s.fDoneVMMR0Init
1135 && !pGVM->gvmm.s.fDoneVMMR0Term)
1136 {
1137 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1138 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1139 {
1140 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1141 VMMR0TermVM(pGVM->pVM, pGVM);
1142 }
1143 else
1144 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1145 }
1146
1147 GMMR0CleanupVM(pGVM);
1148}
1149
1150
1151/**
1152 * Handle destructor.
1153 *
1154 * @param pvGVMM The GVM instance pointer.
1155 * @param pvHandle The handle pointer.
1156 */
1157static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1158{
1159 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1160
1161 /*
1162 * Some quick, paranoid, input validation.
1163 */
1164 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1165 AssertPtr(pHandle);
1166 PGVMM pGVMM = (PGVMM)pvGVMM;
1167 Assert(pGVMM == g_pGVMM);
1168 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1169 if ( !iHandle
1170 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1171 || iHandle != pHandle->iSelf)
1172 {
1173 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1174 return;
1175 }
1176
1177 int rc = gvmmR0CreateDestroyLock(pGVMM);
1178 AssertRC(rc);
1179 rc = gvmmR0UsedLock(pGVMM);
1180 AssertRC(rc);
1181
1182 /*
1183 * This is a tad slow but a doubly linked list is too much hassle.
1184 */
1185 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1186 {
1187 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1188 gvmmR0UsedUnlock(pGVMM);
1189 gvmmR0CreateDestroyUnlock(pGVMM);
1190 return;
1191 }
1192
1193 if (pGVMM->iUsedHead == iHandle)
1194 pGVMM->iUsedHead = pHandle->iNext;
1195 else
1196 {
1197 uint16_t iPrev = pGVMM->iUsedHead;
1198 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1199 while (iPrev)
1200 {
1201 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1202 {
1203 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1204 gvmmR0UsedUnlock(pGVMM);
1205 gvmmR0CreateDestroyUnlock(pGVMM);
1206 return;
1207 }
1208 if (RT_UNLIKELY(c-- <= 0))
1209 {
1210 iPrev = 0;
1211 break;
1212 }
1213
1214 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1215 break;
1216 iPrev = pGVMM->aHandles[iPrev].iNext;
1217 }
1218 if (!iPrev)
1219 {
1220 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1221 gvmmR0UsedUnlock(pGVMM);
1222 gvmmR0CreateDestroyUnlock(pGVMM);
1223 return;
1224 }
1225
1226 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1227 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1228 }
1229 pHandle->iNext = 0;
1230 pGVMM->cVMs--;
1231
1232 /*
1233 * Do the global cleanup round.
1234 */
1235 PGVM pGVM = pHandle->pGVM;
1236 if ( VALID_PTR(pGVM)
1237 && pGVM->u32Magic == GVM_MAGIC)
1238 {
1239 pGVMM->cEMTs -= pGVM->cCpus;
1240 gvmmR0UsedUnlock(pGVMM);
1241
1242 gvmmR0CleanupVM(pGVM);
1243
1244 /*
1245 * Do the GVMM cleanup - must be done last.
1246 */
1247 /* The VM and VM pages mappings/allocations. */
1248 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1249 {
1250 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1251 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1252 }
1253
1254 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1255 {
1256 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1257 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1258 }
1259
1260 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1261 {
1262 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1263 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1264 }
1265
1266 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1267 {
1268 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1269 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1270 }
1271
1272 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1273 {
1274 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1275 {
1276 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1277 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1278 }
1279 }
1280
1281 /* the GVM structure itself. */
1282 pGVM->u32Magic |= UINT32_C(0x80000000);
1283 RTMemFree(pGVM);
1284
1285 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1286 rc = gvmmR0UsedLock(pGVMM);
1287 AssertRC(rc);
1288 }
1289 /* else: GVMMR0CreateVM cleanup. */
1290
1291 /*
1292 * Free the handle.
1293 */
1294 pHandle->iNext = pGVMM->iFreeHead;
1295 pGVMM->iFreeHead = iHandle;
1296 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1297 ASMAtomicWriteNullPtr(&pHandle->pVM);
1298 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1299 ASMAtomicWriteNullPtr(&pHandle->pSession);
1300 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1301 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1302
1303 gvmmR0UsedUnlock(pGVMM);
1304 gvmmR0CreateDestroyUnlock(pGVMM);
1305 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1306}
1307
1308
1309/**
1310 * Registers the calling thread as the EMT of a Virtual CPU.
1311 *
1312 * Note that VCPU 0 is automatically registered during VM creation.
1313 *
1314 * @returns VBox status code
1315 * @param pVM The shared VM structure (the ring-0 mapping).
1316 * @param idCpu VCPU id.
1317 */
1318GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1319{
1320 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1321
1322 /*
1323 * Validate the VM structure, state and handle.
1324 */
1325 PGVM pGVM;
1326 PGVMM pGVMM;
1327 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1328 if (RT_FAILURE(rc))
1329 return rc;
1330
1331 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1332 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1333 Assert(pGVM->cCpus == pVM->cCpus);
1334 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1335
1336 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1337
1338 return VINF_SUCCESS;
1339}
1340
1341
1342/**
1343 * Lookup a GVM structure by its handle.
1344 *
1345 * @returns The GVM pointer on success, NULL on failure.
1346 * @param hGVM The global VM handle. Asserts on bad handle.
1347 */
1348GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1349{
1350 PGVMM pGVMM;
1351 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1352
1353 /*
1354 * Validate.
1355 */
1356 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1357 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1358
1359 /*
1360 * Look it up.
1361 */
1362 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1363 AssertPtrReturn(pHandle->pVM, NULL);
1364 AssertPtrReturn(pHandle->pvObj, NULL);
1365 PGVM pGVM = pHandle->pGVM;
1366 AssertPtrReturn(pGVM, NULL);
1367 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1368
1369 return pHandle->pGVM;
1370}
1371
1372
1373/**
1374 * Lookup a GVM structure by the shared VM structure.
1375 *
1376 * The calling thread must be in the same process as the VM. All current lookups
1377 * are by threads inside the same process, so this will not be an issue.
1378 *
1379 * @returns VBox status code.
1380 * @param pVM The shared VM structure (the ring-0 mapping).
1381 * @param ppGVM Where to store the GVM pointer.
1382 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1383 * @param fTakeUsedLock Whether to take the used lock or not.
1384 * Be very careful if not taking the lock as it's possible that
1385 * the VM will disappear then.
1386 *
1387 * @remark This will not assert on an invalid pVM but try return silently.
1388 */
1389static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1390{
1391 RTPROCESS ProcId = RTProcSelf();
1392 PGVMM pGVMM;
1393 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1394
1395 /*
1396 * Validate.
1397 */
1398 if (RT_UNLIKELY( !VALID_PTR(pVM)
1399 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1400 return VERR_INVALID_POINTER;
1401 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1402 || pVM->enmVMState >= VMSTATE_TERMINATED))
1403 return VERR_INVALID_POINTER;
1404
1405 uint16_t hGVM = pVM->hSelf;
1406 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1407 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1408 return VERR_INVALID_HANDLE;
1409
1410 /*
1411 * Look it up.
1412 */
1413 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1414 PGVM pGVM;
1415 if (fTakeUsedLock)
1416 {
1417 int rc = gvmmR0UsedLock(pGVMM);
1418 AssertRCReturn(rc, rc);
1419
1420 pGVM = pHandle->pGVM;
1421 if (RT_UNLIKELY( pHandle->pVM != pVM
1422 || pHandle->ProcId != ProcId
1423 || !VALID_PTR(pHandle->pvObj)
1424 || !VALID_PTR(pGVM)
1425 || pGVM->pVM != pVM))
1426 {
1427 gvmmR0UsedUnlock(pGVMM);
1428 return VERR_INVALID_HANDLE;
1429 }
1430 }
1431 else
1432 {
1433 if (RT_UNLIKELY(pHandle->pVM != pVM))
1434 return VERR_INVALID_HANDLE;
1435 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1436 return VERR_INVALID_HANDLE;
1437 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1438 return VERR_INVALID_HANDLE;
1439
1440 pGVM = pHandle->pGVM;
1441 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1442 return VERR_INVALID_HANDLE;
1443 if (RT_UNLIKELY(pGVM->pVM != pVM))
1444 return VERR_INVALID_HANDLE;
1445 }
1446
1447 *ppGVM = pGVM;
1448 *ppGVMM = pGVMM;
1449 return VINF_SUCCESS;
1450}
1451
1452
1453/**
1454 * Lookup a GVM structure by the shared VM structure.
1455 *
1456 * @returns VBox status code.
1457 * @param pVM The shared VM structure (the ring-0 mapping).
1458 * @param ppGVM Where to store the GVM pointer.
1459 *
1460 * @remark This will not take the 'used'-lock because it doesn't do
1461 * nesting and this function will be used from under the lock.
1462 */
1463GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1464{
1465 PGVMM pGVMM;
1466 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1467}
1468
1469
1470/**
1471 * Lookup a GVM structure by the shared VM structure and ensuring that the
1472 * caller is an EMT thread.
1473 *
1474 * @returns VBox status code.
1475 * @param pVM The shared VM structure (the ring-0 mapping).
1476 * @param idCpu The Virtual CPU ID of the calling EMT.
1477 * @param ppGVM Where to store the GVM pointer.
1478 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1479 * @thread EMT
1480 *
1481 * @remark This will assert in all failure paths.
1482 */
1483static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1484{
1485 PGVMM pGVMM;
1486 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1487
1488 /*
1489 * Validate.
1490 */
1491 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1492 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1493
1494 uint16_t hGVM = pVM->hSelf;
1495 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1496 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1497
1498 /*
1499 * Look it up.
1500 */
1501 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1502 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1503 RTPROCESS ProcId = RTProcSelf();
1504 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1505 AssertPtrReturn(pHandle->pvObj, VERR_NOT_OWNER);
1506
1507 PGVM pGVM = pHandle->pGVM;
1508 AssertPtrReturn(pGVM, VERR_NOT_OWNER);
1509 AssertReturn(pGVM->pVM == pVM, VERR_NOT_OWNER);
1510 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1511 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1512 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1513
1514 *ppGVM = pGVM;
1515 *ppGVMM = pGVMM;
1516 return VINF_SUCCESS;
1517}
1518
1519
1520/**
1521 * Lookup a GVM structure by the shared VM structure
1522 * and ensuring that the caller is the EMT thread.
1523 *
1524 * @returns VBox status code.
1525 * @param pVM The shared VM structure (the ring-0 mapping).
1526 * @param idCpu The Virtual CPU ID of the calling EMT.
1527 * @param ppGVM Where to store the GVM pointer.
1528 * @thread EMT
1529 */
1530GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1531{
1532 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1533 PGVMM pGVMM;
1534 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1535}
1536
1537
1538/**
1539 * Lookup a VM by its global handle.
1540 *
1541 * @returns The VM handle on success, NULL on failure.
1542 * @param hGVM The global VM handle. Asserts on bad handle.
1543 */
1544GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1545{
1546 PGVM pGVM = GVMMR0ByHandle(hGVM);
1547 return pGVM ? pGVM->pVM : NULL;
1548}
1549
1550
1551/**
1552 * Looks up the VM belonging to the specified EMT thread.
1553 *
1554 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1555 * unnecessary kernel panics when the EMT thread hits an assertion. The
1556 * call may or not be an EMT thread.
1557 *
1558 * @returns The VM handle on success, NULL on failure.
1559 * @param hEMT The native thread handle of the EMT.
1560 * NIL_RTNATIVETHREAD means the current thread
1561 */
1562GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1563{
1564 /*
1565 * No Assertions here as we're usually called in a AssertMsgN or
1566 * RTAssert* context.
1567 */
1568 PGVMM pGVMM = g_pGVMM;
1569 if ( !VALID_PTR(pGVMM)
1570 || pGVMM->u32Magic != GVMM_MAGIC)
1571 return NULL;
1572
1573 if (hEMT == NIL_RTNATIVETHREAD)
1574 hEMT = RTThreadNativeSelf();
1575 RTPROCESS ProcId = RTProcSelf();
1576
1577 /*
1578 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1579 */
1580 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1581 {
1582 if ( pGVMM->aHandles[i].iSelf == i
1583 && pGVMM->aHandles[i].ProcId == ProcId
1584 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1585 && VALID_PTR(pGVMM->aHandles[i].pVM)
1586 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1587 {
1588 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1589 return pGVMM->aHandles[i].pVM;
1590
1591 /* This is fearly safe with the current process per VM approach. */
1592 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1593 VMCPUID const cCpus = pGVM->cCpus;
1594 if ( cCpus < 1
1595 || cCpus > VMM_MAX_CPU_COUNT)
1596 continue;
1597 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1598 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1599 return pGVMM->aHandles[i].pVM;
1600 }
1601 }
1602 return NULL;
1603}
1604
1605
1606/**
1607 * This is will wake up expired and soon-to-be expired VMs.
1608 *
1609 * @returns Number of VMs that has been woken up.
1610 * @param pGVMM Pointer to the GVMM instance data.
1611 * @param u64Now The current time.
1612 */
1613static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1614{
1615 /*
1616 * Skip this if we've got disabled because of high resolution wakeups or by
1617 * the user.
1618 */
1619 if ( !pGVMM->nsEarlyWakeUp1
1620 && !pGVMM->nsEarlyWakeUp2)
1621 return 0;
1622
1623/** @todo Rewrite this algorithm. See performance defect XYZ. */
1624
1625 /*
1626 * A cheap optimization to stop wasting so much time here on big setups.
1627 */
1628 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1629 if ( pGVMM->cHaltedEMTs == 0
1630 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1631 return 0;
1632
1633 /*
1634 * The first pass will wake up VMs which have actually expired
1635 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1636 */
1637 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1638 uint64_t u64Min = UINT64_MAX;
1639 unsigned cWoken = 0;
1640 unsigned cHalted = 0;
1641 unsigned cTodo2nd = 0;
1642 unsigned cTodo3rd = 0;
1643 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1644 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1645 i = pGVMM->aHandles[i].iNext)
1646 {
1647 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1648 if ( VALID_PTR(pCurGVM)
1649 && pCurGVM->u32Magic == GVM_MAGIC)
1650 {
1651 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1652 {
1653 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1654 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1655 if (u64)
1656 {
1657 if (u64 <= u64Now)
1658 {
1659 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1660 {
1661 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1662 AssertRC(rc);
1663 cWoken++;
1664 }
1665 }
1666 else
1667 {
1668 cHalted++;
1669 if (u64 <= uNsEarlyWakeUp1)
1670 cTodo2nd++;
1671 else if (u64 <= uNsEarlyWakeUp2)
1672 cTodo3rd++;
1673 else if (u64 < u64Min)
1674 u64 = u64Min;
1675 }
1676 }
1677 }
1678 }
1679 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1680 }
1681
1682 if (cTodo2nd)
1683 {
1684 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1685 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1686 i = pGVMM->aHandles[i].iNext)
1687 {
1688 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1689 if ( VALID_PTR(pCurGVM)
1690 && pCurGVM->u32Magic == GVM_MAGIC)
1691 {
1692 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1693 {
1694 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1695 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1696 if ( u64
1697 && u64 <= uNsEarlyWakeUp1)
1698 {
1699 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1700 {
1701 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1702 AssertRC(rc);
1703 cWoken++;
1704 }
1705 }
1706 }
1707 }
1708 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1709 }
1710 }
1711
1712 if (cTodo3rd)
1713 {
1714 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1715 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1716 i = pGVMM->aHandles[i].iNext)
1717 {
1718 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1719 if ( VALID_PTR(pCurGVM)
1720 && pCurGVM->u32Magic == GVM_MAGIC)
1721 {
1722 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1723 {
1724 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1725 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1726 if ( u64
1727 && u64 <= uNsEarlyWakeUp2)
1728 {
1729 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1730 {
1731 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1732 AssertRC(rc);
1733 cWoken++;
1734 }
1735 }
1736 }
1737 }
1738 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1739 }
1740 }
1741
1742 /*
1743 * Set the minimum value.
1744 */
1745 pGVMM->uNsNextEmtWakeup = u64Min;
1746
1747 return cWoken;
1748}
1749
1750
1751/**
1752 * Halt the EMT thread.
1753 *
1754 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1755 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1756 * @param pVM Pointer to the shared VM structure.
1757 * @param idCpu The Virtual CPU ID of the calling EMT.
1758 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1759 * @thread EMT(idCpu).
1760 */
1761GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1762{
1763 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1764
1765 /*
1766 * Validate the VM structure, state and handle.
1767 */
1768 PGVM pGVM;
1769 PGVMM pGVMM;
1770 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1771 if (RT_FAILURE(rc))
1772 return rc;
1773 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1774
1775 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1776 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1777
1778 /*
1779 * Take the UsedList semaphore, get the current time
1780 * and check if anyone needs waking up.
1781 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1782 */
1783 rc = gvmmR0UsedLock(pGVMM);
1784 AssertRC(rc);
1785
1786 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1787
1788 /* GIP hack: We might are frequently sleeping for short intervals where the
1789 difference between GIP and system time matters on systems with high resolution
1790 system time. So, convert the input from GIP to System time in that case. */
1791 Assert(ASMGetFlags() & X86_EFL_IF);
1792 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1793 const uint64_t u64NowGip = RTTimeNanoTS();
1794 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1795
1796 /*
1797 * Go to sleep if we must...
1798 * Cap the sleep time to 1 second to be on the safe side.
1799 */
1800 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1801 if ( u64NowGip < u64ExpireGipTime
1802 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1803 ? pGVMM->nsMinSleepCompany
1804 : pGVMM->nsMinSleepAlone))
1805 {
1806 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1807 if (cNsInterval > RT_NS_1SEC)
1808 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1809 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1810 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1811 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1812 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1813 gvmmR0UsedUnlock(pGVMM);
1814
1815 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1816 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1817 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1818
1819 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1820 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1821
1822 /* Reset the semaphore to try prevent a few false wake-ups. */
1823 if (rc == VINF_SUCCESS)
1824 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1825 else if (rc == VERR_TIMEOUT)
1826 {
1827 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1828 rc = VINF_SUCCESS;
1829 }
1830 }
1831 else
1832 {
1833 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1834 gvmmR0UsedUnlock(pGVMM);
1835 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1836 }
1837
1838 return rc;
1839}
1840
1841
1842/**
1843 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1844 * the a sleeping EMT.
1845 *
1846 * @retval VINF_SUCCESS if successfully woken up.
1847 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1848 *
1849 * @param pGVM The global (ring-0) VM structure.
1850 * @param pGVCpu The global (ring-0) VCPU structure.
1851 */
1852DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1853{
1854 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1855
1856 /*
1857 * Signal the semaphore regardless of whether it's current blocked on it.
1858 *
1859 * The reason for this is that there is absolutely no way we can be 100%
1860 * certain that it isn't *about* go to go to sleep on it and just got
1861 * delayed a bit en route. So, we will always signal the semaphore when
1862 * the it is flagged as halted in the VMM.
1863 */
1864/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1865 int rc;
1866 if (pGVCpu->gvmm.s.u64HaltExpire)
1867 {
1868 rc = VINF_SUCCESS;
1869 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1870 }
1871 else
1872 {
1873 rc = VINF_GVM_NOT_BLOCKED;
1874 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1875 }
1876
1877 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1878 AssertRC(rc2);
1879
1880 return rc;
1881}
1882
1883
1884/**
1885 * Wakes up the halted EMT thread so it can service a pending request.
1886 *
1887 * @returns VBox status code.
1888 * @retval VINF_SUCCESS if successfully woken up.
1889 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1890 *
1891 * @param pVM Pointer to the shared VM structure.
1892 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1893 * @param fTakeUsedLock Take the used lock or not
1894 * @thread Any but EMT.
1895 */
1896GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1897{
1898 /*
1899 * Validate input and take the UsedLock.
1900 */
1901 PGVM pGVM;
1902 PGVMM pGVMM;
1903 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1904 if (RT_SUCCESS(rc))
1905 {
1906 if (idCpu < pGVM->cCpus)
1907 {
1908 /*
1909 * Do the actual job.
1910 */
1911 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1912
1913 if (fTakeUsedLock)
1914 {
1915 /*
1916 * While we're here, do a round of scheduling.
1917 */
1918 Assert(ASMGetFlags() & X86_EFL_IF);
1919 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1920 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1921 }
1922 }
1923 else
1924 rc = VERR_INVALID_CPU_ID;
1925
1926 if (fTakeUsedLock)
1927 {
1928 int rc2 = gvmmR0UsedUnlock(pGVMM);
1929 AssertRC(rc2);
1930 }
1931 }
1932
1933 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1934 return rc;
1935}
1936
1937
1938/**
1939 * Wakes up the halted EMT thread so it can service a pending request.
1940 *
1941 * @returns VBox status code.
1942 * @retval VINF_SUCCESS if successfully woken up.
1943 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1944 *
1945 * @param pVM Pointer to the shared VM structure.
1946 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1947 * @thread Any but EMT.
1948 */
1949GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1950{
1951 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1952}
1953
1954/**
1955 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1956 * the Virtual CPU if it's still busy executing guest code.
1957 *
1958 * @returns VBox status code.
1959 * @retval VINF_SUCCESS if poked successfully.
1960 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1961 *
1962 * @param pGVM The global (ring-0) VM structure.
1963 * @param pVCpu The Virtual CPU handle.
1964 */
1965DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1966{
1967 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1968
1969 RTCPUID idHostCpu = pVCpu->idHostCpu;
1970 if ( idHostCpu == NIL_RTCPUID
1971 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1972 {
1973 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1974 return VINF_GVM_NOT_BUSY_IN_GC;
1975 }
1976
1977 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1978 RTMpPokeCpu(idHostCpu);
1979 return VINF_SUCCESS;
1980}
1981
1982/**
1983 * Pokes an EMT if it's still busy running guest code.
1984 *
1985 * @returns VBox status code.
1986 * @retval VINF_SUCCESS if poked successfully.
1987 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1988 *
1989 * @param pVM Pointer to the shared VM structure.
1990 * @param idCpu The ID of the virtual CPU to poke.
1991 * @param fTakeUsedLock Take the used lock or not
1992 */
1993GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1994{
1995 /*
1996 * Validate input and take the UsedLock.
1997 */
1998 PGVM pGVM;
1999 PGVMM pGVMM;
2000 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
2001 if (RT_SUCCESS(rc))
2002 {
2003 if (idCpu < pGVM->cCpus)
2004 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2005 else
2006 rc = VERR_INVALID_CPU_ID;
2007
2008 if (fTakeUsedLock)
2009 {
2010 int rc2 = gvmmR0UsedUnlock(pGVMM);
2011 AssertRC(rc2);
2012 }
2013 }
2014
2015 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2016 return rc;
2017}
2018
2019
2020/**
2021 * Pokes an EMT if it's still busy running guest code.
2022 *
2023 * @returns VBox status code.
2024 * @retval VINF_SUCCESS if poked successfully.
2025 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2026 *
2027 * @param pVM Pointer to the shared VM structure.
2028 * @param idCpu The ID of the virtual CPU to poke.
2029 */
2030GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2031{
2032 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2033}
2034
2035
2036/**
2037 * Wakes up a set of halted EMT threads so they can service pending request.
2038 *
2039 * @returns VBox status code, no informational stuff.
2040 *
2041 * @param pVM Pointer to the shared VM structure.
2042 * @param pSleepSet The set of sleepers to wake up.
2043 * @param pPokeSet The set of CPUs to poke.
2044 */
2045GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2046{
2047 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2048 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2049 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2050
2051 /*
2052 * Validate input and take the UsedLock.
2053 */
2054 PGVM pGVM;
2055 PGVMM pGVMM;
2056 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2057 if (RT_SUCCESS(rc))
2058 {
2059 rc = VINF_SUCCESS;
2060 VMCPUID idCpu = pGVM->cCpus;
2061 while (idCpu-- > 0)
2062 {
2063 /* Don't try poke or wake up ourselves. */
2064 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2065 continue;
2066
2067 /* just ignore errors for now. */
2068 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2069 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2070 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2071 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2072 }
2073
2074 int rc2 = gvmmR0UsedUnlock(pGVMM);
2075 AssertRC(rc2);
2076 }
2077
2078 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2079 return rc;
2080}
2081
2082
2083/**
2084 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2085 *
2086 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2087 * @param pVM Pointer to the shared VM structure.
2088 * @param pReq The request packet.
2089 */
2090GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2091{
2092 /*
2093 * Validate input and pass it on.
2094 */
2095 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2096 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2097
2098 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2099}
2100
2101
2102
2103/**
2104 * Poll the schedule to see if someone else should get a chance to run.
2105 *
2106 * This is a bit hackish and will not work too well if the machine is
2107 * under heavy load from non-VM processes.
2108 *
2109 * @returns VINF_SUCCESS if not yielded.
2110 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2111 * @param pVM Pointer to the shared VM structure.
2112 * @param idCpu The Virtual CPU ID of the calling EMT.
2113 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2114 * @param fYield Whether to yield or not.
2115 * This is for when we're spinning in the halt loop.
2116 * @thread EMT(idCpu).
2117 */
2118GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2119{
2120 /*
2121 * Validate input.
2122 */
2123 PGVM pGVM;
2124 PGVMM pGVMM;
2125 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2126 if (RT_SUCCESS(rc))
2127 {
2128 rc = gvmmR0UsedLock(pGVMM);
2129 AssertRC(rc);
2130 pGVM->gvmm.s.StatsSched.cPollCalls++;
2131
2132 Assert(ASMGetFlags() & X86_EFL_IF);
2133 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2134
2135 if (!fYield)
2136 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2137 else
2138 {
2139 /** @todo implement this... */
2140 rc = VERR_NOT_IMPLEMENTED;
2141 }
2142
2143 gvmmR0UsedUnlock(pGVMM);
2144 }
2145
2146 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2147 return rc;
2148}
2149
2150
2151#ifdef GVMM_SCHED_WITH_PPT
2152/**
2153 * Timer callback for the periodic preemption timer.
2154 *
2155 * @param pTimer The timer handle.
2156 * @param pvUser Pointer to the per cpu structure.
2157 * @param iTick The current tick.
2158 */
2159static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2160{
2161 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2162 NOREF(pTimer); NOREF(iTick);
2163
2164 /*
2165 * Termination check
2166 */
2167 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2168 return;
2169
2170 /*
2171 * Do the house keeping.
2172 */
2173 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2174 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2175
2176 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2177 {
2178 /*
2179 * Historicize the max frequency.
2180 */
2181 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2182 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2183 pCpu->Ppt.iTickHistorization = 0;
2184 pCpu->Ppt.uDesiredHz = 0;
2185
2186 /*
2187 * Check if the current timer frequency.
2188 */
2189 uint32_t uHistMaxHz = 0;
2190 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2191 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2192 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2193 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2194 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2195 else if (uHistMaxHz)
2196 {
2197 /*
2198 * Reprogram it.
2199 */
2200 pCpu->Ppt.cChanges++;
2201 pCpu->Ppt.iTickHistorization = 0;
2202 pCpu->Ppt.uTimerHz = uHistMaxHz;
2203 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2204 pCpu->Ppt.cNsInterval = cNsInterval;
2205 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2206 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2207 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2208 / cNsInterval;
2209 else
2210 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2211 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2212
2213 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2214 RTTimerChangeInterval(pTimer, cNsInterval);
2215 }
2216 else
2217 {
2218 /*
2219 * Stop it.
2220 */
2221 pCpu->Ppt.fStarted = false;
2222 pCpu->Ppt.uTimerHz = 0;
2223 pCpu->Ppt.cNsInterval = 0;
2224 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2225
2226 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2227 RTTimerStop(pTimer);
2228 }
2229 }
2230 else
2231 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2232}
2233#endif /* GVMM_SCHED_WITH_PPT */
2234
2235
2236/**
2237 * Updates the periodic preemption timer for the calling CPU.
2238 *
2239 * The caller must have disabled preemption!
2240 * The caller must check that the host can do high resolution timers.
2241 *
2242 * @param pVM The VM handle.
2243 * @param idHostCpu The current host CPU id.
2244 * @param uHz The desired frequency.
2245 */
2246GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2247{
2248 NOREF(pVM);
2249#ifdef GVMM_SCHED_WITH_PPT
2250 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2251 Assert(RTTimerCanDoHighResolution());
2252
2253 /*
2254 * Resolve the per CPU data.
2255 */
2256 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2257 PGVMM pGVMM = g_pGVMM;
2258 if ( !VALID_PTR(pGVMM)
2259 || pGVMM->u32Magic != GVMM_MAGIC)
2260 return;
2261 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2262 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2263 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2264 && pCpu->idCpu == idHostCpu,
2265 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2266
2267 /*
2268 * Check whether we need to do anything about the timer.
2269 * We have to be a little bit careful since we might be race the timer
2270 * callback here.
2271 */
2272 if (uHz > 16384)
2273 uHz = 16384; /** @todo add a query method for this! */
2274 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2275 && uHz >= pCpu->Ppt.uMinHz
2276 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2277 {
2278 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2279 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2280
2281 pCpu->Ppt.uDesiredHz = uHz;
2282 uint32_t cNsInterval = 0;
2283 if (!pCpu->Ppt.fStarted)
2284 {
2285 pCpu->Ppt.cStarts++;
2286 pCpu->Ppt.fStarted = true;
2287 pCpu->Ppt.fStarting = true;
2288 pCpu->Ppt.iTickHistorization = 0;
2289 pCpu->Ppt.uTimerHz = uHz;
2290 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2291 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2292 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2293 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2294 / cNsInterval;
2295 else
2296 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2297 }
2298
2299 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2300
2301 if (cNsInterval)
2302 {
2303 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2304 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2305 AssertRC(rc);
2306
2307 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2308 if (RT_FAILURE(rc))
2309 pCpu->Ppt.fStarted = false;
2310 pCpu->Ppt.fStarting = false;
2311 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2312 }
2313 }
2314#else /* !GVMM_SCHED_WITH_PPT */
2315 NOREF(idHostCpu); NOREF(uHz);
2316#endif /* !GVMM_SCHED_WITH_PPT */
2317}
2318
2319
2320/**
2321 * Retrieves the GVMM statistics visible to the caller.
2322 *
2323 * @returns VBox status code.
2324 *
2325 * @param pStats Where to put the statistics.
2326 * @param pSession The current session.
2327 * @param pVM The VM to obtain statistics for. Optional.
2328 */
2329GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2330{
2331 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2332
2333 /*
2334 * Validate input.
2335 */
2336 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2337 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2338 pStats->cVMs = 0; /* (crash before taking the sem...) */
2339
2340 /*
2341 * Take the lock and get the VM statistics.
2342 */
2343 PGVMM pGVMM;
2344 if (pVM)
2345 {
2346 PGVM pGVM;
2347 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2348 if (RT_FAILURE(rc))
2349 return rc;
2350 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2351 }
2352 else
2353 {
2354 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2355 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2356
2357 int rc = gvmmR0UsedLock(pGVMM);
2358 AssertRCReturn(rc, rc);
2359 }
2360
2361 /*
2362 * Enumerate the VMs and add the ones visible to the statistics.
2363 */
2364 pStats->cVMs = 0;
2365 pStats->cEMTs = 0;
2366 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2367
2368 for (unsigned i = pGVMM->iUsedHead;
2369 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2370 i = pGVMM->aHandles[i].iNext)
2371 {
2372 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2373 void *pvObj = pGVMM->aHandles[i].pvObj;
2374 if ( VALID_PTR(pvObj)
2375 && VALID_PTR(pGVM)
2376 && pGVM->u32Magic == GVM_MAGIC
2377 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2378 {
2379 pStats->cVMs++;
2380 pStats->cEMTs += pGVM->cCpus;
2381
2382 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2383 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2384 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2385 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2386 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2387
2388 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2389 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2390 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2391
2392 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2393 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2394
2395 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2396 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2397 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2398 }
2399 }
2400
2401 /*
2402 * Copy out the per host CPU statistics.
2403 */
2404 uint32_t iDstCpu = 0;
2405 uint32_t cSrcCpus = pGVMM->cHostCpus;
2406 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2407 {
2408 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2409 {
2410 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2411 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2412#ifdef GVMM_SCHED_WITH_PPT
2413 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2414 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2415 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2416 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2417#else
2418 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2419 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2420 pStats->aHostCpus[iDstCpu].cChanges = 0;
2421 pStats->aHostCpus[iDstCpu].cStarts = 0;
2422#endif
2423 iDstCpu++;
2424 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2425 break;
2426 }
2427 }
2428 pStats->cHostCpus = iDstCpu;
2429
2430 gvmmR0UsedUnlock(pGVMM);
2431
2432 return VINF_SUCCESS;
2433}
2434
2435
2436/**
2437 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2438 *
2439 * @returns see GVMMR0QueryStatistics.
2440 * @param pVM Pointer to the shared VM structure. Optional.
2441 * @param pReq The request packet.
2442 */
2443GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2444{
2445 /*
2446 * Validate input and pass it on.
2447 */
2448 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2449 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2450
2451 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2452}
2453
2454
2455/**
2456 * Resets the specified GVMM statistics.
2457 *
2458 * @returns VBox status code.
2459 *
2460 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2461 * @param pSession The current session.
2462 * @param pVM The VM to reset statistics for. Optional.
2463 */
2464GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2465{
2466 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2467
2468 /*
2469 * Validate input.
2470 */
2471 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2472 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2473
2474 /*
2475 * Take the lock and get the VM statistics.
2476 */
2477 PGVMM pGVMM;
2478 if (pVM)
2479 {
2480 PGVM pGVM;
2481 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2482 if (RT_FAILURE(rc))
2483 return rc;
2484# define MAYBE_RESET_FIELD(field) \
2485 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2486 MAYBE_RESET_FIELD(cHaltCalls);
2487 MAYBE_RESET_FIELD(cHaltBlocking);
2488 MAYBE_RESET_FIELD(cHaltTimeouts);
2489 MAYBE_RESET_FIELD(cHaltNotBlocking);
2490 MAYBE_RESET_FIELD(cHaltWakeUps);
2491 MAYBE_RESET_FIELD(cWakeUpCalls);
2492 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2493 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2494 MAYBE_RESET_FIELD(cPokeCalls);
2495 MAYBE_RESET_FIELD(cPokeNotBusy);
2496 MAYBE_RESET_FIELD(cPollCalls);
2497 MAYBE_RESET_FIELD(cPollHalts);
2498 MAYBE_RESET_FIELD(cPollWakeUps);
2499# undef MAYBE_RESET_FIELD
2500 }
2501 else
2502 {
2503 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2504
2505 int rc = gvmmR0UsedLock(pGVMM);
2506 AssertRCReturn(rc, rc);
2507 }
2508
2509 /*
2510 * Enumerate the VMs and add the ones visible to the statistics.
2511 */
2512 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2513 {
2514 for (unsigned i = pGVMM->iUsedHead;
2515 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2516 i = pGVMM->aHandles[i].iNext)
2517 {
2518 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2519 void *pvObj = pGVMM->aHandles[i].pvObj;
2520 if ( VALID_PTR(pvObj)
2521 && VALID_PTR(pGVM)
2522 && pGVM->u32Magic == GVM_MAGIC
2523 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2524 {
2525# define MAYBE_RESET_FIELD(field) \
2526 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2527 MAYBE_RESET_FIELD(cHaltCalls);
2528 MAYBE_RESET_FIELD(cHaltBlocking);
2529 MAYBE_RESET_FIELD(cHaltTimeouts);
2530 MAYBE_RESET_FIELD(cHaltNotBlocking);
2531 MAYBE_RESET_FIELD(cHaltWakeUps);
2532 MAYBE_RESET_FIELD(cWakeUpCalls);
2533 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2534 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2535 MAYBE_RESET_FIELD(cPokeCalls);
2536 MAYBE_RESET_FIELD(cPokeNotBusy);
2537 MAYBE_RESET_FIELD(cPollCalls);
2538 MAYBE_RESET_FIELD(cPollHalts);
2539 MAYBE_RESET_FIELD(cPollWakeUps);
2540# undef MAYBE_RESET_FIELD
2541 }
2542 }
2543 }
2544
2545 gvmmR0UsedUnlock(pGVMM);
2546
2547 return VINF_SUCCESS;
2548}
2549
2550
2551/**
2552 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2553 *
2554 * @returns see GVMMR0ResetStatistics.
2555 * @param pVM Pointer to the shared VM structure. Optional.
2556 * @param pReq The request packet.
2557 */
2558GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2559{
2560 /*
2561 * Validate input and pass it on.
2562 */
2563 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2564 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2565
2566 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2567}
2568
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette