VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 39078

Last change on this file since 39078 was 39078, checked in by vboxsync, 13 years ago

VMM: -Wunused-parameter

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 87.1 KB
Line 
1/* $Id: GVMMR0.cpp 39078 2011-10-21 14:18:22Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmm.h>
59#include <VBox/param.h>
60#include <VBox/err.h>
61#include <iprt/asm.h>
62#include <iprt/asm-amd64-x86.h>
63#include <iprt/mem.h>
64#include <iprt/semaphore.h>
65#include <iprt/time.h>
66#include <VBox/log.h>
67#include <iprt/thread.h>
68#include <iprt/process.h>
69#include <iprt/param.h>
70#include <iprt/string.h>
71#include <iprt/assert.h>
72#include <iprt/mem.h>
73#include <iprt/memobj.h>
74#include <iprt/mp.h>
75#include <iprt/cpuset.h>
76#include <iprt/spinlock.h>
77#include <iprt/timer.h>
78
79
80/*******************************************************************************
81* Defined Constants And Macros *
82*******************************************************************************/
83#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
84/** Define this to enable the periodic preemption timer. */
85# define GVMM_SCHED_WITH_PPT
86#endif
87
88
89/*******************************************************************************
90* Structures and Typedefs *
91*******************************************************************************/
92
93/**
94 * Global VM handle.
95 */
96typedef struct GVMHANDLE
97{
98 /** The index of the next handle in the list (free or used). (0 is nil.) */
99 uint16_t volatile iNext;
100 /** Our own index / handle value. */
101 uint16_t iSelf;
102 /** The process ID of the handle owner.
103 * This is used for access checks. */
104 RTPROCESS ProcId;
105 /** The pointer to the ring-0 only (aka global) VM structure. */
106 PGVM pGVM;
107 /** The ring-0 mapping of the shared VM instance data. */
108 PVM pVM;
109 /** The virtual machine object. */
110 void *pvObj;
111 /** The session this VM is associated with. */
112 PSUPDRVSESSION pSession;
113 /** The ring-0 handle of the EMT0 thread.
114 * This is used for ownership checks as well as looking up a VM handle by thread
115 * at times like assertions. */
116 RTNATIVETHREAD hEMT0;
117} GVMHANDLE;
118/** Pointer to a global VM handle. */
119typedef GVMHANDLE *PGVMHANDLE;
120
121/** Number of GVM handles (including the NIL handle). */
122#if HC_ARCH_BITS == 64
123# define GVMM_MAX_HANDLES 8192
124#else
125# define GVMM_MAX_HANDLES 128
126#endif
127
128/**
129 * Per host CPU GVMM data.
130 */
131typedef struct GVMMHOSTCPU
132{
133 /** Magic number (GVMMHOSTCPU_MAGIC). */
134 uint32_t volatile u32Magic;
135 /** The CPU ID. */
136 RTCPUID idCpu;
137 /** The CPU set index. */
138 uint32_t idxCpuSet;
139
140#ifdef GVMM_SCHED_WITH_PPT
141 /** Periodic preemption timer data. */
142 struct
143 {
144 /** The handle to the periodic preemption timer. */
145 PRTTIMER pTimer;
146 /** Spinlock protecting the data below. */
147 RTSPINLOCK hSpinlock;
148 /** The smalles Hz that we need to care about. (static) */
149 uint32_t uMinHz;
150 /** The number of ticks between each historization. */
151 uint32_t cTicksHistoriziationInterval;
152 /** The current historization tick (counting up to
153 * cTicksHistoriziationInterval and then resetting). */
154 uint32_t iTickHistorization;
155 /** The current timer interval. This is set to 0 when inactive. */
156 uint32_t cNsInterval;
157 /** The current timer frequency. This is set to 0 when inactive. */
158 uint32_t uTimerHz;
159 /** The current max frequency reported by the EMTs.
160 * This gets historicize and reset by the timer callback. This is
161 * read without holding the spinlock, so needs atomic updating. */
162 uint32_t volatile uDesiredHz;
163 /** Whether the timer was started or not. */
164 bool volatile fStarted;
165 /** Set if we're starting timer. */
166 bool volatile fStarting;
167 /** The index of the next history entry (mod it). */
168 uint32_t iHzHistory;
169 /** Historicized uDesiredHz values. The array wraps around, new entries
170 * are added at iHzHistory. This is updated approximately every
171 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
172 uint32_t aHzHistory[8];
173 /** Statistics counter for recording the number of interval changes. */
174 uint32_t cChanges;
175 /** Statistics counter for recording the number of timer starts. */
176 uint32_t cStarts;
177 } Ppt;
178#endif /* GVMM_SCHED_WITH_PPT */
179
180} GVMMHOSTCPU;
181/** Pointer to the per host CPU GVMM data. */
182typedef GVMMHOSTCPU *PGVMMHOSTCPU;
183/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
184#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
185/** The interval on history entry should cover (approximately) give in
186 * nanoseconds. */
187#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
188
189
190/**
191 * The GVMM instance data.
192 */
193typedef struct GVMM
194{
195 /** Eyecatcher / magic. */
196 uint32_t u32Magic;
197 /** The index of the head of the free handle chain. (0 is nil.) */
198 uint16_t volatile iFreeHead;
199 /** The index of the head of the active handle chain. (0 is nil.) */
200 uint16_t volatile iUsedHead;
201 /** The number of VMs. */
202 uint16_t volatile cVMs;
203 /** Alignment padding. */
204 uint16_t u16Reserved;
205 /** The number of EMTs. */
206 uint32_t volatile cEMTs;
207 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
208 uint32_t volatile cHaltedEMTs;
209 /** Alignment padding. */
210 uint32_t u32Alignment;
211 /** When the next halted or sleeping EMT will wake up.
212 * This is set to 0 when it needs recalculating and to UINT64_MAX when
213 * there are no halted or sleeping EMTs in the GVMM. */
214 uint64_t uNsNextEmtWakeup;
215 /** The lock used to serialize VM creation, destruction and associated events that
216 * isn't performance critical. Owners may acquire the list lock. */
217 RTSEMFASTMUTEX CreateDestroyLock;
218 /** The lock used to serialize used list updates and accesses.
219 * This indirectly includes scheduling since the scheduler will have to walk the
220 * used list to examin running VMs. Owners may not acquire any other locks. */
221 RTSEMFASTMUTEX UsedLock;
222 /** The handle array.
223 * The size of this array defines the maximum number of currently running VMs.
224 * The first entry is unused as it represents the NIL handle. */
225 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
226
227 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
228 * The number of EMTs that means we no longer consider ourselves alone on a
229 * CPU/Core.
230 */
231 uint32_t cEMTsMeansCompany;
232 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
233 * The minimum sleep time for when we're alone, in nano seconds.
234 */
235 uint32_t nsMinSleepAlone;
236 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
237 * The minimum sleep time for when we've got company, in nano seconds.
238 */
239 uint32_t nsMinSleepCompany;
240 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
241 * The limit for the first round of early wakeups, given in nano seconds.
242 */
243 uint32_t nsEarlyWakeUp1;
244 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
245 * The limit for the second round of early wakeups, given in nano seconds.
246 */
247 uint32_t nsEarlyWakeUp2;
248
249 /** The number of entries in the host CPU array (aHostCpus). */
250 uint32_t cHostCpus;
251 /** Per host CPU data (variable length). */
252 GVMMHOSTCPU aHostCpus[1];
253} GVMM;
254/** Pointer to the GVMM instance data. */
255typedef GVMM *PGVMM;
256
257/** The GVMM::u32Magic value (Charlie Haden). */
258#define GVMM_MAGIC UINT32_C(0x19370806)
259
260
261
262/*******************************************************************************
263* Global Variables *
264*******************************************************************************/
265/** Pointer to the GVMM instance data.
266 * (Just my general dislike for global variables.) */
267static PGVMM g_pGVMM = NULL;
268
269/** Macro for obtaining and validating the g_pGVMM pointer.
270 * On failure it will return from the invoking function with the specified return value.
271 *
272 * @param pGVMM The name of the pGVMM variable.
273 * @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
274 * VBox status codes.
275 */
276#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
277 do { \
278 (pGVMM) = g_pGVMM;\
279 AssertPtrReturn((pGVMM), (rc)); \
280 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
281 } while (0)
282
283/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
284 * On failure it will return from the invoking function.
285 *
286 * @param pGVMM The name of the pGVMM variable.
287 */
288#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
289 do { \
290 (pGVMM) = g_pGVMM;\
291 AssertPtrReturnVoid((pGVMM)); \
292 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
293 } while (0)
294
295
296/*******************************************************************************
297* Internal Functions *
298*******************************************************************************/
299static void gvmmR0InitPerVMData(PGVM pGVM);
300static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
301static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
302static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
303#ifdef GVMM_SCHED_WITH_PPT
304static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
305#endif
306
307
308/**
309 * Initializes the GVMM.
310 *
311 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
312 *
313 * @returns VBox status code.
314 */
315GVMMR0DECL(int) GVMMR0Init(void)
316{
317 LogFlow(("GVMMR0Init:\n"));
318
319 /*
320 * Allocate and initialize the instance data.
321 */
322 uint32_t cHostCpus = RTMpGetArraySize();
323 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_INTERNAL_ERROR_2);
324
325 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
326 if (!pGVMM)
327 return VERR_NO_MEMORY;
328 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
329 if (RT_SUCCESS(rc))
330 {
331 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
332 if (RT_SUCCESS(rc))
333 {
334 pGVMM->u32Magic = GVMM_MAGIC;
335 pGVMM->iUsedHead = 0;
336 pGVMM->iFreeHead = 1;
337
338 /* the nil handle */
339 pGVMM->aHandles[0].iSelf = 0;
340 pGVMM->aHandles[0].iNext = 0;
341
342 /* the tail */
343 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
344 pGVMM->aHandles[i].iSelf = i;
345 pGVMM->aHandles[i].iNext = 0; /* nil */
346
347 /* the rest */
348 while (i-- > 1)
349 {
350 pGVMM->aHandles[i].iSelf = i;
351 pGVMM->aHandles[i].iNext = i + 1;
352 }
353
354 /* The default configuration values. */
355 uint32_t cNsResolution = RTSemEventMultiGetResolution();
356 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
357 if (cNsResolution >= 5*RT_NS_100US)
358 {
359 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
360 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
361 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
362 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
363 }
364 else if (cNsResolution > RT_NS_100US)
365 {
366 pGVMM->nsMinSleepAlone = cNsResolution / 2;
367 pGVMM->nsMinSleepCompany = cNsResolution / 4;
368 pGVMM->nsEarlyWakeUp1 = 0;
369 pGVMM->nsEarlyWakeUp2 = 0;
370 }
371 else
372 {
373 pGVMM->nsMinSleepAlone = 2000;
374 pGVMM->nsMinSleepCompany = 2000;
375 pGVMM->nsEarlyWakeUp1 = 0;
376 pGVMM->nsEarlyWakeUp2 = 0;
377 }
378
379 /* The host CPU data. */
380 pGVMM->cHostCpus = cHostCpus;
381 uint32_t iCpu = cHostCpus;
382 RTCPUSET PossibleSet;
383 RTMpGetSet(&PossibleSet);
384 while (iCpu-- > 0)
385 {
386 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
387#ifdef GVMM_SCHED_WITH_PPT
388 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
389 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
390 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
391 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
392 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
393 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
394 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
395 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
396 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
397 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
398 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
399 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
400#endif
401
402 if (RTCpuSetIsMember(&PossibleSet, iCpu))
403 {
404 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
405 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
406
407#ifdef GVMM_SCHED_WITH_PPT
408 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
409 50*1000*1000 /* whatever */,
410 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
411 gvmmR0SchedPeriodicPreemptionTimerCallback,
412 &pGVMM->aHostCpus[iCpu]);
413 if (RT_SUCCESS(rc))
414 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
415 if (RT_FAILURE(rc))
416 {
417 while (iCpu < cHostCpus)
418 {
419 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
420 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
421 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
422 iCpu++;
423 }
424 break;
425 }
426#endif
427 }
428 else
429 {
430 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
431 pGVMM->aHostCpus[iCpu].u32Magic = 0;
432 }
433 }
434 if (RT_SUCCESS(rc))
435 {
436 g_pGVMM = pGVMM;
437 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
438 return VINF_SUCCESS;
439 }
440
441 /* bail out. */
442 RTSemFastMutexDestroy(pGVMM->UsedLock);
443 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
444 }
445 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
446 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
447 }
448
449 RTMemFree(pGVMM);
450 return rc;
451}
452
453
454/**
455 * Terminates the GVM.
456 *
457 * This is called while owning the loader semaphore (see supdrvLdrFree()).
458 * And unless something is wrong, there should be absolutely no VMs
459 * registered at this point.
460 */
461GVMMR0DECL(void) GVMMR0Term(void)
462{
463 LogFlow(("GVMMR0Term:\n"));
464
465 PGVMM pGVMM = g_pGVMM;
466 g_pGVMM = NULL;
467 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
468 {
469 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
470 return;
471 }
472
473 /*
474 * First of all, stop all active timers.
475 */
476 uint32_t cActiveTimers = 0;
477 uint32_t iCpu = pGVMM->cHostCpus;
478 while (iCpu-- > 0)
479 {
480 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
481#ifdef GVMM_SCHED_WITH_PPT
482 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
483 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
484 cActiveTimers++;
485#endif
486 }
487 if (cActiveTimers)
488 RTThreadSleep(1); /* fudge */
489
490 /*
491 * Invalidate the and free resources.
492 */
493 pGVMM->u32Magic = ~GVMM_MAGIC;
494 RTSemFastMutexDestroy(pGVMM->UsedLock);
495 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
496 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
497 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
498
499 pGVMM->iFreeHead = 0;
500 if (pGVMM->iUsedHead)
501 {
502 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
503 pGVMM->iUsedHead = 0;
504 }
505
506#ifdef GVMM_SCHED_WITH_PPT
507 iCpu = pGVMM->cHostCpus;
508 while (iCpu-- > 0)
509 {
510 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
511 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
512 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
513 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
514 }
515#endif
516
517 RTMemFree(pGVMM);
518}
519
520
521/**
522 * A quick hack for setting global config values.
523 *
524 * @returns VBox status code.
525 *
526 * @param pSession The session handle. Used for authentication.
527 * @param pszName The variable name.
528 * @param u64Value The new value.
529 */
530GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
531{
532 /*
533 * Validate input.
534 */
535 PGVMM pGVMM;
536 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
537 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
538 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
539
540 /*
541 * String switch time!
542 */
543 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
544 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
545 int rc = VINF_SUCCESS;
546 pszName += sizeof("/GVMM/") - 1;
547 if (!strcmp(pszName, "cEMTsMeansCompany"))
548 {
549 if (u64Value <= UINT32_MAX)
550 pGVMM->cEMTsMeansCompany = u64Value;
551 else
552 rc = VERR_OUT_OF_RANGE;
553 }
554 else if (!strcmp(pszName, "MinSleepAlone"))
555 {
556 if (u64Value <= RT_NS_100MS)
557 pGVMM->nsMinSleepAlone = u64Value;
558 else
559 rc = VERR_OUT_OF_RANGE;
560 }
561 else if (!strcmp(pszName, "MinSleepCompany"))
562 {
563 if (u64Value <= RT_NS_100MS)
564 pGVMM->nsMinSleepCompany = u64Value;
565 else
566 rc = VERR_OUT_OF_RANGE;
567 }
568 else if (!strcmp(pszName, "EarlyWakeUp1"))
569 {
570 if (u64Value <= RT_NS_100MS)
571 pGVMM->nsEarlyWakeUp1 = u64Value;
572 else
573 rc = VERR_OUT_OF_RANGE;
574 }
575 else if (!strcmp(pszName, "EarlyWakeUp2"))
576 {
577 if (u64Value <= RT_NS_100MS)
578 pGVMM->nsEarlyWakeUp2 = u64Value;
579 else
580 rc = VERR_OUT_OF_RANGE;
581 }
582 else
583 rc = VERR_CFGM_VALUE_NOT_FOUND;
584 return rc;
585}
586
587
588/**
589 * A quick hack for getting global config values.
590 *
591 * @returns VBox status code.
592 *
593 * @param pSession The session handle. Used for authentication.
594 * @param pszName The variable name.
595 * @param u64Value The new value.
596 */
597GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
598{
599 /*
600 * Validate input.
601 */
602 PGVMM pGVMM;
603 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
604 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
605 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
606 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
607
608 /*
609 * String switch time!
610 */
611 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
612 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
613 int rc = VINF_SUCCESS;
614 pszName += sizeof("/GVMM/") - 1;
615 if (!strcmp(pszName, "cEMTsMeansCompany"))
616 *pu64Value = pGVMM->cEMTsMeansCompany;
617 else if (!strcmp(pszName, "MinSleepAlone"))
618 *pu64Value = pGVMM->nsMinSleepAlone;
619 else if (!strcmp(pszName, "MinSleepCompany"))
620 *pu64Value = pGVMM->nsMinSleepCompany;
621 else if (!strcmp(pszName, "EarlyWakeUp1"))
622 *pu64Value = pGVMM->nsEarlyWakeUp1;
623 else if (!strcmp(pszName, "EarlyWakeUp2"))
624 *pu64Value = pGVMM->nsEarlyWakeUp2;
625 else
626 rc = VERR_CFGM_VALUE_NOT_FOUND;
627 return rc;
628}
629
630
631/**
632 * Try acquire the 'used' lock.
633 *
634 * @returns IPRT status code, see RTSemFastMutexRequest.
635 * @param pGVMM The GVMM instance data.
636 */
637DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
638{
639 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
640 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
641 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
642 return rc;
643}
644
645
646/**
647 * Release the 'used' lock.
648 *
649 * @returns IPRT status code, see RTSemFastMutexRelease.
650 * @param pGVMM The GVMM instance data.
651 */
652DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
653{
654 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
655 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
656 AssertRC(rc);
657 return rc;
658}
659
660
661/**
662 * Try acquire the 'create & destroy' lock.
663 *
664 * @returns IPRT status code, see RTSemFastMutexRequest.
665 * @param pGVMM The GVMM instance data.
666 */
667DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
668{
669 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
670 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
671 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
672 return rc;
673}
674
675
676/**
677 * Release the 'create & destroy' lock.
678 *
679 * @returns IPRT status code, see RTSemFastMutexRequest.
680 * @param pGVMM The GVMM instance data.
681 */
682DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
683{
684 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
685 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
686 AssertRC(rc);
687 return rc;
688}
689
690
691/**
692 * Request wrapper for the GVMMR0CreateVM API.
693 *
694 * @returns VBox status code.
695 * @param pReq The request buffer.
696 */
697GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
698{
699 /*
700 * Validate the request.
701 */
702 if (!VALID_PTR(pReq))
703 return VERR_INVALID_POINTER;
704 if (pReq->Hdr.cbReq != sizeof(*pReq))
705 return VERR_INVALID_PARAMETER;
706 if (!VALID_PTR(pReq->pSession))
707 return VERR_INVALID_POINTER;
708
709 /*
710 * Execute it.
711 */
712 PVM pVM;
713 pReq->pVMR0 = NULL;
714 pReq->pVMR3 = NIL_RTR3PTR;
715 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
716 if (RT_SUCCESS(rc))
717 {
718 pReq->pVMR0 = pVM;
719 pReq->pVMR3 = pVM->pVMR3;
720 }
721 return rc;
722}
723
724
725/**
726 * Allocates the VM structure and registers it with GVM.
727 *
728 * The caller will become the VM owner and there by the EMT.
729 *
730 * @returns VBox status code.
731 * @param pSession The support driver session.
732 * @param cCpus Number of virtual CPUs for the new VM.
733 * @param ppVM Where to store the pointer to the VM structure.
734 *
735 * @thread EMT.
736 */
737GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
738{
739 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
740 PGVMM pGVMM;
741 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
742
743 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
744 *ppVM = NULL;
745
746 if ( cCpus == 0
747 || cCpus > VMM_MAX_CPU_COUNT)
748 return VERR_INVALID_PARAMETER;
749
750 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
751 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR);
752 RTNATIVETHREAD ProcId = RTProcSelf();
753 AssertReturn(ProcId != NIL_RTPROCESS, VERR_INTERNAL_ERROR);
754
755 /*
756 * The whole allocation process is protected by the lock.
757 */
758 int rc = gvmmR0CreateDestroyLock(pGVMM);
759 AssertRCReturn(rc, rc);
760
761 /*
762 * Allocate a handle first so we don't waste resources unnecessarily.
763 */
764 uint16_t iHandle = pGVMM->iFreeHead;
765 if (iHandle)
766 {
767 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
768
769 /* consistency checks, a bit paranoid as always. */
770 if ( !pHandle->pVM
771 && !pHandle->pGVM
772 && !pHandle->pvObj
773 && pHandle->iSelf == iHandle)
774 {
775 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
776 if (pHandle->pvObj)
777 {
778 /*
779 * Move the handle from the free to used list and perform permission checks.
780 */
781 rc = gvmmR0UsedLock(pGVMM);
782 AssertRC(rc);
783
784 pGVMM->iFreeHead = pHandle->iNext;
785 pHandle->iNext = pGVMM->iUsedHead;
786 pGVMM->iUsedHead = iHandle;
787 pGVMM->cVMs++;
788
789 pHandle->pVM = NULL;
790 pHandle->pGVM = NULL;
791 pHandle->pSession = pSession;
792 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
793 pHandle->ProcId = NIL_RTPROCESS;
794
795 gvmmR0UsedUnlock(pGVMM);
796
797 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
798 if (RT_SUCCESS(rc))
799 {
800 /*
801 * Allocate the global VM structure (GVM) and initialize it.
802 */
803 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
804 if (pGVM)
805 {
806 pGVM->u32Magic = GVM_MAGIC;
807 pGVM->hSelf = iHandle;
808 pGVM->pVM = NULL;
809 pGVM->cCpus = cCpus;
810
811 gvmmR0InitPerVMData(pGVM);
812 GMMR0InitPerVMData(pGVM);
813
814 /*
815 * Allocate the shared VM structure and associated page array.
816 */
817 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
818 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
819#ifdef RT_OS_DARWIN /** @todo Figure out why this is broken. Is it only on snow leopard? */
820 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, (cPages + 1) << PAGE_SHIFT, false /* fExecutable */);
821#else
822 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
823#endif
824 if (RT_SUCCESS(rc))
825 {
826 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
827 memset(pVM, 0, cPages << PAGE_SHIFT);
828 pVM->enmVMState = VMSTATE_CREATING;
829 pVM->pVMR0 = pVM;
830 pVM->pSession = pSession;
831 pVM->hSelf = iHandle;
832 pVM->cbSelf = cbVM;
833 pVM->cCpus = cCpus;
834 pVM->uCpuExecutionCap = 100; /* default is no cap. */
835 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
836 AssertCompileMemberAlignment(VM, cpum, 64);
837 AssertCompileMemberAlignment(VM, tm, 64);
838 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
839
840 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
841 if (RT_SUCCESS(rc))
842 {
843 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
844 for (uint32_t iPage = 0; iPage < cPages; iPage++)
845 {
846 paPages[iPage].uReserved = 0;
847 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
848 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
849 }
850
851 /*
852 * Map them into ring-3.
853 */
854 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
855 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
856 if (RT_SUCCESS(rc))
857 {
858 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
859 AssertPtr((void *)pVM->pVMR3);
860
861 /* Initialize all the VM pointers. */
862 for (uint32_t i = 0; i < cCpus; i++)
863 {
864 pVM->aCpus[i].pVMR0 = pVM;
865 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
866 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
867 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
868 }
869
870 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0,
871 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
872 if (RT_SUCCESS(rc))
873 {
874 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
875 AssertPtr((void *)pVM->paVMPagesR3);
876
877 /* complete the handle - take the UsedLock sem just to be careful. */
878 rc = gvmmR0UsedLock(pGVMM);
879 AssertRC(rc);
880
881 pHandle->pVM = pVM;
882 pHandle->pGVM = pGVM;
883 pHandle->hEMT0 = hEMT0;
884 pHandle->ProcId = ProcId;
885 pGVM->pVM = pVM;
886 pGVM->aCpus[0].hEMT = hEMT0;
887 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
888 pGVMM->cEMTs += cCpus;
889
890 gvmmR0UsedUnlock(pGVMM);
891 gvmmR0CreateDestroyUnlock(pGVMM);
892
893 *ppVM = pVM;
894 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
895 return VINF_SUCCESS;
896 }
897
898 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
899 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
900 }
901 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
902 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
903 }
904 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
905 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
906 }
907 }
908 }
909 /* else: The user wasn't permitted to create this VM. */
910
911 /*
912 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
913 * object reference here. A little extra mess because of non-recursive lock.
914 */
915 void *pvObj = pHandle->pvObj;
916 pHandle->pvObj = NULL;
917 gvmmR0CreateDestroyUnlock(pGVMM);
918
919 SUPR0ObjRelease(pvObj, pSession);
920
921 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
922 return rc;
923 }
924
925 rc = VERR_NO_MEMORY;
926 }
927 else
928 rc = VERR_INTERNAL_ERROR;
929 }
930 else
931 rc = VERR_GVM_TOO_MANY_VMS;
932
933 gvmmR0CreateDestroyUnlock(pGVMM);
934 return rc;
935}
936
937
938/**
939 * Initializes the per VM data belonging to GVMM.
940 *
941 * @param pGVM Pointer to the global VM structure.
942 */
943static void gvmmR0InitPerVMData(PGVM pGVM)
944{
945 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
946 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
947 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
948 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
949 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
950 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
951 pGVM->gvmm.s.fDoneVMMR0Init = false;
952 pGVM->gvmm.s.fDoneVMMR0Term = false;
953
954 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
955 {
956 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
957 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
958 }
959}
960
961
962/**
963 * Does the VM initialization.
964 *
965 * @returns VBox status code.
966 * @param pVM Pointer to the shared VM structure.
967 */
968GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
969{
970 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
971
972 /*
973 * Validate the VM structure, state and handle.
974 */
975 PGVM pGVM;
976 PGVMM pGVMM;
977 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
978 if (RT_SUCCESS(rc))
979 {
980 if ( !pGVM->gvmm.s.fDoneVMMR0Init
981 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
982 {
983 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
984 {
985 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
986 if (RT_FAILURE(rc))
987 {
988 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
989 break;
990 }
991 }
992 }
993 else
994 rc = VERR_WRONG_ORDER;
995 }
996
997 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
998 return rc;
999}
1000
1001
1002/**
1003 * Indicates that we're done with the ring-0 initialization
1004 * of the VM.
1005 *
1006 * @param pVM Pointer to the shared VM structure.
1007 * @thread EMT(0)
1008 */
1009GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1010{
1011 /* Validate the VM structure, state and handle. */
1012 PGVM pGVM;
1013 PGVMM pGVMM;
1014 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1015 AssertRCReturnVoid(rc);
1016
1017 /* Set the indicator. */
1018 pGVM->gvmm.s.fDoneVMMR0Init = true;
1019}
1020
1021
1022/**
1023 * Indicates that we're doing the ring-0 termination of the VM.
1024 *
1025 * @returns true if termination hasn't been done already, false if it has.
1026 * @param pVM Pointer to the shared VM structure.
1027 * @param pGVM Pointer to the global VM structure. Optional.
1028 * @thread EMT(0)
1029 */
1030GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1031{
1032 /* Validate the VM structure, state and handle. */
1033 AssertPtrNullReturn(pGVM, false);
1034 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1035 if (!pGVM)
1036 {
1037 PGVMM pGVMM;
1038 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1039 AssertRCReturn(rc, false);
1040 }
1041
1042 /* Set the indicator. */
1043 if (pGVM->gvmm.s.fDoneVMMR0Term)
1044 return false;
1045 pGVM->gvmm.s.fDoneVMMR0Term = true;
1046 return true;
1047}
1048
1049
1050/**
1051 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1052 *
1053 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1054 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1055 * would've been nice if the caller was actually the EMT thread or that we somehow
1056 * could've associated the calling thread with the VM up front.
1057 *
1058 * @returns VBox status code.
1059 * @param pVM Where to store the pointer to the VM structure.
1060 *
1061 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1062 */
1063GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1064{
1065 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1066 PGVMM pGVMM;
1067 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1068
1069
1070 /*
1071 * Validate the VM structure, state and caller.
1072 */
1073 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1074 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1075 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER);
1076
1077 uint32_t hGVM = pVM->hSelf;
1078 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1079 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1080
1081 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1082 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1083
1084 RTPROCESS ProcId = RTProcSelf();
1085 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1086 AssertReturn( ( pHandle->hEMT0 == hSelf
1087 && pHandle->ProcId == ProcId)
1088 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1089
1090 /*
1091 * Lookup the handle and destroy the object.
1092 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1093 * object, we take some precautions against racing callers just in case...
1094 */
1095 int rc = gvmmR0CreateDestroyLock(pGVMM);
1096 AssertRC(rc);
1097
1098 /* be careful here because we might theoretically be racing someone else cleaning up. */
1099 if ( pHandle->pVM == pVM
1100 && ( ( pHandle->hEMT0 == hSelf
1101 && pHandle->ProcId == ProcId)
1102 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1103 && VALID_PTR(pHandle->pvObj)
1104 && VALID_PTR(pHandle->pSession)
1105 && VALID_PTR(pHandle->pGVM)
1106 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1107 {
1108 void *pvObj = pHandle->pvObj;
1109 pHandle->pvObj = NULL;
1110 gvmmR0CreateDestroyUnlock(pGVMM);
1111
1112 SUPR0ObjRelease(pvObj, pHandle->pSession);
1113 }
1114 else
1115 {
1116 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1117 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1118 gvmmR0CreateDestroyUnlock(pGVMM);
1119 rc = VERR_INTERNAL_ERROR;
1120 }
1121
1122 return rc;
1123}
1124
1125
1126/**
1127 * Performs VM cleanup task as part of object destruction.
1128 *
1129 * @param pGVM The GVM pointer.
1130 */
1131static void gvmmR0CleanupVM(PGVM pGVM)
1132{
1133 if ( pGVM->gvmm.s.fDoneVMMR0Init
1134 && !pGVM->gvmm.s.fDoneVMMR0Term)
1135 {
1136 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1137 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1138 {
1139 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1140 VMMR0TermVM(pGVM->pVM, pGVM);
1141 }
1142 else
1143 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1144 }
1145
1146 GMMR0CleanupVM(pGVM);
1147}
1148
1149
1150/**
1151 * Handle destructor.
1152 *
1153 * @param pvGVMM The GVM instance pointer.
1154 * @param pvHandle The handle pointer.
1155 */
1156static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1157{
1158 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1159
1160 /*
1161 * Some quick, paranoid, input validation.
1162 */
1163 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1164 AssertPtr(pHandle);
1165 PGVMM pGVMM = (PGVMM)pvGVMM;
1166 Assert(pGVMM == g_pGVMM);
1167 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1168 if ( !iHandle
1169 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1170 || iHandle != pHandle->iSelf)
1171 {
1172 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1173 return;
1174 }
1175
1176 int rc = gvmmR0CreateDestroyLock(pGVMM);
1177 AssertRC(rc);
1178 rc = gvmmR0UsedLock(pGVMM);
1179 AssertRC(rc);
1180
1181 /*
1182 * This is a tad slow but a doubly linked list is too much hassle.
1183 */
1184 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1185 {
1186 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1187 gvmmR0UsedUnlock(pGVMM);
1188 gvmmR0CreateDestroyUnlock(pGVMM);
1189 return;
1190 }
1191
1192 if (pGVMM->iUsedHead == iHandle)
1193 pGVMM->iUsedHead = pHandle->iNext;
1194 else
1195 {
1196 uint16_t iPrev = pGVMM->iUsedHead;
1197 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1198 while (iPrev)
1199 {
1200 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1201 {
1202 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1203 gvmmR0UsedUnlock(pGVMM);
1204 gvmmR0CreateDestroyUnlock(pGVMM);
1205 return;
1206 }
1207 if (RT_UNLIKELY(c-- <= 0))
1208 {
1209 iPrev = 0;
1210 break;
1211 }
1212
1213 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1214 break;
1215 iPrev = pGVMM->aHandles[iPrev].iNext;
1216 }
1217 if (!iPrev)
1218 {
1219 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1220 gvmmR0UsedUnlock(pGVMM);
1221 gvmmR0CreateDestroyUnlock(pGVMM);
1222 return;
1223 }
1224
1225 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1226 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1227 }
1228 pHandle->iNext = 0;
1229 pGVMM->cVMs--;
1230
1231 /*
1232 * Do the global cleanup round.
1233 */
1234 PGVM pGVM = pHandle->pGVM;
1235 if ( VALID_PTR(pGVM)
1236 && pGVM->u32Magic == GVM_MAGIC)
1237 {
1238 pGVMM->cEMTs -= pGVM->cCpus;
1239 gvmmR0UsedUnlock(pGVMM);
1240
1241 gvmmR0CleanupVM(pGVM);
1242
1243 /*
1244 * Do the GVMM cleanup - must be done last.
1245 */
1246 /* The VM and VM pages mappings/allocations. */
1247 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1248 {
1249 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1250 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1251 }
1252
1253 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1254 {
1255 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1256 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1257 }
1258
1259 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1260 {
1261 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1262 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1263 }
1264
1265 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1266 {
1267 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1268 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1269 }
1270
1271 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1272 {
1273 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1274 {
1275 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1276 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1277 }
1278 }
1279
1280 /* the GVM structure itself. */
1281 pGVM->u32Magic |= UINT32_C(0x80000000);
1282 RTMemFree(pGVM);
1283
1284 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1285 rc = gvmmR0UsedLock(pGVMM);
1286 AssertRC(rc);
1287 }
1288 /* else: GVMMR0CreateVM cleanup. */
1289
1290 /*
1291 * Free the handle.
1292 */
1293 pHandle->iNext = pGVMM->iFreeHead;
1294 pGVMM->iFreeHead = iHandle;
1295 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1296 ASMAtomicWriteNullPtr(&pHandle->pVM);
1297 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1298 ASMAtomicWriteNullPtr(&pHandle->pSession);
1299 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1300 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1301
1302 gvmmR0UsedUnlock(pGVMM);
1303 gvmmR0CreateDestroyUnlock(pGVMM);
1304 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1305}
1306
1307
1308/**
1309 * Registers the calling thread as the EMT of a Virtual CPU.
1310 *
1311 * Note that VCPU 0 is automatically registered during VM creation.
1312 *
1313 * @returns VBox status code
1314 * @param pVM The shared VM structure (the ring-0 mapping).
1315 * @param idCpu VCPU id.
1316 */
1317GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1318{
1319 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1320
1321 /*
1322 * Validate the VM structure, state and handle.
1323 */
1324 PGVM pGVM;
1325 PGVMM pGVMM;
1326 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1327 if (RT_FAILURE(rc))
1328 return rc;
1329
1330 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1331 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1332 Assert(pGVM->cCpus == pVM->cCpus);
1333 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1334
1335 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1336
1337 return VINF_SUCCESS;
1338}
1339
1340
1341/**
1342 * Lookup a GVM structure by its handle.
1343 *
1344 * @returns The GVM pointer on success, NULL on failure.
1345 * @param hGVM The global VM handle. Asserts on bad handle.
1346 */
1347GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1348{
1349 PGVMM pGVMM;
1350 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1351
1352 /*
1353 * Validate.
1354 */
1355 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1356 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1357
1358 /*
1359 * Look it up.
1360 */
1361 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1362 AssertPtrReturn(pHandle->pVM, NULL);
1363 AssertPtrReturn(pHandle->pvObj, NULL);
1364 PGVM pGVM = pHandle->pGVM;
1365 AssertPtrReturn(pGVM, NULL);
1366 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1367
1368 return pHandle->pGVM;
1369}
1370
1371
1372/**
1373 * Lookup a GVM structure by the shared VM structure.
1374 *
1375 * The calling thread must be in the same process as the VM. All current lookups
1376 * are by threads inside the same process, so this will not be an issue.
1377 *
1378 * @returns VBox status code.
1379 * @param pVM The shared VM structure (the ring-0 mapping).
1380 * @param ppGVM Where to store the GVM pointer.
1381 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1382 * @param fTakeUsedLock Whether to take the used lock or not.
1383 * Be very careful if not taking the lock as it's possible that
1384 * the VM will disappear then.
1385 *
1386 * @remark This will not assert on an invalid pVM but try return silently.
1387 */
1388static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1389{
1390 RTPROCESS ProcId = RTProcSelf();
1391 PGVMM pGVMM;
1392 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1393
1394 /*
1395 * Validate.
1396 */
1397 if (RT_UNLIKELY( !VALID_PTR(pVM)
1398 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1399 return VERR_INVALID_POINTER;
1400 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1401 || pVM->enmVMState >= VMSTATE_TERMINATED))
1402 return VERR_INVALID_POINTER;
1403
1404 uint16_t hGVM = pVM->hSelf;
1405 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1406 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1407 return VERR_INVALID_HANDLE;
1408
1409 /*
1410 * Look it up.
1411 */
1412 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1413 PGVM pGVM;
1414 if (fTakeUsedLock)
1415 {
1416 int rc = gvmmR0UsedLock(pGVMM);
1417 AssertRCReturn(rc, rc);
1418
1419 pGVM = pHandle->pGVM;
1420 if (RT_UNLIKELY( pHandle->pVM != pVM
1421 || pHandle->ProcId != ProcId
1422 || !VALID_PTR(pHandle->pvObj)
1423 || !VALID_PTR(pGVM)
1424 || pGVM->pVM != pVM))
1425 {
1426 gvmmR0UsedUnlock(pGVMM);
1427 return VERR_INVALID_HANDLE;
1428 }
1429 }
1430 else
1431 {
1432 if (RT_UNLIKELY(pHandle->pVM != pVM))
1433 return VERR_INVALID_HANDLE;
1434 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1435 return VERR_INVALID_HANDLE;
1436 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1437 return VERR_INVALID_HANDLE;
1438
1439 pGVM = pHandle->pGVM;
1440 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1441 return VERR_INVALID_HANDLE;
1442 if (RT_UNLIKELY(pGVM->pVM != pVM))
1443 return VERR_INVALID_HANDLE;
1444 }
1445
1446 *ppGVM = pGVM;
1447 *ppGVMM = pGVMM;
1448 return VINF_SUCCESS;
1449}
1450
1451
1452/**
1453 * Lookup a GVM structure by the shared VM structure.
1454 *
1455 * @returns VBox status code.
1456 * @param pVM The shared VM structure (the ring-0 mapping).
1457 * @param ppGVM Where to store the GVM pointer.
1458 *
1459 * @remark This will not take the 'used'-lock because it doesn't do
1460 * nesting and this function will be used from under the lock.
1461 */
1462GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1463{
1464 PGVMM pGVMM;
1465 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1466}
1467
1468
1469/**
1470 * Lookup a GVM structure by the shared VM structure and ensuring that the
1471 * caller is an EMT thread.
1472 *
1473 * @returns VBox status code.
1474 * @param pVM The shared VM structure (the ring-0 mapping).
1475 * @param idCpu The Virtual CPU ID of the calling EMT.
1476 * @param ppGVM Where to store the GVM pointer.
1477 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1478 * @thread EMT
1479 *
1480 * @remark This will assert in all failure paths.
1481 */
1482static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1483{
1484 PGVMM pGVMM;
1485 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1486
1487 /*
1488 * Validate.
1489 */
1490 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1491 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1492
1493 uint16_t hGVM = pVM->hSelf;
1494 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1495 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1496
1497 /*
1498 * Look it up.
1499 */
1500 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1501 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1502 RTPROCESS ProcId = RTProcSelf();
1503 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1504 AssertPtrReturn(pHandle->pvObj, VERR_INTERNAL_ERROR);
1505
1506 PGVM pGVM = pHandle->pGVM;
1507 AssertPtrReturn(pGVM, VERR_INTERNAL_ERROR);
1508 AssertReturn(pGVM->pVM == pVM, VERR_INTERNAL_ERROR);
1509 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1510 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1511 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_INTERNAL_ERROR);
1512
1513 *ppGVM = pGVM;
1514 *ppGVMM = pGVMM;
1515 return VINF_SUCCESS;
1516}
1517
1518
1519/**
1520 * Lookup a GVM structure by the shared VM structure
1521 * and ensuring that the caller is the EMT thread.
1522 *
1523 * @returns VBox status code.
1524 * @param pVM The shared VM structure (the ring-0 mapping).
1525 * @param idCpu The Virtual CPU ID of the calling EMT.
1526 * @param ppGVM Where to store the GVM pointer.
1527 * @thread EMT
1528 */
1529GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1530{
1531 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1532 PGVMM pGVMM;
1533 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1534}
1535
1536
1537/**
1538 * Lookup a VM by its global handle.
1539 *
1540 * @returns The VM handle on success, NULL on failure.
1541 * @param hGVM The global VM handle. Asserts on bad handle.
1542 */
1543GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1544{
1545 PGVM pGVM = GVMMR0ByHandle(hGVM);
1546 return pGVM ? pGVM->pVM : NULL;
1547}
1548
1549
1550/**
1551 * Looks up the VM belonging to the specified EMT thread.
1552 *
1553 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1554 * unnecessary kernel panics when the EMT thread hits an assertion. The
1555 * call may or not be an EMT thread.
1556 *
1557 * @returns The VM handle on success, NULL on failure.
1558 * @param hEMT The native thread handle of the EMT.
1559 * NIL_RTNATIVETHREAD means the current thread
1560 */
1561GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1562{
1563 /*
1564 * No Assertions here as we're usually called in a AssertMsgN or
1565 * RTAssert* context.
1566 */
1567 PGVMM pGVMM = g_pGVMM;
1568 if ( !VALID_PTR(pGVMM)
1569 || pGVMM->u32Magic != GVMM_MAGIC)
1570 return NULL;
1571
1572 if (hEMT == NIL_RTNATIVETHREAD)
1573 hEMT = RTThreadNativeSelf();
1574 RTPROCESS ProcId = RTProcSelf();
1575
1576 /*
1577 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1578 */
1579 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1580 {
1581 if ( pGVMM->aHandles[i].iSelf == i
1582 && pGVMM->aHandles[i].ProcId == ProcId
1583 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1584 && VALID_PTR(pGVMM->aHandles[i].pVM)
1585 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1586 {
1587 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1588 return pGVMM->aHandles[i].pVM;
1589
1590 /* This is fearly safe with the current process per VM approach. */
1591 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1592 VMCPUID const cCpus = pGVM->cCpus;
1593 if ( cCpus < 1
1594 || cCpus > VMM_MAX_CPU_COUNT)
1595 continue;
1596 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1597 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1598 return pGVMM->aHandles[i].pVM;
1599 }
1600 }
1601 return NULL;
1602}
1603
1604
1605/**
1606 * This is will wake up expired and soon-to-be expired VMs.
1607 *
1608 * @returns Number of VMs that has been woken up.
1609 * @param pGVMM Pointer to the GVMM instance data.
1610 * @param u64Now The current time.
1611 */
1612static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1613{
1614 /*
1615 * Skip this if we've got disabled because of high resolution wakeups or by
1616 * the user.
1617 */
1618 if ( !pGVMM->nsEarlyWakeUp1
1619 && !pGVMM->nsEarlyWakeUp2)
1620 return 0;
1621
1622/** @todo Rewrite this algorithm. See performance defect XYZ. */
1623
1624 /*
1625 * A cheap optimization to stop wasting so much time here on big setups.
1626 */
1627 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1628 if ( pGVMM->cHaltedEMTs == 0
1629 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1630 return 0;
1631
1632 /*
1633 * The first pass will wake up VMs which have actually expired
1634 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1635 */
1636 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1637 uint64_t u64Min = UINT64_MAX;
1638 unsigned cWoken = 0;
1639 unsigned cHalted = 0;
1640 unsigned cTodo2nd = 0;
1641 unsigned cTodo3rd = 0;
1642 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1643 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1644 i = pGVMM->aHandles[i].iNext)
1645 {
1646 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1647 if ( VALID_PTR(pCurGVM)
1648 && pCurGVM->u32Magic == GVM_MAGIC)
1649 {
1650 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1651 {
1652 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1653 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1654 if (u64)
1655 {
1656 if (u64 <= u64Now)
1657 {
1658 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1659 {
1660 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1661 AssertRC(rc);
1662 cWoken++;
1663 }
1664 }
1665 else
1666 {
1667 cHalted++;
1668 if (u64 <= uNsEarlyWakeUp1)
1669 cTodo2nd++;
1670 else if (u64 <= uNsEarlyWakeUp2)
1671 cTodo3rd++;
1672 else if (u64 < u64Min)
1673 u64 = u64Min;
1674 }
1675 }
1676 }
1677 }
1678 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1679 }
1680
1681 if (cTodo2nd)
1682 {
1683 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1684 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1685 i = pGVMM->aHandles[i].iNext)
1686 {
1687 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1688 if ( VALID_PTR(pCurGVM)
1689 && pCurGVM->u32Magic == GVM_MAGIC)
1690 {
1691 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1692 {
1693 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1694 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1695 if ( u64
1696 && u64 <= uNsEarlyWakeUp1)
1697 {
1698 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1699 {
1700 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1701 AssertRC(rc);
1702 cWoken++;
1703 }
1704 }
1705 }
1706 }
1707 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1708 }
1709 }
1710
1711 if (cTodo3rd)
1712 {
1713 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1714 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1715 i = pGVMM->aHandles[i].iNext)
1716 {
1717 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1718 if ( VALID_PTR(pCurGVM)
1719 && pCurGVM->u32Magic == GVM_MAGIC)
1720 {
1721 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1722 {
1723 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1724 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1725 if ( u64
1726 && u64 <= uNsEarlyWakeUp2)
1727 {
1728 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1729 {
1730 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1731 AssertRC(rc);
1732 cWoken++;
1733 }
1734 }
1735 }
1736 }
1737 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1738 }
1739 }
1740
1741 /*
1742 * Set the minimum value.
1743 */
1744 pGVMM->uNsNextEmtWakeup = u64Min;
1745
1746 return cWoken;
1747}
1748
1749
1750/**
1751 * Halt the EMT thread.
1752 *
1753 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1754 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1755 * @param pVM Pointer to the shared VM structure.
1756 * @param idCpu The Virtual CPU ID of the calling EMT.
1757 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1758 * @thread EMT(idCpu).
1759 */
1760GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1761{
1762 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1763
1764 /*
1765 * Validate the VM structure, state and handle.
1766 */
1767 PGVM pGVM;
1768 PGVMM pGVMM;
1769 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1770 if (RT_FAILURE(rc))
1771 return rc;
1772 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1773
1774 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1775 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1776
1777 /*
1778 * Take the UsedList semaphore, get the current time
1779 * and check if anyone needs waking up.
1780 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1781 */
1782 rc = gvmmR0UsedLock(pGVMM);
1783 AssertRC(rc);
1784
1785 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1786
1787 /* GIP hack: We might are frequently sleeping for short intervals where the
1788 difference between GIP and system time matters on systems with high resolution
1789 system time. So, convert the input from GIP to System time in that case. */
1790 Assert(ASMGetFlags() & X86_EFL_IF);
1791 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1792 const uint64_t u64NowGip = RTTimeNanoTS();
1793 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1794
1795 /*
1796 * Go to sleep if we must...
1797 * Cap the sleep time to 1 second to be on the safe side.
1798 */
1799 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1800 if ( u64NowGip < u64ExpireGipTime
1801 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1802 ? pGVMM->nsMinSleepCompany
1803 : pGVMM->nsMinSleepAlone))
1804 {
1805 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1806 if (cNsInterval > RT_NS_1SEC)
1807 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1808 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1809 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1810 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1811 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1812 gvmmR0UsedUnlock(pGVMM);
1813
1814 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1815 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1816 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1817
1818 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1819 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1820
1821 /* Reset the semaphore to try prevent a few false wake-ups. */
1822 if (rc == VINF_SUCCESS)
1823 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1824 else if (rc == VERR_TIMEOUT)
1825 {
1826 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1827 rc = VINF_SUCCESS;
1828 }
1829 }
1830 else
1831 {
1832 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1833 gvmmR0UsedUnlock(pGVMM);
1834 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1835 }
1836
1837 return rc;
1838}
1839
1840
1841/**
1842 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1843 * the a sleeping EMT.
1844 *
1845 * @retval VINF_SUCCESS if successfully woken up.
1846 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1847 *
1848 * @param pGVM The global (ring-0) VM structure.
1849 * @param pGVCpu The global (ring-0) VCPU structure.
1850 */
1851DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1852{
1853 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1854
1855 /*
1856 * Signal the semaphore regardless of whether it's current blocked on it.
1857 *
1858 * The reason for this is that there is absolutely no way we can be 100%
1859 * certain that it isn't *about* go to go to sleep on it and just got
1860 * delayed a bit en route. So, we will always signal the semaphore when
1861 * the it is flagged as halted in the VMM.
1862 */
1863/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1864 int rc;
1865 if (pGVCpu->gvmm.s.u64HaltExpire)
1866 {
1867 rc = VINF_SUCCESS;
1868 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1869 }
1870 else
1871 {
1872 rc = VINF_GVM_NOT_BLOCKED;
1873 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1874 }
1875
1876 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1877 AssertRC(rc2);
1878
1879 return rc;
1880}
1881
1882
1883/**
1884 * Wakes up the halted EMT thread so it can service a pending request.
1885 *
1886 * @returns VBox status code.
1887 * @retval VINF_SUCCESS if successfully woken up.
1888 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1889 *
1890 * @param pVM Pointer to the shared VM structure.
1891 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1892 * @param fTakeUsedLock Take the used lock or not
1893 * @thread Any but EMT.
1894 */
1895GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1896{
1897 /*
1898 * Validate input and take the UsedLock.
1899 */
1900 PGVM pGVM;
1901 PGVMM pGVMM;
1902 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1903 if (RT_SUCCESS(rc))
1904 {
1905 if (idCpu < pGVM->cCpus)
1906 {
1907 /*
1908 * Do the actual job.
1909 */
1910 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1911
1912 if (fTakeUsedLock)
1913 {
1914 /*
1915 * While we're here, do a round of scheduling.
1916 */
1917 Assert(ASMGetFlags() & X86_EFL_IF);
1918 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1919 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1920 }
1921 }
1922 else
1923 rc = VERR_INVALID_CPU_ID;
1924
1925 if (fTakeUsedLock)
1926 {
1927 int rc2 = gvmmR0UsedUnlock(pGVMM);
1928 AssertRC(rc2);
1929 }
1930 }
1931
1932 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1933 return rc;
1934}
1935
1936
1937/**
1938 * Wakes up the halted EMT thread so it can service a pending request.
1939 *
1940 * @returns VBox status code.
1941 * @retval VINF_SUCCESS if successfully woken up.
1942 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1943 *
1944 * @param pVM Pointer to the shared VM structure.
1945 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1946 * @thread Any but EMT.
1947 */
1948GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1949{
1950 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1951}
1952
1953/**
1954 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1955 * the Virtual CPU if it's still busy executing guest code.
1956 *
1957 * @returns VBox status code.
1958 * @retval VINF_SUCCESS if poked successfully.
1959 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1960 *
1961 * @param pGVM The global (ring-0) VM structure.
1962 * @param pVCpu The Virtual CPU handle.
1963 */
1964DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1965{
1966 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1967
1968 RTCPUID idHostCpu = pVCpu->idHostCpu;
1969 if ( idHostCpu == NIL_RTCPUID
1970 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1971 {
1972 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1973 return VINF_GVM_NOT_BUSY_IN_GC;
1974 }
1975
1976 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1977 RTMpPokeCpu(idHostCpu);
1978 return VINF_SUCCESS;
1979}
1980
1981/**
1982 * Pokes an EMT if it's still busy running guest code.
1983 *
1984 * @returns VBox status code.
1985 * @retval VINF_SUCCESS if poked successfully.
1986 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1987 *
1988 * @param pVM Pointer to the shared VM structure.
1989 * @param idCpu The ID of the virtual CPU to poke.
1990 * @param fTakeUsedLock Take the used lock or not
1991 */
1992GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1993{
1994 /*
1995 * Validate input and take the UsedLock.
1996 */
1997 PGVM pGVM;
1998 PGVMM pGVMM;
1999 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
2000 if (RT_SUCCESS(rc))
2001 {
2002 if (idCpu < pGVM->cCpus)
2003 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2004 else
2005 rc = VERR_INVALID_CPU_ID;
2006
2007 if (fTakeUsedLock)
2008 {
2009 int rc2 = gvmmR0UsedUnlock(pGVMM);
2010 AssertRC(rc2);
2011 }
2012 }
2013
2014 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2015 return rc;
2016}
2017
2018
2019/**
2020 * Pokes an EMT if it's still busy running guest code.
2021 *
2022 * @returns VBox status code.
2023 * @retval VINF_SUCCESS if poked successfully.
2024 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2025 *
2026 * @param pVM Pointer to the shared VM structure.
2027 * @param idCpu The ID of the virtual CPU to poke.
2028 */
2029GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2030{
2031 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2032}
2033
2034
2035/**
2036 * Wakes up a set of halted EMT threads so they can service pending request.
2037 *
2038 * @returns VBox status code, no informational stuff.
2039 *
2040 * @param pVM Pointer to the shared VM structure.
2041 * @param pSleepSet The set of sleepers to wake up.
2042 * @param pPokeSet The set of CPUs to poke.
2043 */
2044GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2045{
2046 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2047 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2048 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2049
2050 /*
2051 * Validate input and take the UsedLock.
2052 */
2053 PGVM pGVM;
2054 PGVMM pGVMM;
2055 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2056 if (RT_SUCCESS(rc))
2057 {
2058 rc = VINF_SUCCESS;
2059 VMCPUID idCpu = pGVM->cCpus;
2060 while (idCpu-- > 0)
2061 {
2062 /* Don't try poke or wake up ourselves. */
2063 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2064 continue;
2065
2066 /* just ignore errors for now. */
2067 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2068 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2069 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2070 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2071 }
2072
2073 int rc2 = gvmmR0UsedUnlock(pGVMM);
2074 AssertRC(rc2);
2075 }
2076
2077 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2078 return rc;
2079}
2080
2081
2082/**
2083 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2084 *
2085 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2086 * @param pVM Pointer to the shared VM structure.
2087 * @param pReq The request packet.
2088 */
2089GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2090{
2091 /*
2092 * Validate input and pass it on.
2093 */
2094 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2095 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2096
2097 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2098}
2099
2100
2101
2102/**
2103 * Poll the schedule to see if someone else should get a chance to run.
2104 *
2105 * This is a bit hackish and will not work too well if the machine is
2106 * under heavy load from non-VM processes.
2107 *
2108 * @returns VINF_SUCCESS if not yielded.
2109 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2110 * @param pVM Pointer to the shared VM structure.
2111 * @param idCpu The Virtual CPU ID of the calling EMT.
2112 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2113 * @param fYield Whether to yield or not.
2114 * This is for when we're spinning in the halt loop.
2115 * @thread EMT(idCpu).
2116 */
2117GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2118{
2119 /*
2120 * Validate input.
2121 */
2122 PGVM pGVM;
2123 PGVMM pGVMM;
2124 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2125 if (RT_SUCCESS(rc))
2126 {
2127 rc = gvmmR0UsedLock(pGVMM);
2128 AssertRC(rc);
2129 pGVM->gvmm.s.StatsSched.cPollCalls++;
2130
2131 Assert(ASMGetFlags() & X86_EFL_IF);
2132 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2133
2134 if (!fYield)
2135 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2136 else
2137 {
2138 /** @todo implement this... */
2139 rc = VERR_NOT_IMPLEMENTED;
2140 }
2141
2142 gvmmR0UsedUnlock(pGVMM);
2143 }
2144
2145 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2146 return rc;
2147}
2148
2149
2150#ifdef GVMM_SCHED_WITH_PPT
2151/**
2152 * Timer callback for the periodic preemption timer.
2153 *
2154 * @param pTimer The timer handle.
2155 * @param pvUser Pointer to the per cpu structure.
2156 * @param iTick The current tick.
2157 */
2158static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2159{
2160 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2161 NOREF(pTimer); NOREF(iTick);
2162
2163 /*
2164 * Termination check
2165 */
2166 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2167 return;
2168
2169 /*
2170 * Do the house keeping.
2171 */
2172 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2173 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2174
2175 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2176 {
2177 /*
2178 * Historicize the max frequency.
2179 */
2180 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2181 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2182 pCpu->Ppt.iTickHistorization = 0;
2183 pCpu->Ppt.uDesiredHz = 0;
2184
2185 /*
2186 * Check if the current timer frequency.
2187 */
2188 uint32_t uHistMaxHz = 0;
2189 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2190 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2191 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2192 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2193 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2194 else if (uHistMaxHz)
2195 {
2196 /*
2197 * Reprogram it.
2198 */
2199 pCpu->Ppt.cChanges++;
2200 pCpu->Ppt.iTickHistorization = 0;
2201 pCpu->Ppt.uTimerHz = uHistMaxHz;
2202 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2203 pCpu->Ppt.cNsInterval = cNsInterval;
2204 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2205 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2206 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2207 / cNsInterval;
2208 else
2209 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2210 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2211
2212 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2213 RTTimerChangeInterval(pTimer, cNsInterval);
2214 }
2215 else
2216 {
2217 /*
2218 * Stop it.
2219 */
2220 pCpu->Ppt.fStarted = false;
2221 pCpu->Ppt.uTimerHz = 0;
2222 pCpu->Ppt.cNsInterval = 0;
2223 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2224
2225 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2226 RTTimerStop(pTimer);
2227 }
2228 }
2229 else
2230 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2231}
2232#endif /* GVMM_SCHED_WITH_PPT */
2233
2234
2235/**
2236 * Updates the periodic preemption timer for the calling CPU.
2237 *
2238 * The caller must have disabled preemption!
2239 * The caller must check that the host can do high resolution timers.
2240 *
2241 * @param pVM The VM handle.
2242 * @param idHostCpu The current host CPU id.
2243 * @param uHz The desired frequency.
2244 */
2245GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2246{
2247 NOREF(pVM);
2248#ifdef GVMM_SCHED_WITH_PPT
2249 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2250 Assert(RTTimerCanDoHighResolution());
2251
2252 /*
2253 * Resolve the per CPU data.
2254 */
2255 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2256 PGVMM pGVMM = g_pGVMM;
2257 if ( !VALID_PTR(pGVMM)
2258 || pGVMM->u32Magic != GVMM_MAGIC)
2259 return;
2260 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2261 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2262 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2263 && pCpu->idCpu == idHostCpu,
2264 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2265
2266 /*
2267 * Check whether we need to do anything about the timer.
2268 * We have to be a little bit careful since we might be race the timer
2269 * callback here.
2270 */
2271 if (uHz > 16384)
2272 uHz = 16384; /** @todo add a query method for this! */
2273 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2274 && uHz >= pCpu->Ppt.uMinHz
2275 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2276 {
2277 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2278 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2279
2280 pCpu->Ppt.uDesiredHz = uHz;
2281 uint32_t cNsInterval = 0;
2282 if (!pCpu->Ppt.fStarted)
2283 {
2284 pCpu->Ppt.cStarts++;
2285 pCpu->Ppt.fStarted = true;
2286 pCpu->Ppt.fStarting = true;
2287 pCpu->Ppt.iTickHistorization = 0;
2288 pCpu->Ppt.uTimerHz = uHz;
2289 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2290 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2291 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2292 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2293 / cNsInterval;
2294 else
2295 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2296 }
2297
2298 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2299
2300 if (cNsInterval)
2301 {
2302 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2303 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2304 AssertRC(rc);
2305
2306 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2307 if (RT_FAILURE(rc))
2308 pCpu->Ppt.fStarted = false;
2309 pCpu->Ppt.fStarting = false;
2310 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2311 }
2312 }
2313#else /* !GVMM_SCHED_WITH_PPT */
2314 NOREF(idHostCpu); NOREF(uHz);
2315#endif /* !GVMM_SCHED_WITH_PPT */
2316}
2317
2318
2319/**
2320 * Retrieves the GVMM statistics visible to the caller.
2321 *
2322 * @returns VBox status code.
2323 *
2324 * @param pStats Where to put the statistics.
2325 * @param pSession The current session.
2326 * @param pVM The VM to obtain statistics for. Optional.
2327 */
2328GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2329{
2330 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2331
2332 /*
2333 * Validate input.
2334 */
2335 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2336 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2337 pStats->cVMs = 0; /* (crash before taking the sem...) */
2338
2339 /*
2340 * Take the lock and get the VM statistics.
2341 */
2342 PGVMM pGVMM;
2343 if (pVM)
2344 {
2345 PGVM pGVM;
2346 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2347 if (RT_FAILURE(rc))
2348 return rc;
2349 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2350 }
2351 else
2352 {
2353 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2354 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2355
2356 int rc = gvmmR0UsedLock(pGVMM);
2357 AssertRCReturn(rc, rc);
2358 }
2359
2360 /*
2361 * Enumerate the VMs and add the ones visible to the statistics.
2362 */
2363 pStats->cVMs = 0;
2364 pStats->cEMTs = 0;
2365 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2366
2367 for (unsigned i = pGVMM->iUsedHead;
2368 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2369 i = pGVMM->aHandles[i].iNext)
2370 {
2371 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2372 void *pvObj = pGVMM->aHandles[i].pvObj;
2373 if ( VALID_PTR(pvObj)
2374 && VALID_PTR(pGVM)
2375 && pGVM->u32Magic == GVM_MAGIC
2376 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2377 {
2378 pStats->cVMs++;
2379 pStats->cEMTs += pGVM->cCpus;
2380
2381 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2382 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2383 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2384 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2385 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2386
2387 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2388 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2389 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2390
2391 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2392 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2393
2394 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2395 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2396 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2397 }
2398 }
2399
2400 /*
2401 * Copy out the per host CPU statistics.
2402 */
2403 uint32_t iDstCpu = 0;
2404 uint32_t cSrcCpus = pGVMM->cHostCpus;
2405 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2406 {
2407 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2408 {
2409 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2410 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2411#ifdef GVMM_SCHED_WITH_PPT
2412 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2413 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2414 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2415 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2416#else
2417 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2418 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2419 pStats->aHostCpus[iDstCpu].cChanges = 0;
2420 pStats->aHostCpus[iDstCpu].cStarts = 0;
2421#endif
2422 iDstCpu++;
2423 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2424 break;
2425 }
2426 }
2427 pStats->cHostCpus = iDstCpu;
2428
2429 gvmmR0UsedUnlock(pGVMM);
2430
2431 return VINF_SUCCESS;
2432}
2433
2434
2435/**
2436 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2437 *
2438 * @returns see GVMMR0QueryStatistics.
2439 * @param pVM Pointer to the shared VM structure. Optional.
2440 * @param pReq The request packet.
2441 */
2442GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2443{
2444 /*
2445 * Validate input and pass it on.
2446 */
2447 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2448 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2449
2450 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2451}
2452
2453
2454/**
2455 * Resets the specified GVMM statistics.
2456 *
2457 * @returns VBox status code.
2458 *
2459 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2460 * @param pSession The current session.
2461 * @param pVM The VM to reset statistics for. Optional.
2462 */
2463GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2464{
2465 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2466
2467 /*
2468 * Validate input.
2469 */
2470 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2471 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2472
2473 /*
2474 * Take the lock and get the VM statistics.
2475 */
2476 PGVMM pGVMM;
2477 if (pVM)
2478 {
2479 PGVM pGVM;
2480 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2481 if (RT_FAILURE(rc))
2482 return rc;
2483# define MAYBE_RESET_FIELD(field) \
2484 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2485 MAYBE_RESET_FIELD(cHaltCalls);
2486 MAYBE_RESET_FIELD(cHaltBlocking);
2487 MAYBE_RESET_FIELD(cHaltTimeouts);
2488 MAYBE_RESET_FIELD(cHaltNotBlocking);
2489 MAYBE_RESET_FIELD(cHaltWakeUps);
2490 MAYBE_RESET_FIELD(cWakeUpCalls);
2491 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2492 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2493 MAYBE_RESET_FIELD(cPokeCalls);
2494 MAYBE_RESET_FIELD(cPokeNotBusy);
2495 MAYBE_RESET_FIELD(cPollCalls);
2496 MAYBE_RESET_FIELD(cPollHalts);
2497 MAYBE_RESET_FIELD(cPollWakeUps);
2498# undef MAYBE_RESET_FIELD
2499 }
2500 else
2501 {
2502 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2503
2504 int rc = gvmmR0UsedLock(pGVMM);
2505 AssertRCReturn(rc, rc);
2506 }
2507
2508 /*
2509 * Enumerate the VMs and add the ones visible to the statistics.
2510 */
2511 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2512 {
2513 for (unsigned i = pGVMM->iUsedHead;
2514 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2515 i = pGVMM->aHandles[i].iNext)
2516 {
2517 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2518 void *pvObj = pGVMM->aHandles[i].pvObj;
2519 if ( VALID_PTR(pvObj)
2520 && VALID_PTR(pGVM)
2521 && pGVM->u32Magic == GVM_MAGIC
2522 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2523 {
2524# define MAYBE_RESET_FIELD(field) \
2525 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2526 MAYBE_RESET_FIELD(cHaltCalls);
2527 MAYBE_RESET_FIELD(cHaltBlocking);
2528 MAYBE_RESET_FIELD(cHaltTimeouts);
2529 MAYBE_RESET_FIELD(cHaltNotBlocking);
2530 MAYBE_RESET_FIELD(cHaltWakeUps);
2531 MAYBE_RESET_FIELD(cWakeUpCalls);
2532 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2533 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2534 MAYBE_RESET_FIELD(cPokeCalls);
2535 MAYBE_RESET_FIELD(cPokeNotBusy);
2536 MAYBE_RESET_FIELD(cPollCalls);
2537 MAYBE_RESET_FIELD(cPollHalts);
2538 MAYBE_RESET_FIELD(cPollWakeUps);
2539# undef MAYBE_RESET_FIELD
2540 }
2541 }
2542 }
2543
2544 gvmmR0UsedUnlock(pGVMM);
2545
2546 return VINF_SUCCESS;
2547}
2548
2549
2550/**
2551 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2552 *
2553 * @returns see GVMMR0ResetStatistics.
2554 * @param pVM Pointer to the shared VM structure. Optional.
2555 * @param pReq The request packet.
2556 */
2557GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2558{
2559 /*
2560 * Validate input and pass it on.
2561 */
2562 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2563 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2564
2565 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2566}
2567
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette