VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 49258

Last change on this file since 49258 was 48395, checked in by vboxsync, 11 years ago

VMM/GVMMR0: Todo question.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 87.3 KB
Line 
1/* $Id: GVMMR0.cpp 48395 2013-09-09 16:37:41Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2012 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/gvm.h>
57#include <VBox/vmm/vm.h>
58#include <VBox/vmm/vmcpuset.h>
59#include <VBox/vmm/vmm.h>
60#include <VBox/param.h>
61#include <VBox/err.h>
62
63#include <iprt/asm.h>
64#include <iprt/asm-amd64-x86.h>
65#include <iprt/mem.h>
66#include <iprt/semaphore.h>
67#include <iprt/time.h>
68#include <VBox/log.h>
69#include <iprt/thread.h>
70#include <iprt/process.h>
71#include <iprt/param.h>
72#include <iprt/string.h>
73#include <iprt/assert.h>
74#include <iprt/mem.h>
75#include <iprt/memobj.h>
76#include <iprt/mp.h>
77#include <iprt/cpuset.h>
78#include <iprt/spinlock.h>
79#include <iprt/timer.h>
80
81#include "dtrace/VBoxVMM.h"
82
83
84/*******************************************************************************
85* Defined Constants And Macros *
86*******************************************************************************/
87#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
88/** Define this to enable the periodic preemption timer. */
89# define GVMM_SCHED_WITH_PPT
90#endif
91
92
93/*******************************************************************************
94* Structures and Typedefs *
95*******************************************************************************/
96
97/**
98 * Global VM handle.
99 */
100typedef struct GVMHANDLE
101{
102 /** The index of the next handle in the list (free or used). (0 is nil.) */
103 uint16_t volatile iNext;
104 /** Our own index / handle value. */
105 uint16_t iSelf;
106 /** The process ID of the handle owner.
107 * This is used for access checks. */
108 RTPROCESS ProcId;
109 /** The pointer to the ring-0 only (aka global) VM structure. */
110 PGVM pGVM;
111 /** The ring-0 mapping of the shared VM instance data. */
112 PVM pVM;
113 /** The virtual machine object. */
114 void *pvObj;
115 /** The session this VM is associated with. */
116 PSUPDRVSESSION pSession;
117 /** The ring-0 handle of the EMT0 thread.
118 * This is used for ownership checks as well as looking up a VM handle by thread
119 * at times like assertions. */
120 RTNATIVETHREAD hEMT0;
121} GVMHANDLE;
122/** Pointer to a global VM handle. */
123typedef GVMHANDLE *PGVMHANDLE;
124
125/** Number of GVM handles (including the NIL handle). */
126#if HC_ARCH_BITS == 64
127# define GVMM_MAX_HANDLES 8192
128#else
129# define GVMM_MAX_HANDLES 128
130#endif
131
132/**
133 * Per host CPU GVMM data.
134 */
135typedef struct GVMMHOSTCPU
136{
137 /** Magic number (GVMMHOSTCPU_MAGIC). */
138 uint32_t volatile u32Magic;
139 /** The CPU ID. */
140 RTCPUID idCpu;
141 /** The CPU set index. */
142 uint32_t idxCpuSet;
143
144#ifdef GVMM_SCHED_WITH_PPT
145 /** Periodic preemption timer data. */
146 struct
147 {
148 /** The handle to the periodic preemption timer. */
149 PRTTIMER pTimer;
150 /** Spinlock protecting the data below. */
151 RTSPINLOCK hSpinlock;
152 /** The smalles Hz that we need to care about. (static) */
153 uint32_t uMinHz;
154 /** The number of ticks between each historization. */
155 uint32_t cTicksHistoriziationInterval;
156 /** The current historization tick (counting up to
157 * cTicksHistoriziationInterval and then resetting). */
158 uint32_t iTickHistorization;
159 /** The current timer interval. This is set to 0 when inactive. */
160 uint32_t cNsInterval;
161 /** The current timer frequency. This is set to 0 when inactive. */
162 uint32_t uTimerHz;
163 /** The current max frequency reported by the EMTs.
164 * This gets historicize and reset by the timer callback. This is
165 * read without holding the spinlock, so needs atomic updating. */
166 uint32_t volatile uDesiredHz;
167 /** Whether the timer was started or not. */
168 bool volatile fStarted;
169 /** Set if we're starting timer. */
170 bool volatile fStarting;
171 /** The index of the next history entry (mod it). */
172 uint32_t iHzHistory;
173 /** Historicized uDesiredHz values. The array wraps around, new entries
174 * are added at iHzHistory. This is updated approximately every
175 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
176 uint32_t aHzHistory[8];
177 /** Statistics counter for recording the number of interval changes. */
178 uint32_t cChanges;
179 /** Statistics counter for recording the number of timer starts. */
180 uint32_t cStarts;
181 } Ppt;
182#endif /* GVMM_SCHED_WITH_PPT */
183
184} GVMMHOSTCPU;
185/** Pointer to the per host CPU GVMM data. */
186typedef GVMMHOSTCPU *PGVMMHOSTCPU;
187/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
188#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
189/** The interval on history entry should cover (approximately) give in
190 * nanoseconds. */
191#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
192
193
194/**
195 * The GVMM instance data.
196 */
197typedef struct GVMM
198{
199 /** Eyecatcher / magic. */
200 uint32_t u32Magic;
201 /** The index of the head of the free handle chain. (0 is nil.) */
202 uint16_t volatile iFreeHead;
203 /** The index of the head of the active handle chain. (0 is nil.) */
204 uint16_t volatile iUsedHead;
205 /** The number of VMs. */
206 uint16_t volatile cVMs;
207 /** Alignment padding. */
208 uint16_t u16Reserved;
209 /** The number of EMTs. */
210 uint32_t volatile cEMTs;
211 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
212 uint32_t volatile cHaltedEMTs;
213 /** Alignment padding. */
214 uint32_t u32Alignment;
215 /** When the next halted or sleeping EMT will wake up.
216 * This is set to 0 when it needs recalculating and to UINT64_MAX when
217 * there are no halted or sleeping EMTs in the GVMM. */
218 uint64_t uNsNextEmtWakeup;
219 /** The lock used to serialize VM creation, destruction and associated events that
220 * isn't performance critical. Owners may acquire the list lock. */
221 RTSEMFASTMUTEX CreateDestroyLock;
222 /** The lock used to serialize used list updates and accesses.
223 * This indirectly includes scheduling since the scheduler will have to walk the
224 * used list to examin running VMs. Owners may not acquire any other locks. */
225 RTSEMFASTMUTEX UsedLock;
226 /** The handle array.
227 * The size of this array defines the maximum number of currently running VMs.
228 * The first entry is unused as it represents the NIL handle. */
229 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
230
231 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
232 * The number of EMTs that means we no longer consider ourselves alone on a
233 * CPU/Core.
234 */
235 uint32_t cEMTsMeansCompany;
236 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
237 * The minimum sleep time for when we're alone, in nano seconds.
238 */
239 uint32_t nsMinSleepAlone;
240 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
241 * The minimum sleep time for when we've got company, in nano seconds.
242 */
243 uint32_t nsMinSleepCompany;
244 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
245 * The limit for the first round of early wakeups, given in nano seconds.
246 */
247 uint32_t nsEarlyWakeUp1;
248 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
249 * The limit for the second round of early wakeups, given in nano seconds.
250 */
251 uint32_t nsEarlyWakeUp2;
252
253 /** The number of entries in the host CPU array (aHostCpus). */
254 uint32_t cHostCpus;
255 /** Per host CPU data (variable length). */
256 GVMMHOSTCPU aHostCpus[1];
257} GVMM;
258/** Pointer to the GVMM instance data. */
259typedef GVMM *PGVMM;
260
261/** The GVMM::u32Magic value (Charlie Haden). */
262#define GVMM_MAGIC UINT32_C(0x19370806)
263
264
265
266/*******************************************************************************
267* Global Variables *
268*******************************************************************************/
269/** Pointer to the GVMM instance data.
270 * (Just my general dislike for global variables.) */
271static PGVMM g_pGVMM = NULL;
272
273/** Macro for obtaining and validating the g_pGVMM pointer.
274 * On failure it will return from the invoking function with the specified return value.
275 *
276 * @param pGVMM The name of the pGVMM variable.
277 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
278 * status codes.
279 */
280#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
281 do { \
282 (pGVMM) = g_pGVMM;\
283 AssertPtrReturn((pGVMM), (rc)); \
284 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
285 } while (0)
286
287/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
288 * On failure it will return from the invoking function.
289 *
290 * @param pGVMM The name of the pGVMM variable.
291 */
292#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
293 do { \
294 (pGVMM) = g_pGVMM;\
295 AssertPtrReturnVoid((pGVMM)); \
296 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
297 } while (0)
298
299
300/*******************************************************************************
301* Internal Functions *
302*******************************************************************************/
303static void gvmmR0InitPerVMData(PGVM pGVM);
304static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
305static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
306static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
307#ifdef GVMM_SCHED_WITH_PPT
308static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
309#endif
310
311
312/**
313 * Initializes the GVMM.
314 *
315 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
316 *
317 * @returns VBox status code.
318 */
319GVMMR0DECL(int) GVMMR0Init(void)
320{
321 LogFlow(("GVMMR0Init:\n"));
322
323 /*
324 * Allocate and initialize the instance data.
325 */
326 uint32_t cHostCpus = RTMpGetArraySize();
327 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
328
329 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
330 if (!pGVMM)
331 return VERR_NO_MEMORY;
332 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
333 if (RT_SUCCESS(rc))
334 {
335 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
336 if (RT_SUCCESS(rc))
337 {
338 pGVMM->u32Magic = GVMM_MAGIC;
339 pGVMM->iUsedHead = 0;
340 pGVMM->iFreeHead = 1;
341
342 /* the nil handle */
343 pGVMM->aHandles[0].iSelf = 0;
344 pGVMM->aHandles[0].iNext = 0;
345
346 /* the tail */
347 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
348 pGVMM->aHandles[i].iSelf = i;
349 pGVMM->aHandles[i].iNext = 0; /* nil */
350
351 /* the rest */
352 while (i-- > 1)
353 {
354 pGVMM->aHandles[i].iSelf = i;
355 pGVMM->aHandles[i].iNext = i + 1;
356 }
357
358 /* The default configuration values. */
359 uint32_t cNsResolution = RTSemEventMultiGetResolution();
360 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
361 if (cNsResolution >= 5*RT_NS_100US)
362 {
363 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
364 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
365 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
366 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
367 }
368 else if (cNsResolution > RT_NS_100US)
369 {
370 pGVMM->nsMinSleepAlone = cNsResolution / 2;
371 pGVMM->nsMinSleepCompany = cNsResolution / 4;
372 pGVMM->nsEarlyWakeUp1 = 0;
373 pGVMM->nsEarlyWakeUp2 = 0;
374 }
375 else
376 {
377 pGVMM->nsMinSleepAlone = 2000;
378 pGVMM->nsMinSleepCompany = 2000;
379 pGVMM->nsEarlyWakeUp1 = 0;
380 pGVMM->nsEarlyWakeUp2 = 0;
381 }
382
383 /* The host CPU data. */
384 pGVMM->cHostCpus = cHostCpus;
385 uint32_t iCpu = cHostCpus;
386 RTCPUSET PossibleSet;
387 RTMpGetSet(&PossibleSet);
388 while (iCpu-- > 0)
389 {
390 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
391#ifdef GVMM_SCHED_WITH_PPT
392 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
393 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
394 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
395 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
396 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
397 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
398 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
399 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
400 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
401 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
402 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
403 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
404#endif
405
406 if (RTCpuSetIsMember(&PossibleSet, iCpu))
407 {
408 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
409 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
410
411#ifdef GVMM_SCHED_WITH_PPT
412 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
413 50*1000*1000 /* whatever */,
414 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
415 gvmmR0SchedPeriodicPreemptionTimerCallback,
416 &pGVMM->aHostCpus[iCpu]);
417 if (RT_SUCCESS(rc))
418 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
419 if (RT_FAILURE(rc))
420 {
421 while (iCpu < cHostCpus)
422 {
423 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
424 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
425 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
426 iCpu++;
427 }
428 break;
429 }
430#endif
431 }
432 else
433 {
434 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
435 pGVMM->aHostCpus[iCpu].u32Magic = 0;
436 }
437 }
438 if (RT_SUCCESS(rc))
439 {
440 g_pGVMM = pGVMM;
441 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
442 return VINF_SUCCESS;
443 }
444
445 /* bail out. */
446 RTSemFastMutexDestroy(pGVMM->UsedLock);
447 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
448 }
449 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
450 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
451 }
452
453 RTMemFree(pGVMM);
454 return rc;
455}
456
457
458/**
459 * Terminates the GVM.
460 *
461 * This is called while owning the loader semaphore (see supdrvLdrFree()).
462 * And unless something is wrong, there should be absolutely no VMs
463 * registered at this point.
464 */
465GVMMR0DECL(void) GVMMR0Term(void)
466{
467 LogFlow(("GVMMR0Term:\n"));
468
469 PGVMM pGVMM = g_pGVMM;
470 g_pGVMM = NULL;
471 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
472 {
473 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
474 return;
475 }
476
477 /*
478 * First of all, stop all active timers.
479 */
480 uint32_t cActiveTimers = 0;
481 uint32_t iCpu = pGVMM->cHostCpus;
482 while (iCpu-- > 0)
483 {
484 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
485#ifdef GVMM_SCHED_WITH_PPT
486 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
487 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
488 cActiveTimers++;
489#endif
490 }
491 if (cActiveTimers)
492 RTThreadSleep(1); /* fudge */
493
494 /*
495 * Invalidate the and free resources.
496 */
497 pGVMM->u32Magic = ~GVMM_MAGIC;
498 RTSemFastMutexDestroy(pGVMM->UsedLock);
499 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
500 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
501 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
502
503 pGVMM->iFreeHead = 0;
504 if (pGVMM->iUsedHead)
505 {
506 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
507 pGVMM->iUsedHead = 0;
508 }
509
510#ifdef GVMM_SCHED_WITH_PPT
511 iCpu = pGVMM->cHostCpus;
512 while (iCpu-- > 0)
513 {
514 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
515 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
516 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
517 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
518 }
519#endif
520
521 RTMemFree(pGVMM);
522}
523
524
525/**
526 * A quick hack for setting global config values.
527 *
528 * @returns VBox status code.
529 *
530 * @param pSession The session handle. Used for authentication.
531 * @param pszName The variable name.
532 * @param u64Value The new value.
533 */
534GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
535{
536 /*
537 * Validate input.
538 */
539 PGVMM pGVMM;
540 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
541 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
542 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
543
544 /*
545 * String switch time!
546 */
547 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
548 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
549 int rc = VINF_SUCCESS;
550 pszName += sizeof("/GVMM/") - 1;
551 if (!strcmp(pszName, "cEMTsMeansCompany"))
552 {
553 if (u64Value <= UINT32_MAX)
554 pGVMM->cEMTsMeansCompany = u64Value;
555 else
556 rc = VERR_OUT_OF_RANGE;
557 }
558 else if (!strcmp(pszName, "MinSleepAlone"))
559 {
560 if (u64Value <= RT_NS_100MS)
561 pGVMM->nsMinSleepAlone = u64Value;
562 else
563 rc = VERR_OUT_OF_RANGE;
564 }
565 else if (!strcmp(pszName, "MinSleepCompany"))
566 {
567 if (u64Value <= RT_NS_100MS)
568 pGVMM->nsMinSleepCompany = u64Value;
569 else
570 rc = VERR_OUT_OF_RANGE;
571 }
572 else if (!strcmp(pszName, "EarlyWakeUp1"))
573 {
574 if (u64Value <= RT_NS_100MS)
575 pGVMM->nsEarlyWakeUp1 = u64Value;
576 else
577 rc = VERR_OUT_OF_RANGE;
578 }
579 else if (!strcmp(pszName, "EarlyWakeUp2"))
580 {
581 if (u64Value <= RT_NS_100MS)
582 pGVMM->nsEarlyWakeUp2 = u64Value;
583 else
584 rc = VERR_OUT_OF_RANGE;
585 }
586 else
587 rc = VERR_CFGM_VALUE_NOT_FOUND;
588 return rc;
589}
590
591
592/**
593 * A quick hack for getting global config values.
594 *
595 * @returns VBox status code.
596 *
597 * @param pSession The session handle. Used for authentication.
598 * @param pszName The variable name.
599 * @param u64Value The new value.
600 */
601GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
602{
603 /*
604 * Validate input.
605 */
606 PGVMM pGVMM;
607 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
608 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
609 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
610 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
611
612 /*
613 * String switch time!
614 */
615 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
616 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
617 int rc = VINF_SUCCESS;
618 pszName += sizeof("/GVMM/") - 1;
619 if (!strcmp(pszName, "cEMTsMeansCompany"))
620 *pu64Value = pGVMM->cEMTsMeansCompany;
621 else if (!strcmp(pszName, "MinSleepAlone"))
622 *pu64Value = pGVMM->nsMinSleepAlone;
623 else if (!strcmp(pszName, "MinSleepCompany"))
624 *pu64Value = pGVMM->nsMinSleepCompany;
625 else if (!strcmp(pszName, "EarlyWakeUp1"))
626 *pu64Value = pGVMM->nsEarlyWakeUp1;
627 else if (!strcmp(pszName, "EarlyWakeUp2"))
628 *pu64Value = pGVMM->nsEarlyWakeUp2;
629 else
630 rc = VERR_CFGM_VALUE_NOT_FOUND;
631 return rc;
632}
633
634
635/**
636 * Try acquire the 'used' lock.
637 *
638 * @returns IPRT status code, see RTSemFastMutexRequest.
639 * @param pGVMM The GVMM instance data.
640 */
641DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
642{
643 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
644 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
645 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
646 return rc;
647}
648
649
650/**
651 * Release the 'used' lock.
652 *
653 * @returns IPRT status code, see RTSemFastMutexRelease.
654 * @param pGVMM The GVMM instance data.
655 */
656DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
657{
658 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
659 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
660 AssertRC(rc);
661 return rc;
662}
663
664
665/**
666 * Try acquire the 'create & destroy' lock.
667 *
668 * @returns IPRT status code, see RTSemFastMutexRequest.
669 * @param pGVMM The GVMM instance data.
670 */
671DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
672{
673 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
674 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
675 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
676 return rc;
677}
678
679
680/**
681 * Release the 'create & destroy' lock.
682 *
683 * @returns IPRT status code, see RTSemFastMutexRequest.
684 * @param pGVMM The GVMM instance data.
685 */
686DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
687{
688 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
689 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
690 AssertRC(rc);
691 return rc;
692}
693
694
695/**
696 * Request wrapper for the GVMMR0CreateVM API.
697 *
698 * @returns VBox status code.
699 * @param pReq The request buffer.
700 */
701GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
702{
703 /*
704 * Validate the request.
705 */
706 if (!VALID_PTR(pReq))
707 return VERR_INVALID_POINTER;
708 if (pReq->Hdr.cbReq != sizeof(*pReq))
709 return VERR_INVALID_PARAMETER;
710 if (!VALID_PTR(pReq->pSession))
711 return VERR_INVALID_POINTER;
712
713 /*
714 * Execute it.
715 */
716 PVM pVM;
717 pReq->pVMR0 = NULL;
718 pReq->pVMR3 = NIL_RTR3PTR;
719 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
720 if (RT_SUCCESS(rc))
721 {
722 pReq->pVMR0 = pVM;
723 pReq->pVMR3 = pVM->pVMR3;
724 }
725 return rc;
726}
727
728
729/**
730 * Allocates the VM structure and registers it with GVM.
731 *
732 * The caller will become the VM owner and there by the EMT.
733 *
734 * @returns VBox status code.
735 * @param pSession The support driver session.
736 * @param cCpus Number of virtual CPUs for the new VM.
737 * @param ppVM Where to store the pointer to the VM structure.
738 *
739 * @thread EMT.
740 */
741GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
742{
743 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
744 PGVMM pGVMM;
745 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
746
747 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
748 *ppVM = NULL;
749
750 if ( cCpus == 0
751 || cCpus > VMM_MAX_CPU_COUNT)
752 return VERR_INVALID_PARAMETER;
753
754 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
755 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
756 RTPROCESS ProcId = RTProcSelf();
757 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
758
759 /*
760 * The whole allocation process is protected by the lock.
761 */
762 int rc = gvmmR0CreateDestroyLock(pGVMM);
763 AssertRCReturn(rc, rc);
764
765 /*
766 * Allocate a handle first so we don't waste resources unnecessarily.
767 */
768 uint16_t iHandle = pGVMM->iFreeHead;
769 if (iHandle)
770 {
771 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
772
773 /* consistency checks, a bit paranoid as always. */
774 if ( !pHandle->pVM
775 && !pHandle->pGVM
776 && !pHandle->pvObj
777 && pHandle->iSelf == iHandle)
778 {
779 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
780 if (pHandle->pvObj)
781 {
782 /*
783 * Move the handle from the free to used list and perform permission checks.
784 */
785 rc = gvmmR0UsedLock(pGVMM);
786 AssertRC(rc);
787
788 pGVMM->iFreeHead = pHandle->iNext;
789 pHandle->iNext = pGVMM->iUsedHead;
790 pGVMM->iUsedHead = iHandle;
791 pGVMM->cVMs++;
792
793 pHandle->pVM = NULL;
794 pHandle->pGVM = NULL;
795 pHandle->pSession = pSession;
796 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
797 pHandle->ProcId = NIL_RTPROCESS;
798
799 gvmmR0UsedUnlock(pGVMM);
800
801 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
802 if (RT_SUCCESS(rc))
803 {
804 /*
805 * Allocate the global VM structure (GVM) and initialize it.
806 */
807 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
808 if (pGVM)
809 {
810 pGVM->u32Magic = GVM_MAGIC;
811 pGVM->hSelf = iHandle;
812 pGVM->pVM = NULL;
813 pGVM->cCpus = cCpus;
814
815 gvmmR0InitPerVMData(pGVM);
816 GMMR0InitPerVMData(pGVM);
817
818 /*
819 * Allocate the shared VM structure and associated page array.
820 */
821 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
822 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
823 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
824 if (RT_SUCCESS(rc))
825 {
826 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
827 memset(pVM, 0, cPages << PAGE_SHIFT);
828 pVM->enmVMState = VMSTATE_CREATING;
829 pVM->pVMR0 = pVM;
830 pVM->pSession = pSession;
831 pVM->hSelf = iHandle;
832 pVM->cbSelf = cbVM;
833 pVM->cCpus = cCpus;
834 pVM->uCpuExecutionCap = 100; /* default is no cap. */
835 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
836 AssertCompileMemberAlignment(VM, cpum, 64);
837 AssertCompileMemberAlignment(VM, tm, 64);
838 AssertCompileMemberAlignment(VM, aCpus, PAGE_SIZE);
839
840 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
841 if (RT_SUCCESS(rc))
842 {
843 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
844 for (uint32_t iPage = 0; iPage < cPages; iPage++)
845 {
846 paPages[iPage].uReserved = 0;
847 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
848 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
849 }
850
851 /*
852 * Map them into ring-3.
853 */
854 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
855 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
856 if (RT_SUCCESS(rc))
857 {
858 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
859 AssertPtr((void *)pVM->pVMR3);
860
861 /* Initialize all the VM pointers. */
862 for (uint32_t i = 0; i < cCpus; i++)
863 {
864 pVM->aCpus[i].pVMR0 = pVM;
865 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
866 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
867 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
868 }
869
870 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
871 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
872 NIL_RTR0PROCESS);
873 if (RT_SUCCESS(rc))
874 {
875 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
876 AssertPtr((void *)pVM->paVMPagesR3);
877
878 /* complete the handle - take the UsedLock sem just to be careful. */
879 rc = gvmmR0UsedLock(pGVMM);
880 AssertRC(rc);
881
882 pHandle->pVM = pVM;
883 pHandle->pGVM = pGVM;
884 pHandle->hEMT0 = hEMT0;
885 pHandle->ProcId = ProcId;
886 pGVM->pVM = pVM;
887 pGVM->aCpus[0].hEMT = hEMT0;
888 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
889 pGVMM->cEMTs += cCpus;
890
891 rc = VMMR0ThreadCtxHooksCreate(&pVM->aCpus[0]);
892 if (RT_SUCCESS(rc))
893 {
894 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pVM, ProcId, (void *)hEMT0, cCpus);
895
896 gvmmR0UsedUnlock(pGVMM);
897 gvmmR0CreateDestroyUnlock(pGVMM);
898
899 *ppVM = pVM;
900 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
901 return VINF_SUCCESS;
902 }
903 }
904
905 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
906 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
907 }
908 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
909 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
910 }
911 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
912 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
913 }
914 }
915 }
916 /* else: The user wasn't permitted to create this VM. */
917
918 /*
919 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
920 * object reference here. A little extra mess because of non-recursive lock.
921 */
922 void *pvObj = pHandle->pvObj;
923 pHandle->pvObj = NULL;
924 gvmmR0CreateDestroyUnlock(pGVMM);
925
926 SUPR0ObjRelease(pvObj, pSession);
927
928 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
929 return rc;
930 }
931
932 rc = VERR_NO_MEMORY;
933 }
934 else
935 rc = VERR_GVMM_IPE_1;
936 }
937 else
938 rc = VERR_GVM_TOO_MANY_VMS;
939
940 gvmmR0CreateDestroyUnlock(pGVMM);
941 return rc;
942}
943
944
945/**
946 * Initializes the per VM data belonging to GVMM.
947 *
948 * @param pGVM Pointer to the global VM structure.
949 */
950static void gvmmR0InitPerVMData(PGVM pGVM)
951{
952 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
953 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
954 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
955 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
956 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
957 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
958 pGVM->gvmm.s.fDoneVMMR0Init = false;
959 pGVM->gvmm.s.fDoneVMMR0Term = false;
960
961 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
962 {
963 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
964 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
965 }
966}
967
968
969/**
970 * Does the VM initialization.
971 *
972 * @returns VBox status code.
973 * @param pVM Pointer to the VM.
974 */
975GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
976{
977 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
978
979 /*
980 * Validate the VM structure, state and handle.
981 */
982 PGVM pGVM;
983 PGVMM pGVMM;
984 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
985 if (RT_SUCCESS(rc))
986 {
987 if ( !pGVM->gvmm.s.fDoneVMMR0Init
988 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
989 {
990 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
991 {
992 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
993 if (RT_FAILURE(rc))
994 {
995 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
996 break;
997 }
998 }
999 }
1000 else
1001 rc = VERR_WRONG_ORDER;
1002 }
1003
1004 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1005 return rc;
1006}
1007
1008
1009/**
1010 * Indicates that we're done with the ring-0 initialization
1011 * of the VM.
1012 *
1013 * @param pVM Pointer to the VM.
1014 * @thread EMT(0)
1015 */
1016GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1017{
1018 /* Validate the VM structure, state and handle. */
1019 PGVM pGVM;
1020 PGVMM pGVMM;
1021 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1022 AssertRCReturnVoid(rc);
1023
1024 /* Set the indicator. */
1025 pGVM->gvmm.s.fDoneVMMR0Init = true;
1026}
1027
1028
1029/**
1030 * Indicates that we're doing the ring-0 termination of the VM.
1031 *
1032 * @returns true if termination hasn't been done already, false if it has.
1033 * @param pVM Pointer to the VM.
1034 * @param pGVM Pointer to the global VM structure. Optional.
1035 * @thread EMT(0)
1036 */
1037GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1038{
1039 /* Validate the VM structure, state and handle. */
1040 AssertPtrNullReturn(pGVM, false);
1041 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1042 if (!pGVM)
1043 {
1044 PGVMM pGVMM;
1045 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1046 AssertRCReturn(rc, false);
1047 }
1048
1049 /* Set the indicator. */
1050 if (pGVM->gvmm.s.fDoneVMMR0Term)
1051 return false;
1052 pGVM->gvmm.s.fDoneVMMR0Term = true;
1053 return true;
1054}
1055
1056
1057/**
1058 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1059 *
1060 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1061 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1062 * would've been nice if the caller was actually the EMT thread or that we somehow
1063 * could've associated the calling thread with the VM up front.
1064 *
1065 * @returns VBox status code.
1066 * @param pVM Pointer to the VM.
1067 *
1068 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1069 */
1070GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1071{
1072 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1073 PGVMM pGVMM;
1074 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1075
1076 /*
1077 * Validate the VM structure, state and caller.
1078 */
1079 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1080 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1081 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState),
1082 VERR_WRONG_ORDER);
1083
1084 uint32_t hGVM = pVM->hSelf;
1085 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1086 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1087
1088 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1089 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1090
1091 RTPROCESS ProcId = RTProcSelf();
1092 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1093 AssertReturn( ( pHandle->hEMT0 == hSelf
1094 && pHandle->ProcId == ProcId)
1095 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1096
1097 /*
1098 * Lookup the handle and destroy the object.
1099 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1100 * object, we take some precautions against racing callers just in case...
1101 */
1102 int rc = gvmmR0CreateDestroyLock(pGVMM);
1103 AssertRC(rc);
1104
1105 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1106 if ( pHandle->pVM == pVM
1107 && ( ( pHandle->hEMT0 == hSelf
1108 && pHandle->ProcId == ProcId)
1109 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1110 && VALID_PTR(pHandle->pvObj)
1111 && VALID_PTR(pHandle->pSession)
1112 && VALID_PTR(pHandle->pGVM)
1113 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1114 {
1115 void *pvObj = pHandle->pvObj;
1116 pHandle->pvObj = NULL;
1117 gvmmR0CreateDestroyUnlock(pGVMM);
1118
1119 for (VMCPUID idCpu = 0; idCpu < pVM->cCpus; idCpu++)
1120 {
1121 /** @todo Can we busy wait here for all thread-context hooks to be
1122 * deregistered before releasing (destroying) it? Only until we find a
1123 * solution for not deregistering hooks everytime we're leaving HMR0
1124 * context. */
1125 VMMR0ThreadCtxHooksRelease(&pVM->aCpus[idCpu]);
1126 }
1127
1128 SUPR0ObjRelease(pvObj, pHandle->pSession);
1129 }
1130 else
1131 {
1132 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1133 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1134 gvmmR0CreateDestroyUnlock(pGVMM);
1135 rc = VERR_GVMM_IPE_2;
1136 }
1137
1138 return rc;
1139}
1140
1141
1142/**
1143 * Performs VM cleanup task as part of object destruction.
1144 *
1145 * @param pGVM The GVM pointer.
1146 */
1147static void gvmmR0CleanupVM(PGVM pGVM)
1148{
1149 if ( pGVM->gvmm.s.fDoneVMMR0Init
1150 && !pGVM->gvmm.s.fDoneVMMR0Term)
1151 {
1152 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1153 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1154 {
1155 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1156 VMMR0TermVM(pGVM->pVM, pGVM);
1157 }
1158 else
1159 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1160 }
1161
1162 GMMR0CleanupVM(pGVM);
1163}
1164
1165
1166/**
1167 * Handle destructor.
1168 *
1169 * @param pvGVMM The GVM instance pointer.
1170 * @param pvHandle The handle pointer.
1171 */
1172static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1173{
1174 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1175
1176 /*
1177 * Some quick, paranoid, input validation.
1178 */
1179 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1180 AssertPtr(pHandle);
1181 PGVMM pGVMM = (PGVMM)pvGVMM;
1182 Assert(pGVMM == g_pGVMM);
1183 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1184 if ( !iHandle
1185 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1186 || iHandle != pHandle->iSelf)
1187 {
1188 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1189 return;
1190 }
1191
1192 int rc = gvmmR0CreateDestroyLock(pGVMM);
1193 AssertRC(rc);
1194 rc = gvmmR0UsedLock(pGVMM);
1195 AssertRC(rc);
1196
1197 /*
1198 * This is a tad slow but a doubly linked list is too much hassle.
1199 */
1200 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1201 {
1202 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1203 gvmmR0UsedUnlock(pGVMM);
1204 gvmmR0CreateDestroyUnlock(pGVMM);
1205 return;
1206 }
1207
1208 if (pGVMM->iUsedHead == iHandle)
1209 pGVMM->iUsedHead = pHandle->iNext;
1210 else
1211 {
1212 uint16_t iPrev = pGVMM->iUsedHead;
1213 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1214 while (iPrev)
1215 {
1216 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1217 {
1218 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1219 gvmmR0UsedUnlock(pGVMM);
1220 gvmmR0CreateDestroyUnlock(pGVMM);
1221 return;
1222 }
1223 if (RT_UNLIKELY(c-- <= 0))
1224 {
1225 iPrev = 0;
1226 break;
1227 }
1228
1229 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1230 break;
1231 iPrev = pGVMM->aHandles[iPrev].iNext;
1232 }
1233 if (!iPrev)
1234 {
1235 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1236 gvmmR0UsedUnlock(pGVMM);
1237 gvmmR0CreateDestroyUnlock(pGVMM);
1238 return;
1239 }
1240
1241 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1242 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1243 }
1244 pHandle->iNext = 0;
1245 pGVMM->cVMs--;
1246
1247 /*
1248 * Do the global cleanup round.
1249 */
1250 PGVM pGVM = pHandle->pGVM;
1251 if ( VALID_PTR(pGVM)
1252 && pGVM->u32Magic == GVM_MAGIC)
1253 {
1254 pGVMM->cEMTs -= pGVM->cCpus;
1255 gvmmR0UsedUnlock(pGVMM);
1256
1257 gvmmR0CleanupVM(pGVM);
1258
1259 /*
1260 * Do the GVMM cleanup - must be done last.
1261 */
1262 /* The VM and VM pages mappings/allocations. */
1263 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1264 {
1265 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1266 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1267 }
1268
1269 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1270 {
1271 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1272 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1273 }
1274
1275 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1276 {
1277 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1278 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1279 }
1280
1281 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1282 {
1283 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1284 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1285 }
1286
1287 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1288 {
1289 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1290 {
1291 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1292 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1293 }
1294 }
1295
1296 /* the GVM structure itself. */
1297 pGVM->u32Magic |= UINT32_C(0x80000000);
1298 RTMemFree(pGVM);
1299
1300 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1301 rc = gvmmR0UsedLock(pGVMM);
1302 AssertRC(rc);
1303 }
1304 /* else: GVMMR0CreateVM cleanup. */
1305
1306 /*
1307 * Free the handle.
1308 */
1309 pHandle->iNext = pGVMM->iFreeHead;
1310 pGVMM->iFreeHead = iHandle;
1311 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1312 ASMAtomicWriteNullPtr(&pHandle->pVM);
1313 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1314 ASMAtomicWriteNullPtr(&pHandle->pSession);
1315 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1316 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1317
1318 gvmmR0UsedUnlock(pGVMM);
1319 gvmmR0CreateDestroyUnlock(pGVMM);
1320 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1321}
1322
1323
1324/**
1325 * Registers the calling thread as the EMT of a Virtual CPU.
1326 *
1327 * Note that VCPU 0 is automatically registered during VM creation.
1328 *
1329 * @returns VBox status code
1330 * @param pVM Pointer to the VM.
1331 * @param idCpu VCPU id.
1332 */
1333GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1334{
1335 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1336
1337 /*
1338 * Validate the VM structure, state and handle.
1339 */
1340 PGVM pGVM;
1341 PGVMM pGVMM;
1342 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1343 if (RT_FAILURE(rc))
1344 return rc;
1345
1346 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1347 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1348 Assert(pGVM->cCpus == pVM->cCpus);
1349 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1350
1351 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1352
1353 rc = VMMR0ThreadCtxHooksCreate(&pVM->aCpus[idCpu]);
1354 return rc;
1355}
1356
1357
1358/**
1359 * Lookup a GVM structure by its handle.
1360 *
1361 * @returns The GVM pointer on success, NULL on failure.
1362 * @param hGVM The global VM handle. Asserts on bad handle.
1363 */
1364GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1365{
1366 PGVMM pGVMM;
1367 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1368
1369 /*
1370 * Validate.
1371 */
1372 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1373 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1374
1375 /*
1376 * Look it up.
1377 */
1378 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1379 AssertPtrReturn(pHandle->pVM, NULL);
1380 AssertPtrReturn(pHandle->pvObj, NULL);
1381 PGVM pGVM = pHandle->pGVM;
1382 AssertPtrReturn(pGVM, NULL);
1383 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1384
1385 return pHandle->pGVM;
1386}
1387
1388
1389/**
1390 * Lookup a GVM structure by the shared VM structure.
1391 *
1392 * The calling thread must be in the same process as the VM. All current lookups
1393 * are by threads inside the same process, so this will not be an issue.
1394 *
1395 * @returns VBox status code.
1396 * @param pVM Pointer to the VM.
1397 * @param ppGVM Where to store the GVM pointer.
1398 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1399 * @param fTakeUsedLock Whether to take the used lock or not.
1400 * Be very careful if not taking the lock as it's possible that
1401 * the VM will disappear then.
1402 *
1403 * @remark This will not assert on an invalid pVM but try return silently.
1404 */
1405static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1406{
1407 RTPROCESS ProcId = RTProcSelf();
1408 PGVMM pGVMM;
1409 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1410
1411 /*
1412 * Validate.
1413 */
1414 if (RT_UNLIKELY( !VALID_PTR(pVM)
1415 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1416 return VERR_INVALID_POINTER;
1417 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1418 || pVM->enmVMState >= VMSTATE_TERMINATED))
1419 return VERR_INVALID_POINTER;
1420
1421 uint16_t hGVM = pVM->hSelf;
1422 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1423 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1424 return VERR_INVALID_HANDLE;
1425
1426 /*
1427 * Look it up.
1428 */
1429 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1430 PGVM pGVM;
1431 if (fTakeUsedLock)
1432 {
1433 int rc = gvmmR0UsedLock(pGVMM);
1434 AssertRCReturn(rc, rc);
1435
1436 pGVM = pHandle->pGVM;
1437 if (RT_UNLIKELY( pHandle->pVM != pVM
1438 || pHandle->ProcId != ProcId
1439 || !VALID_PTR(pHandle->pvObj)
1440 || !VALID_PTR(pGVM)
1441 || pGVM->pVM != pVM))
1442 {
1443 gvmmR0UsedUnlock(pGVMM);
1444 return VERR_INVALID_HANDLE;
1445 }
1446 }
1447 else
1448 {
1449 if (RT_UNLIKELY(pHandle->pVM != pVM))
1450 return VERR_INVALID_HANDLE;
1451 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1452 return VERR_INVALID_HANDLE;
1453 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1454 return VERR_INVALID_HANDLE;
1455
1456 pGVM = pHandle->pGVM;
1457 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1458 return VERR_INVALID_HANDLE;
1459 if (RT_UNLIKELY(pGVM->pVM != pVM))
1460 return VERR_INVALID_HANDLE;
1461 }
1462
1463 *ppGVM = pGVM;
1464 *ppGVMM = pGVMM;
1465 return VINF_SUCCESS;
1466}
1467
1468
1469/**
1470 * Lookup a GVM structure by the shared VM structure.
1471 *
1472 * @returns VBox status code.
1473 * @param pVM Pointer to the VM.
1474 * @param ppGVM Where to store the GVM pointer.
1475 *
1476 * @remark This will not take the 'used'-lock because it doesn't do
1477 * nesting and this function will be used from under the lock.
1478 */
1479GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1480{
1481 PGVMM pGVMM;
1482 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1483}
1484
1485
1486/**
1487 * Lookup a GVM structure by the shared VM structure and ensuring that the
1488 * caller is an EMT thread.
1489 *
1490 * @returns VBox status code.
1491 * @param pVM Pointer to the VM.
1492 * @param idCpu The Virtual CPU ID of the calling EMT.
1493 * @param ppGVM Where to store the GVM pointer.
1494 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1495 * @thread EMT
1496 *
1497 * @remark This will assert in all failure paths.
1498 */
1499static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1500{
1501 PGVMM pGVMM;
1502 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1503
1504 /*
1505 * Validate.
1506 */
1507 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1508 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1509
1510 uint16_t hGVM = pVM->hSelf;
1511 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1512 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1513
1514 /*
1515 * Look it up.
1516 */
1517 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1518 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1519 RTPROCESS ProcId = RTProcSelf();
1520 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1521 AssertPtrReturn(pHandle->pvObj, VERR_NOT_OWNER);
1522
1523 PGVM pGVM = pHandle->pGVM;
1524 AssertPtrReturn(pGVM, VERR_NOT_OWNER);
1525 AssertReturn(pGVM->pVM == pVM, VERR_NOT_OWNER);
1526 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1527 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1528 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1529
1530 *ppGVM = pGVM;
1531 *ppGVMM = pGVMM;
1532 return VINF_SUCCESS;
1533}
1534
1535
1536/**
1537 * Lookup a GVM structure by the shared VM structure
1538 * and ensuring that the caller is the EMT thread.
1539 *
1540 * @returns VBox status code.
1541 * @param pVM Pointer to the VM.
1542 * @param idCpu The Virtual CPU ID of the calling EMT.
1543 * @param ppGVM Where to store the GVM pointer.
1544 * @thread EMT
1545 */
1546GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1547{
1548 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1549 PGVMM pGVMM;
1550 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1551}
1552
1553
1554/**
1555 * Lookup a VM by its global handle.
1556 *
1557 * @returns Pointer to the VM on success, NULL on failure.
1558 * @param hGVM The global VM handle. Asserts on bad handle.
1559 */
1560GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1561{
1562 PGVM pGVM = GVMMR0ByHandle(hGVM);
1563 return pGVM ? pGVM->pVM : NULL;
1564}
1565
1566
1567/**
1568 * Looks up the VM belonging to the specified EMT thread.
1569 *
1570 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1571 * unnecessary kernel panics when the EMT thread hits an assertion. The
1572 * call may or not be an EMT thread.
1573 *
1574 * @returns Pointer to the VM on success, NULL on failure.
1575 * @param hEMT The native thread handle of the EMT.
1576 * NIL_RTNATIVETHREAD means the current thread
1577 */
1578GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1579{
1580 /*
1581 * No Assertions here as we're usually called in a AssertMsgN or
1582 * RTAssert* context.
1583 */
1584 PGVMM pGVMM = g_pGVMM;
1585 if ( !VALID_PTR(pGVMM)
1586 || pGVMM->u32Magic != GVMM_MAGIC)
1587 return NULL;
1588
1589 if (hEMT == NIL_RTNATIVETHREAD)
1590 hEMT = RTThreadNativeSelf();
1591 RTPROCESS ProcId = RTProcSelf();
1592
1593 /*
1594 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1595 */
1596 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1597 {
1598 if ( pGVMM->aHandles[i].iSelf == i
1599 && pGVMM->aHandles[i].ProcId == ProcId
1600 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1601 && VALID_PTR(pGVMM->aHandles[i].pVM)
1602 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1603 {
1604 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1605 return pGVMM->aHandles[i].pVM;
1606
1607 /* This is fearly safe with the current process per VM approach. */
1608 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1609 VMCPUID const cCpus = pGVM->cCpus;
1610 if ( cCpus < 1
1611 || cCpus > VMM_MAX_CPU_COUNT)
1612 continue;
1613 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1614 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1615 return pGVMM->aHandles[i].pVM;
1616 }
1617 }
1618 return NULL;
1619}
1620
1621
1622/**
1623 * This is will wake up expired and soon-to-be expired VMs.
1624 *
1625 * @returns Number of VMs that has been woken up.
1626 * @param pGVMM Pointer to the GVMM instance data.
1627 * @param u64Now The current time.
1628 */
1629static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1630{
1631 /*
1632 * Skip this if we've got disabled because of high resolution wakeups or by
1633 * the user.
1634 */
1635 if ( !pGVMM->nsEarlyWakeUp1
1636 && !pGVMM->nsEarlyWakeUp2)
1637 return 0;
1638
1639/** @todo Rewrite this algorithm. See performance defect XYZ. */
1640
1641 /*
1642 * A cheap optimization to stop wasting so much time here on big setups.
1643 */
1644 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1645 if ( pGVMM->cHaltedEMTs == 0
1646 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1647 return 0;
1648
1649 /*
1650 * The first pass will wake up VMs which have actually expired
1651 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1652 */
1653 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1654 uint64_t u64Min = UINT64_MAX;
1655 unsigned cWoken = 0;
1656 unsigned cHalted = 0;
1657 unsigned cTodo2nd = 0;
1658 unsigned cTodo3rd = 0;
1659 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1660 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1661 i = pGVMM->aHandles[i].iNext)
1662 {
1663 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1664 if ( VALID_PTR(pCurGVM)
1665 && pCurGVM->u32Magic == GVM_MAGIC)
1666 {
1667 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1668 {
1669 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1670 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1671 if (u64)
1672 {
1673 if (u64 <= u64Now)
1674 {
1675 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1676 {
1677 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1678 AssertRC(rc);
1679 cWoken++;
1680 }
1681 }
1682 else
1683 {
1684 cHalted++;
1685 if (u64 <= uNsEarlyWakeUp1)
1686 cTodo2nd++;
1687 else if (u64 <= uNsEarlyWakeUp2)
1688 cTodo3rd++;
1689 else if (u64 < u64Min)
1690 u64 = u64Min;
1691 }
1692 }
1693 }
1694 }
1695 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1696 }
1697
1698 if (cTodo2nd)
1699 {
1700 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1701 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1702 i = pGVMM->aHandles[i].iNext)
1703 {
1704 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1705 if ( VALID_PTR(pCurGVM)
1706 && pCurGVM->u32Magic == GVM_MAGIC)
1707 {
1708 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1709 {
1710 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1711 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1712 if ( u64
1713 && u64 <= uNsEarlyWakeUp1)
1714 {
1715 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1716 {
1717 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1718 AssertRC(rc);
1719 cWoken++;
1720 }
1721 }
1722 }
1723 }
1724 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1725 }
1726 }
1727
1728 if (cTodo3rd)
1729 {
1730 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1731 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1732 i = pGVMM->aHandles[i].iNext)
1733 {
1734 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1735 if ( VALID_PTR(pCurGVM)
1736 && pCurGVM->u32Magic == GVM_MAGIC)
1737 {
1738 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1739 {
1740 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1741 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1742 if ( u64
1743 && u64 <= uNsEarlyWakeUp2)
1744 {
1745 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1746 {
1747 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1748 AssertRC(rc);
1749 cWoken++;
1750 }
1751 }
1752 }
1753 }
1754 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1755 }
1756 }
1757
1758 /*
1759 * Set the minimum value.
1760 */
1761 pGVMM->uNsNextEmtWakeup = u64Min;
1762
1763 return cWoken;
1764}
1765
1766
1767/**
1768 * Halt the EMT thread.
1769 *
1770 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1771 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1772 * @param pVM Pointer to the VM.
1773 * @param idCpu The Virtual CPU ID of the calling EMT.
1774 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1775 * @thread EMT(idCpu).
1776 */
1777GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1778{
1779 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1780
1781 /*
1782 * Validate the VM structure, state and handle.
1783 */
1784 PGVM pGVM;
1785 PGVMM pGVMM;
1786 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1787 if (RT_FAILURE(rc))
1788 return rc;
1789 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1790
1791 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1792 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1793
1794 /*
1795 * Take the UsedList semaphore, get the current time
1796 * and check if anyone needs waking up.
1797 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1798 */
1799 rc = gvmmR0UsedLock(pGVMM);
1800 AssertRC(rc);
1801
1802 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1803
1804 /* GIP hack: We might are frequently sleeping for short intervals where the
1805 difference between GIP and system time matters on systems with high resolution
1806 system time. So, convert the input from GIP to System time in that case. */
1807 Assert(ASMGetFlags() & X86_EFL_IF);
1808 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1809 const uint64_t u64NowGip = RTTimeNanoTS();
1810 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1811
1812 /*
1813 * Go to sleep if we must...
1814 * Cap the sleep time to 1 second to be on the safe side.
1815 */
1816 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1817 if ( u64NowGip < u64ExpireGipTime
1818 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1819 ? pGVMM->nsMinSleepCompany
1820 : pGVMM->nsMinSleepAlone))
1821 {
1822 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1823 if (cNsInterval > RT_NS_1SEC)
1824 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1825 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1826 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1827 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1828 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1829 gvmmR0UsedUnlock(pGVMM);
1830
1831 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1832 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1833 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1834
1835 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1836 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1837
1838 /* Reset the semaphore to try prevent a few false wake-ups. */
1839 if (rc == VINF_SUCCESS)
1840 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1841 else if (rc == VERR_TIMEOUT)
1842 {
1843 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1844 rc = VINF_SUCCESS;
1845 }
1846 }
1847 else
1848 {
1849 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1850 gvmmR0UsedUnlock(pGVMM);
1851 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1852 }
1853
1854 return rc;
1855}
1856
1857
1858/**
1859 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1860 * the a sleeping EMT.
1861 *
1862 * @retval VINF_SUCCESS if successfully woken up.
1863 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1864 *
1865 * @param pGVM The global (ring-0) VM structure.
1866 * @param pGVCpu The global (ring-0) VCPU structure.
1867 */
1868DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1869{
1870 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1871
1872 /*
1873 * Signal the semaphore regardless of whether it's current blocked on it.
1874 *
1875 * The reason for this is that there is absolutely no way we can be 100%
1876 * certain that it isn't *about* go to go to sleep on it and just got
1877 * delayed a bit en route. So, we will always signal the semaphore when
1878 * the it is flagged as halted in the VMM.
1879 */
1880/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1881 int rc;
1882 if (pGVCpu->gvmm.s.u64HaltExpire)
1883 {
1884 rc = VINF_SUCCESS;
1885 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1886 }
1887 else
1888 {
1889 rc = VINF_GVM_NOT_BLOCKED;
1890 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1891 }
1892
1893 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1894 AssertRC(rc2);
1895
1896 return rc;
1897}
1898
1899
1900/**
1901 * Wakes up the halted EMT thread so it can service a pending request.
1902 *
1903 * @returns VBox status code.
1904 * @retval VINF_SUCCESS if successfully woken up.
1905 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1906 *
1907 * @param pVM Pointer to the VM.
1908 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1909 * @param fTakeUsedLock Take the used lock or not
1910 * @thread Any but EMT.
1911 */
1912GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1913{
1914 /*
1915 * Validate input and take the UsedLock.
1916 */
1917 PGVM pGVM;
1918 PGVMM pGVMM;
1919 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1920 if (RT_SUCCESS(rc))
1921 {
1922 if (idCpu < pGVM->cCpus)
1923 {
1924 /*
1925 * Do the actual job.
1926 */
1927 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1928
1929 if (fTakeUsedLock)
1930 {
1931 /*
1932 * While we're here, do a round of scheduling.
1933 */
1934 Assert(ASMGetFlags() & X86_EFL_IF);
1935 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1936 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1937 }
1938 }
1939 else
1940 rc = VERR_INVALID_CPU_ID;
1941
1942 if (fTakeUsedLock)
1943 {
1944 int rc2 = gvmmR0UsedUnlock(pGVMM);
1945 AssertRC(rc2);
1946 }
1947 }
1948
1949 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1950 return rc;
1951}
1952
1953
1954/**
1955 * Wakes up the halted EMT thread so it can service a pending request.
1956 *
1957 * @returns VBox status code.
1958 * @retval VINF_SUCCESS if successfully woken up.
1959 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1960 *
1961 * @param pVM Pointer to the VM.
1962 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1963 * @thread Any but EMT.
1964 */
1965GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1966{
1967 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1968}
1969
1970/**
1971 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1972 * the Virtual CPU if it's still busy executing guest code.
1973 *
1974 * @returns VBox status code.
1975 * @retval VINF_SUCCESS if poked successfully.
1976 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1977 *
1978 * @param pGVM The global (ring-0) VM structure.
1979 * @param pVCpu Pointer to the VMCPU.
1980 */
1981DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1982{
1983 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1984
1985 RTCPUID idHostCpu = pVCpu->idHostCpu;
1986 if ( idHostCpu == NIL_RTCPUID
1987 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1988 {
1989 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1990 return VINF_GVM_NOT_BUSY_IN_GC;
1991 }
1992
1993 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1994 RTMpPokeCpu(idHostCpu);
1995 return VINF_SUCCESS;
1996}
1997
1998/**
1999 * Pokes an EMT if it's still busy running guest code.
2000 *
2001 * @returns VBox status code.
2002 * @retval VINF_SUCCESS if poked successfully.
2003 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2004 *
2005 * @param pVM Pointer to the VM.
2006 * @param idCpu The ID of the virtual CPU to poke.
2007 * @param fTakeUsedLock Take the used lock or not
2008 */
2009GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
2010{
2011 /*
2012 * Validate input and take the UsedLock.
2013 */
2014 PGVM pGVM;
2015 PGVMM pGVMM;
2016 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
2017 if (RT_SUCCESS(rc))
2018 {
2019 if (idCpu < pGVM->cCpus)
2020 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2021 else
2022 rc = VERR_INVALID_CPU_ID;
2023
2024 if (fTakeUsedLock)
2025 {
2026 int rc2 = gvmmR0UsedUnlock(pGVMM);
2027 AssertRC(rc2);
2028 }
2029 }
2030
2031 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2032 return rc;
2033}
2034
2035
2036/**
2037 * Pokes an EMT if it's still busy running guest code.
2038 *
2039 * @returns VBox status code.
2040 * @retval VINF_SUCCESS if poked successfully.
2041 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2042 *
2043 * @param pVM Pointer to the VM.
2044 * @param idCpu The ID of the virtual CPU to poke.
2045 */
2046GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2047{
2048 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2049}
2050
2051
2052/**
2053 * Wakes up a set of halted EMT threads so they can service pending request.
2054 *
2055 * @returns VBox status code, no informational stuff.
2056 *
2057 * @param pVM Pointer to the VM.
2058 * @param pSleepSet The set of sleepers to wake up.
2059 * @param pPokeSet The set of CPUs to poke.
2060 */
2061GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2062{
2063 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2064 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2065 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2066
2067 /*
2068 * Validate input and take the UsedLock.
2069 */
2070 PGVM pGVM;
2071 PGVMM pGVMM;
2072 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2073 if (RT_SUCCESS(rc))
2074 {
2075 rc = VINF_SUCCESS;
2076 VMCPUID idCpu = pGVM->cCpus;
2077 while (idCpu-- > 0)
2078 {
2079 /* Don't try poke or wake up ourselves. */
2080 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2081 continue;
2082
2083 /* just ignore errors for now. */
2084 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2085 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2086 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2087 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2088 }
2089
2090 int rc2 = gvmmR0UsedUnlock(pGVMM);
2091 AssertRC(rc2);
2092 }
2093
2094 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2095 return rc;
2096}
2097
2098
2099/**
2100 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2101 *
2102 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2103 * @param pVM Pointer to the VM.
2104 * @param pReq Pointer to the request packet.
2105 */
2106GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2107{
2108 /*
2109 * Validate input and pass it on.
2110 */
2111 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2112 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2113
2114 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2115}
2116
2117
2118
2119/**
2120 * Poll the schedule to see if someone else should get a chance to run.
2121 *
2122 * This is a bit hackish and will not work too well if the machine is
2123 * under heavy load from non-VM processes.
2124 *
2125 * @returns VINF_SUCCESS if not yielded.
2126 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2127 * @param pVM Pointer to the VM.
2128 * @param idCpu The Virtual CPU ID of the calling EMT.
2129 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2130 * @param fYield Whether to yield or not.
2131 * This is for when we're spinning in the halt loop.
2132 * @thread EMT(idCpu).
2133 */
2134GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2135{
2136 /*
2137 * Validate input.
2138 */
2139 PGVM pGVM;
2140 PGVMM pGVMM;
2141 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2142 if (RT_SUCCESS(rc))
2143 {
2144 rc = gvmmR0UsedLock(pGVMM);
2145 AssertRC(rc);
2146 pGVM->gvmm.s.StatsSched.cPollCalls++;
2147
2148 Assert(ASMGetFlags() & X86_EFL_IF);
2149 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2150
2151 if (!fYield)
2152 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2153 else
2154 {
2155 /** @todo implement this... */
2156 rc = VERR_NOT_IMPLEMENTED;
2157 }
2158
2159 gvmmR0UsedUnlock(pGVMM);
2160 }
2161
2162 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2163 return rc;
2164}
2165
2166
2167#ifdef GVMM_SCHED_WITH_PPT
2168/**
2169 * Timer callback for the periodic preemption timer.
2170 *
2171 * @param pTimer The timer handle.
2172 * @param pvUser Pointer to the per cpu structure.
2173 * @param iTick The current tick.
2174 */
2175static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2176{
2177 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2178 NOREF(pTimer); NOREF(iTick);
2179
2180 /*
2181 * Termination check
2182 */
2183 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2184 return;
2185
2186 /*
2187 * Do the house keeping.
2188 */
2189 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2190
2191 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2192 {
2193 /*
2194 * Historicize the max frequency.
2195 */
2196 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2197 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2198 pCpu->Ppt.iTickHistorization = 0;
2199 pCpu->Ppt.uDesiredHz = 0;
2200
2201 /*
2202 * Check if the current timer frequency.
2203 */
2204 uint32_t uHistMaxHz = 0;
2205 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2206 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2207 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2208 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2209 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2210 else if (uHistMaxHz)
2211 {
2212 /*
2213 * Reprogram it.
2214 */
2215 pCpu->Ppt.cChanges++;
2216 pCpu->Ppt.iTickHistorization = 0;
2217 pCpu->Ppt.uTimerHz = uHistMaxHz;
2218 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2219 pCpu->Ppt.cNsInterval = cNsInterval;
2220 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2221 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2222 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2223 / cNsInterval;
2224 else
2225 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2226 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2227
2228 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2229 RTTimerChangeInterval(pTimer, cNsInterval);
2230 }
2231 else
2232 {
2233 /*
2234 * Stop it.
2235 */
2236 pCpu->Ppt.fStarted = false;
2237 pCpu->Ppt.uTimerHz = 0;
2238 pCpu->Ppt.cNsInterval = 0;
2239 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2240
2241 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2242 RTTimerStop(pTimer);
2243 }
2244 }
2245 else
2246 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2247}
2248#endif /* GVMM_SCHED_WITH_PPT */
2249
2250
2251/**
2252 * Updates the periodic preemption timer for the calling CPU.
2253 *
2254 * The caller must have disabled preemption!
2255 * The caller must check that the host can do high resolution timers.
2256 *
2257 * @param pVM Pointer to the VM.
2258 * @param idHostCpu The current host CPU id.
2259 * @param uHz The desired frequency.
2260 */
2261GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2262{
2263 NOREF(pVM);
2264#ifdef GVMM_SCHED_WITH_PPT
2265 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2266 Assert(RTTimerCanDoHighResolution());
2267
2268 /*
2269 * Resolve the per CPU data.
2270 */
2271 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2272 PGVMM pGVMM = g_pGVMM;
2273 if ( !VALID_PTR(pGVMM)
2274 || pGVMM->u32Magic != GVMM_MAGIC)
2275 return;
2276 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2277 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2278 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2279 && pCpu->idCpu == idHostCpu,
2280 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2281
2282 /*
2283 * Check whether we need to do anything about the timer.
2284 * We have to be a little bit careful since we might be race the timer
2285 * callback here.
2286 */
2287 if (uHz > 16384)
2288 uHz = 16384; /** @todo add a query method for this! */
2289 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2290 && uHz >= pCpu->Ppt.uMinHz
2291 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2292 {
2293 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2294
2295 pCpu->Ppt.uDesiredHz = uHz;
2296 uint32_t cNsInterval = 0;
2297 if (!pCpu->Ppt.fStarted)
2298 {
2299 pCpu->Ppt.cStarts++;
2300 pCpu->Ppt.fStarted = true;
2301 pCpu->Ppt.fStarting = true;
2302 pCpu->Ppt.iTickHistorization = 0;
2303 pCpu->Ppt.uTimerHz = uHz;
2304 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2305 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2306 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2307 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2308 / cNsInterval;
2309 else
2310 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2311 }
2312
2313 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2314
2315 if (cNsInterval)
2316 {
2317 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2318 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2319 AssertRC(rc);
2320
2321 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2322 if (RT_FAILURE(rc))
2323 pCpu->Ppt.fStarted = false;
2324 pCpu->Ppt.fStarting = false;
2325 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock);
2326 }
2327 }
2328#else /* !GVMM_SCHED_WITH_PPT */
2329 NOREF(idHostCpu); NOREF(uHz);
2330#endif /* !GVMM_SCHED_WITH_PPT */
2331}
2332
2333
2334/**
2335 * Retrieves the GVMM statistics visible to the caller.
2336 *
2337 * @returns VBox status code.
2338 *
2339 * @param pStats Where to put the statistics.
2340 * @param pSession The current session.
2341 * @param pVM The VM to obtain statistics for. Optional.
2342 */
2343GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2344{
2345 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2346
2347 /*
2348 * Validate input.
2349 */
2350 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2351 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2352 pStats->cVMs = 0; /* (crash before taking the sem...) */
2353
2354 /*
2355 * Take the lock and get the VM statistics.
2356 */
2357 PGVMM pGVMM;
2358 if (pVM)
2359 {
2360 PGVM pGVM;
2361 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2362 if (RT_FAILURE(rc))
2363 return rc;
2364 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2365 }
2366 else
2367 {
2368 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2369 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2370
2371 int rc = gvmmR0UsedLock(pGVMM);
2372 AssertRCReturn(rc, rc);
2373 }
2374
2375 /*
2376 * Enumerate the VMs and add the ones visible to the statistics.
2377 */
2378 pStats->cVMs = 0;
2379 pStats->cEMTs = 0;
2380 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2381
2382 for (unsigned i = pGVMM->iUsedHead;
2383 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2384 i = pGVMM->aHandles[i].iNext)
2385 {
2386 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2387 void *pvObj = pGVMM->aHandles[i].pvObj;
2388 if ( VALID_PTR(pvObj)
2389 && VALID_PTR(pGVM)
2390 && pGVM->u32Magic == GVM_MAGIC
2391 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2392 {
2393 pStats->cVMs++;
2394 pStats->cEMTs += pGVM->cCpus;
2395
2396 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2397 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2398 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2399 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2400 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2401
2402 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2403 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2404 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2405
2406 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2407 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2408
2409 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2410 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2411 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2412 }
2413 }
2414
2415 /*
2416 * Copy out the per host CPU statistics.
2417 */
2418 uint32_t iDstCpu = 0;
2419 uint32_t cSrcCpus = pGVMM->cHostCpus;
2420 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2421 {
2422 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2423 {
2424 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2425 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2426#ifdef GVMM_SCHED_WITH_PPT
2427 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2428 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2429 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2430 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2431#else
2432 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2433 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2434 pStats->aHostCpus[iDstCpu].cChanges = 0;
2435 pStats->aHostCpus[iDstCpu].cStarts = 0;
2436#endif
2437 iDstCpu++;
2438 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2439 break;
2440 }
2441 }
2442 pStats->cHostCpus = iDstCpu;
2443
2444 gvmmR0UsedUnlock(pGVMM);
2445
2446 return VINF_SUCCESS;
2447}
2448
2449
2450/**
2451 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2452 *
2453 * @returns see GVMMR0QueryStatistics.
2454 * @param pVM Pointer to the VM. Optional.
2455 * @param pReq Pointer to the request packet.
2456 */
2457GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2458{
2459 /*
2460 * Validate input and pass it on.
2461 */
2462 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2463 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2464
2465 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2466}
2467
2468
2469/**
2470 * Resets the specified GVMM statistics.
2471 *
2472 * @returns VBox status code.
2473 *
2474 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2475 * @param pSession The current session.
2476 * @param pVM The VM to reset statistics for. Optional.
2477 */
2478GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2479{
2480 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2481
2482 /*
2483 * Validate input.
2484 */
2485 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2486 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2487
2488 /*
2489 * Take the lock and get the VM statistics.
2490 */
2491 PGVMM pGVMM;
2492 if (pVM)
2493 {
2494 PGVM pGVM;
2495 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2496 if (RT_FAILURE(rc))
2497 return rc;
2498# define MAYBE_RESET_FIELD(field) \
2499 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2500 MAYBE_RESET_FIELD(cHaltCalls);
2501 MAYBE_RESET_FIELD(cHaltBlocking);
2502 MAYBE_RESET_FIELD(cHaltTimeouts);
2503 MAYBE_RESET_FIELD(cHaltNotBlocking);
2504 MAYBE_RESET_FIELD(cHaltWakeUps);
2505 MAYBE_RESET_FIELD(cWakeUpCalls);
2506 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2507 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2508 MAYBE_RESET_FIELD(cPokeCalls);
2509 MAYBE_RESET_FIELD(cPokeNotBusy);
2510 MAYBE_RESET_FIELD(cPollCalls);
2511 MAYBE_RESET_FIELD(cPollHalts);
2512 MAYBE_RESET_FIELD(cPollWakeUps);
2513# undef MAYBE_RESET_FIELD
2514 }
2515 else
2516 {
2517 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2518
2519 int rc = gvmmR0UsedLock(pGVMM);
2520 AssertRCReturn(rc, rc);
2521 }
2522
2523 /*
2524 * Enumerate the VMs and add the ones visible to the statistics.
2525 */
2526 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2527 {
2528 for (unsigned i = pGVMM->iUsedHead;
2529 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2530 i = pGVMM->aHandles[i].iNext)
2531 {
2532 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2533 void *pvObj = pGVMM->aHandles[i].pvObj;
2534 if ( VALID_PTR(pvObj)
2535 && VALID_PTR(pGVM)
2536 && pGVM->u32Magic == GVM_MAGIC
2537 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2538 {
2539# define MAYBE_RESET_FIELD(field) \
2540 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2541 MAYBE_RESET_FIELD(cHaltCalls);
2542 MAYBE_RESET_FIELD(cHaltBlocking);
2543 MAYBE_RESET_FIELD(cHaltTimeouts);
2544 MAYBE_RESET_FIELD(cHaltNotBlocking);
2545 MAYBE_RESET_FIELD(cHaltWakeUps);
2546 MAYBE_RESET_FIELD(cWakeUpCalls);
2547 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2548 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2549 MAYBE_RESET_FIELD(cPokeCalls);
2550 MAYBE_RESET_FIELD(cPokeNotBusy);
2551 MAYBE_RESET_FIELD(cPollCalls);
2552 MAYBE_RESET_FIELD(cPollHalts);
2553 MAYBE_RESET_FIELD(cPollWakeUps);
2554# undef MAYBE_RESET_FIELD
2555 }
2556 }
2557 }
2558
2559 gvmmR0UsedUnlock(pGVMM);
2560
2561 return VINF_SUCCESS;
2562}
2563
2564
2565/**
2566 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2567 *
2568 * @returns see GVMMR0ResetStatistics.
2569 * @param pVM Pointer to the VM. Optional.
2570 * @param pReq Pointer to the request packet.
2571 */
2572GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2573{
2574 /*
2575 * Validate input and pass it on.
2576 */
2577 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2578 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2579
2580 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2581}
2582
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette