VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 33172

Last change on this file since 33172 was 33172, checked in by vboxsync, 14 years ago

VMEmt,GVMM: Some more tuning...

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 86.6 KB
Line 
1/* $Id: GVMMR0.cpp 33172 2010-10-15 23:33:49Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2010 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*******************************************************************************
50* Header Files *
51*******************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/gvmm.h>
54#include <VBox/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/gvm.h>
57#include <VBox/vm.h>
58#include <VBox/vmm.h>
59#include <VBox/param.h>
60#include <VBox/err.h>
61#include <iprt/asm.h>
62#include <iprt/asm-amd64-x86.h>
63#include <iprt/mem.h>
64#include <iprt/semaphore.h>
65#include <iprt/time.h>
66#include <VBox/log.h>
67#include <iprt/thread.h>
68#include <iprt/process.h>
69#include <iprt/param.h>
70#include <iprt/string.h>
71#include <iprt/assert.h>
72#include <iprt/mem.h>
73#include <iprt/memobj.h>
74#include <iprt/mp.h>
75#include <iprt/cpuset.h>
76#include <iprt/spinlock.h>
77#include <iprt/timer.h>
78
79
80/*******************************************************************************
81* Defined Constants And Macros *
82*******************************************************************************/
83#if defined(RT_OS_LINUX) || defined(DOXYGEN_RUNNING)
84/** Define this to enable the periodic preemption timer. */
85# define GVMM_SCHED_WITH_PPT
86#endif
87
88
89/*******************************************************************************
90* Structures and Typedefs *
91*******************************************************************************/
92
93/**
94 * Global VM handle.
95 */
96typedef struct GVMHANDLE
97{
98 /** The index of the next handle in the list (free or used). (0 is nil.) */
99 uint16_t volatile iNext;
100 /** Our own index / handle value. */
101 uint16_t iSelf;
102 /** The pointer to the ring-0 only (aka global) VM structure. */
103 PGVM pGVM;
104 /** The ring-0 mapping of the shared VM instance data. */
105 PVM pVM;
106 /** The virtual machine object. */
107 void *pvObj;
108 /** The session this VM is associated with. */
109 PSUPDRVSESSION pSession;
110 /** The ring-0 handle of the EMT0 thread.
111 * This is used for ownership checks as well as looking up a VM handle by thread
112 * at times like assertions. */
113 RTNATIVETHREAD hEMT0;
114 /** The process ID of the handle owner.
115 * This is used for access checks. */
116 RTPROCESS ProcId;
117} GVMHANDLE;
118/** Pointer to a global VM handle. */
119typedef GVMHANDLE *PGVMHANDLE;
120
121/** Number of GVM handles (including the NIL handle). */
122#if HC_ARCH_BITS == 64
123# define GVMM_MAX_HANDLES 1024
124#else
125# define GVMM_MAX_HANDLES 128
126#endif
127
128/**
129 * Per host CPU GVMM data.
130 */
131typedef struct GVMMHOSTCPU
132{
133 /** Magic number (GVMMHOSTCPU_MAGIC). */
134 uint32_t volatile u32Magic;
135 /** The CPU ID. */
136 RTCPUID idCpu;
137 /** The CPU set index. */
138 uint32_t idxCpuSet;
139
140#ifdef GVMM_SCHED_WITH_PPT
141 /** Periodic preemption timer data. */
142 struct
143 {
144 /** The handle to the periodic preemption timer. */
145 PRTTIMER pTimer;
146 /** Spinlock protecting the data below. */
147 RTSPINLOCK hSpinlock;
148 /** The smalles Hz that we need to care about. (static) */
149 uint32_t uMinHz;
150 /** The number of ticks between each historization. */
151 uint32_t cTicksHistoriziationInterval;
152 /** The current historization tick (counting up to
153 * cTicksHistoriziationInterval and then resetting). */
154 uint32_t iTickHistorization;
155 /** The current timer interval. This is set to 0 when inactive. */
156 uint32_t cNsInterval;
157 /** The current timer frequency. This is set to 0 when inactive. */
158 uint32_t uTimerHz;
159 /** The current max frequency reported by the EMTs.
160 * This gets historicize and reset by the timer callback. This is
161 * read without holding the spinlock, so needs atomic updating. */
162 uint32_t volatile uDesiredHz;
163 /** Whether the timer was started or not. */
164 bool volatile fStarted;
165 /** Set if we're starting timer. */
166 bool volatile fStarting;
167 /** The index of the next history entry (mod it). */
168 uint32_t iHzHistory;
169 /** Hitoricized uDesiredHz values. The array wraps around, new entries
170 * are added at iHzHistory. This is updated approximately every
171 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
172 uint32_t aHzHistory[8];
173 /** Statistics counter for recording the number of interval changes. */
174 uint32_t cChanges;
175 /** Statistics counter for recording the number of timer starts. */
176 uint32_t cStarts;
177 } Ppt;
178#endif /* GVMM_SCHED_WITH_PPT */
179
180} GVMMHOSTCPU;
181/** Pointer to the per host CPU GVMM data. */
182typedef GVMMHOSTCPU *PGVMMHOSTCPU;
183/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
184#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
185/** The interval on history entry should cover (approximately) give in
186 * nanoseconds. */
187#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
188
189
190/**
191 * The GVMM instance data.
192 */
193typedef struct GVMM
194{
195 /** Eyecatcher / magic. */
196 uint32_t u32Magic;
197 /** The index of the head of the free handle chain. (0 is nil.) */
198 uint16_t volatile iFreeHead;
199 /** The index of the head of the active handle chain. (0 is nil.) */
200 uint16_t volatile iUsedHead;
201 /** The number of VMs. */
202 uint16_t volatile cVMs;
203 /** Alignment padding. */
204 uint16_t u16Reserved;
205 /** The number of EMTs. */
206 uint32_t volatile cEMTs;
207 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
208 uint32_t volatile cHaltedEMTs;
209 /** Alignment padding. */
210 uint32_t u32Alignment;
211 /** When the next halted or sleeping EMT will wake up.
212 * This is set to 0 when it needs recalculating and to UINT64_MAX when
213 * there are no halted or sleeping EMTs in the GVMM. */
214 uint64_t uNsNextEmtWakeup;
215 /** The lock used to serialize VM creation, destruction and associated events that
216 * isn't performance critical. Owners may acquire the list lock. */
217 RTSEMFASTMUTEX CreateDestroyLock;
218 /** The lock used to serialize used list updates and accesses.
219 * This indirectly includes scheduling since the scheduler will have to walk the
220 * used list to examin running VMs. Owners may not acquire any other locks. */
221 RTSEMFASTMUTEX UsedLock;
222 /** The handle array.
223 * The size of this array defines the maximum number of currently running VMs.
224 * The first entry is unused as it represents the NIL handle. */
225 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
226
227 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
228 * The number of EMTs that means we no longer consider ourselves alone on a
229 * CPU/Core.
230 */
231 uint32_t cEMTsMeansCompany;
232 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
233 * The minimum sleep time for when we're alone, in nano seconds.
234 */
235 uint32_t nsMinSleepAlone;
236 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
237 * The minimum sleep time for when we've got company, in nano seconds.
238 */
239 uint32_t nsMinSleepCompany;
240 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
241 * The limit for the first round of early wakeups, given in nano seconds.
242 */
243 uint32_t nsEarlyWakeUp1;
244 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
245 * The limit for the second round of early wakeups, given in nano seconds.
246 */
247 uint32_t nsEarlyWakeUp2;
248
249 /** The number of entries in the host CPU array (aHostCpus). */
250 uint32_t cHostCpus;
251 /** Per host CPU data (variable length). */
252 GVMMHOSTCPU aHostCpus[1];
253} GVMM;
254/** Pointer to the GVMM instance data. */
255typedef GVMM *PGVMM;
256
257/** The GVMM::u32Magic value (Charlie Haden). */
258#define GVMM_MAGIC 0x19370806
259
260
261
262/*******************************************************************************
263* Global Variables *
264*******************************************************************************/
265/** Pointer to the GVMM instance data.
266 * (Just my general dislike for global variables.) */
267static PGVMM g_pGVMM = NULL;
268
269/** Macro for obtaining and validating the g_pGVMM pointer.
270 * On failure it will return from the invoking function with the specified return value.
271 *
272 * @param pGVMM The name of the pGVMM variable.
273 * @param rc The return value on failure. Use VERR_INTERNAL_ERROR for
274 * VBox status codes.
275 */
276#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
277 do { \
278 (pGVMM) = g_pGVMM;\
279 AssertPtrReturn((pGVMM), (rc)); \
280 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
281 } while (0)
282
283/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
284 * On failure it will return from the invoking function.
285 *
286 * @param pGVMM The name of the pGVMM variable.
287 */
288#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
289 do { \
290 (pGVMM) = g_pGVMM;\
291 AssertPtrReturnVoid((pGVMM)); \
292 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
293 } while (0)
294
295
296/*******************************************************************************
297* Internal Functions *
298*******************************************************************************/
299static void gvmmR0InitPerVMData(PGVM pGVM);
300static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
301static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
302static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM);
303#ifdef GVMM_SCHED_WITH_PPT
304static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
305#endif
306
307
308/**
309 * Initializes the GVMM.
310 *
311 * This is called while owninng the loader sempahore (see supdrvIOCtl_LdrLoad()).
312 *
313 * @returns VBox status code.
314 */
315GVMMR0DECL(int) GVMMR0Init(void)
316{
317 LogFlow(("GVMMR0Init:\n"));
318
319 /*
320 * Allocate and initialize the instance data.
321 */
322 uint32_t cHostCpus = RTMpGetArraySize();
323 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_INTERNAL_ERROR_2);
324
325 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF(GVMM, aHostCpus[cHostCpus]));
326 if (!pGVMM)
327 return VERR_NO_MEMORY;
328 int rc = RTSemFastMutexCreate(&pGVMM->CreateDestroyLock);
329 if (RT_SUCCESS(rc))
330 {
331 rc = RTSemFastMutexCreate(&pGVMM->UsedLock);
332 if (RT_SUCCESS(rc))
333 {
334 pGVMM->u32Magic = GVMM_MAGIC;
335 pGVMM->iUsedHead = 0;
336 pGVMM->iFreeHead = 1;
337
338 /* the nil handle */
339 pGVMM->aHandles[0].iSelf = 0;
340 pGVMM->aHandles[0].iNext = 0;
341
342 /* the tail */
343 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
344 pGVMM->aHandles[i].iSelf = i;
345 pGVMM->aHandles[i].iNext = 0; /* nil */
346
347 /* the rest */
348 while (i-- > 1)
349 {
350 pGVMM->aHandles[i].iSelf = i;
351 pGVMM->aHandles[i].iNext = i + 1;
352 }
353
354 /* The default configuration values. */
355 uint32_t cNsResolution = RTSemEventMultiGetResolution();
356 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
357 if (cNsResolution >= 5*RT_NS_100US)
358 {
359 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
360 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
361 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
362 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
363 }
364 else if (cNsResolution > RT_NS_100US)
365 {
366 pGVMM->nsMinSleepAlone = cNsResolution / 2;
367 pGVMM->nsMinSleepCompany = cNsResolution / 4;
368 pGVMM->nsEarlyWakeUp1 = 0;
369 pGVMM->nsEarlyWakeUp2 = 0;
370 }
371 else
372 {
373 pGVMM->nsMinSleepAlone = 2000;
374 pGVMM->nsMinSleepCompany = 2000;
375 pGVMM->nsEarlyWakeUp1 = 0;
376 pGVMM->nsEarlyWakeUp2 = 0;
377 }
378
379 /* The host CPU data. */
380 pGVMM->cHostCpus = cHostCpus;
381 uint32_t iCpu = cHostCpus;
382 RTCPUSET PossibleSet;
383 RTMpGetSet(&PossibleSet);
384 while (iCpu-- > 0)
385 {
386 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
387#ifdef GVMM_SCHED_WITH_PPT
388 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
389 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
390 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
391 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
392 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
393 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
394 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
395 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
396 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
397 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
398 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
399 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
400#endif
401
402 if (RTCpuSetIsMember(&PossibleSet, iCpu))
403 {
404 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
405 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
406
407#ifdef GVMM_SCHED_WITH_PPT
408 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
409 50*1000*1000 /* whatever */,
410 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
411 gvmmR0SchedPeriodicPreemptionTimerCallback,
412 &pGVMM->aHostCpus[iCpu]);
413 if (RT_SUCCESS(rc))
414 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
415 if (RT_FAILURE(rc))
416 {
417 while (iCpu < cHostCpus)
418 {
419 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
420 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
421 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
422 iCpu++;
423 }
424 break;
425 }
426#endif
427 }
428 else
429 {
430 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
431 pGVMM->aHostCpus[iCpu].u32Magic = 0;
432 }
433 }
434 if (RT_SUCCESS(rc))
435 {
436 g_pGVMM = pGVMM;
437 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
438 return VINF_SUCCESS;
439 }
440
441 /* bail out. */
442 RTSemFastMutexDestroy(pGVMM->UsedLock);
443 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
444 }
445 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
446 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
447 }
448
449 RTMemFree(pGVMM);
450 return rc;
451}
452
453
454/**
455 * Terminates the GVM.
456 *
457 * This is called while owning the loader semaphore (see supdrvLdrFree()).
458 * And unless something is wrong, there should be absolutely no VMs
459 * registered at this point.
460 */
461GVMMR0DECL(void) GVMMR0Term(void)
462{
463 LogFlow(("GVMMR0Term:\n"));
464
465 PGVMM pGVMM = g_pGVMM;
466 g_pGVMM = NULL;
467 if (RT_UNLIKELY(!VALID_PTR(pGVMM)))
468 {
469 SUPR0Printf("GVMMR0Term: pGVMM=%p\n", pGVMM);
470 return;
471 }
472
473 /*
474 * First of all, stop all active timers.
475 */
476 uint32_t cActiveTimers = 0;
477 uint32_t iCpu = pGVMM->cHostCpus;
478 while (iCpu-- > 0)
479 {
480 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
481#ifdef GVMM_SCHED_WITH_PPT
482 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
483 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
484 cActiveTimers++;
485#endif
486 }
487 if (cActiveTimers)
488 RTThreadSleep(1); /* fudge */
489
490 /*
491 * Invalidate the and free resources.
492 */
493 pGVMM->u32Magic = ~GVMM_MAGIC;
494 RTSemFastMutexDestroy(pGVMM->UsedLock);
495 pGVMM->UsedLock = NIL_RTSEMFASTMUTEX;
496 RTSemFastMutexDestroy(pGVMM->CreateDestroyLock);
497 pGVMM->CreateDestroyLock = NIL_RTSEMFASTMUTEX;
498
499 pGVMM->iFreeHead = 0;
500 if (pGVMM->iUsedHead)
501 {
502 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
503 pGVMM->iUsedHead = 0;
504 }
505
506#ifdef GVMM_SCHED_WITH_PPT
507 iCpu = pGVMM->cHostCpus;
508 while (iCpu-- > 0)
509 {
510 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
511 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
512 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
513 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
514 }
515#endif
516
517 RTMemFree(pGVMM);
518}
519
520
521/**
522 * A quick hack for setting global config values.
523 *
524 * @returns VBox status code.
525 *
526 * @param pSession The session handle. Used for authentication.
527 * @param pszName The variable name.
528 * @param u64Value The new value.
529 */
530GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
531{
532 /*
533 * Validate input.
534 */
535 PGVMM pGVMM;
536 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
537 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
538 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
539
540 /*
541 * String switch time!
542 */
543 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
544 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
545 int rc = VINF_SUCCESS;
546 pszName += sizeof("/GVMM/") - 1;
547 if (!strcmp(pszName, "cEMTsMeansCompany"))
548 {
549 if (u64Value <= UINT32_MAX)
550 pGVMM->cEMTsMeansCompany = u64Value;
551 else
552 rc = VERR_OUT_OF_RANGE;
553 }
554 else if (!strcmp(pszName, "MinSleepAlone"))
555 {
556 if (u64Value <= RT_NS_100MS)
557 pGVMM->nsMinSleepAlone = u64Value;
558 else
559 rc = VERR_OUT_OF_RANGE;
560 }
561 else if (!strcmp(pszName, "MinSleepCompany"))
562 {
563 if (u64Value <= RT_NS_100MS)
564 pGVMM->nsMinSleepCompany = u64Value;
565 else
566 rc = VERR_OUT_OF_RANGE;
567 }
568 else if (!strcmp(pszName, "EarlyWakeUp1"))
569 {
570 if (u64Value <= RT_NS_100MS)
571 pGVMM->nsEarlyWakeUp1 = u64Value;
572 else
573 rc = VERR_OUT_OF_RANGE;
574 }
575 else if (!strcmp(pszName, "EarlyWakeUp2"))
576 {
577 if (u64Value <= RT_NS_100MS)
578 pGVMM->nsEarlyWakeUp2 = u64Value;
579 else
580 rc = VERR_OUT_OF_RANGE;
581 }
582 else
583 rc = VERR_CFGM_VALUE_NOT_FOUND;
584 return rc;
585}
586
587
588/**
589 * A quick hack for getting global config values.
590 *
591 * @returns VBox status code.
592 *
593 * @param pSession The session handle. Used for authentication.
594 * @param pszName The variable name.
595 * @param u64Value The new value.
596 */
597GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
598{
599 /*
600 * Validate input.
601 */
602 PGVMM pGVMM;
603 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
604 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
605 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
606 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
607
608 /*
609 * String switch time!
610 */
611 if (strncmp(pszName, "/GVMM/", sizeof("/GVMM/") - 1))
612 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
613 int rc = VINF_SUCCESS;
614 pszName += sizeof("/GVMM/") - 1;
615 if (!strcmp(pszName, "cEMTsMeansCompany"))
616 *pu64Value = pGVMM->cEMTsMeansCompany;
617 else if (!strcmp(pszName, "MinSleepAlone"))
618 *pu64Value = pGVMM->nsMinSleepAlone;
619 else if (!strcmp(pszName, "MinSleepCompany"))
620 *pu64Value = pGVMM->nsMinSleepCompany;
621 else if (!strcmp(pszName, "EarlyWakeUp1"))
622 *pu64Value = pGVMM->nsEarlyWakeUp1;
623 else if (!strcmp(pszName, "EarlyWakeUp2"))
624 *pu64Value = pGVMM->nsEarlyWakeUp2;
625 else
626 rc = VERR_CFGM_VALUE_NOT_FOUND;
627 return rc;
628}
629
630
631/**
632 * Try acquire the 'used' lock.
633 *
634 * @returns IPRT status code, see RTSemFastMutexRequest.
635 * @param pGVMM The GVMM instance data.
636 */
637DECLINLINE(int) gvmmR0UsedLock(PGVMM pGVMM)
638{
639 LogFlow(("++gvmmR0UsedLock(%p)\n", pGVMM));
640 int rc = RTSemFastMutexRequest(pGVMM->UsedLock);
641 LogFlow(("gvmmR0UsedLock(%p)->%Rrc\n", pGVMM, rc));
642 return rc;
643}
644
645
646/**
647 * Release the 'used' lock.
648 *
649 * @returns IPRT status code, see RTSemFastMutexRelease.
650 * @param pGVMM The GVMM instance data.
651 */
652DECLINLINE(int) gvmmR0UsedUnlock(PGVMM pGVMM)
653{
654 LogFlow(("--gvmmR0UsedUnlock(%p)\n", pGVMM));
655 int rc = RTSemFastMutexRelease(pGVMM->UsedLock);
656 AssertRC(rc);
657 return rc;
658}
659
660
661/**
662 * Try acquire the 'create & destroy' lock.
663 *
664 * @returns IPRT status code, see RTSemFastMutexRequest.
665 * @param pGVMM The GVMM instance data.
666 */
667DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
668{
669 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
670 int rc = RTSemFastMutexRequest(pGVMM->CreateDestroyLock);
671 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
672 return rc;
673}
674
675
676/**
677 * Release the 'create & destroy' lock.
678 *
679 * @returns IPRT status code, see RTSemFastMutexRequest.
680 * @param pGVMM The GVMM instance data.
681 */
682DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
683{
684 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
685 int rc = RTSemFastMutexRelease(pGVMM->CreateDestroyLock);
686 AssertRC(rc);
687 return rc;
688}
689
690
691/**
692 * Request wrapper for the GVMMR0CreateVM API.
693 *
694 * @returns VBox status code.
695 * @param pReq The request buffer.
696 */
697GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq)
698{
699 /*
700 * Validate the request.
701 */
702 if (!VALID_PTR(pReq))
703 return VERR_INVALID_POINTER;
704 if (pReq->Hdr.cbReq != sizeof(*pReq))
705 return VERR_INVALID_PARAMETER;
706 if (!VALID_PTR(pReq->pSession))
707 return VERR_INVALID_POINTER;
708
709 /*
710 * Execute it.
711 */
712 PVM pVM;
713 pReq->pVMR0 = NULL;
714 pReq->pVMR3 = NIL_RTR3PTR;
715 int rc = GVMMR0CreateVM(pReq->pSession, pReq->cCpus, &pVM);
716 if (RT_SUCCESS(rc))
717 {
718 pReq->pVMR0 = pVM;
719 pReq->pVMR3 = pVM->pVMR3;
720 }
721 return rc;
722}
723
724
725/**
726 * Allocates the VM structure and registers it with GVM.
727 *
728 * The caller will become the VM owner and there by the EMT.
729 *
730 * @returns VBox status code.
731 * @param pSession The support driver session.
732 * @param cCpus Number of virtual CPUs for the new VM.
733 * @param ppVM Where to store the pointer to the VM structure.
734 *
735 * @thread EMT.
736 */
737GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PVM *ppVM)
738{
739 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
740 PGVMM pGVMM;
741 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
742
743 AssertPtrReturn(ppVM, VERR_INVALID_POINTER);
744 *ppVM = NULL;
745
746 if ( cCpus == 0
747 || cCpus > VMM_MAX_CPU_COUNT)
748 return VERR_INVALID_PARAMETER;
749
750 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
751 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR);
752 RTNATIVETHREAD ProcId = RTProcSelf();
753 AssertReturn(ProcId != NIL_RTPROCESS, VERR_INTERNAL_ERROR);
754
755 /*
756 * The whole allocation process is protected by the lock.
757 */
758 int rc = gvmmR0CreateDestroyLock(pGVMM);
759 AssertRCReturn(rc, rc);
760
761 /*
762 * Allocate a handle first so we don't waste resources unnecessarily.
763 */
764 uint16_t iHandle = pGVMM->iFreeHead;
765 if (iHandle)
766 {
767 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
768
769 /* consistency checks, a bit paranoid as always. */
770 if ( !pHandle->pVM
771 && !pHandle->pGVM
772 && !pHandle->pvObj
773 && pHandle->iSelf == iHandle)
774 {
775 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
776 if (pHandle->pvObj)
777 {
778 /*
779 * Move the handle from the free to used list and perform permission checks.
780 */
781 rc = gvmmR0UsedLock(pGVMM);
782 AssertRC(rc);
783
784 pGVMM->iFreeHead = pHandle->iNext;
785 pHandle->iNext = pGVMM->iUsedHead;
786 pGVMM->iUsedHead = iHandle;
787 pGVMM->cVMs++;
788
789 pHandle->pVM = NULL;
790 pHandle->pGVM = NULL;
791 pHandle->pSession = pSession;
792 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
793 pHandle->ProcId = NIL_RTPROCESS;
794
795 gvmmR0UsedUnlock(pGVMM);
796
797 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
798 if (RT_SUCCESS(rc))
799 {
800 /*
801 * Allocate the global VM structure (GVM) and initialize it.
802 */
803 PGVM pGVM = (PGVM)RTMemAllocZ(RT_UOFFSETOF(GVM, aCpus[cCpus]));
804 if (pGVM)
805 {
806 pGVM->u32Magic = GVM_MAGIC;
807 pGVM->hSelf = iHandle;
808 pGVM->pVM = NULL;
809 pGVM->cCpus = cCpus;
810
811 gvmmR0InitPerVMData(pGVM);
812 GMMR0InitPerVMData(pGVM);
813
814 /*
815 * Allocate the shared VM structure and associated page array.
816 */
817 const uint32_t cbVM = RT_UOFFSETOF(VM, aCpus[cCpus]);
818 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
819#ifdef RT_OS_DARWIN /** @todo Figure out why this is broken. Is it only on snow leopard? */
820 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, (cPages + 1) << PAGE_SHIFT, false /* fExecutable */);
821#else
822 rc = RTR0MemObjAllocLow(&pGVM->gvmm.s.VMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
823#endif
824 if (RT_SUCCESS(rc))
825 {
826 PVM pVM = (PVM)RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj); AssertPtr(pVM);
827 memset(pVM, 0, cPages << PAGE_SHIFT);
828 pVM->enmVMState = VMSTATE_CREATING;
829 pVM->pVMR0 = pVM;
830 pVM->pSession = pSession;
831 pVM->hSelf = iHandle;
832 pVM->cbSelf = cbVM;
833 pVM->cCpus = cCpus;
834 pVM->uCpuExecutionCap = 100; /* default is no cap. */
835 pVM->offVMCPU = RT_UOFFSETOF(VM, aCpus);
836
837 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
838 if (RT_SUCCESS(rc))
839 {
840 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
841 for (uint32_t iPage = 0; iPage < cPages; iPage++)
842 {
843 paPages[iPage].uReserved = 0;
844 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
845 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
846 }
847
848 /*
849 * Map them into ring-3.
850 */
851 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
852 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
853 if (RT_SUCCESS(rc))
854 {
855 pVM->pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
856 AssertPtr((void *)pVM->pVMR3);
857
858 /* Initialize all the VM pointers. */
859 for (uint32_t i = 0; i < cCpus; i++)
860 {
861 pVM->aCpus[i].pVMR0 = pVM;
862 pVM->aCpus[i].pVMR3 = pVM->pVMR3;
863 pVM->aCpus[i].idHostCpu = NIL_RTCPUID;
864 pVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
865 }
866
867 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1, 0,
868 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS);
869 if (RT_SUCCESS(rc))
870 {
871 pVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
872 AssertPtr((void *)pVM->paVMPagesR3);
873
874 /* complete the handle - take the UsedLock sem just to be careful. */
875 rc = gvmmR0UsedLock(pGVMM);
876 AssertRC(rc);
877
878 pHandle->pVM = pVM;
879 pHandle->pGVM = pGVM;
880 pHandle->hEMT0 = hEMT0;
881 pHandle->ProcId = ProcId;
882 pGVM->pVM = pVM;
883 pGVM->aCpus[0].hEMT = hEMT0;
884 pVM->aCpus[0].hNativeThreadR0 = hEMT0;
885 pGVMM->cEMTs += cCpus;
886
887 gvmmR0UsedUnlock(pGVMM);
888 gvmmR0CreateDestroyUnlock(pGVMM);
889
890 *ppVM = pVM;
891 Log(("GVMMR0CreateVM: pVM=%p pVMR3=%p pGVM=%p hGVM=%d\n", pVM, pVM->pVMR3, pGVM, iHandle));
892 return VINF_SUCCESS;
893 }
894
895 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
896 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
897 }
898 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */);
899 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
900 }
901 RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */);
902 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
903 }
904 }
905 }
906 /* else: The user wasn't permitted to create this VM. */
907
908 /*
909 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
910 * object reference here. A little extra mess because of non-recursive lock.
911 */
912 void *pvObj = pHandle->pvObj;
913 pHandle->pvObj = NULL;
914 gvmmR0CreateDestroyUnlock(pGVMM);
915
916 SUPR0ObjRelease(pvObj, pSession);
917
918 SUPR0Printf("GVMMR0CreateVM: failed, rc=%d\n", rc);
919 return rc;
920 }
921
922 rc = VERR_NO_MEMORY;
923 }
924 else
925 rc = VERR_INTERNAL_ERROR;
926 }
927 else
928 rc = VERR_GVM_TOO_MANY_VMS;
929
930 gvmmR0CreateDestroyUnlock(pGVMM);
931 return rc;
932}
933
934
935/**
936 * Initializes the per VM data belonging to GVMM.
937 *
938 * @param pGVM Pointer to the global VM structure.
939 */
940static void gvmmR0InitPerVMData(PGVM pGVM)
941{
942 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
943 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
944 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
945 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
946 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
947 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
948 pGVM->gvmm.s.fDoneVMMR0Init = false;
949 pGVM->gvmm.s.fDoneVMMR0Term = false;
950
951 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
952 {
953 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
954 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
955 }
956}
957
958
959/**
960 * Does the VM initialization.
961 *
962 * @returns VBox status code.
963 * @param pVM Pointer to the shared VM structure.
964 */
965GVMMR0DECL(int) GVMMR0InitVM(PVM pVM)
966{
967 LogFlow(("GVMMR0InitVM: pVM=%p\n", pVM));
968
969 /*
970 * Validate the VM structure, state and handle.
971 */
972 PGVM pGVM;
973 PGVMM pGVMM;
974 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
975 if (RT_SUCCESS(rc))
976 {
977 if ( !pGVM->gvmm.s.fDoneVMMR0Init
978 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
979 {
980 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
981 {
982 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
983 if (RT_FAILURE(rc))
984 {
985 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
986 break;
987 }
988 }
989 }
990 else
991 rc = VERR_WRONG_ORDER;
992 }
993
994 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
995 return rc;
996}
997
998
999/**
1000 * Indicates that we're done with the ring-0 initialization
1001 * of the VM.
1002 *
1003 * @param pVM Pointer to the shared VM structure.
1004 * @thread EMT(0)
1005 */
1006GVMMR0DECL(void) GVMMR0DoneInitVM(PVM pVM)
1007{
1008 /* Validate the VM structure, state and handle. */
1009 PGVM pGVM;
1010 PGVMM pGVMM;
1011 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1012 AssertRCReturnVoid(rc);
1013
1014 /* Set the indicator. */
1015 pGVM->gvmm.s.fDoneVMMR0Init = true;
1016}
1017
1018
1019/**
1020 * Indicates that we're doing the ring-0 termination of the VM.
1021 *
1022 * @returns true if termination hasn't been done already, false if it has.
1023 * @param pVM Pointer to the shared VM structure.
1024 * @param pGVM Pointer to the global VM structure. Optional.
1025 * @thread EMT(0)
1026 */
1027GVMMR0DECL(bool) GVMMR0DoingTermVM(PVM pVM, PGVM pGVM)
1028{
1029 /* Validate the VM structure, state and handle. */
1030 AssertPtrNullReturn(pGVM, false);
1031 AssertReturn(!pGVM || pGVM->u32Magic == GVM_MAGIC, false);
1032 if (!pGVM)
1033 {
1034 PGVMM pGVMM;
1035 int rc = gvmmR0ByVMAndEMT(pVM, 0 /* idCpu */, &pGVM, &pGVMM);
1036 AssertRCReturn(rc, false);
1037 }
1038
1039 /* Set the indicator. */
1040 if (pGVM->gvmm.s.fDoneVMMR0Term)
1041 return false;
1042 pGVM->gvmm.s.fDoneVMMR0Term = true;
1043 return true;
1044}
1045
1046
1047/**
1048 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1049 *
1050 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1051 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1052 * would've been nice if the caller was actually the EMT thread or that we somehow
1053 * could've associated the calling thread with the VM up front.
1054 *
1055 * @returns VBox status code.
1056 * @param pVM Where to store the pointer to the VM structure.
1057 *
1058 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1059 */
1060GVMMR0DECL(int) GVMMR0DestroyVM(PVM pVM)
1061{
1062 LogFlow(("GVMMR0DestroyVM: pVM=%p\n", pVM));
1063 PGVMM pGVMM;
1064 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1065
1066
1067 /*
1068 * Validate the VM structure, state and caller.
1069 */
1070 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1071 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1072 AssertMsgReturn(pVM->enmVMState >= VMSTATE_CREATING && pVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pVM->enmVMState), VERR_WRONG_ORDER);
1073
1074 uint32_t hGVM = pVM->hSelf;
1075 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1076 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1077
1078 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1079 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1080
1081 RTPROCESS ProcId = RTProcSelf();
1082 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1083 AssertReturn( ( pHandle->hEMT0 == hSelf
1084 && pHandle->ProcId == ProcId)
1085 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1086
1087 /*
1088 * Lookup the handle and destroy the object.
1089 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1090 * object, we take some precautions against racing callers just in case...
1091 */
1092 int rc = gvmmR0CreateDestroyLock(pGVMM);
1093 AssertRC(rc);
1094
1095 /* be careful here because we might theoretically be racing someone else cleaning up. */
1096 if ( pHandle->pVM == pVM
1097 && ( ( pHandle->hEMT0 == hSelf
1098 && pHandle->ProcId == ProcId)
1099 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1100 && VALID_PTR(pHandle->pvObj)
1101 && VALID_PTR(pHandle->pSession)
1102 && VALID_PTR(pHandle->pGVM)
1103 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1104 {
1105 void *pvObj = pHandle->pvObj;
1106 pHandle->pvObj = NULL;
1107 gvmmR0CreateDestroyUnlock(pGVMM);
1108
1109 SUPR0ObjRelease(pvObj, pHandle->pSession);
1110 }
1111 else
1112 {
1113 SUPR0Printf("GVMMR0DestroyVM: pHandle=%p:{.pVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pVM=%p hSelf=%p\n",
1114 pHandle, pHandle->pVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pVM, hSelf);
1115 gvmmR0CreateDestroyUnlock(pGVMM);
1116 rc = VERR_INTERNAL_ERROR;
1117 }
1118
1119 return rc;
1120}
1121
1122
1123/**
1124 * Performs VM cleanup task as part of object destruction.
1125 *
1126 * @param pGVM The GVM pointer.
1127 */
1128static void gvmmR0CleanupVM(PGVM pGVM)
1129{
1130 if ( pGVM->gvmm.s.fDoneVMMR0Init
1131 && !pGVM->gvmm.s.fDoneVMMR0Term)
1132 {
1133 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1134 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM->pVM)
1135 {
1136 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1137 VMMR0TermVM(pGVM->pVM, pGVM);
1138 }
1139 else
1140 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM->pVM));
1141 }
1142
1143 GMMR0CleanupVM(pGVM);
1144}
1145
1146
1147/**
1148 * Handle destructor.
1149 *
1150 * @param pvGVMM The GVM instance pointer.
1151 * @param pvHandle The handle pointer.
1152 */
1153static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle)
1154{
1155 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvGVMM, pvHandle));
1156
1157 /*
1158 * Some quick, paranoid, input validation.
1159 */
1160 PGVMHANDLE pHandle = (PGVMHANDLE)pvHandle;
1161 AssertPtr(pHandle);
1162 PGVMM pGVMM = (PGVMM)pvGVMM;
1163 Assert(pGVMM == g_pGVMM);
1164 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1165 if ( !iHandle
1166 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1167 || iHandle != pHandle->iSelf)
1168 {
1169 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1170 return;
1171 }
1172
1173 int rc = gvmmR0CreateDestroyLock(pGVMM);
1174 AssertRC(rc);
1175 rc = gvmmR0UsedLock(pGVMM);
1176 AssertRC(rc);
1177
1178 /*
1179 * This is a tad slow but a doubly linked list is too much hazzle.
1180 */
1181 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1182 {
1183 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1184 gvmmR0UsedUnlock(pGVMM);
1185 gvmmR0CreateDestroyUnlock(pGVMM);
1186 return;
1187 }
1188
1189 if (pGVMM->iUsedHead == iHandle)
1190 pGVMM->iUsedHead = pHandle->iNext;
1191 else
1192 {
1193 uint16_t iPrev = pGVMM->iUsedHead;
1194 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1195 while (iPrev)
1196 {
1197 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1198 {
1199 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1200 gvmmR0UsedUnlock(pGVMM);
1201 gvmmR0CreateDestroyUnlock(pGVMM);
1202 return;
1203 }
1204 if (RT_UNLIKELY(c-- <= 0))
1205 {
1206 iPrev = 0;
1207 break;
1208 }
1209
1210 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1211 break;
1212 iPrev = pGVMM->aHandles[iPrev].iNext;
1213 }
1214 if (!iPrev)
1215 {
1216 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1217 gvmmR0UsedUnlock(pGVMM);
1218 gvmmR0CreateDestroyUnlock(pGVMM);
1219 return;
1220 }
1221
1222 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1223 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1224 }
1225 pHandle->iNext = 0;
1226 pGVMM->cVMs--;
1227
1228 /*
1229 * Do the global cleanup round.
1230 */
1231 PGVM pGVM = pHandle->pGVM;
1232 if ( VALID_PTR(pGVM)
1233 && pGVM->u32Magic == GVM_MAGIC)
1234 {
1235 pGVMM->cEMTs -= pGVM->cCpus;
1236 gvmmR0UsedUnlock(pGVMM);
1237
1238 gvmmR0CleanupVM(pGVM);
1239
1240 /*
1241 * Do the GVMM cleanup - must be done last.
1242 */
1243 /* The VM and VM pages mappings/allocations. */
1244 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1245 {
1246 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1247 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1248 }
1249
1250 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1251 {
1252 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1253 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1254 }
1255
1256 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1257 {
1258 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1259 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1260 }
1261
1262 if (pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ)
1263 {
1264 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, false /* fFreeMappings */); AssertRC(rc);
1265 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1266 }
1267
1268 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1269 {
1270 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1271 {
1272 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1273 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1274 }
1275 }
1276
1277 /* the GVM structure itself. */
1278 pGVM->u32Magic |= UINT32_C(0x80000000);
1279 RTMemFree(pGVM);
1280
1281 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1282 rc = gvmmR0UsedLock(pGVMM);
1283 AssertRC(rc);
1284 }
1285 /* else: GVMMR0CreateVM cleanup. */
1286
1287 /*
1288 * Free the handle.
1289 */
1290 pHandle->iNext = pGVMM->iFreeHead;
1291 pGVMM->iFreeHead = iHandle;
1292 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1293 ASMAtomicWriteNullPtr(&pHandle->pVM);
1294 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1295 ASMAtomicWriteNullPtr(&pHandle->pSession);
1296 ASMAtomicWriteSize(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1297 ASMAtomicWriteSize(&pHandle->ProcId, NIL_RTPROCESS);
1298
1299 gvmmR0UsedUnlock(pGVMM);
1300 gvmmR0CreateDestroyUnlock(pGVMM);
1301 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1302}
1303
1304
1305/**
1306 * Registers the calling thread as the EMT of a Virtual CPU.
1307 *
1308 * Note that VCPU 0 is automatically registered during VM creation.
1309 *
1310 * @returns VBox status code
1311 * @param pVM The shared VM structure (the ring-0 mapping).
1312 * @param idCpu VCPU id.
1313 */
1314GVMMR0DECL(int) GVMMR0RegisterVCpu(PVM pVM, VMCPUID idCpu)
1315{
1316 AssertReturn(idCpu != 0, VERR_NOT_OWNER);
1317
1318 /*
1319 * Validate the VM structure, state and handle.
1320 */
1321 PGVM pGVM;
1322 PGVMM pGVMM;
1323 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, false /* fTakeUsedLock */);
1324 if (RT_FAILURE(rc))
1325 return rc;
1326
1327 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1328 AssertReturn(pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD, VERR_ACCESS_DENIED);
1329 Assert(pGVM->cCpus == pVM->cCpus);
1330 Assert(pVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1331
1332 pVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1333
1334 return VINF_SUCCESS;
1335}
1336
1337
1338/**
1339 * Lookup a GVM structure by its handle.
1340 *
1341 * @returns The GVM pointer on success, NULL on failure.
1342 * @param hGVM The global VM handle. Asserts on bad handle.
1343 */
1344GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1345{
1346 PGVMM pGVMM;
1347 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1348
1349 /*
1350 * Validate.
1351 */
1352 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1353 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1354
1355 /*
1356 * Look it up.
1357 */
1358 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1359 AssertPtrReturn(pHandle->pVM, NULL);
1360 AssertPtrReturn(pHandle->pvObj, NULL);
1361 PGVM pGVM = pHandle->pGVM;
1362 AssertPtrReturn(pGVM, NULL);
1363 AssertReturn(pGVM->pVM == pHandle->pVM, NULL);
1364
1365 return pHandle->pGVM;
1366}
1367
1368
1369/**
1370 * Lookup a GVM structure by the shared VM structure.
1371 *
1372 * The calling thread must be in the same process as the VM. All current lookups
1373 * are by threads inside the same process, so this will not be an issue.
1374 *
1375 * @returns VBox status code.
1376 * @param pVM The shared VM structure (the ring-0 mapping).
1377 * @param ppGVM Where to store the GVM pointer.
1378 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1379 * @param fTakeUsedLock Whether to take the used lock or not.
1380 * Be very careful if not taking the lock as it's possible that
1381 * the VM will disappear then.
1382 *
1383 * @remark This will not assert on an invalid pVM but try return sliently.
1384 */
1385static int gvmmR0ByVM(PVM pVM, PGVM *ppGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1386{
1387 RTPROCESS ProcId = RTProcSelf();
1388 PGVMM pGVMM;
1389 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1390
1391 /*
1392 * Validate.
1393 */
1394 if (RT_UNLIKELY( !VALID_PTR(pVM)
1395 || ((uintptr_t)pVM & PAGE_OFFSET_MASK)))
1396 return VERR_INVALID_POINTER;
1397 if (RT_UNLIKELY( pVM->enmVMState < VMSTATE_CREATING
1398 || pVM->enmVMState >= VMSTATE_TERMINATED))
1399 return VERR_INVALID_POINTER;
1400
1401 uint16_t hGVM = pVM->hSelf;
1402 if (RT_UNLIKELY( hGVM == NIL_GVM_HANDLE
1403 || hGVM >= RT_ELEMENTS(pGVMM->aHandles)))
1404 return VERR_INVALID_HANDLE;
1405
1406 /*
1407 * Look it up.
1408 */
1409 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1410 PGVM pGVM;
1411 if (fTakeUsedLock)
1412 {
1413 int rc = gvmmR0UsedLock(pGVMM);
1414 AssertRCReturn(rc, rc);
1415
1416 pGVM = pHandle->pGVM;
1417 if (RT_UNLIKELY( pHandle->pVM != pVM
1418 || pHandle->ProcId != ProcId
1419 || !VALID_PTR(pHandle->pvObj)
1420 || !VALID_PTR(pGVM)
1421 || pGVM->pVM != pVM))
1422 {
1423 gvmmR0UsedUnlock(pGVMM);
1424 return VERR_INVALID_HANDLE;
1425 }
1426 }
1427 else
1428 {
1429 if (RT_UNLIKELY(pHandle->pVM != pVM))
1430 return VERR_INVALID_HANDLE;
1431 if (RT_UNLIKELY(pHandle->ProcId != ProcId))
1432 return VERR_INVALID_HANDLE;
1433 if (RT_UNLIKELY(!VALID_PTR(pHandle->pvObj)))
1434 return VERR_INVALID_HANDLE;
1435
1436 pGVM = pHandle->pGVM;
1437 if (RT_UNLIKELY(!VALID_PTR(pGVM)))
1438 return VERR_INVALID_HANDLE;
1439 if (RT_UNLIKELY(pGVM->pVM != pVM))
1440 return VERR_INVALID_HANDLE;
1441 }
1442
1443 *ppGVM = pGVM;
1444 *ppGVMM = pGVMM;
1445 return VINF_SUCCESS;
1446}
1447
1448
1449/**
1450 * Lookup a GVM structure by the shared VM structure.
1451 *
1452 * @returns VBox status code.
1453 * @param pVM The shared VM structure (the ring-0 mapping).
1454 * @param ppGVM Where to store the GVM pointer.
1455 *
1456 * @remark This will not take the 'used'-lock because it doesn't do
1457 * nesting and this function will be used from under the lock.
1458 */
1459GVMMR0DECL(int) GVMMR0ByVM(PVM pVM, PGVM *ppGVM)
1460{
1461 PGVMM pGVMM;
1462 return gvmmR0ByVM(pVM, ppGVM, &pGVMM, false /* fTakeUsedLock */);
1463}
1464
1465
1466/**
1467 * Lookup a GVM structure by the shared VM structure and ensuring that the
1468 * caller is an EMT thread.
1469 *
1470 * @returns VBox status code.
1471 * @param pVM The shared VM structure (the ring-0 mapping).
1472 * @param idCpu The Virtual CPU ID of the calling EMT.
1473 * @param ppGVM Where to store the GVM pointer.
1474 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1475 * @thread EMT
1476 *
1477 * @remark This will assert in all failure paths.
1478 */
1479static int gvmmR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM, PGVMM *ppGVMM)
1480{
1481 PGVMM pGVMM;
1482 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
1483
1484 /*
1485 * Validate.
1486 */
1487 AssertPtrReturn(pVM, VERR_INVALID_POINTER);
1488 AssertReturn(!((uintptr_t)pVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1489
1490 uint16_t hGVM = pVM->hSelf;
1491 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_HANDLE);
1492 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_HANDLE);
1493
1494 /*
1495 * Look it up.
1496 */
1497 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1498 AssertReturn(pHandle->pVM == pVM, VERR_NOT_OWNER);
1499 RTPROCESS ProcId = RTProcSelf();
1500 AssertReturn(pHandle->ProcId == ProcId, VERR_NOT_OWNER);
1501 AssertPtrReturn(pHandle->pvObj, VERR_INTERNAL_ERROR);
1502
1503 PGVM pGVM = pHandle->pGVM;
1504 AssertPtrReturn(pGVM, VERR_INTERNAL_ERROR);
1505 AssertReturn(pGVM->pVM == pVM, VERR_INTERNAL_ERROR);
1506 RTNATIVETHREAD hAllegedEMT = RTThreadNativeSelf();
1507 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1508 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_INTERNAL_ERROR);
1509
1510 *ppGVM = pGVM;
1511 *ppGVMM = pGVMM;
1512 return VINF_SUCCESS;
1513}
1514
1515
1516/**
1517 * Lookup a GVM structure by the shared VM structure
1518 * and ensuring that the caller is the EMT thread.
1519 *
1520 * @returns VBox status code.
1521 * @param pVM The shared VM structure (the ring-0 mapping).
1522 * @param idCpu The Virtual CPU ID of the calling EMT.
1523 * @param ppGVM Where to store the GVM pointer.
1524 * @thread EMT
1525 */
1526GVMMR0DECL(int) GVMMR0ByVMAndEMT(PVM pVM, VMCPUID idCpu, PGVM *ppGVM)
1527{
1528 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
1529 PGVMM pGVMM;
1530 return gvmmR0ByVMAndEMT(pVM, idCpu, ppGVM, &pGVMM);
1531}
1532
1533
1534/**
1535 * Lookup a VM by its global handle.
1536 *
1537 * @returns The VM handle on success, NULL on failure.
1538 * @param hGVM The global VM handle. Asserts on bad handle.
1539 */
1540GVMMR0DECL(PVM) GVMMR0GetVMByHandle(uint32_t hGVM)
1541{
1542 PGVM pGVM = GVMMR0ByHandle(hGVM);
1543 return pGVM ? pGVM->pVM : NULL;
1544}
1545
1546
1547/**
1548 * Looks up the VM belonging to the specified EMT thread.
1549 *
1550 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1551 * unnecessary kernel panics when the EMT thread hits an assertion. The
1552 * call may or not be an EMT thread.
1553 *
1554 * @returns The VM handle on success, NULL on failure.
1555 * @param hEMT The native thread handle of the EMT.
1556 * NIL_RTNATIVETHREAD means the current thread
1557 */
1558GVMMR0DECL(PVM) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1559{
1560 /*
1561 * No Assertions here as we're usually called in a AssertMsgN or
1562 * RTAssert* context.
1563 */
1564 PGVMM pGVMM = g_pGVMM;
1565 if ( !VALID_PTR(pGVMM)
1566 || pGVMM->u32Magic != GVMM_MAGIC)
1567 return NULL;
1568
1569 if (hEMT == NIL_RTNATIVETHREAD)
1570 hEMT = RTThreadNativeSelf();
1571 RTPROCESS ProcId = RTProcSelf();
1572
1573 /*
1574 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1575 */
1576 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1577 {
1578 if ( pGVMM->aHandles[i].iSelf == i
1579 && pGVMM->aHandles[i].ProcId == ProcId
1580 && VALID_PTR(pGVMM->aHandles[i].pvObj)
1581 && VALID_PTR(pGVMM->aHandles[i].pVM)
1582 && VALID_PTR(pGVMM->aHandles[i].pGVM))
1583 {
1584 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1585 return pGVMM->aHandles[i].pVM;
1586
1587 /* This is fearly safe with the current process per VM approach. */
1588 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1589 VMCPUID const cCpus = pGVM->cCpus;
1590 if ( cCpus < 1
1591 || cCpus > VMM_MAX_CPU_COUNT)
1592 continue;
1593 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1594 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1595 return pGVMM->aHandles[i].pVM;
1596 }
1597 }
1598 return NULL;
1599}
1600
1601
1602/**
1603 * This is will wake up expired and soon-to-be expired VMs.
1604 *
1605 * @returns Number of VMs that has been woken up.
1606 * @param pGVMM Pointer to the GVMM instance data.
1607 * @param u64Now The current time.
1608 */
1609static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1610{
1611 /*
1612 * Skip this if we've got disabled because of high resolution wakeups or by
1613 * the user.
1614 */
1615 if ( !pGVMM->nsEarlyWakeUp1
1616 && !pGVMM->nsEarlyWakeUp2)
1617 return 0;
1618
1619/** @todo Rewrite this algorithm. See performance defect XYZ. */
1620
1621 /*
1622 * A cheap optimization to stop wasting so much time here on big setups.
1623 */
1624 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1625 if ( pGVMM->cHaltedEMTs == 0
1626 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1627 return 0;
1628
1629 /*
1630 * The first pass will wake up VMs which have actually expired
1631 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1632 */
1633 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1634 uint64_t u64Min = UINT64_MAX;
1635 unsigned cWoken = 0;
1636 unsigned cHalted = 0;
1637 unsigned cTodo2nd = 0;
1638 unsigned cTodo3rd = 0;
1639 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1640 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1641 i = pGVMM->aHandles[i].iNext)
1642 {
1643 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1644 if ( VALID_PTR(pCurGVM)
1645 && pCurGVM->u32Magic == GVM_MAGIC)
1646 {
1647 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1648 {
1649 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1650 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1651 if (u64)
1652 {
1653 if (u64 <= u64Now)
1654 {
1655 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1656 {
1657 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1658 AssertRC(rc);
1659 cWoken++;
1660 }
1661 }
1662 else
1663 {
1664 cHalted++;
1665 if (u64 <= uNsEarlyWakeUp1)
1666 cTodo2nd++;
1667 else if (u64 <= uNsEarlyWakeUp2)
1668 cTodo3rd++;
1669 else if (u64 < u64Min)
1670 u64 = u64Min;
1671 }
1672 }
1673 }
1674 }
1675 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1676 }
1677
1678 if (cTodo2nd)
1679 {
1680 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1681 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1682 i = pGVMM->aHandles[i].iNext)
1683 {
1684 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1685 if ( VALID_PTR(pCurGVM)
1686 && pCurGVM->u32Magic == GVM_MAGIC)
1687 {
1688 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1689 {
1690 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1691 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1692 if ( u64
1693 && u64 <= uNsEarlyWakeUp1)
1694 {
1695 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1696 {
1697 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1698 AssertRC(rc);
1699 cWoken++;
1700 }
1701 }
1702 }
1703 }
1704 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1705 }
1706 }
1707
1708 if (cTodo3rd)
1709 {
1710 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1711 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1712 i = pGVMM->aHandles[i].iNext)
1713 {
1714 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1715 if ( VALID_PTR(pCurGVM)
1716 && pCurGVM->u32Magic == GVM_MAGIC)
1717 {
1718 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1719 {
1720 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1721 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1722 if ( u64
1723 && u64 <= uNsEarlyWakeUp2)
1724 {
1725 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1726 {
1727 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1728 AssertRC(rc);
1729 cWoken++;
1730 }
1731 }
1732 }
1733 }
1734 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1735 }
1736 }
1737
1738 /*
1739 * Set the minimum value.
1740 */
1741 pGVMM->uNsNextEmtWakeup = u64Min;
1742
1743 return cWoken;
1744}
1745
1746
1747/**
1748 * Halt the EMT thread.
1749 *
1750 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
1751 * VERR_INTERRUPTED if a signal was scheduled for the thread.
1752 * @param pVM Pointer to the shared VM structure.
1753 * @param idCpu The Virtual CPU ID of the calling EMT.
1754 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
1755 * @thread EMT(idCpu).
1756 */
1757GVMMR0DECL(int) GVMMR0SchedHalt(PVM pVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
1758{
1759 LogFlow(("GVMMR0SchedHalt: pVM=%p\n", pVM));
1760
1761 /*
1762 * Validate the VM structure, state and handle.
1763 */
1764 PGVM pGVM;
1765 PGVMM pGVMM;
1766 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
1767 if (RT_FAILURE(rc))
1768 return rc;
1769 pGVM->gvmm.s.StatsSched.cHaltCalls++;
1770
1771 PGVMCPU pCurGVCpu = &pGVM->aCpus[idCpu];
1772 Assert(!pCurGVCpu->gvmm.s.u64HaltExpire);
1773
1774 /*
1775 * Take the UsedList semaphore, get the current time
1776 * and check if anyone needs waking up.
1777 * Interrupts must NOT be disabled at this point because we ask for GIP time!
1778 */
1779 rc = gvmmR0UsedLock(pGVMM);
1780 AssertRC(rc);
1781
1782 pCurGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
1783
1784 /* GIP hack: We might are frequently sleeping for short intervals where the
1785 difference between GIP and system time matters on systems with high resolution
1786 system time. So, convert the input from GIP to System time in that case. */
1787 Assert(ASMGetFlags() & X86_EFL_IF);
1788 const uint64_t u64NowSys = RTTimeSystemNanoTS();
1789 const uint64_t u64NowGip = RTTimeNanoTS();
1790 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
1791
1792 /*
1793 * Go to sleep if we must...
1794 * Cap the sleep time to 1 second to be on the safe side.
1795 */
1796 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
1797 if ( u64NowGip < u64ExpireGipTime
1798 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
1799 ? pGVMM->nsMinSleepCompany
1800 : pGVMM->nsMinSleepAlone))
1801 {
1802 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
1803 if (cNsInterval > RT_NS_1SEC)
1804 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
1805 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
1806 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
1807 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
1808 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
1809 gvmmR0UsedUnlock(pGVMM);
1810
1811 rc = RTSemEventMultiWaitEx(pCurGVCpu->gvmm.s.HaltEventMulti,
1812 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
1813 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
1814
1815 ASMAtomicWriteU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0);
1816 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
1817
1818 /* Reset the semaphore to try prevent a few false wake-ups. */
1819 if (rc == VINF_SUCCESS)
1820 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1821 else if (rc == VERR_TIMEOUT)
1822 {
1823 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
1824 rc = VINF_SUCCESS;
1825 }
1826 }
1827 else
1828 {
1829 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
1830 gvmmR0UsedUnlock(pGVMM);
1831 RTSemEventMultiReset(pCurGVCpu->gvmm.s.HaltEventMulti);
1832 }
1833
1834 return rc;
1835}
1836
1837
1838/**
1839 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
1840 * the a sleeping EMT.
1841 *
1842 * @retval VINF_SUCCESS if successfully woken up.
1843 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1844 *
1845 * @param pGVM The global (ring-0) VM structure.
1846 * @param pGVCpu The global (ring-0) VCPU structure.
1847 */
1848DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
1849{
1850 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
1851
1852 /*
1853 * Signal the semaphore regardless of whether it's current blocked on it.
1854 *
1855 * The reason for this is that there is absolutely no way we can be 100%
1856 * certain that it isn't *about* go to go to sleep on it and just got
1857 * delayed a bit en route. So, we will always signal the semaphore when
1858 * the it is flagged as halted in the VMM.
1859 */
1860/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
1861 int rc;
1862 if (pGVCpu->gvmm.s.u64HaltExpire)
1863 {
1864 rc = VINF_SUCCESS;
1865 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
1866 }
1867 else
1868 {
1869 rc = VINF_GVM_NOT_BLOCKED;
1870 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
1871 }
1872
1873 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
1874 AssertRC(rc2);
1875
1876 return rc;
1877}
1878
1879
1880/**
1881 * Wakes up the halted EMT thread so it can service a pending request.
1882 *
1883 * @returns VBox status code.
1884 * @retval VINF_SUCCESS if successfully woken up.
1885 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1886 *
1887 * @param pVM Pointer to the shared VM structure.
1888 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1889 * @param fTakeUsedLock Take the used lock or not
1890 * @thread Any but EMT.
1891 */
1892GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1893{
1894 /*
1895 * Validate input and take the UsedLock.
1896 */
1897 PGVM pGVM;
1898 PGVMM pGVMM;
1899 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1900 if (RT_SUCCESS(rc))
1901 {
1902 if (idCpu < pGVM->cCpus)
1903 {
1904 /*
1905 * Do the actual job.
1906 */
1907 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
1908
1909 if (fTakeUsedLock)
1910 {
1911 /*
1912 * While we're here, do a round of scheduling.
1913 */
1914 Assert(ASMGetFlags() & X86_EFL_IF);
1915 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
1916 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
1917 }
1918 }
1919 else
1920 rc = VERR_INVALID_CPU_ID;
1921
1922 if (fTakeUsedLock)
1923 {
1924 int rc2 = gvmmR0UsedUnlock(pGVMM);
1925 AssertRC(rc2);
1926 }
1927 }
1928
1929 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
1930 return rc;
1931}
1932
1933
1934/**
1935 * Wakes up the halted EMT thread so it can service a pending request.
1936 *
1937 * @returns VBox status code.
1938 * @retval VINF_SUCCESS if successfully woken up.
1939 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
1940 *
1941 * @param pVM Pointer to the shared VM structure.
1942 * @param idCpu The Virtual CPU ID of the EMT to wake up.
1943 * @thread Any but EMT.
1944 */
1945GVMMR0DECL(int) GVMMR0SchedWakeUp(PVM pVM, VMCPUID idCpu)
1946{
1947 return GVMMR0SchedWakeUpEx(pVM, idCpu, true /* fTakeUsedLock */);
1948}
1949
1950/**
1951 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
1952 * the Virtual CPU if it's still busy executing guest code.
1953 *
1954 * @returns VBox status code.
1955 * @retval VINF_SUCCESS if poked successfully.
1956 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1957 *
1958 * @param pGVM The global (ring-0) VM structure.
1959 * @param pVCpu The Virtual CPU handle.
1960 */
1961DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPU pVCpu)
1962{
1963 pGVM->gvmm.s.StatsSched.cPokeCalls++;
1964
1965 RTCPUID idHostCpu = pVCpu->idHostCpu;
1966 if ( idHostCpu == NIL_RTCPUID
1967 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
1968 {
1969 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
1970 return VINF_GVM_NOT_BUSY_IN_GC;
1971 }
1972
1973 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
1974 RTMpPokeCpu(idHostCpu);
1975 return VINF_SUCCESS;
1976}
1977
1978/**
1979 * Pokes an EMT if it's still busy running guest code.
1980 *
1981 * @returns VBox status code.
1982 * @retval VINF_SUCCESS if poked successfully.
1983 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
1984 *
1985 * @param pVM Pointer to the shared VM structure.
1986 * @param idCpu The ID of the virtual CPU to poke.
1987 * @param fTakeUsedLock Take the used lock or not
1988 */
1989GVMMR0DECL(int) GVMMR0SchedPokeEx(PVM pVM, VMCPUID idCpu, bool fTakeUsedLock)
1990{
1991 /*
1992 * Validate input and take the UsedLock.
1993 */
1994 PGVM pGVM;
1995 PGVMM pGVMM;
1996 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, fTakeUsedLock);
1997 if (RT_SUCCESS(rc))
1998 {
1999 if (idCpu < pGVM->cCpus)
2000 rc = gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2001 else
2002 rc = VERR_INVALID_CPU_ID;
2003
2004 if (fTakeUsedLock)
2005 {
2006 int rc2 = gvmmR0UsedUnlock(pGVMM);
2007 AssertRC(rc2);
2008 }
2009 }
2010
2011 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2012 return rc;
2013}
2014
2015
2016/**
2017 * Pokes an EMT if it's still busy running guest code.
2018 *
2019 * @returns VBox status code.
2020 * @retval VINF_SUCCESS if poked successfully.
2021 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2022 *
2023 * @param pVM Pointer to the shared VM structure.
2024 * @param idCpu The ID of the virtual CPU to poke.
2025 */
2026GVMMR0DECL(int) GVMMR0SchedPoke(PVM pVM, VMCPUID idCpu)
2027{
2028 return GVMMR0SchedPokeEx(pVM, idCpu, true /* fTakeUsedLock */);
2029}
2030
2031
2032/**
2033 * Wakes up a set of halted EMT threads so they can service pending request.
2034 *
2035 * @returns VBox status code, no informational stuff.
2036 *
2037 * @param pVM Pointer to the shared VM structure.
2038 * @param pSleepSet The set of sleepers to wake up.
2039 * @param pPokeSet The set of CPUs to poke.
2040 */
2041GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PVM pVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2042{
2043 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2044 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2045 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2046
2047 /*
2048 * Validate input and take the UsedLock.
2049 */
2050 PGVM pGVM;
2051 PGVMM pGVMM;
2052 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /* fTakeUsedLock */);
2053 if (RT_SUCCESS(rc))
2054 {
2055 rc = VINF_SUCCESS;
2056 VMCPUID idCpu = pGVM->cCpus;
2057 while (idCpu-- > 0)
2058 {
2059 /* Don't try poke or wake up ourselves. */
2060 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2061 continue;
2062
2063 /* just ignore errors for now. */
2064 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2065 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2066 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2067 gvmmR0SchedPokeOne(pGVM, &pVM->aCpus[idCpu]);
2068 }
2069
2070 int rc2 = gvmmR0UsedUnlock(pGVMM);
2071 AssertRC(rc2);
2072 }
2073
2074 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2075 return rc;
2076}
2077
2078
2079/**
2080 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2081 *
2082 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2083 * @param pVM Pointer to the shared VM structure.
2084 * @param pReq The request packet.
2085 */
2086GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PVM pVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2087{
2088 /*
2089 * Validate input and pass it on.
2090 */
2091 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2092 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2093
2094 return GVMMR0SchedWakeUpAndPokeCpus(pVM, &pReq->SleepSet, &pReq->PokeSet);
2095}
2096
2097
2098
2099/**
2100 * Poll the schedule to see if someone else should get a chance to run.
2101 *
2102 * This is a bit hackish and will not work too well if the machine is
2103 * under heavy load from non-VM processes.
2104 *
2105 * @returns VINF_SUCCESS if not yielded.
2106 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2107 * @param pVM Pointer to the shared VM structure.
2108 * @param idCpu The Virtual CPU ID of the calling EMT.
2109 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2110 * @param fYield Whether to yield or not.
2111 * This is for when we're spinning in the halt loop.
2112 * @thread EMT(idCpu).
2113 */
2114GVMMR0DECL(int) GVMMR0SchedPoll(PVM pVM, VMCPUID idCpu, bool fYield)
2115{
2116 /*
2117 * Validate input.
2118 */
2119 PGVM pGVM;
2120 PGVMM pGVMM;
2121 int rc = gvmmR0ByVMAndEMT(pVM, idCpu, &pGVM, &pGVMM);
2122 if (RT_SUCCESS(rc))
2123 {
2124 rc = gvmmR0UsedLock(pGVMM);
2125 AssertRC(rc);
2126 pGVM->gvmm.s.StatsSched.cPollCalls++;
2127
2128 Assert(ASMGetFlags() & X86_EFL_IF);
2129 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2130
2131 if (!fYield)
2132 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2133 else
2134 {
2135 /** @todo implement this... */
2136 rc = VERR_NOT_IMPLEMENTED;
2137 }
2138
2139 gvmmR0UsedUnlock(pGVMM);
2140 }
2141
2142 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2143 return rc;
2144}
2145
2146
2147#ifdef GVMM_SCHED_WITH_PPT
2148/**
2149 * Timer callback for the periodic preemption timer.
2150 *
2151 * @param pTimer The timer handle.
2152 * @param pvUser Pointer to the per cpu structure.
2153 * @param iTick The current tick.
2154 */
2155static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2156{
2157 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2158
2159 /*
2160 * Termination check
2161 */
2162 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2163 return;
2164
2165 /*
2166 * Do the house keeping.
2167 */
2168 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2169 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2170
2171 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2172 {
2173 /*
2174 * Historicize the max frequency.
2175 */
2176 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2177 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2178 pCpu->Ppt.iTickHistorization = 0;
2179 pCpu->Ppt.uDesiredHz = 0;
2180
2181 /*
2182 * Check if the current timer frequency.
2183 */
2184 uint32_t uHistMaxHz = 0;
2185 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2186 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2187 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2188 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2189 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2190 else if (uHistMaxHz)
2191 {
2192 /*
2193 * Reprogram it.
2194 */
2195 pCpu->Ppt.cChanges++;
2196 pCpu->Ppt.iTickHistorization = 0;
2197 pCpu->Ppt.uTimerHz = uHistMaxHz;
2198 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2199 pCpu->Ppt.cNsInterval = cNsInterval;
2200 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2201 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2202 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2203 / cNsInterval;
2204 else
2205 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2206 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2207
2208 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2209 RTTimerChangeInterval(pTimer, cNsInterval);
2210 }
2211 else
2212 {
2213 /*
2214 * Stop it.
2215 */
2216 pCpu->Ppt.fStarted = false;
2217 pCpu->Ppt.uTimerHz = 0;
2218 pCpu->Ppt.cNsInterval = 0;
2219 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2220
2221 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2222 RTTimerStop(pTimer);
2223 }
2224 }
2225 else
2226 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2227}
2228#endif /* GVMM_SCHED_WITH_PPT */
2229
2230
2231/**
2232 * Updates the periodic preemption timer for the calling CPU.
2233 *
2234 * The caller must have disabled preemption!
2235 * The caller must check that the host can do high resolution timers.
2236 *
2237 * @param pVM The VM handle.
2238 * @param idHostCpu The current host CPU id.
2239 * @param uHz The desired frequency.
2240 */
2241GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PVM pVM, RTCPUID idHostCpu, uint32_t uHz)
2242{
2243#ifdef GVMM_SCHED_WITH_PPT
2244 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2245 Assert(RTTimerCanDoHighResolution());
2246
2247 /*
2248 * Resolve the per CPU data.
2249 */
2250 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2251 PGVMM pGVMM = g_pGVMM;
2252 if ( !VALID_PTR(pGVMM)
2253 || pGVMM->u32Magic != GVMM_MAGIC)
2254 return;
2255 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2256 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2257 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2258 && pCpu->idCpu == idHostCpu,
2259 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2260
2261 /*
2262 * Check whether we need to do anything about the timer.
2263 * We have to be a little bit careful since we might be race the timer
2264 * callback here.
2265 */
2266 if (uHz > 16384)
2267 uHz = 16384; /** @todo add a query method for this! */
2268 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2269 && uHz >= pCpu->Ppt.uMinHz
2270 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2271 {
2272 RTSPINLOCKTMP Tmp = RTSPINLOCKTMP_INITIALIZER;
2273 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2274
2275 pCpu->Ppt.uDesiredHz = uHz;
2276 uint32_t cNsInterval = 0;
2277 if (!pCpu->Ppt.fStarted)
2278 {
2279 pCpu->Ppt.cStarts++;
2280 pCpu->Ppt.fStarted = true;
2281 pCpu->Ppt.fStarting = true;
2282 pCpu->Ppt.iTickHistorization = 0;
2283 pCpu->Ppt.uTimerHz = uHz;
2284 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2285 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2286 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2287 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2288 / cNsInterval;
2289 else
2290 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2291 }
2292
2293 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2294
2295 if (cNsInterval)
2296 {
2297 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2298 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2299 AssertRC(rc);
2300
2301 RTSpinlockAcquireNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2302 if (RT_FAILURE(rc))
2303 pCpu->Ppt.fStarted = false;
2304 pCpu->Ppt.fStarting = false;
2305 RTSpinlockReleaseNoInts(pCpu->Ppt.hSpinlock, &Tmp);
2306 }
2307 }
2308#endif /* GVMM_SCHED_WITH_PPT */
2309}
2310
2311
2312/**
2313 * Retrieves the GVMM statistics visible to the caller.
2314 *
2315 * @returns VBox status code.
2316 *
2317 * @param pStats Where to put the statistics.
2318 * @param pSession The current session.
2319 * @param pVM The VM to obtain statistics for. Optional.
2320 */
2321GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2322{
2323 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2324
2325 /*
2326 * Validate input.
2327 */
2328 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2329 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2330 pStats->cVMs = 0; /* (crash before taking the sem...) */
2331
2332 /*
2333 * Take the lock and get the VM statistics.
2334 */
2335 PGVMM pGVMM;
2336 if (pVM)
2337 {
2338 PGVM pGVM;
2339 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2340 if (RT_FAILURE(rc))
2341 return rc;
2342 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2343 }
2344 else
2345 {
2346 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2347 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2348
2349 int rc = gvmmR0UsedLock(pGVMM);
2350 AssertRCReturn(rc, rc);
2351 }
2352
2353 /*
2354 * Enumerate the VMs and add the ones visibile to the statistics.
2355 */
2356 pStats->cVMs = 0;
2357 pStats->cEMTs = 0;
2358 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2359
2360 for (unsigned i = pGVMM->iUsedHead;
2361 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2362 i = pGVMM->aHandles[i].iNext)
2363 {
2364 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2365 void *pvObj = pGVMM->aHandles[i].pvObj;
2366 if ( VALID_PTR(pvObj)
2367 && VALID_PTR(pGVM)
2368 && pGVM->u32Magic == GVM_MAGIC
2369 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2370 {
2371 pStats->cVMs++;
2372 pStats->cEMTs += pGVM->cCpus;
2373
2374 pStats->SchedSum.cHaltCalls += pGVM->gvmm.s.StatsSched.cHaltCalls;
2375 pStats->SchedSum.cHaltBlocking += pGVM->gvmm.s.StatsSched.cHaltBlocking;
2376 pStats->SchedSum.cHaltTimeouts += pGVM->gvmm.s.StatsSched.cHaltTimeouts;
2377 pStats->SchedSum.cHaltNotBlocking += pGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2378 pStats->SchedSum.cHaltWakeUps += pGVM->gvmm.s.StatsSched.cHaltWakeUps;
2379
2380 pStats->SchedSum.cWakeUpCalls += pGVM->gvmm.s.StatsSched.cWakeUpCalls;
2381 pStats->SchedSum.cWakeUpNotHalted += pGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2382 pStats->SchedSum.cWakeUpWakeUps += pGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2383
2384 pStats->SchedSum.cPokeCalls += pGVM->gvmm.s.StatsSched.cPokeCalls;
2385 pStats->SchedSum.cPokeNotBusy += pGVM->gvmm.s.StatsSched.cPokeNotBusy;
2386
2387 pStats->SchedSum.cPollCalls += pGVM->gvmm.s.StatsSched.cPollCalls;
2388 pStats->SchedSum.cPollHalts += pGVM->gvmm.s.StatsSched.cPollHalts;
2389 pStats->SchedSum.cPollWakeUps += pGVM->gvmm.s.StatsSched.cPollWakeUps;
2390 }
2391 }
2392
2393 /*
2394 * Copy out the per host CPU statistics.
2395 */
2396 uint32_t iDstCpu = 0;
2397 uint32_t cSrcCpus = pGVMM->cHostCpus;
2398 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2399 {
2400 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2401 {
2402 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2403 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2404#ifdef GVMM_SCHED_WITH_PPT
2405 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2406 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2407 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2408 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2409#else
2410 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2411 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2412 pStats->aHostCpus[iDstCpu].cChanges = 0;
2413 pStats->aHostCpus[iDstCpu].cStarts = 0;
2414#endif
2415 iDstCpu++;
2416 }
2417 }
2418 pStats->cHostCpus = iDstCpu;
2419
2420 gvmmR0UsedUnlock(pGVMM);
2421
2422 return VINF_SUCCESS;
2423}
2424
2425
2426/**
2427 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2428 *
2429 * @returns see GVMMR0QueryStatistics.
2430 * @param pVM Pointer to the shared VM structure. Optional.
2431 * @param pReq The request packet.
2432 */
2433GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PVM pVM, PGVMMQUERYSTATISTICSSREQ pReq)
2434{
2435 /*
2436 * Validate input and pass it on.
2437 */
2438 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2439 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2440
2441 return GVMMR0QueryStatistics(&pReq->Stats, pReq->pSession, pVM);
2442}
2443
2444
2445/**
2446 * Resets the specified GVMM statistics.
2447 *
2448 * @returns VBox status code.
2449 *
2450 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2451 * @param pSession The current session.
2452 * @param pVM The VM to reset statistics for. Optional.
2453 */
2454GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PVM pVM)
2455{
2456 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pVM=%p\n", pStats, pSession, pVM));
2457
2458 /*
2459 * Validate input.
2460 */
2461 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2462 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2463
2464 /*
2465 * Take the lock and get the VM statistics.
2466 */
2467 PGVMM pGVMM;
2468 if (pVM)
2469 {
2470 PGVM pGVM;
2471 int rc = gvmmR0ByVM(pVM, &pGVM, &pGVMM, true /*fTakeUsedLock*/);
2472 if (RT_FAILURE(rc))
2473 return rc;
2474# define MAYBE_RESET_FIELD(field) \
2475 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2476 MAYBE_RESET_FIELD(cHaltCalls);
2477 MAYBE_RESET_FIELD(cHaltBlocking);
2478 MAYBE_RESET_FIELD(cHaltTimeouts);
2479 MAYBE_RESET_FIELD(cHaltNotBlocking);
2480 MAYBE_RESET_FIELD(cHaltWakeUps);
2481 MAYBE_RESET_FIELD(cWakeUpCalls);
2482 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2483 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2484 MAYBE_RESET_FIELD(cPokeCalls);
2485 MAYBE_RESET_FIELD(cPokeNotBusy);
2486 MAYBE_RESET_FIELD(cPollCalls);
2487 MAYBE_RESET_FIELD(cPollHalts);
2488 MAYBE_RESET_FIELD(cPollWakeUps);
2489# undef MAYBE_RESET_FIELD
2490 }
2491 else
2492 {
2493 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_INTERNAL_ERROR);
2494
2495 int rc = gvmmR0UsedLock(pGVMM);
2496 AssertRCReturn(rc, rc);
2497 }
2498
2499 /*
2500 * Enumerate the VMs and add the ones visibile to the statistics.
2501 */
2502 if (ASMMemIsAll8(&pStats->SchedSum, sizeof(pStats->SchedSum), 0))
2503 {
2504 for (unsigned i = pGVMM->iUsedHead;
2505 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2506 i = pGVMM->aHandles[i].iNext)
2507 {
2508 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2509 void *pvObj = pGVMM->aHandles[i].pvObj;
2510 if ( VALID_PTR(pvObj)
2511 && VALID_PTR(pGVM)
2512 && pGVM->u32Magic == GVM_MAGIC
2513 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2514 {
2515# define MAYBE_RESET_FIELD(field) \
2516 do { if (pStats->SchedSum. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2517 MAYBE_RESET_FIELD(cHaltCalls);
2518 MAYBE_RESET_FIELD(cHaltBlocking);
2519 MAYBE_RESET_FIELD(cHaltTimeouts);
2520 MAYBE_RESET_FIELD(cHaltNotBlocking);
2521 MAYBE_RESET_FIELD(cHaltWakeUps);
2522 MAYBE_RESET_FIELD(cWakeUpCalls);
2523 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2524 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2525 MAYBE_RESET_FIELD(cPokeCalls);
2526 MAYBE_RESET_FIELD(cPokeNotBusy);
2527 MAYBE_RESET_FIELD(cPollCalls);
2528 MAYBE_RESET_FIELD(cPollHalts);
2529 MAYBE_RESET_FIELD(cPollWakeUps);
2530# undef MAYBE_RESET_FIELD
2531 }
2532 }
2533 }
2534
2535 gvmmR0UsedUnlock(pGVMM);
2536
2537 return VINF_SUCCESS;
2538}
2539
2540
2541/**
2542 * VMMR0 request wrapper for GVMMR0ResetStatistics.
2543 *
2544 * @returns see GVMMR0ResetStatistics.
2545 * @param pVM Pointer to the shared VM structure. Optional.
2546 * @param pReq The request packet.
2547 */
2548GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PVM pVM, PGVMMRESETSTATISTICSSREQ pReq)
2549{
2550 /*
2551 * Validate input and pass it on.
2552 */
2553 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2554 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2555
2556 return GVMMR0ResetStatistics(&pReq->Stats, pReq->pSession, pVM);
2557}
2558
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette