VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 93368

Last change on this file since 93368 was 93115, checked in by vboxsync, 3 years ago

scm --update-copyright-year

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 122.0 KB
Line 
1/* $Id: GVMMR0.cpp 93115 2022-01-01 11:31:46Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2022 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99#if /*defined(RT_OS_WINDOWS) ||*/ defined(DOXYGEN_RUNNING)
100/** Define this to enable the per-EMT high resolution wakeup timers. */
101# define GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
102#endif
103
104
105/** Special value that GVMMR0DeregisterVCpu sets. */
106#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
107AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
108
109
110/*********************************************************************************************************************************
111* Structures and Typedefs *
112*********************************************************************************************************************************/
113
114/**
115 * Global VM handle.
116 */
117typedef struct GVMHANDLE
118{
119 /** The index of the next handle in the list (free or used). (0 is nil.) */
120 uint16_t volatile iNext;
121 /** Our own index / handle value. */
122 uint16_t iSelf;
123 /** The process ID of the handle owner.
124 * This is used for access checks. */
125 RTPROCESS ProcId;
126 /** The pointer to the ring-0 only (aka global) VM structure. */
127 PGVM pGVM;
128 /** The virtual machine object. */
129 void *pvObj;
130 /** The session this VM is associated with. */
131 PSUPDRVSESSION pSession;
132 /** The ring-0 handle of the EMT0 thread.
133 * This is used for ownership checks as well as looking up a VM handle by thread
134 * at times like assertions. */
135 RTNATIVETHREAD hEMT0;
136} GVMHANDLE;
137/** Pointer to a global VM handle. */
138typedef GVMHANDLE *PGVMHANDLE;
139
140/** Number of GVM handles (including the NIL handle). */
141#if HC_ARCH_BITS == 64
142# define GVMM_MAX_HANDLES 8192
143#else
144# define GVMM_MAX_HANDLES 128
145#endif
146
147/**
148 * Per host CPU GVMM data.
149 */
150typedef struct GVMMHOSTCPU
151{
152 /** Magic number (GVMMHOSTCPU_MAGIC). */
153 uint32_t volatile u32Magic;
154 /** The CPU ID. */
155 RTCPUID idCpu;
156 /** The CPU set index. */
157 uint32_t idxCpuSet;
158
159#ifdef GVMM_SCHED_WITH_PPT
160 /** Periodic preemption timer data. */
161 struct
162 {
163 /** The handle to the periodic preemption timer. */
164 PRTTIMER pTimer;
165 /** Spinlock protecting the data below. */
166 RTSPINLOCK hSpinlock;
167 /** The smalles Hz that we need to care about. (static) */
168 uint32_t uMinHz;
169 /** The number of ticks between each historization. */
170 uint32_t cTicksHistoriziationInterval;
171 /** The current historization tick (counting up to
172 * cTicksHistoriziationInterval and then resetting). */
173 uint32_t iTickHistorization;
174 /** The current timer interval. This is set to 0 when inactive. */
175 uint32_t cNsInterval;
176 /** The current timer frequency. This is set to 0 when inactive. */
177 uint32_t uTimerHz;
178 /** The current max frequency reported by the EMTs.
179 * This gets historicize and reset by the timer callback. This is
180 * read without holding the spinlock, so needs atomic updating. */
181 uint32_t volatile uDesiredHz;
182 /** Whether the timer was started or not. */
183 bool volatile fStarted;
184 /** Set if we're starting timer. */
185 bool volatile fStarting;
186 /** The index of the next history entry (mod it). */
187 uint32_t iHzHistory;
188 /** Historicized uDesiredHz values. The array wraps around, new entries
189 * are added at iHzHistory. This is updated approximately every
190 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
191 uint32_t aHzHistory[8];
192 /** Statistics counter for recording the number of interval changes. */
193 uint32_t cChanges;
194 /** Statistics counter for recording the number of timer starts. */
195 uint32_t cStarts;
196 } Ppt;
197#endif /* GVMM_SCHED_WITH_PPT */
198
199} GVMMHOSTCPU;
200/** Pointer to the per host CPU GVMM data. */
201typedef GVMMHOSTCPU *PGVMMHOSTCPU;
202/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
203#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
204/** The interval on history entry should cover (approximately) give in
205 * nanoseconds. */
206#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
207
208
209/**
210 * The GVMM instance data.
211 */
212typedef struct GVMM
213{
214 /** Eyecatcher / magic. */
215 uint32_t u32Magic;
216 /** The index of the head of the free handle chain. (0 is nil.) */
217 uint16_t volatile iFreeHead;
218 /** The index of the head of the active handle chain. (0 is nil.) */
219 uint16_t volatile iUsedHead;
220 /** The number of VMs. */
221 uint16_t volatile cVMs;
222 /** Alignment padding. */
223 uint16_t u16Reserved;
224 /** The number of EMTs. */
225 uint32_t volatile cEMTs;
226 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
227 uint32_t volatile cHaltedEMTs;
228 /** Mini lock for restricting early wake-ups to one thread. */
229 bool volatile fDoingEarlyWakeUps;
230 bool afPadding[3]; /**< explicit alignment padding. */
231 /** When the next halted or sleeping EMT will wake up.
232 * This is set to 0 when it needs recalculating and to UINT64_MAX when
233 * there are no halted or sleeping EMTs in the GVMM. */
234 uint64_t uNsNextEmtWakeup;
235 /** The lock used to serialize VM creation, destruction and associated events that
236 * isn't performance critical. Owners may acquire the list lock. */
237 RTCRITSECT CreateDestroyLock;
238 /** The lock used to serialize used list updates and accesses.
239 * This indirectly includes scheduling since the scheduler will have to walk the
240 * used list to examin running VMs. Owners may not acquire any other locks. */
241 RTCRITSECTRW UsedLock;
242 /** The handle array.
243 * The size of this array defines the maximum number of currently running VMs.
244 * The first entry is unused as it represents the NIL handle. */
245 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
246
247 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
248 * The number of EMTs that means we no longer consider ourselves alone on a
249 * CPU/Core.
250 */
251 uint32_t cEMTsMeansCompany;
252 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
253 * The minimum sleep time for when we're alone, in nano seconds.
254 */
255 uint32_t nsMinSleepAlone;
256 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
257 * The minimum sleep time for when we've got company, in nano seconds.
258 */
259 uint32_t nsMinSleepCompany;
260#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
261 /** @gcfgm{/GVMM/MinSleepWithHrWakeUp,32-bit,0, 100000000, 5000, ns}
262 * The minimum sleep time for when we've got a high-resolution wake-up timer, in
263 * nano seconds.
264 */
265 uint32_t nsMinSleepWithHrTimer;
266#endif
267 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
268 * The limit for the first round of early wake-ups, given in nano seconds.
269 */
270 uint32_t nsEarlyWakeUp1;
271 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
272 * The limit for the second round of early wake-ups, given in nano seconds.
273 */
274 uint32_t nsEarlyWakeUp2;
275
276 /** Set if we're doing early wake-ups.
277 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
278 bool volatile fDoEarlyWakeUps;
279
280 /** The number of entries in the host CPU array (aHostCpus). */
281 uint32_t cHostCpus;
282 /** Per host CPU data (variable length). */
283 GVMMHOSTCPU aHostCpus[1];
284} GVMM;
285AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
286AssertCompileMemberAlignment(GVMM, UsedLock, 8);
287AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
288/** Pointer to the GVMM instance data. */
289typedef GVMM *PGVMM;
290
291/** The GVMM::u32Magic value (Charlie Haden). */
292#define GVMM_MAGIC UINT32_C(0x19370806)
293
294
295
296/*********************************************************************************************************************************
297* Global Variables *
298*********************************************************************************************************************************/
299/** Pointer to the GVMM instance data.
300 * (Just my general dislike for global variables.) */
301static PGVMM g_pGVMM = NULL;
302
303/** Macro for obtaining and validating the g_pGVMM pointer.
304 * On failure it will return from the invoking function with the specified return value.
305 *
306 * @param pGVMM The name of the pGVMM variable.
307 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
308 * status codes.
309 */
310#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
311 do { \
312 (pGVMM) = g_pGVMM;\
313 AssertPtrReturn((pGVMM), (rc)); \
314 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
315 } while (0)
316
317/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
318 * On failure it will return from the invoking function.
319 *
320 * @param pGVMM The name of the pGVMM variable.
321 */
322#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
323 do { \
324 (pGVMM) = g_pGVMM;\
325 AssertPtrReturnVoid((pGVMM)); \
326 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
327 } while (0)
328
329
330/*********************************************************************************************************************************
331* Internal Functions *
332*********************************************************************************************************************************/
333static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
334static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
335static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
336static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
337
338#ifdef GVMM_SCHED_WITH_PPT
339static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
340#endif
341#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
342static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
343#endif
344
345
346/**
347 * Initializes the GVMM.
348 *
349 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
350 *
351 * @returns VBox status code.
352 */
353GVMMR0DECL(int) GVMMR0Init(void)
354{
355 LogFlow(("GVMMR0Init:\n"));
356
357 /*
358 * Allocate and initialize the instance data.
359 */
360 uint32_t cHostCpus = RTMpGetArraySize();
361 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
362
363 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
364 if (!pGVMM)
365 return VERR_NO_MEMORY;
366 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
367 "GVMM-CreateDestroyLock");
368 if (RT_SUCCESS(rc))
369 {
370 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
371 if (RT_SUCCESS(rc))
372 {
373 pGVMM->u32Magic = GVMM_MAGIC;
374 pGVMM->iUsedHead = 0;
375 pGVMM->iFreeHead = 1;
376
377 /* the nil handle */
378 pGVMM->aHandles[0].iSelf = 0;
379 pGVMM->aHandles[0].iNext = 0;
380
381 /* the tail */
382 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
383 pGVMM->aHandles[i].iSelf = i;
384 pGVMM->aHandles[i].iNext = 0; /* nil */
385
386 /* the rest */
387 while (i-- > 1)
388 {
389 pGVMM->aHandles[i].iSelf = i;
390 pGVMM->aHandles[i].iNext = i + 1;
391 }
392
393 /* The default configuration values. */
394 uint32_t cNsResolution = RTSemEventMultiGetResolution();
395 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
396 if (cNsResolution >= 5*RT_NS_100US)
397 {
398 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
399 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
400 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
401 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
402 }
403 else if (cNsResolution > RT_NS_100US)
404 {
405 pGVMM->nsMinSleepAlone = cNsResolution / 2;
406 pGVMM->nsMinSleepCompany = cNsResolution / 4;
407 pGVMM->nsEarlyWakeUp1 = 0;
408 pGVMM->nsEarlyWakeUp2 = 0;
409 }
410 else
411 {
412 pGVMM->nsMinSleepAlone = 2000;
413 pGVMM->nsMinSleepCompany = 2000;
414 pGVMM->nsEarlyWakeUp1 = 0;
415 pGVMM->nsEarlyWakeUp2 = 0;
416 }
417#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
418 pGVMM->nsMinSleepWithHrTimer = 5000 /* ns (0.005 ms) */;
419#endif
420 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
421
422 /* The host CPU data. */
423 pGVMM->cHostCpus = cHostCpus;
424 uint32_t iCpu = cHostCpus;
425 RTCPUSET PossibleSet;
426 RTMpGetSet(&PossibleSet);
427 while (iCpu-- > 0)
428 {
429 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
430#ifdef GVMM_SCHED_WITH_PPT
431 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
432 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
433 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
434 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
435 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
436 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
437 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
438 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
439 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
440 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
441 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
442 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
443#endif
444
445 if (RTCpuSetIsMember(&PossibleSet, iCpu))
446 {
447 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
448 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
449
450#ifdef GVMM_SCHED_WITH_PPT
451 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
452 50*1000*1000 /* whatever */,
453 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
454 gvmmR0SchedPeriodicPreemptionTimerCallback,
455 &pGVMM->aHostCpus[iCpu]);
456 if (RT_SUCCESS(rc))
457 {
458 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
459 if (RT_FAILURE(rc))
460 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
461 }
462 else
463 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
464 if (RT_FAILURE(rc))
465 {
466 while (iCpu < cHostCpus)
467 {
468 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
469 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
470 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
471 iCpu++;
472 }
473 break;
474 }
475#endif
476 }
477 else
478 {
479 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
480 pGVMM->aHostCpus[iCpu].u32Magic = 0;
481 }
482 }
483 if (RT_SUCCESS(rc))
484 {
485 g_pGVMM = pGVMM;
486 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
487 return VINF_SUCCESS;
488 }
489
490 /* bail out. */
491 RTCritSectRwDelete(&pGVMM->UsedLock);
492 }
493 else
494 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
495 RTCritSectDelete(&pGVMM->CreateDestroyLock);
496 }
497 else
498 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
499
500 RTMemFree(pGVMM);
501 return rc;
502}
503
504
505/**
506 * Terminates the GVM.
507 *
508 * This is called while owning the loader semaphore (see supdrvLdrFree()).
509 * And unless something is wrong, there should be absolutely no VMs
510 * registered at this point.
511 */
512GVMMR0DECL(void) GVMMR0Term(void)
513{
514 LogFlow(("GVMMR0Term:\n"));
515
516 PGVMM pGVMM = g_pGVMM;
517 g_pGVMM = NULL;
518 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
519 {
520 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
521 return;
522 }
523
524 /*
525 * First of all, stop all active timers.
526 */
527 uint32_t cActiveTimers = 0;
528 uint32_t iCpu = pGVMM->cHostCpus;
529 while (iCpu-- > 0)
530 {
531 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
532#ifdef GVMM_SCHED_WITH_PPT
533 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
534 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
535 cActiveTimers++;
536#endif
537 }
538 if (cActiveTimers)
539 RTThreadSleep(1); /* fudge */
540
541 /*
542 * Invalidate the and free resources.
543 */
544 pGVMM->u32Magic = ~GVMM_MAGIC;
545 RTCritSectRwDelete(&pGVMM->UsedLock);
546 RTCritSectDelete(&pGVMM->CreateDestroyLock);
547
548 pGVMM->iFreeHead = 0;
549 if (pGVMM->iUsedHead)
550 {
551 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
552 pGVMM->iUsedHead = 0;
553 }
554
555#ifdef GVMM_SCHED_WITH_PPT
556 iCpu = pGVMM->cHostCpus;
557 while (iCpu-- > 0)
558 {
559 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
560 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
561 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
562 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
563 }
564#endif
565
566 RTMemFree(pGVMM);
567}
568
569
570/**
571 * A quick hack for setting global config values.
572 *
573 * @returns VBox status code.
574 *
575 * @param pSession The session handle. Used for authentication.
576 * @param pszName The variable name.
577 * @param u64Value The new value.
578 */
579GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
580{
581 /*
582 * Validate input.
583 */
584 PGVMM pGVMM;
585 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
586 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
587 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
588
589 /*
590 * String switch time!
591 */
592 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
593 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
594 int rc = VINF_SUCCESS;
595 pszName += sizeof("/GVMM/") - 1;
596 if (!strcmp(pszName, "cEMTsMeansCompany"))
597 {
598 if (u64Value <= UINT32_MAX)
599 pGVMM->cEMTsMeansCompany = u64Value;
600 else
601 rc = VERR_OUT_OF_RANGE;
602 }
603 else if (!strcmp(pszName, "MinSleepAlone"))
604 {
605 if (u64Value <= RT_NS_100MS)
606 pGVMM->nsMinSleepAlone = u64Value;
607 else
608 rc = VERR_OUT_OF_RANGE;
609 }
610 else if (!strcmp(pszName, "MinSleepCompany"))
611 {
612 if (u64Value <= RT_NS_100MS)
613 pGVMM->nsMinSleepCompany = u64Value;
614 else
615 rc = VERR_OUT_OF_RANGE;
616 }
617#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
618 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
619 {
620 if (u64Value <= RT_NS_100MS)
621 pGVMM->nsMinSleepWithHrTimer = u64Value;
622 else
623 rc = VERR_OUT_OF_RANGE;
624 }
625#endif
626 else if (!strcmp(pszName, "EarlyWakeUp1"))
627 {
628 if (u64Value <= RT_NS_100MS)
629 {
630 pGVMM->nsEarlyWakeUp1 = u64Value;
631 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
632 }
633 else
634 rc = VERR_OUT_OF_RANGE;
635 }
636 else if (!strcmp(pszName, "EarlyWakeUp2"))
637 {
638 if (u64Value <= RT_NS_100MS)
639 {
640 pGVMM->nsEarlyWakeUp2 = u64Value;
641 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
642 }
643 else
644 rc = VERR_OUT_OF_RANGE;
645 }
646 else
647 rc = VERR_CFGM_VALUE_NOT_FOUND;
648 return rc;
649}
650
651
652/**
653 * A quick hack for getting global config values.
654 *
655 * @returns VBox status code.
656 *
657 * @param pSession The session handle. Used for authentication.
658 * @param pszName The variable name.
659 * @param pu64Value Where to return the value.
660 */
661GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
662{
663 /*
664 * Validate input.
665 */
666 PGVMM pGVMM;
667 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
668 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
669 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
670 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
671
672 /*
673 * String switch time!
674 */
675 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
676 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
677 int rc = VINF_SUCCESS;
678 pszName += sizeof("/GVMM/") - 1;
679 if (!strcmp(pszName, "cEMTsMeansCompany"))
680 *pu64Value = pGVMM->cEMTsMeansCompany;
681 else if (!strcmp(pszName, "MinSleepAlone"))
682 *pu64Value = pGVMM->nsMinSleepAlone;
683 else if (!strcmp(pszName, "MinSleepCompany"))
684 *pu64Value = pGVMM->nsMinSleepCompany;
685#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
686 else if (!strcmp(pszName, "MinSleepWithHrWakeUp"))
687 *pu64Value = pGVMM->nsMinSleepWithHrTimer;
688#endif
689 else if (!strcmp(pszName, "EarlyWakeUp1"))
690 *pu64Value = pGVMM->nsEarlyWakeUp1;
691 else if (!strcmp(pszName, "EarlyWakeUp2"))
692 *pu64Value = pGVMM->nsEarlyWakeUp2;
693 else
694 rc = VERR_CFGM_VALUE_NOT_FOUND;
695 return rc;
696}
697
698
699/**
700 * Acquire the 'used' lock in shared mode.
701 *
702 * This prevents destruction of the VM while we're in ring-0.
703 *
704 * @returns IPRT status code, see RTSemFastMutexRequest.
705 * @param a_pGVMM The GVMM instance data.
706 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
707 */
708#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
709
710/**
711 * Release the 'used' lock in when owning it in shared mode.
712 *
713 * @returns IPRT status code, see RTSemFastMutexRequest.
714 * @param a_pGVMM The GVMM instance data.
715 * @sa GVMMR0_USED_SHARED_LOCK
716 */
717#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
718
719/**
720 * Acquire the 'used' lock in exclusive mode.
721 *
722 * Only use this function when making changes to the used list.
723 *
724 * @returns IPRT status code, see RTSemFastMutexRequest.
725 * @param a_pGVMM The GVMM instance data.
726 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
727 */
728#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
729
730/**
731 * Release the 'used' lock when owning it in exclusive mode.
732 *
733 * @returns IPRT status code, see RTSemFastMutexRelease.
734 * @param a_pGVMM The GVMM instance data.
735 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
736 */
737#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
738
739
740/**
741 * Try acquire the 'create & destroy' lock.
742 *
743 * @returns IPRT status code, see RTSemFastMutexRequest.
744 * @param pGVMM The GVMM instance data.
745 */
746DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
747{
748 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
749 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
750 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
751 return rc;
752}
753
754
755/**
756 * Release the 'create & destroy' lock.
757 *
758 * @returns IPRT status code, see RTSemFastMutexRequest.
759 * @param pGVMM The GVMM instance data.
760 */
761DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
762{
763 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
764 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
765 AssertRC(rc);
766 return rc;
767}
768
769
770/**
771 * Request wrapper for the GVMMR0CreateVM API.
772 *
773 * @returns VBox status code.
774 * @param pReq The request buffer.
775 * @param pSession The session handle. The VM will be associated with this.
776 */
777GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
778{
779 /*
780 * Validate the request.
781 */
782 if (!RT_VALID_PTR(pReq))
783 return VERR_INVALID_POINTER;
784 if (pReq->Hdr.cbReq != sizeof(*pReq))
785 return VERR_INVALID_PARAMETER;
786 if (pReq->pSession != pSession)
787 return VERR_INVALID_POINTER;
788
789 /*
790 * Execute it.
791 */
792 PGVM pGVM;
793 pReq->pVMR0 = NULL;
794 pReq->pVMR3 = NIL_RTR3PTR;
795 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
796 if (RT_SUCCESS(rc))
797 {
798 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
799 pReq->pVMR3 = pGVM->pVMR3;
800 }
801 return rc;
802}
803
804
805/**
806 * Allocates the VM structure and registers it with GVM.
807 *
808 * The caller will become the VM owner and there by the EMT.
809 *
810 * @returns VBox status code.
811 * @param pSession The support driver session.
812 * @param cCpus Number of virtual CPUs for the new VM.
813 * @param ppGVM Where to store the pointer to the VM structure.
814 *
815 * @thread EMT.
816 */
817GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
818{
819 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
820 PGVMM pGVMM;
821 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
822
823 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
824 *ppGVM = NULL;
825
826 if ( cCpus == 0
827 || cCpus > VMM_MAX_CPU_COUNT)
828 return VERR_INVALID_PARAMETER;
829
830 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
831 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
832 RTPROCESS ProcId = RTProcSelf();
833 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
834
835 /*
836 * The whole allocation process is protected by the lock.
837 */
838 int rc = gvmmR0CreateDestroyLock(pGVMM);
839 AssertRCReturn(rc, rc);
840
841 /*
842 * Only one VM per session.
843 */
844 if (SUPR0GetSessionVM(pSession) != NULL)
845 {
846 gvmmR0CreateDestroyUnlock(pGVMM);
847 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
848 return VERR_ALREADY_EXISTS;
849 }
850
851 /*
852 * Allocate a handle first so we don't waste resources unnecessarily.
853 */
854 uint16_t iHandle = pGVMM->iFreeHead;
855 if (iHandle)
856 {
857 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
858
859 /* consistency checks, a bit paranoid as always. */
860 if ( !pHandle->pGVM
861 && !pHandle->pvObj
862 && pHandle->iSelf == iHandle)
863 {
864 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
865 if (pHandle->pvObj)
866 {
867 /*
868 * Move the handle from the free to used list and perform permission checks.
869 */
870 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
871 AssertRC(rc);
872
873 pGVMM->iFreeHead = pHandle->iNext;
874 pHandle->iNext = pGVMM->iUsedHead;
875 pGVMM->iUsedHead = iHandle;
876 pGVMM->cVMs++;
877
878 pHandle->pGVM = NULL;
879 pHandle->pSession = pSession;
880 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
881 pHandle->ProcId = NIL_RTPROCESS;
882
883 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
884
885 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
886 if (RT_SUCCESS(rc))
887 {
888 /*
889 * Allocate memory for the VM structure (combined VM + GVM).
890 */
891 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
892 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
893 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
894 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
895 if (RT_SUCCESS(rc))
896 {
897 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
898 AssertPtr(pGVM);
899
900 /*
901 * Initialise the structure.
902 */
903 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
904 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
905 pGVM->gvmm.s.VMMemObj = hVMMemObj;
906 rc = GMMR0InitPerVMData(pGVM);
907 int rc2 = PGMR0InitPerVMData(pGVM);
908 int rc3 = VMMR0InitPerVMData(pGVM);
909 DBGFR0InitPerVMData(pGVM);
910 PDMR0InitPerVMData(pGVM);
911 IOMR0InitPerVMData(pGVM);
912 TMR0InitPerVMData(pGVM);
913 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
914 {
915 /*
916 * Allocate page array.
917 * This currently have to be made available to ring-3, but this is should change eventually.
918 */
919 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
920 if (RT_SUCCESS(rc))
921 {
922 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
923 for (uint32_t iPage = 0; iPage < cPages; iPage++)
924 {
925 paPages[iPage].uReserved = 0;
926 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
927 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
928 }
929
930 /*
931 * Map the page array, VM and VMCPU structures into ring-3.
932 */
933 AssertCompileSizeAlignment(VM, PAGE_SIZE);
934 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
935 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
936 0 /*offSub*/, sizeof(VM));
937 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
938 {
939 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
940 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
941 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
942 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
943 }
944 if (RT_SUCCESS(rc))
945 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
946 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
947 NIL_RTR0PROCESS);
948 if (RT_SUCCESS(rc))
949 {
950 /*
951 * Initialize all the VM pointers.
952 */
953 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
954 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
955
956 for (VMCPUID i = 0; i < cCpus; i++)
957 {
958 pGVM->aCpus[i].pVMR0 = pGVM;
959 pGVM->aCpus[i].pVMR3 = pVMR3;
960 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
961 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
962 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
963 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
964 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
965 }
966
967 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
968 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
969 ("%p\n", pGVM->paVMPagesR3));
970
971#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
972 /*
973 * Create the high resolution wake-up timer for EMT 0, ignore failures.
974 */
975 if (RTTimerCanDoHighResolution())
976 {
977 int rc4 = RTTimerCreateEx(&pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer,
978 0 /*one-shot, no interval*/,
979 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback,
980 &pGVM->aCpus[0]);
981 if (RT_FAILURE(rc4))
982 pGVM->aCpus[0].gvmm.s.hHrWakeUpTimer = NULL;
983 }
984#endif
985
986 /*
987 * Complete the handle - take the UsedLock sem just to be careful.
988 */
989 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
990 AssertRC(rc);
991
992 pHandle->pGVM = pGVM;
993 pHandle->hEMT0 = hEMT0;
994 pHandle->ProcId = ProcId;
995 pGVM->pVMR3 = pVMR3;
996 pGVM->pVMR3Unsafe = pVMR3;
997 pGVM->aCpus[0].hEMT = hEMT0;
998 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
999 pGVM->aCpus[0].cEmtHashCollisions = 0;
1000 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
1001 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1002 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
1003 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
1004 pGVMM->cEMTs += cCpus;
1005
1006 /* Associate it with the session and create the context hook for EMT0. */
1007 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
1008 if (RT_SUCCESS(rc))
1009 {
1010 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
1011 if (RT_SUCCESS(rc))
1012 {
1013 /*
1014 * Done!
1015 */
1016 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
1017
1018 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1019 gvmmR0CreateDestroyUnlock(pGVMM);
1020
1021 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1022
1023 *ppGVM = pGVM;
1024 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1025 return VINF_SUCCESS;
1026 }
1027
1028 SUPR0SetSessionVM(pSession, NULL, NULL);
1029 }
1030 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1031 }
1032
1033 /* Cleanup mappings. */
1034 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1035 {
1036 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1037 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1038 }
1039 for (VMCPUID i = 0; i < cCpus; i++)
1040 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1041 {
1042 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1043 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1044 }
1045 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1046 {
1047 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1048 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1049 }
1050 }
1051 }
1052 else
1053 {
1054 if (RT_SUCCESS_NP(rc))
1055 rc = rc2;
1056 if (RT_SUCCESS_NP(rc))
1057 rc = rc3;
1058 }
1059 }
1060 }
1061 /* else: The user wasn't permitted to create this VM. */
1062
1063 /*
1064 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1065 * object reference here. A little extra mess because of non-recursive lock.
1066 */
1067 void *pvObj = pHandle->pvObj;
1068 pHandle->pvObj = NULL;
1069 gvmmR0CreateDestroyUnlock(pGVMM);
1070
1071 SUPR0ObjRelease(pvObj, pSession);
1072
1073 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1074 return rc;
1075 }
1076
1077 rc = VERR_NO_MEMORY;
1078 }
1079 else
1080 rc = VERR_GVMM_IPE_1;
1081 }
1082 else
1083 rc = VERR_GVM_TOO_MANY_VMS;
1084
1085 gvmmR0CreateDestroyUnlock(pGVMM);
1086 return rc;
1087}
1088
1089
1090/**
1091 * Initializes the per VM data belonging to GVMM.
1092 *
1093 * @param pGVM Pointer to the global VM structure.
1094 * @param hSelf The handle.
1095 * @param cCpus The CPU count.
1096 * @param pSession The session this VM is associated with.
1097 */
1098static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1099{
1100 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1101 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1102 AssertCompileMemberAlignment(VM, cpum, 64);
1103 AssertCompileMemberAlignment(VM, tm, 64);
1104
1105 /* GVM: */
1106 pGVM->u32Magic = GVM_MAGIC;
1107 pGVM->hSelf = hSelf;
1108 pGVM->cCpus = cCpus;
1109 pGVM->pSession = pSession;
1110 pGVM->pSelf = pGVM;
1111
1112 /* VM: */
1113 pGVM->enmVMState = VMSTATE_CREATING;
1114 pGVM->hSelfUnsafe = hSelf;
1115 pGVM->pSessionUnsafe = pSession;
1116 pGVM->pVMR0ForCall = pGVM;
1117 pGVM->cCpusUnsafe = cCpus;
1118 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1119 pGVM->uStructVersion = 1;
1120 pGVM->cbSelf = sizeof(VM);
1121 pGVM->cbVCpu = sizeof(VMCPU);
1122
1123 /* GVMM: */
1124 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1125 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1126 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1127 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1128 pGVM->gvmm.s.fDoneVMMR0Init = false;
1129 pGVM->gvmm.s.fDoneVMMR0Term = false;
1130
1131 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1132 {
1133 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1134 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1135 }
1136 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1137
1138 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1139 {
1140 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1141 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1142 }
1143
1144 /*
1145 * Per virtual CPU.
1146 */
1147 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1148 {
1149 pGVM->aCpus[i].idCpu = i;
1150 pGVM->aCpus[i].idCpuUnsafe = i;
1151 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1152 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1153 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1154 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1155 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1156 pGVM->aCpus[i].pGVM = pGVM;
1157 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1158 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1159 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1160 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1161 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1162 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1163 }
1164}
1165
1166
1167/**
1168 * Does the VM initialization.
1169 *
1170 * @returns VBox status code.
1171 * @param pGVM The global (ring-0) VM structure.
1172 */
1173GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1174{
1175 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1176
1177 int rc = VERR_INTERNAL_ERROR_3;
1178 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1179 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1180 {
1181 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1182 {
1183 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1184 if (RT_FAILURE(rc))
1185 {
1186 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1187 break;
1188 }
1189 }
1190 }
1191 else
1192 rc = VERR_WRONG_ORDER;
1193
1194 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1195 return rc;
1196}
1197
1198
1199/**
1200 * Indicates that we're done with the ring-0 initialization
1201 * of the VM.
1202 *
1203 * @param pGVM The global (ring-0) VM structure.
1204 * @thread EMT(0)
1205 */
1206GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1207{
1208 /* Set the indicator. */
1209 pGVM->gvmm.s.fDoneVMMR0Init = true;
1210}
1211
1212
1213/**
1214 * Indicates that we're doing the ring-0 termination of the VM.
1215 *
1216 * @returns true if termination hasn't been done already, false if it has.
1217 * @param pGVM Pointer to the global VM structure. Optional.
1218 * @thread EMT(0) or session cleanup thread.
1219 */
1220GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1221{
1222 /* Validate the VM structure, state and handle. */
1223 AssertPtrReturn(pGVM, false);
1224
1225 /* Set the indicator. */
1226 if (pGVM->gvmm.s.fDoneVMMR0Term)
1227 return false;
1228 pGVM->gvmm.s.fDoneVMMR0Term = true;
1229 return true;
1230}
1231
1232
1233/**
1234 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1235 *
1236 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1237 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1238 * would've been nice if the caller was actually the EMT thread or that we somehow
1239 * could've associated the calling thread with the VM up front.
1240 *
1241 * @returns VBox status code.
1242 * @param pGVM The global (ring-0) VM structure.
1243 *
1244 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1245 */
1246GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1247{
1248 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1249 PGVMM pGVMM;
1250 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1251
1252 /*
1253 * Validate the VM structure, state and caller.
1254 */
1255 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1256 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1257 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1258 VERR_WRONG_ORDER);
1259
1260 uint32_t hGVM = pGVM->hSelf;
1261 ASMCompilerBarrier();
1262 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1263 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1264
1265 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1266 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1267
1268 RTPROCESS ProcId = RTProcSelf();
1269 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1270 AssertReturn( ( pHandle->hEMT0 == hSelf
1271 && pHandle->ProcId == ProcId)
1272 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1273
1274 /*
1275 * Lookup the handle and destroy the object.
1276 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1277 * object, we take some precautions against racing callers just in case...
1278 */
1279 int rc = gvmmR0CreateDestroyLock(pGVMM);
1280 AssertRC(rc);
1281
1282 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1283 if ( pHandle->pGVM == pGVM
1284 && ( ( pHandle->hEMT0 == hSelf
1285 && pHandle->ProcId == ProcId)
1286 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1287 && RT_VALID_PTR(pHandle->pvObj)
1288 && RT_VALID_PTR(pHandle->pSession)
1289 && RT_VALID_PTR(pHandle->pGVM)
1290 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1291 {
1292 /* Check that other EMTs have deregistered. */
1293 uint32_t cNotDeregistered = 0;
1294 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1295 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1296 if (cNotDeregistered == 0)
1297 {
1298 /* Grab the object pointer. */
1299 void *pvObj = pHandle->pvObj;
1300 pHandle->pvObj = NULL;
1301 gvmmR0CreateDestroyUnlock(pGVMM);
1302
1303 SUPR0ObjRelease(pvObj, pHandle->pSession);
1304 }
1305 else
1306 {
1307 gvmmR0CreateDestroyUnlock(pGVMM);
1308 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1309 }
1310 }
1311 else
1312 {
1313 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1314 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1315 gvmmR0CreateDestroyUnlock(pGVMM);
1316 rc = VERR_GVMM_IPE_2;
1317 }
1318
1319 return rc;
1320}
1321
1322
1323/**
1324 * Performs VM cleanup task as part of object destruction.
1325 *
1326 * @param pGVM The GVM pointer.
1327 */
1328static void gvmmR0CleanupVM(PGVM pGVM)
1329{
1330 if ( pGVM->gvmm.s.fDoneVMMR0Init
1331 && !pGVM->gvmm.s.fDoneVMMR0Term)
1332 {
1333 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1334 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1335 {
1336 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1337 VMMR0TermVM(pGVM, NIL_VMCPUID);
1338 }
1339 else
1340 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1341 }
1342
1343 GMMR0CleanupVM(pGVM);
1344#ifdef VBOX_WITH_NEM_R0
1345 NEMR0CleanupVM(pGVM);
1346#endif
1347 PDMR0CleanupVM(pGVM);
1348 IOMR0CleanupVM(pGVM);
1349 DBGFR0CleanupVM(pGVM);
1350 PGMR0CleanupVM(pGVM);
1351 TMR0CleanupVM(pGVM);
1352 VMMR0CleanupVM(pGVM);
1353}
1354
1355
1356/**
1357 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1358 *
1359 * pvUser1 is the GVM instance pointer.
1360 * pvUser2 is the handle pointer.
1361 */
1362static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1363{
1364 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1365
1366 NOREF(pvObj);
1367
1368 /*
1369 * Some quick, paranoid, input validation.
1370 */
1371 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1372 AssertPtr(pHandle);
1373 PGVMM pGVMM = (PGVMM)pvUser1;
1374 Assert(pGVMM == g_pGVMM);
1375 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1376 if ( !iHandle
1377 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1378 || iHandle != pHandle->iSelf)
1379 {
1380 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1381 return;
1382 }
1383
1384 int rc = gvmmR0CreateDestroyLock(pGVMM);
1385 AssertRC(rc);
1386 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1387 AssertRC(rc);
1388
1389 /*
1390 * This is a tad slow but a doubly linked list is too much hassle.
1391 */
1392 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1393 {
1394 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1395 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1396 gvmmR0CreateDestroyUnlock(pGVMM);
1397 return;
1398 }
1399
1400 if (pGVMM->iUsedHead == iHandle)
1401 pGVMM->iUsedHead = pHandle->iNext;
1402 else
1403 {
1404 uint16_t iPrev = pGVMM->iUsedHead;
1405 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1406 while (iPrev)
1407 {
1408 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1409 {
1410 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1411 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1412 gvmmR0CreateDestroyUnlock(pGVMM);
1413 return;
1414 }
1415 if (RT_UNLIKELY(c-- <= 0))
1416 {
1417 iPrev = 0;
1418 break;
1419 }
1420
1421 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1422 break;
1423 iPrev = pGVMM->aHandles[iPrev].iNext;
1424 }
1425 if (!iPrev)
1426 {
1427 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1428 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1429 gvmmR0CreateDestroyUnlock(pGVMM);
1430 return;
1431 }
1432
1433 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1434 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1435 }
1436 pHandle->iNext = 0;
1437 pGVMM->cVMs--;
1438
1439 /*
1440 * Do the global cleanup round.
1441 */
1442 PGVM pGVM = pHandle->pGVM;
1443 if ( RT_VALID_PTR(pGVM)
1444 && pGVM->u32Magic == GVM_MAGIC)
1445 {
1446 pGVMM->cEMTs -= pGVM->cCpus;
1447
1448 if (pGVM->pSession)
1449 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1450
1451 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1452
1453 gvmmR0CleanupVM(pGVM);
1454
1455 /*
1456 * Do the GVMM cleanup - must be done last.
1457 */
1458 /* The VM and VM pages mappings/allocations. */
1459 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1460 {
1461 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1462 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1463 }
1464
1465 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1466 {
1467 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1468 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1469 }
1470
1471 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1472 {
1473 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1474 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1475 }
1476
1477 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1478 {
1479 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1480 {
1481 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1482 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1483 }
1484 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1485 {
1486 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1487 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1488 }
1489#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1490 if (pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer != NULL)
1491 {
1492 RTTimerDestroy(pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer);
1493 pGVM->aCpus[i].gvmm.s.hHrWakeUpTimer = NULL;
1494 }
1495#endif
1496 }
1497
1498 /* the GVM structure itself. */
1499 pGVM->u32Magic |= UINT32_C(0x80000000);
1500 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1501 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1502 pGVM = NULL;
1503
1504 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1505 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1506 AssertRC(rc);
1507 }
1508 /* else: GVMMR0CreateVM cleanup. */
1509
1510 /*
1511 * Free the handle.
1512 */
1513 pHandle->iNext = pGVMM->iFreeHead;
1514 pGVMM->iFreeHead = iHandle;
1515 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1516 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1517 ASMAtomicWriteNullPtr(&pHandle->pSession);
1518 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1519 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1520
1521 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1522 gvmmR0CreateDestroyUnlock(pGVMM);
1523 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1524}
1525
1526
1527/**
1528 * Registers the calling thread as the EMT of a Virtual CPU.
1529 *
1530 * Note that VCPU 0 is automatically registered during VM creation.
1531 *
1532 * @returns VBox status code
1533 * @param pGVM The global (ring-0) VM structure.
1534 * @param idCpu VCPU id to register the current thread as.
1535 */
1536GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1537{
1538 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1539
1540 /*
1541 * Validate the VM structure, state and handle.
1542 */
1543 PGVMM pGVMM;
1544 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1545 if (RT_SUCCESS(rc))
1546 {
1547 if (idCpu < pGVM->cCpus)
1548 {
1549 PGVMCPU const pGVCpu = &pGVM->aCpus[idCpu];
1550 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1551
1552 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1553
1554 /* Check that the EMT isn't already assigned to a thread. */
1555 if (pGVCpu->hEMT == NIL_RTNATIVETHREAD)
1556 {
1557 Assert(pGVCpu->hNativeThreadR0 == NIL_RTNATIVETHREAD);
1558
1559 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1560 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1561 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1562 if (RT_SUCCESS(rc))
1563 {
1564 /*
1565 * Do the assignment, then try setup the hook. Undo if that fails.
1566 */
1567 unsigned cCollisions = 0;
1568 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1569 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1570 {
1571 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1572 do
1573 {
1574 cCollisions++;
1575 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1576 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1577 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1578 }
1579 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1580 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1581
1582 pGVCpu->hNativeThreadR0 = hNativeSelf;
1583 pGVCpu->hEMT = hNativeSelf;
1584 pGVCpu->cEmtHashCollisions = (uint8_t)cCollisions;
1585 pGVCpu->gvmm.s.idxEmtHash = (uint16_t)idxHash;
1586
1587 rc = VMMR0ThreadCtxHookCreateForEmt(pGVCpu);
1588 if (RT_SUCCESS(rc))
1589 {
1590 CPUMR0RegisterVCpuThread(pGVCpu);
1591
1592#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
1593 /*
1594 * Create the high resolution wake-up timer, ignore failures.
1595 */
1596 if (RTTimerCanDoHighResolution())
1597 {
1598 int rc2 = RTTimerCreateEx(&pGVCpu->gvmm.s.hHrWakeUpTimer, 0 /*one-shot, no interval*/,
1599 RTTIMER_FLAGS_HIGH_RES, gvmmR0EmtWakeUpTimerCallback, pGVCpu);
1600 if (RT_FAILURE(rc2))
1601 pGVCpu->gvmm.s.hHrWakeUpTimer = NULL;
1602 }
1603#endif
1604 }
1605 else
1606 {
1607 pGVCpu->hNativeThreadR0 = NIL_RTNATIVETHREAD;
1608 pGVCpu->hEMT = NIL_RTNATIVETHREAD;
1609 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1610 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1611 pGVCpu->gvmm.s.idxEmtHash = UINT16_MAX;
1612 }
1613 }
1614 }
1615 else
1616 rc = VERR_ACCESS_DENIED;
1617
1618 gvmmR0CreateDestroyUnlock(pGVMM);
1619 }
1620 else
1621 rc = VERR_INVALID_CPU_ID;
1622 }
1623 return rc;
1624}
1625
1626
1627/**
1628 * Deregisters the calling thread as the EMT of a Virtual CPU.
1629 *
1630 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1631 *
1632 * @returns VBox status code
1633 * @param pGVM The global (ring-0) VM structure.
1634 * @param idCpu VCPU id to register the current thread as.
1635 */
1636GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1637{
1638 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1639
1640 /*
1641 * Validate the VM structure, state and handle.
1642 */
1643 PGVMM pGVMM;
1644 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1645 if (RT_SUCCESS(rc))
1646 {
1647 /*
1648 * Take the destruction lock and recheck the handle state to
1649 * prevent racing GVMMR0DestroyVM.
1650 */
1651 gvmmR0CreateDestroyLock(pGVMM);
1652
1653 uint32_t hSelf = pGVM->hSelf;
1654 ASMCompilerBarrier();
1655 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1656 && pGVMM->aHandles[hSelf].pvObj != NULL
1657 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1658 {
1659 /*
1660 * Do per-EMT cleanups.
1661 */
1662 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1663
1664 /*
1665 * Invalidate hEMT. We don't use NIL here as that would allow
1666 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1667 */
1668 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1669 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1670
1671 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1672 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1673 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1674 }
1675
1676 gvmmR0CreateDestroyUnlock(pGVMM);
1677 }
1678 return rc;
1679}
1680
1681
1682/**
1683 * Registers the caller as a given worker thread.
1684 *
1685 * This enables the thread to operate critical sections in ring-0.
1686 *
1687 * @returns VBox status code.
1688 * @param pGVM The global (ring-0) VM structure.
1689 * @param enmWorker The worker thread this is supposed to be.
1690 * @param hNativeSelfR3 The ring-3 native self of the caller.
1691 */
1692GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1693{
1694 /*
1695 * Validate input.
1696 */
1697 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1698 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1699 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1700 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1701 PGVMM pGVMM;
1702 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1703 AssertRCReturn(rc, rc);
1704 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1705
1706 /*
1707 * Grab the big lock and check the VM state again.
1708 */
1709 uint32_t const hSelf = pGVM->hSelf;
1710 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1711 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1712 && pGVMM->aHandles[hSelf].pvObj != NULL
1713 && pGVMM->aHandles[hSelf].pGVM == pGVM
1714 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1715 {
1716 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1717 {
1718 /*
1719 * Check that the thread isn't an EMT or serving in some other worker capacity.
1720 */
1721 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1722 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1723 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1724 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1725 rc = VERR_INVALID_PARAMETER);
1726 if (RT_SUCCESS(rc))
1727 {
1728 /*
1729 * Do the registration.
1730 */
1731 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1732 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1733 {
1734 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1735 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1736 rc = VINF_SUCCESS;
1737 }
1738 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1739 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1740 rc = VERR_ALREADY_EXISTS;
1741 else
1742 rc = VERR_RESOURCE_BUSY;
1743 }
1744 }
1745 else
1746 rc = VERR_VM_INVALID_VM_STATE;
1747 }
1748 else
1749 rc = VERR_INVALID_VM_HANDLE;
1750 gvmmR0CreateDestroyUnlock(pGVMM);
1751 return rc;
1752}
1753
1754
1755/**
1756 * Deregisters a workinger thread (caller).
1757 *
1758 * The worker thread cannot be re-created and re-registered, instead the given
1759 * @a enmWorker slot becomes invalid.
1760 *
1761 * @returns VBox status code.
1762 * @param pGVM The global (ring-0) VM structure.
1763 * @param enmWorker The worker thread this is supposed to be.
1764 */
1765GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1766{
1767 /*
1768 * Validate input.
1769 */
1770 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1771 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1772 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1773 PGVMM pGVMM;
1774 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1775 AssertRCReturn(rc, rc);
1776
1777 /*
1778 * Grab the big lock and check the VM state again.
1779 */
1780 uint32_t const hSelf = pGVM->hSelf;
1781 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1782 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1783 && pGVMM->aHandles[hSelf].pvObj != NULL
1784 && pGVMM->aHandles[hSelf].pGVM == pGVM
1785 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1786 {
1787 /*
1788 * Do the deregistration.
1789 * This will prevent any other threads register as the worker later.
1790 */
1791 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1792 {
1793 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1794 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1795 rc = VINF_SUCCESS;
1796 }
1797 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1798 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1799 rc = VINF_SUCCESS;
1800 else
1801 rc = VERR_NOT_OWNER;
1802 }
1803 else
1804 rc = VERR_INVALID_VM_HANDLE;
1805 gvmmR0CreateDestroyUnlock(pGVMM);
1806 return rc;
1807}
1808
1809
1810/**
1811 * Lookup a GVM structure by its handle.
1812 *
1813 * @returns The GVM pointer on success, NULL on failure.
1814 * @param hGVM The global VM handle. Asserts on bad handle.
1815 */
1816GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1817{
1818 PGVMM pGVMM;
1819 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1820
1821 /*
1822 * Validate.
1823 */
1824 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1825 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1826
1827 /*
1828 * Look it up.
1829 */
1830 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1831 AssertPtrReturn(pHandle->pvObj, NULL);
1832 PGVM pGVM = pHandle->pGVM;
1833 AssertPtrReturn(pGVM, NULL);
1834
1835 return pGVM;
1836}
1837
1838
1839/**
1840 * Check that the given GVM and VM structures match up.
1841 *
1842 * The calling thread must be in the same process as the VM. All current lookups
1843 * are by threads inside the same process, so this will not be an issue.
1844 *
1845 * @returns VBox status code.
1846 * @param pGVM The global (ring-0) VM structure.
1847 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1848 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1849 * shared mode when requested.
1850 *
1851 * Be very careful if not taking the lock as it's
1852 * possible that the VM will disappear then!
1853 *
1854 * @remark This will not assert on an invalid pGVM but try return silently.
1855 */
1856static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1857{
1858 /*
1859 * Check the pointers.
1860 */
1861 int rc;
1862 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1863 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1864 {
1865 /*
1866 * Get the pGVMM instance and check the VM handle.
1867 */
1868 PGVMM pGVMM;
1869 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1870
1871 uint16_t hGVM = pGVM->hSelf;
1872 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1873 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1874 {
1875 RTPROCESS const pidSelf = RTProcSelf();
1876 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1877 if (fTakeUsedLock)
1878 {
1879 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1880 AssertRCReturn(rc, rc);
1881 }
1882
1883 if (RT_LIKELY( pHandle->pGVM == pGVM
1884 && pHandle->ProcId == pidSelf
1885 && RT_VALID_PTR(pHandle->pvObj)))
1886 {
1887 /*
1888 * Some more VM data consistency checks.
1889 */
1890 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1891 && pGVM->hSelfUnsafe == hGVM
1892 && pGVM->pSelf == pGVM))
1893 {
1894 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1895 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1896 {
1897 *ppGVMM = pGVMM;
1898 return VINF_SUCCESS;
1899 }
1900 rc = VERR_INCONSISTENT_VM_HANDLE;
1901 }
1902 else
1903 rc = VERR_INCONSISTENT_VM_HANDLE;
1904 }
1905 else
1906 rc = VERR_INVALID_VM_HANDLE;
1907
1908 if (fTakeUsedLock)
1909 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1910 }
1911 else
1912 rc = VERR_INVALID_VM_HANDLE;
1913 }
1914 else
1915 rc = VERR_INVALID_POINTER;
1916 return rc;
1917}
1918
1919
1920/**
1921 * Validates a GVM/VM pair.
1922 *
1923 * @returns VBox status code.
1924 * @param pGVM The global (ring-0) VM structure.
1925 */
1926GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1927{
1928 PGVMM pGVMM;
1929 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1930}
1931
1932
1933/**
1934 * Check that the given GVM and VM structures match up.
1935 *
1936 * The calling thread must be in the same process as the VM. All current lookups
1937 * are by threads inside the same process, so this will not be an issue.
1938 *
1939 * @returns VBox status code.
1940 * @param pGVM The global (ring-0) VM structure.
1941 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1942 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1943 * @thread EMT
1944 *
1945 * @remarks This will assert in all failure paths.
1946 */
1947static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1948{
1949 /*
1950 * Check the pointers.
1951 */
1952 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1953 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1954
1955 /*
1956 * Get the pGVMM instance and check the VM handle.
1957 */
1958 PGVMM pGVMM;
1959 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1960
1961 uint16_t hGVM = pGVM->hSelf;
1962 ASMCompilerBarrier();
1963 AssertReturn( hGVM != NIL_GVM_HANDLE
1964 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1965
1966 RTPROCESS const pidSelf = RTProcSelf();
1967 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1968 AssertReturn( pHandle->pGVM == pGVM
1969 && pHandle->ProcId == pidSelf
1970 && RT_VALID_PTR(pHandle->pvObj),
1971 VERR_INVALID_HANDLE);
1972
1973 /*
1974 * Check the EMT claim.
1975 */
1976 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1977 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1978 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1979
1980 /*
1981 * Some more VM data consistency checks.
1982 */
1983 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1984 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1985 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1986 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1987
1988 *ppGVMM = pGVMM;
1989 return VINF_SUCCESS;
1990}
1991
1992
1993/**
1994 * Validates a GVM/EMT pair.
1995 *
1996 * @returns VBox status code.
1997 * @param pGVM The global (ring-0) VM structure.
1998 * @param idCpu The Virtual CPU ID of the calling EMT.
1999 * @thread EMT(idCpu)
2000 */
2001GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
2002{
2003 PGVMM pGVMM;
2004 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2005}
2006
2007
2008/**
2009 * Looks up the VM belonging to the specified EMT thread.
2010 *
2011 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2012 * unnecessary kernel panics when the EMT thread hits an assertion. The
2013 * call may or not be an EMT thread.
2014 *
2015 * @returns Pointer to the VM on success, NULL on failure.
2016 * @param hEMT The native thread handle of the EMT.
2017 * NIL_RTNATIVETHREAD means the current thread
2018 */
2019GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
2020{
2021 /*
2022 * No Assertions here as we're usually called in a AssertMsgN or
2023 * RTAssert* context.
2024 */
2025 PGVMM pGVMM = g_pGVMM;
2026 if ( !RT_VALID_PTR(pGVMM)
2027 || pGVMM->u32Magic != GVMM_MAGIC)
2028 return NULL;
2029
2030 if (hEMT == NIL_RTNATIVETHREAD)
2031 hEMT = RTThreadNativeSelf();
2032 RTPROCESS ProcId = RTProcSelf();
2033
2034 /*
2035 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2036 */
2037/** @todo introduce some pid hash table here, please. */
2038 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2039 {
2040 if ( pGVMM->aHandles[i].iSelf == i
2041 && pGVMM->aHandles[i].ProcId == ProcId
2042 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2043 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2044 {
2045 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2046 return pGVMM->aHandles[i].pGVM;
2047
2048 /* This is fearly safe with the current process per VM approach. */
2049 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2050 VMCPUID const cCpus = pGVM->cCpus;
2051 ASMCompilerBarrier();
2052 if ( cCpus < 1
2053 || cCpus > VMM_MAX_CPU_COUNT)
2054 continue;
2055 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2056 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2057 return pGVMM->aHandles[i].pGVM;
2058 }
2059 }
2060 return NULL;
2061}
2062
2063
2064/**
2065 * Looks up the GVMCPU belonging to the specified EMT thread.
2066 *
2067 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
2068 * unnecessary kernel panics when the EMT thread hits an assertion. The
2069 * call may or not be an EMT thread.
2070 *
2071 * @returns Pointer to the VM on success, NULL on failure.
2072 * @param hEMT The native thread handle of the EMT.
2073 * NIL_RTNATIVETHREAD means the current thread
2074 */
2075GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2076{
2077 /*
2078 * No Assertions here as we're usually called in a AssertMsgN,
2079 * RTAssert*, Log and LogRel contexts.
2080 */
2081 PGVMM pGVMM = g_pGVMM;
2082 if ( !RT_VALID_PTR(pGVMM)
2083 || pGVMM->u32Magic != GVMM_MAGIC)
2084 return NULL;
2085
2086 if (hEMT == NIL_RTNATIVETHREAD)
2087 hEMT = RTThreadNativeSelf();
2088 RTPROCESS ProcId = RTProcSelf();
2089
2090 /*
2091 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2092 */
2093/** @todo introduce some pid hash table here, please. */
2094 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2095 {
2096 if ( pGVMM->aHandles[i].iSelf == i
2097 && pGVMM->aHandles[i].ProcId == ProcId
2098 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2099 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2100 {
2101 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2102 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2103 return &pGVM->aCpus[0];
2104
2105 /* This is fearly safe with the current process per VM approach. */
2106 VMCPUID const cCpus = pGVM->cCpus;
2107 ASMCompilerBarrier();
2108 ASMCompilerBarrier();
2109 if ( cCpus < 1
2110 || cCpus > VMM_MAX_CPU_COUNT)
2111 continue;
2112 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2113 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2114 return &pGVM->aCpus[idCpu];
2115 }
2116 }
2117 return NULL;
2118}
2119
2120
2121/**
2122 * Get the GVMCPU structure for the given EMT.
2123 *
2124 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2125 * @param pGVM The global (ring-0) VM structure.
2126 * @param hEMT The native thread handle of the EMT.
2127 * NIL_RTNATIVETHREAD means the current thread
2128 */
2129GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2130{
2131 /*
2132 * Validate & adjust input.
2133 */
2134 AssertPtr(pGVM);
2135 Assert(pGVM->u32Magic == GVM_MAGIC);
2136 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2137 {
2138 hEMT = RTThreadNativeSelf();
2139 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2140 }
2141
2142 /*
2143 * Find the matching hash table entry.
2144 * See similar code in GVMMR0GetRing3ThreadForSelf.
2145 */
2146 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2147 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2148 { /* likely */ }
2149 else
2150 {
2151#ifdef VBOX_STRICT
2152 unsigned cCollisions = 0;
2153#endif
2154 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2155 for (;;)
2156 {
2157 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2158 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2159 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2160 break;
2161 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2162 {
2163#ifdef VBOX_STRICT
2164 uint32_t idxCpu = pGVM->cCpus;
2165 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2166 while (idxCpu-- > 0)
2167 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2168#endif
2169 return NULL;
2170 }
2171 }
2172 }
2173
2174 /*
2175 * Validate the VCpu number and translate it into a pointer.
2176 */
2177 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2178 AssertReturn(idCpu < pGVM->cCpus, NULL);
2179 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2180 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2181 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2182 return pGVCpu;
2183}
2184
2185
2186/**
2187 * Get the native ring-3 thread handle for the caller.
2188 *
2189 * This works for EMTs and registered workers.
2190 *
2191 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2192 * @param pGVM The global (ring-0) VM structure.
2193 */
2194GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2195{
2196 /*
2197 * Validate input.
2198 */
2199 AssertPtr(pGVM);
2200 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2201 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2202 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2203
2204 /*
2205 * Find the matching hash table entry.
2206 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2207 */
2208 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2209 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2210 { /* likely */ }
2211 else
2212 {
2213#ifdef VBOX_STRICT
2214 unsigned cCollisions = 0;
2215#endif
2216 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2217 for (;;)
2218 {
2219 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2220 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2221 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2222 break;
2223 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2224 {
2225#ifdef VBOX_STRICT
2226 uint32_t idxCpu = pGVM->cCpus;
2227 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2228 while (idxCpu-- > 0)
2229 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2230#endif
2231
2232 /*
2233 * Not an EMT, so see if it's a worker thread.
2234 */
2235 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2236 while (--idx > GVMMWORKERTHREAD_INVALID)
2237 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2238 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2239
2240 return NIL_RTNATIVETHREAD;
2241 }
2242 }
2243 }
2244
2245 /*
2246 * Validate the VCpu number and translate it into a pointer.
2247 */
2248 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2249 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2250 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2251 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2252 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2253 return pGVCpu->hNativeThread;
2254}
2255
2256
2257/**
2258 * Converts a pointer with the GVM structure to a host physical address.
2259 *
2260 * @returns Host physical address.
2261 * @param pGVM The global (ring-0) VM structure.
2262 * @param pv The address to convert.
2263 * @thread EMT
2264 */
2265GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2266{
2267 AssertPtr(pGVM);
2268 Assert(pGVM->u32Magic == GVM_MAGIC);
2269 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2270 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2271 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> PAGE_SHIFT) | ((uintptr_t)pv & PAGE_OFFSET_MASK);
2272}
2273
2274
2275/**
2276 * This is will wake up expired and soon-to-be expired VMs.
2277 *
2278 * @returns Number of VMs that has been woken up.
2279 * @param pGVMM Pointer to the GVMM instance data.
2280 * @param u64Now The current time.
2281 */
2282static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2283{
2284 /*
2285 * Skip this if we've got disabled because of high resolution wakeups or by
2286 * the user.
2287 */
2288 if (!pGVMM->fDoEarlyWakeUps)
2289 return 0;
2290
2291/** @todo Rewrite this algorithm. See performance defect XYZ. */
2292
2293 /*
2294 * A cheap optimization to stop wasting so much time here on big setups.
2295 */
2296 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2297 if ( pGVMM->cHaltedEMTs == 0
2298 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2299 return 0;
2300
2301 /*
2302 * Only one thread doing this at a time.
2303 */
2304 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2305 return 0;
2306
2307 /*
2308 * The first pass will wake up VMs which have actually expired
2309 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2310 */
2311 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2312 uint64_t u64Min = UINT64_MAX;
2313 unsigned cWoken = 0;
2314 unsigned cHalted = 0;
2315 unsigned cTodo2nd = 0;
2316 unsigned cTodo3rd = 0;
2317 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2318 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2319 i = pGVMM->aHandles[i].iNext)
2320 {
2321 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2322 if ( RT_VALID_PTR(pCurGVM)
2323 && pCurGVM->u32Magic == GVM_MAGIC)
2324 {
2325 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2326 {
2327 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2328 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2329 if (u64)
2330 {
2331 if (u64 <= u64Now)
2332 {
2333 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2334 {
2335 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2336 AssertRC(rc);
2337 cWoken++;
2338 }
2339 }
2340 else
2341 {
2342 cHalted++;
2343 if (u64 <= uNsEarlyWakeUp1)
2344 cTodo2nd++;
2345 else if (u64 <= uNsEarlyWakeUp2)
2346 cTodo3rd++;
2347 else if (u64 < u64Min)
2348 u64 = u64Min;
2349 }
2350 }
2351 }
2352 }
2353 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2354 }
2355
2356 if (cTodo2nd)
2357 {
2358 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2359 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2360 i = pGVMM->aHandles[i].iNext)
2361 {
2362 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2363 if ( RT_VALID_PTR(pCurGVM)
2364 && pCurGVM->u32Magic == GVM_MAGIC)
2365 {
2366 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2367 {
2368 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2369 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2370 if ( u64
2371 && u64 <= uNsEarlyWakeUp1)
2372 {
2373 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2374 {
2375 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2376 AssertRC(rc);
2377 cWoken++;
2378 }
2379 }
2380 }
2381 }
2382 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2383 }
2384 }
2385
2386 if (cTodo3rd)
2387 {
2388 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2389 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2390 i = pGVMM->aHandles[i].iNext)
2391 {
2392 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2393 if ( RT_VALID_PTR(pCurGVM)
2394 && pCurGVM->u32Magic == GVM_MAGIC)
2395 {
2396 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2397 {
2398 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2399 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2400 if ( u64
2401 && u64 <= uNsEarlyWakeUp2)
2402 {
2403 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2404 {
2405 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2406 AssertRC(rc);
2407 cWoken++;
2408 }
2409 }
2410 }
2411 }
2412 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2413 }
2414 }
2415
2416 /*
2417 * Set the minimum value.
2418 */
2419 pGVMM->uNsNextEmtWakeup = u64Min;
2420
2421 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2422 return cWoken;
2423}
2424
2425
2426#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2427/**
2428 * Timer callback for the EMT high-resolution wake-up timer.
2429 *
2430 * @param pTimer The timer handle.
2431 * @param pvUser The global (ring-0) CPU structure for the EMT to wake up.
2432 * @param iTick The current tick.
2433 */
2434static DECLCALLBACK(void) gvmmR0EmtWakeUpTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2435{
2436 PGVMCPU pGVCpu = (PGVMCPU)pvUser;
2437 NOREF(pTimer); NOREF(iTick);
2438
2439 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2440 if (pGVCpu->gvmm.s.u64HaltExpire != 0)
2441 {
2442 RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2443 pGVCpu->gvmm.s.Stats.cWakeUpTimerHits += 1;
2444 }
2445 else
2446 pGVCpu->gvmm.s.Stats.cWakeUpTimerMisses += 1;
2447
2448 if (RTMpCpuId() == pGVCpu->gvmm.s.idHaltedOnCpu)
2449 pGVCpu->gvmm.s.Stats.cWakeUpTimerSameCpu += 1;
2450}
2451#endif /* GVMM_SCHED_WITH_HR_WAKE_UP_TIMER */
2452
2453
2454/**
2455 * Halt the EMT thread.
2456 *
2457 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2458 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2459 * @param pGVM The global (ring-0) VM structure.
2460 * @param pGVCpu The global (ring-0) CPU structure of the calling
2461 * EMT.
2462 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2463 * @thread EMT(pGVCpu).
2464 */
2465GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2466{
2467 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2468 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2469 PGVMM pGVMM;
2470 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2471
2472 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2473 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2474
2475 /*
2476 * If we're doing early wake-ups, we must take the UsedList lock before we
2477 * start querying the current time.
2478 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2479 */
2480 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2481 if (fDoEarlyWakeUps)
2482 {
2483 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2484 }
2485
2486 /* GIP hack: We might are frequently sleeping for short intervals where the
2487 difference between GIP and system time matters on systems with high resolution
2488 system time. So, convert the input from GIP to System time in that case. */
2489 Assert(ASMGetFlags() & X86_EFL_IF);
2490 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2491 const uint64_t u64NowGip = RTTimeNanoTS();
2492
2493 if (fDoEarlyWakeUps)
2494 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2495
2496 /*
2497 * Go to sleep if we must...
2498 * Cap the sleep time to 1 second to be on the safe side.
2499 */
2500 int rc;
2501 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2502 if ( u64NowGip < u64ExpireGipTime
2503 && ( cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2504 ? pGVMM->nsMinSleepCompany
2505 : pGVMM->nsMinSleepAlone)
2506#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2507 || (pGVCpu->gvmm.s.hHrWakeUpTimer != NULL && cNsInterval >= pGVMM->nsMinSleepWithHrTimer)
2508#endif
2509 )
2510 )
2511 {
2512 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2513 if (cNsInterval > RT_NS_1SEC)
2514 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2515 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2516 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2517 if (fDoEarlyWakeUps)
2518 {
2519 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2520 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2521 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2522 }
2523
2524#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2525 if ( pGVCpu->gvmm.s.hHrWakeUpTimer != NULL
2526 && cNsInterval >= RT_MIN(RT_NS_1US, pGVMM->nsMinSleepWithHrTimer))
2527 {
2528 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Start, a);
2529 RTTimerStart(pGVCpu->gvmm.s.hHrWakeUpTimer, cNsInterval);
2530 pGVCpu->gvmm.s.fHrWakeUptimerArmed = true;
2531 pGVCpu->gvmm.s.idHaltedOnCpu = RTMpCpuId();
2532 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Start, a);
2533 }
2534#endif
2535
2536 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2537 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2538 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2539
2540 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2541 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2542
2543#ifdef GVMM_SCHED_WITH_HR_WAKE_UP_TIMER
2544 if (!pGVCpu->gvmm.s.fHrWakeUptimerArmed)
2545 { /* likely */ }
2546 else
2547 {
2548 STAM_REL_PROFILE_START(&pGVCpu->gvmm.s.Stats.Stop, a);
2549 RTTimerStop(pGVCpu->gvmm.s.hHrWakeUpTimer);
2550 pGVCpu->gvmm.s.fHrWakeUptimerArmed = false;
2551 pGVCpu->gvmm.s.Stats.cWakeUpTimerCanceled += 1;
2552 STAM_REL_PROFILE_STOP(&pGVCpu->gvmm.s.Stats.Stop, a);
2553 }
2554#endif
2555
2556 /* Reset the semaphore to try prevent a few false wake-ups. */
2557 if (rc == VINF_SUCCESS)
2558 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2559 else if (rc == VERR_TIMEOUT)
2560 {
2561 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2562 rc = VINF_SUCCESS;
2563 }
2564 }
2565 else
2566 {
2567 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2568 if (fDoEarlyWakeUps)
2569 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2570 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2571 rc = VINF_SUCCESS;
2572 }
2573
2574 return rc;
2575}
2576
2577
2578/**
2579 * Halt the EMT thread.
2580 *
2581 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2582 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2583 * @param pGVM The global (ring-0) VM structure.
2584 * @param idCpu The Virtual CPU ID of the calling EMT.
2585 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2586 * @thread EMT(idCpu).
2587 */
2588GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2589{
2590 PGVMM pGVMM;
2591 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2592 if (RT_SUCCESS(rc))
2593 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2594 return rc;
2595}
2596
2597
2598
2599/**
2600 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2601 * the a sleeping EMT.
2602 *
2603 * @retval VINF_SUCCESS if successfully woken up.
2604 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2605 *
2606 * @param pGVM The global (ring-0) VM structure.
2607 * @param pGVCpu The global (ring-0) VCPU structure.
2608 */
2609DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2610{
2611 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2612
2613 /*
2614 * Signal the semaphore regardless of whether it's current blocked on it.
2615 *
2616 * The reason for this is that there is absolutely no way we can be 100%
2617 * certain that it isn't *about* go to go to sleep on it and just got
2618 * delayed a bit en route. So, we will always signal the semaphore when
2619 * the it is flagged as halted in the VMM.
2620 */
2621/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2622 int rc;
2623 if (pGVCpu->gvmm.s.u64HaltExpire)
2624 {
2625 rc = VINF_SUCCESS;
2626 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2627 }
2628 else
2629 {
2630 rc = VINF_GVM_NOT_BLOCKED;
2631 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2632 }
2633
2634 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2635 AssertRC(rc2);
2636
2637 return rc;
2638}
2639
2640
2641/**
2642 * Wakes up the halted EMT thread so it can service a pending request.
2643 *
2644 * @returns VBox status code.
2645 * @retval VINF_SUCCESS if successfully woken up.
2646 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2647 *
2648 * @param pGVM The global (ring-0) VM structure.
2649 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2650 * @param fTakeUsedLock Take the used lock or not
2651 * @thread Any but EMT(idCpu).
2652 */
2653GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2654{
2655 /*
2656 * Validate input and take the UsedLock.
2657 */
2658 PGVMM pGVMM;
2659 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2660 if (RT_SUCCESS(rc))
2661 {
2662 if (idCpu < pGVM->cCpus)
2663 {
2664 /*
2665 * Do the actual job.
2666 */
2667 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2668
2669 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2670 {
2671 /*
2672 * While we're here, do a round of scheduling.
2673 */
2674 Assert(ASMGetFlags() & X86_EFL_IF);
2675 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2676 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2677 }
2678 }
2679 else
2680 rc = VERR_INVALID_CPU_ID;
2681
2682 if (fTakeUsedLock)
2683 {
2684 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2685 AssertRC(rc2);
2686 }
2687 }
2688
2689 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2690 return rc;
2691}
2692
2693
2694/**
2695 * Wakes up the halted EMT thread so it can service a pending request.
2696 *
2697 * @returns VBox status code.
2698 * @retval VINF_SUCCESS if successfully woken up.
2699 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2700 *
2701 * @param pGVM The global (ring-0) VM structure.
2702 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2703 * @thread Any but EMT(idCpu).
2704 */
2705GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2706{
2707 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2708}
2709
2710
2711/**
2712 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2713 * parameter and no used locking.
2714 *
2715 * @returns VBox status code.
2716 * @retval VINF_SUCCESS if successfully woken up.
2717 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2718 *
2719 * @param pGVM The global (ring-0) VM structure.
2720 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2721 * @thread Any but EMT(idCpu).
2722 * @deprecated Don't use in new code if possible! Use the GVM variant.
2723 */
2724GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2725{
2726 PGVMM pGVMM;
2727 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2728 if (RT_SUCCESS(rc))
2729 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2730 return rc;
2731}
2732
2733
2734/**
2735 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2736 * the Virtual CPU if it's still busy executing guest code.
2737 *
2738 * @returns VBox status code.
2739 * @retval VINF_SUCCESS if poked successfully.
2740 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2741 *
2742 * @param pGVM The global (ring-0) VM structure.
2743 * @param pVCpu The cross context virtual CPU structure.
2744 */
2745DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2746{
2747 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2748
2749 RTCPUID idHostCpu = pVCpu->idHostCpu;
2750 if ( idHostCpu == NIL_RTCPUID
2751 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2752 {
2753 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2754 return VINF_GVM_NOT_BUSY_IN_GC;
2755 }
2756
2757 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2758 RTMpPokeCpu(idHostCpu);
2759 return VINF_SUCCESS;
2760}
2761
2762
2763/**
2764 * Pokes an EMT if it's still busy running guest code.
2765 *
2766 * @returns VBox status code.
2767 * @retval VINF_SUCCESS if poked successfully.
2768 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2769 *
2770 * @param pGVM The global (ring-0) VM structure.
2771 * @param idCpu The ID of the virtual CPU to poke.
2772 * @param fTakeUsedLock Take the used lock or not
2773 */
2774GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2775{
2776 /*
2777 * Validate input and take the UsedLock.
2778 */
2779 PGVMM pGVMM;
2780 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2781 if (RT_SUCCESS(rc))
2782 {
2783 if (idCpu < pGVM->cCpus)
2784 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2785 else
2786 rc = VERR_INVALID_CPU_ID;
2787
2788 if (fTakeUsedLock)
2789 {
2790 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2791 AssertRC(rc2);
2792 }
2793 }
2794
2795 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2796 return rc;
2797}
2798
2799
2800/**
2801 * Pokes an EMT if it's still busy running guest code.
2802 *
2803 * @returns VBox status code.
2804 * @retval VINF_SUCCESS if poked successfully.
2805 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2806 *
2807 * @param pGVM The global (ring-0) VM structure.
2808 * @param idCpu The ID of the virtual CPU to poke.
2809 */
2810GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2811{
2812 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2813}
2814
2815
2816/**
2817 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2818 * used locking.
2819 *
2820 * @returns VBox status code.
2821 * @retval VINF_SUCCESS if poked successfully.
2822 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2823 *
2824 * @param pGVM The global (ring-0) VM structure.
2825 * @param idCpu The ID of the virtual CPU to poke.
2826 *
2827 * @deprecated Don't use in new code if possible! Use the GVM variant.
2828 */
2829GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2830{
2831 PGVMM pGVMM;
2832 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2833 if (RT_SUCCESS(rc))
2834 {
2835 if (idCpu < pGVM->cCpus)
2836 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2837 else
2838 rc = VERR_INVALID_CPU_ID;
2839 }
2840 return rc;
2841}
2842
2843
2844/**
2845 * Wakes up a set of halted EMT threads so they can service pending request.
2846 *
2847 * @returns VBox status code, no informational stuff.
2848 *
2849 * @param pGVM The global (ring-0) VM structure.
2850 * @param pSleepSet The set of sleepers to wake up.
2851 * @param pPokeSet The set of CPUs to poke.
2852 */
2853GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2854{
2855 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2856 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2857 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2858
2859 /*
2860 * Validate input and take the UsedLock.
2861 */
2862 PGVMM pGVMM;
2863 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2864 if (RT_SUCCESS(rc))
2865 {
2866 rc = VINF_SUCCESS;
2867 VMCPUID idCpu = pGVM->cCpus;
2868 while (idCpu-- > 0)
2869 {
2870 /* Don't try poke or wake up ourselves. */
2871 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2872 continue;
2873
2874 /* just ignore errors for now. */
2875 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2876 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2877 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2878 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2879 }
2880
2881 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2882 AssertRC(rc2);
2883 }
2884
2885 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2886 return rc;
2887}
2888
2889
2890/**
2891 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2892 *
2893 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2894 * @param pGVM The global (ring-0) VM structure.
2895 * @param pReq Pointer to the request packet.
2896 */
2897GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2898{
2899 /*
2900 * Validate input and pass it on.
2901 */
2902 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2903 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2904
2905 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2906}
2907
2908
2909
2910/**
2911 * Poll the schedule to see if someone else should get a chance to run.
2912 *
2913 * This is a bit hackish and will not work too well if the machine is
2914 * under heavy load from non-VM processes.
2915 *
2916 * @returns VINF_SUCCESS if not yielded.
2917 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2918 * @param pGVM The global (ring-0) VM structure.
2919 * @param idCpu The Virtual CPU ID of the calling EMT.
2920 * @param fYield Whether to yield or not.
2921 * This is for when we're spinning in the halt loop.
2922 * @thread EMT(idCpu).
2923 */
2924GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2925{
2926 /*
2927 * Validate input.
2928 */
2929 PGVMM pGVMM;
2930 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2931 if (RT_SUCCESS(rc))
2932 {
2933 /*
2934 * We currently only implement helping doing wakeups (fYield = false), so don't
2935 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2936 */
2937 if (!fYield && pGVMM->fDoEarlyWakeUps)
2938 {
2939 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2940 pGVM->gvmm.s.StatsSched.cPollCalls++;
2941
2942 Assert(ASMGetFlags() & X86_EFL_IF);
2943 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2944
2945 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2946
2947 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2948 }
2949 /*
2950 * Not quite sure what we could do here...
2951 */
2952 else if (fYield)
2953 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2954 else
2955 rc = VINF_SUCCESS;
2956 }
2957
2958 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2959 return rc;
2960}
2961
2962
2963#ifdef GVMM_SCHED_WITH_PPT
2964/**
2965 * Timer callback for the periodic preemption timer.
2966 *
2967 * @param pTimer The timer handle.
2968 * @param pvUser Pointer to the per cpu structure.
2969 * @param iTick The current tick.
2970 */
2971static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2972{
2973 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2974 NOREF(pTimer); NOREF(iTick);
2975
2976 /*
2977 * Termination check
2978 */
2979 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2980 return;
2981
2982 /*
2983 * Do the house keeping.
2984 */
2985 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2986
2987 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2988 {
2989 /*
2990 * Historicize the max frequency.
2991 */
2992 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2993 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2994 pCpu->Ppt.iTickHistorization = 0;
2995 pCpu->Ppt.uDesiredHz = 0;
2996
2997 /*
2998 * Check if the current timer frequency.
2999 */
3000 uint32_t uHistMaxHz = 0;
3001 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
3002 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
3003 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
3004 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
3005 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3006 else if (uHistMaxHz)
3007 {
3008 /*
3009 * Reprogram it.
3010 */
3011 pCpu->Ppt.cChanges++;
3012 pCpu->Ppt.iTickHistorization = 0;
3013 pCpu->Ppt.uTimerHz = uHistMaxHz;
3014 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
3015 pCpu->Ppt.cNsInterval = cNsInterval;
3016 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3017 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3018 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3019 / cNsInterval;
3020 else
3021 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3022 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3023
3024 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
3025 RTTimerChangeInterval(pTimer, cNsInterval);
3026 }
3027 else
3028 {
3029 /*
3030 * Stop it.
3031 */
3032 pCpu->Ppt.fStarted = false;
3033 pCpu->Ppt.uTimerHz = 0;
3034 pCpu->Ppt.cNsInterval = 0;
3035 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3036
3037 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
3038 RTTimerStop(pTimer);
3039 }
3040 }
3041 else
3042 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3043}
3044#endif /* GVMM_SCHED_WITH_PPT */
3045
3046
3047/**
3048 * Updates the periodic preemption timer for the calling CPU.
3049 *
3050 * The caller must have disabled preemption!
3051 * The caller must check that the host can do high resolution timers.
3052 *
3053 * @param pGVM The global (ring-0) VM structure.
3054 * @param idHostCpu The current host CPU id.
3055 * @param uHz The desired frequency.
3056 */
3057GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
3058{
3059 NOREF(pGVM);
3060#ifdef GVMM_SCHED_WITH_PPT
3061 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
3062 Assert(RTTimerCanDoHighResolution());
3063
3064 /*
3065 * Resolve the per CPU data.
3066 */
3067 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
3068 PGVMM pGVMM = g_pGVMM;
3069 if ( !RT_VALID_PTR(pGVMM)
3070 || pGVMM->u32Magic != GVMM_MAGIC)
3071 return;
3072 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
3073 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
3074 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
3075 && pCpu->idCpu == idHostCpu,
3076 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
3077
3078 /*
3079 * Check whether we need to do anything about the timer.
3080 * We have to be a little bit careful since we might be race the timer
3081 * callback here.
3082 */
3083 if (uHz > 16384)
3084 uHz = 16384; /** @todo add a query method for this! */
3085 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
3086 && uHz >= pCpu->Ppt.uMinHz
3087 && !pCpu->Ppt.fStarting /* solaris paranoia */))
3088 {
3089 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3090
3091 pCpu->Ppt.uDesiredHz = uHz;
3092 uint32_t cNsInterval = 0;
3093 if (!pCpu->Ppt.fStarted)
3094 {
3095 pCpu->Ppt.cStarts++;
3096 pCpu->Ppt.fStarted = true;
3097 pCpu->Ppt.fStarting = true;
3098 pCpu->Ppt.iTickHistorization = 0;
3099 pCpu->Ppt.uTimerHz = uHz;
3100 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
3101 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
3102 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
3103 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
3104 / cNsInterval;
3105 else
3106 pCpu->Ppt.cTicksHistoriziationInterval = 1;
3107 }
3108
3109 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3110
3111 if (cNsInterval)
3112 {
3113 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
3114 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
3115 AssertRC(rc);
3116
3117 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
3118 if (RT_FAILURE(rc))
3119 pCpu->Ppt.fStarted = false;
3120 pCpu->Ppt.fStarting = false;
3121 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
3122 }
3123 }
3124#else /* !GVMM_SCHED_WITH_PPT */
3125 NOREF(idHostCpu); NOREF(uHz);
3126#endif /* !GVMM_SCHED_WITH_PPT */
3127}
3128
3129
3130/**
3131 * Calls @a pfnCallback for each VM in the system.
3132 *
3133 * This will enumerate the VMs while holding the global VM used list lock in
3134 * shared mode. So, only suitable for simple work. If more expensive work
3135 * needs doing, a different approach must be taken as using this API would
3136 * otherwise block VM creation and destruction.
3137 *
3138 * @returns VBox status code.
3139 * @param pfnCallback The callback function.
3140 * @param pvUser User argument to the callback.
3141 */
3142GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3143{
3144 PGVMM pGVMM;
3145 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3146
3147 int rc = VINF_SUCCESS;
3148 GVMMR0_USED_SHARED_LOCK(pGVMM);
3149 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3150 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3151 i = pGVMM->aHandles[i].iNext, cLoops++)
3152 {
3153 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3154 if ( RT_VALID_PTR(pGVM)
3155 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3156 && pGVM->u32Magic == GVM_MAGIC)
3157 {
3158 rc = pfnCallback(pGVM, pvUser);
3159 if (rc != VINF_SUCCESS)
3160 break;
3161 }
3162
3163 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3164 }
3165 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3166 return rc;
3167}
3168
3169
3170/**
3171 * Retrieves the GVMM statistics visible to the caller.
3172 *
3173 * @returns VBox status code.
3174 *
3175 * @param pStats Where to put the statistics.
3176 * @param pSession The current session.
3177 * @param pGVM The GVM to obtain statistics for. Optional.
3178 */
3179GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3180{
3181 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3182
3183 /*
3184 * Validate input.
3185 */
3186 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3187 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3188 pStats->cVMs = 0; /* (crash before taking the sem...) */
3189
3190 /*
3191 * Take the lock and get the VM statistics.
3192 */
3193 PGVMM pGVMM;
3194 if (pGVM)
3195 {
3196 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3197 if (RT_FAILURE(rc))
3198 return rc;
3199 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3200
3201 uint32_t iCpu = RT_MIN(pGVM->cCpus, RT_ELEMENTS(pStats->aVCpus));
3202 if (iCpu < RT_ELEMENTS(pStats->aVCpus))
3203 RT_BZERO(&pStats->aVCpus[iCpu], (RT_ELEMENTS(pStats->aVCpus) - iCpu) * sizeof(pStats->aVCpus[0]));
3204 while (iCpu-- > 0)
3205 pStats->aVCpus[iCpu] = pGVM->aCpus[iCpu].gvmm.s.Stats;
3206 }
3207 else
3208 {
3209 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3210 RT_ZERO(pStats->SchedVM);
3211 RT_ZERO(pStats->aVCpus);
3212
3213 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3214 AssertRCReturn(rc, rc);
3215 }
3216
3217 /*
3218 * Enumerate the VMs and add the ones visible to the statistics.
3219 */
3220 pStats->cVMs = 0;
3221 pStats->cEMTs = 0;
3222 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3223
3224 for (unsigned i = pGVMM->iUsedHead;
3225 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3226 i = pGVMM->aHandles[i].iNext)
3227 {
3228 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3229 void *pvObj = pGVMM->aHandles[i].pvObj;
3230 if ( RT_VALID_PTR(pvObj)
3231 && RT_VALID_PTR(pOtherGVM)
3232 && pOtherGVM->u32Magic == GVM_MAGIC
3233 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3234 {
3235 pStats->cVMs++;
3236 pStats->cEMTs += pOtherGVM->cCpus;
3237
3238 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3239 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3240 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3241 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3242 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3243
3244 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3245 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3246 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3247
3248 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3249 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3250
3251 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3252 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3253 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3254 }
3255 }
3256
3257 /*
3258 * Copy out the per host CPU statistics.
3259 */
3260 uint32_t iDstCpu = 0;
3261 uint32_t cSrcCpus = pGVMM->cHostCpus;
3262 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3263 {
3264 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3265 {
3266 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3267 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3268#ifdef GVMM_SCHED_WITH_PPT
3269 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3270 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3271 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3272 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3273#else
3274 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3275 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3276 pStats->aHostCpus[iDstCpu].cChanges = 0;
3277 pStats->aHostCpus[iDstCpu].cStarts = 0;
3278#endif
3279 iDstCpu++;
3280 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3281 break;
3282 }
3283 }
3284 pStats->cHostCpus = iDstCpu;
3285
3286 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3287
3288 return VINF_SUCCESS;
3289}
3290
3291
3292/**
3293 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3294 *
3295 * @returns see GVMMR0QueryStatistics.
3296 * @param pGVM The global (ring-0) VM structure. Optional.
3297 * @param pReq Pointer to the request packet.
3298 * @param pSession The current session.
3299 */
3300GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3301{
3302 /*
3303 * Validate input and pass it on.
3304 */
3305 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3306 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3307 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3308
3309 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3310}
3311
3312
3313/**
3314 * Resets the specified GVMM statistics.
3315 *
3316 * @returns VBox status code.
3317 *
3318 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3319 * @param pSession The current session.
3320 * @param pGVM The GVM to reset statistics for. Optional.
3321 */
3322GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3323{
3324 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3325
3326 /*
3327 * Validate input.
3328 */
3329 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3330 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3331
3332 /*
3333 * Take the lock and get the VM statistics.
3334 */
3335 PGVMM pGVMM;
3336 if (pGVM)
3337 {
3338 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3339 if (RT_FAILURE(rc))
3340 return rc;
3341# define MAYBE_RESET_FIELD(field) \
3342 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3343 MAYBE_RESET_FIELD(cHaltCalls);
3344 MAYBE_RESET_FIELD(cHaltBlocking);
3345 MAYBE_RESET_FIELD(cHaltTimeouts);
3346 MAYBE_RESET_FIELD(cHaltNotBlocking);
3347 MAYBE_RESET_FIELD(cHaltWakeUps);
3348 MAYBE_RESET_FIELD(cWakeUpCalls);
3349 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3350 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3351 MAYBE_RESET_FIELD(cPokeCalls);
3352 MAYBE_RESET_FIELD(cPokeNotBusy);
3353 MAYBE_RESET_FIELD(cPollCalls);
3354 MAYBE_RESET_FIELD(cPollHalts);
3355 MAYBE_RESET_FIELD(cPollWakeUps);
3356# undef MAYBE_RESET_FIELD
3357 }
3358 else
3359 {
3360 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3361
3362 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3363 AssertRCReturn(rc, rc);
3364 }
3365
3366 /*
3367 * Enumerate the VMs and add the ones visible to the statistics.
3368 */
3369 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3370 {
3371 for (unsigned i = pGVMM->iUsedHead;
3372 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3373 i = pGVMM->aHandles[i].iNext)
3374 {
3375 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3376 void *pvObj = pGVMM->aHandles[i].pvObj;
3377 if ( RT_VALID_PTR(pvObj)
3378 && RT_VALID_PTR(pOtherGVM)
3379 && pOtherGVM->u32Magic == GVM_MAGIC
3380 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3381 {
3382# define MAYBE_RESET_FIELD(field) \
3383 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3384 MAYBE_RESET_FIELD(cHaltCalls);
3385 MAYBE_RESET_FIELD(cHaltBlocking);
3386 MAYBE_RESET_FIELD(cHaltTimeouts);
3387 MAYBE_RESET_FIELD(cHaltNotBlocking);
3388 MAYBE_RESET_FIELD(cHaltWakeUps);
3389 MAYBE_RESET_FIELD(cWakeUpCalls);
3390 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3391 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3392 MAYBE_RESET_FIELD(cPokeCalls);
3393 MAYBE_RESET_FIELD(cPokeNotBusy);
3394 MAYBE_RESET_FIELD(cPollCalls);
3395 MAYBE_RESET_FIELD(cPollHalts);
3396 MAYBE_RESET_FIELD(cPollWakeUps);
3397# undef MAYBE_RESET_FIELD
3398 }
3399 }
3400 }
3401
3402 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3403
3404 return VINF_SUCCESS;
3405}
3406
3407
3408/**
3409 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3410 *
3411 * @returns see GVMMR0ResetStatistics.
3412 * @param pGVM The global (ring-0) VM structure. Optional.
3413 * @param pReq Pointer to the request packet.
3414 * @param pSession The current session.
3415 */
3416GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3417{
3418 /*
3419 * Validate input and pass it on.
3420 */
3421 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3422 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3423 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3424
3425 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3426}
3427
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette