VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 92870

Last change on this file since 92870 was 92870, checked in by vboxsync, 3 years ago

VMM/GVMMR0: Added more LogRel statements to failure paths of GVMMR0Init to try track down the VERR_INVALID_PARAMETER that started on some boxes after enabling GVMM_SCHED_WITH_PPT.

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 116.7 KB
Line 
1/* $Id: GVMMR0.cpp 92870 2021-12-10 23:48:55Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(RT_OS_WINDOWS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99
100/** Special value that GVMMR0DeregisterVCpu sets. */
101#define GVMM_RTNATIVETHREAD_DESTROYED (~(RTNATIVETHREAD)1)
102AssertCompile(GVMM_RTNATIVETHREAD_DESTROYED != NIL_RTNATIVETHREAD);
103
104
105/*********************************************************************************************************************************
106* Structures and Typedefs *
107*********************************************************************************************************************************/
108
109/**
110 * Global VM handle.
111 */
112typedef struct GVMHANDLE
113{
114 /** The index of the next handle in the list (free or used). (0 is nil.) */
115 uint16_t volatile iNext;
116 /** Our own index / handle value. */
117 uint16_t iSelf;
118 /** The process ID of the handle owner.
119 * This is used for access checks. */
120 RTPROCESS ProcId;
121 /** The pointer to the ring-0 only (aka global) VM structure. */
122 PGVM pGVM;
123 /** The virtual machine object. */
124 void *pvObj;
125 /** The session this VM is associated with. */
126 PSUPDRVSESSION pSession;
127 /** The ring-0 handle of the EMT0 thread.
128 * This is used for ownership checks as well as looking up a VM handle by thread
129 * at times like assertions. */
130 RTNATIVETHREAD hEMT0;
131} GVMHANDLE;
132/** Pointer to a global VM handle. */
133typedef GVMHANDLE *PGVMHANDLE;
134
135/** Number of GVM handles (including the NIL handle). */
136#if HC_ARCH_BITS == 64
137# define GVMM_MAX_HANDLES 8192
138#else
139# define GVMM_MAX_HANDLES 128
140#endif
141
142/**
143 * Per host CPU GVMM data.
144 */
145typedef struct GVMMHOSTCPU
146{
147 /** Magic number (GVMMHOSTCPU_MAGIC). */
148 uint32_t volatile u32Magic;
149 /** The CPU ID. */
150 RTCPUID idCpu;
151 /** The CPU set index. */
152 uint32_t idxCpuSet;
153
154#ifdef GVMM_SCHED_WITH_PPT
155 /** Periodic preemption timer data. */
156 struct
157 {
158 /** The handle to the periodic preemption timer. */
159 PRTTIMER pTimer;
160 /** Spinlock protecting the data below. */
161 RTSPINLOCK hSpinlock;
162 /** The smalles Hz that we need to care about. (static) */
163 uint32_t uMinHz;
164 /** The number of ticks between each historization. */
165 uint32_t cTicksHistoriziationInterval;
166 /** The current historization tick (counting up to
167 * cTicksHistoriziationInterval and then resetting). */
168 uint32_t iTickHistorization;
169 /** The current timer interval. This is set to 0 when inactive. */
170 uint32_t cNsInterval;
171 /** The current timer frequency. This is set to 0 when inactive. */
172 uint32_t uTimerHz;
173 /** The current max frequency reported by the EMTs.
174 * This gets historicize and reset by the timer callback. This is
175 * read without holding the spinlock, so needs atomic updating. */
176 uint32_t volatile uDesiredHz;
177 /** Whether the timer was started or not. */
178 bool volatile fStarted;
179 /** Set if we're starting timer. */
180 bool volatile fStarting;
181 /** The index of the next history entry (mod it). */
182 uint32_t iHzHistory;
183 /** Historicized uDesiredHz values. The array wraps around, new entries
184 * are added at iHzHistory. This is updated approximately every
185 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
186 uint32_t aHzHistory[8];
187 /** Statistics counter for recording the number of interval changes. */
188 uint32_t cChanges;
189 /** Statistics counter for recording the number of timer starts. */
190 uint32_t cStarts;
191 } Ppt;
192#endif /* GVMM_SCHED_WITH_PPT */
193
194} GVMMHOSTCPU;
195/** Pointer to the per host CPU GVMM data. */
196typedef GVMMHOSTCPU *PGVMMHOSTCPU;
197/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
198#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
199/** The interval on history entry should cover (approximately) give in
200 * nanoseconds. */
201#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
202
203
204/**
205 * The GVMM instance data.
206 */
207typedef struct GVMM
208{
209 /** Eyecatcher / magic. */
210 uint32_t u32Magic;
211 /** The index of the head of the free handle chain. (0 is nil.) */
212 uint16_t volatile iFreeHead;
213 /** The index of the head of the active handle chain. (0 is nil.) */
214 uint16_t volatile iUsedHead;
215 /** The number of VMs. */
216 uint16_t volatile cVMs;
217 /** Alignment padding. */
218 uint16_t u16Reserved;
219 /** The number of EMTs. */
220 uint32_t volatile cEMTs;
221 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
222 uint32_t volatile cHaltedEMTs;
223 /** Mini lock for restricting early wake-ups to one thread. */
224 bool volatile fDoingEarlyWakeUps;
225 bool afPadding[3]; /**< explicit alignment padding. */
226 /** When the next halted or sleeping EMT will wake up.
227 * This is set to 0 when it needs recalculating and to UINT64_MAX when
228 * there are no halted or sleeping EMTs in the GVMM. */
229 uint64_t uNsNextEmtWakeup;
230 /** The lock used to serialize VM creation, destruction and associated events that
231 * isn't performance critical. Owners may acquire the list lock. */
232 RTCRITSECT CreateDestroyLock;
233 /** The lock used to serialize used list updates and accesses.
234 * This indirectly includes scheduling since the scheduler will have to walk the
235 * used list to examin running VMs. Owners may not acquire any other locks. */
236 RTCRITSECTRW UsedLock;
237 /** The handle array.
238 * The size of this array defines the maximum number of currently running VMs.
239 * The first entry is unused as it represents the NIL handle. */
240 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
241
242 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
243 * The number of EMTs that means we no longer consider ourselves alone on a
244 * CPU/Core.
245 */
246 uint32_t cEMTsMeansCompany;
247 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
248 * The minimum sleep time for when we're alone, in nano seconds.
249 */
250 uint32_t nsMinSleepAlone;
251 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
252 * The minimum sleep time for when we've got company, in nano seconds.
253 */
254 uint32_t nsMinSleepCompany;
255 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
256 * The limit for the first round of early wake-ups, given in nano seconds.
257 */
258 uint32_t nsEarlyWakeUp1;
259 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
260 * The limit for the second round of early wake-ups, given in nano seconds.
261 */
262 uint32_t nsEarlyWakeUp2;
263
264 /** Set if we're doing early wake-ups.
265 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
266 bool volatile fDoEarlyWakeUps;
267
268 /** The number of entries in the host CPU array (aHostCpus). */
269 uint32_t cHostCpus;
270 /** Per host CPU data (variable length). */
271 GVMMHOSTCPU aHostCpus[1];
272} GVMM;
273AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
274AssertCompileMemberAlignment(GVMM, UsedLock, 8);
275AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
276/** Pointer to the GVMM instance data. */
277typedef GVMM *PGVMM;
278
279/** The GVMM::u32Magic value (Charlie Haden). */
280#define GVMM_MAGIC UINT32_C(0x19370806)
281
282
283
284/*********************************************************************************************************************************
285* Global Variables *
286*********************************************************************************************************************************/
287/** Pointer to the GVMM instance data.
288 * (Just my general dislike for global variables.) */
289static PGVMM g_pGVMM = NULL;
290
291/** Macro for obtaining and validating the g_pGVMM pointer.
292 * On failure it will return from the invoking function with the specified return value.
293 *
294 * @param pGVMM The name of the pGVMM variable.
295 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
296 * status codes.
297 */
298#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
299 do { \
300 (pGVMM) = g_pGVMM;\
301 AssertPtrReturn((pGVMM), (rc)); \
302 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
303 } while (0)
304
305/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
306 * On failure it will return from the invoking function.
307 *
308 * @param pGVMM The name of the pGVMM variable.
309 */
310#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
311 do { \
312 (pGVMM) = g_pGVMM;\
313 AssertPtrReturnVoid((pGVMM)); \
314 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
315 } while (0)
316
317
318/*********************************************************************************************************************************
319* Internal Functions *
320*********************************************************************************************************************************/
321static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
322static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
323static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
324static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
325
326#ifdef GVMM_SCHED_WITH_PPT
327static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
328#endif
329
330
331/**
332 * Initializes the GVMM.
333 *
334 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
335 *
336 * @returns VBox status code.
337 */
338GVMMR0DECL(int) GVMMR0Init(void)
339{
340 LogFlow(("GVMMR0Init:\n"));
341
342 /*
343 * Allocate and initialize the instance data.
344 */
345 uint32_t cHostCpus = RTMpGetArraySize();
346 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
347
348 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
349 if (!pGVMM)
350 return VERR_NO_MEMORY;
351 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
352 "GVMM-CreateDestroyLock");
353 if (RT_SUCCESS(rc))
354 {
355 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
356 if (RT_SUCCESS(rc))
357 {
358 pGVMM->u32Magic = GVMM_MAGIC;
359 pGVMM->iUsedHead = 0;
360 pGVMM->iFreeHead = 1;
361
362 /* the nil handle */
363 pGVMM->aHandles[0].iSelf = 0;
364 pGVMM->aHandles[0].iNext = 0;
365
366 /* the tail */
367 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
368 pGVMM->aHandles[i].iSelf = i;
369 pGVMM->aHandles[i].iNext = 0; /* nil */
370
371 /* the rest */
372 while (i-- > 1)
373 {
374 pGVMM->aHandles[i].iSelf = i;
375 pGVMM->aHandles[i].iNext = i + 1;
376 }
377
378 /* The default configuration values. */
379 uint32_t cNsResolution = RTSemEventMultiGetResolution();
380 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
381 if (cNsResolution >= 5*RT_NS_100US)
382 {
383 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
384 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
385 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
386 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
387 }
388 else if (cNsResolution > RT_NS_100US)
389 {
390 pGVMM->nsMinSleepAlone = cNsResolution / 2;
391 pGVMM->nsMinSleepCompany = cNsResolution / 4;
392 pGVMM->nsEarlyWakeUp1 = 0;
393 pGVMM->nsEarlyWakeUp2 = 0;
394 }
395 else
396 {
397 pGVMM->nsMinSleepAlone = 2000;
398 pGVMM->nsMinSleepCompany = 2000;
399 pGVMM->nsEarlyWakeUp1 = 0;
400 pGVMM->nsEarlyWakeUp2 = 0;
401 }
402 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
403
404 /* The host CPU data. */
405 pGVMM->cHostCpus = cHostCpus;
406 uint32_t iCpu = cHostCpus;
407 RTCPUSET PossibleSet;
408 RTMpGetSet(&PossibleSet);
409 while (iCpu-- > 0)
410 {
411 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
412#ifdef GVMM_SCHED_WITH_PPT
413 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
414 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
415 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
416 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
417 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
418 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
419 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
420 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
421 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
422 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
423 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
424 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
425#endif
426
427 if (RTCpuSetIsMember(&PossibleSet, iCpu))
428 {
429 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
430 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
431
432#ifdef GVMM_SCHED_WITH_PPT
433 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
434 50*1000*1000 /* whatever */,
435 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
436 gvmmR0SchedPeriodicPreemptionTimerCallback,
437 &pGVMM->aHostCpus[iCpu]);
438 if (RT_SUCCESS(rc))
439 {
440 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
441 if (RT_FAILURE(rc))
442 LogRel(("GVMMR0Init: RTSpinlockCreate failed for #%u (%d)\n", iCpu, rc));
443 }
444 else
445 LogRel(("GVMMR0Init: RTTimerCreateEx failed for #%u (%d)\n", iCpu, rc));
446 if (RT_FAILURE(rc))
447 {
448 while (iCpu < cHostCpus)
449 {
450 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
451 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
452 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
453 iCpu++;
454 }
455 break;
456 }
457#endif
458 }
459 else
460 {
461 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
462 pGVMM->aHostCpus[iCpu].u32Magic = 0;
463 }
464 }
465 if (RT_SUCCESS(rc))
466 {
467 g_pGVMM = pGVMM;
468 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
469 return VINF_SUCCESS;
470 }
471
472 /* bail out. */
473 RTCritSectRwDelete(&pGVMM->UsedLock);
474 }
475 else
476 LogRel(("GVMMR0Init: RTCritSectRwInitEx failed (%d)\n", rc));
477 RTCritSectDelete(&pGVMM->CreateDestroyLock);
478 }
479 else
480 LogRel(("GVMMR0Init: RTCritSectInitEx failed (%d)\n", rc));
481
482 RTMemFree(pGVMM);
483 return rc;
484}
485
486
487/**
488 * Terminates the GVM.
489 *
490 * This is called while owning the loader semaphore (see supdrvLdrFree()).
491 * And unless something is wrong, there should be absolutely no VMs
492 * registered at this point.
493 */
494GVMMR0DECL(void) GVMMR0Term(void)
495{
496 LogFlow(("GVMMR0Term:\n"));
497
498 PGVMM pGVMM = g_pGVMM;
499 g_pGVMM = NULL;
500 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
501 {
502 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
503 return;
504 }
505
506 /*
507 * First of all, stop all active timers.
508 */
509 uint32_t cActiveTimers = 0;
510 uint32_t iCpu = pGVMM->cHostCpus;
511 while (iCpu-- > 0)
512 {
513 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
514#ifdef GVMM_SCHED_WITH_PPT
515 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
516 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
517 cActiveTimers++;
518#endif
519 }
520 if (cActiveTimers)
521 RTThreadSleep(1); /* fudge */
522
523 /*
524 * Invalidate the and free resources.
525 */
526 pGVMM->u32Magic = ~GVMM_MAGIC;
527 RTCritSectRwDelete(&pGVMM->UsedLock);
528 RTCritSectDelete(&pGVMM->CreateDestroyLock);
529
530 pGVMM->iFreeHead = 0;
531 if (pGVMM->iUsedHead)
532 {
533 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
534 pGVMM->iUsedHead = 0;
535 }
536
537#ifdef GVMM_SCHED_WITH_PPT
538 iCpu = pGVMM->cHostCpus;
539 while (iCpu-- > 0)
540 {
541 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
542 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
543 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
544 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
545 }
546#endif
547
548 RTMemFree(pGVMM);
549}
550
551
552/**
553 * A quick hack for setting global config values.
554 *
555 * @returns VBox status code.
556 *
557 * @param pSession The session handle. Used for authentication.
558 * @param pszName The variable name.
559 * @param u64Value The new value.
560 */
561GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
562{
563 /*
564 * Validate input.
565 */
566 PGVMM pGVMM;
567 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
568 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
569 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
570
571 /*
572 * String switch time!
573 */
574 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
575 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
576 int rc = VINF_SUCCESS;
577 pszName += sizeof("/GVMM/") - 1;
578 if (!strcmp(pszName, "cEMTsMeansCompany"))
579 {
580 if (u64Value <= UINT32_MAX)
581 pGVMM->cEMTsMeansCompany = u64Value;
582 else
583 rc = VERR_OUT_OF_RANGE;
584 }
585 else if (!strcmp(pszName, "MinSleepAlone"))
586 {
587 if (u64Value <= RT_NS_100MS)
588 pGVMM->nsMinSleepAlone = u64Value;
589 else
590 rc = VERR_OUT_OF_RANGE;
591 }
592 else if (!strcmp(pszName, "MinSleepCompany"))
593 {
594 if (u64Value <= RT_NS_100MS)
595 pGVMM->nsMinSleepCompany = u64Value;
596 else
597 rc = VERR_OUT_OF_RANGE;
598 }
599 else if (!strcmp(pszName, "EarlyWakeUp1"))
600 {
601 if (u64Value <= RT_NS_100MS)
602 {
603 pGVMM->nsEarlyWakeUp1 = u64Value;
604 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
605 }
606 else
607 rc = VERR_OUT_OF_RANGE;
608 }
609 else if (!strcmp(pszName, "EarlyWakeUp2"))
610 {
611 if (u64Value <= RT_NS_100MS)
612 {
613 pGVMM->nsEarlyWakeUp2 = u64Value;
614 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
615 }
616 else
617 rc = VERR_OUT_OF_RANGE;
618 }
619 else
620 rc = VERR_CFGM_VALUE_NOT_FOUND;
621 return rc;
622}
623
624
625/**
626 * A quick hack for getting global config values.
627 *
628 * @returns VBox status code.
629 *
630 * @param pSession The session handle. Used for authentication.
631 * @param pszName The variable name.
632 * @param pu64Value Where to return the value.
633 */
634GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
635{
636 /*
637 * Validate input.
638 */
639 PGVMM pGVMM;
640 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
641 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
642 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
643 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
644
645 /*
646 * String switch time!
647 */
648 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
649 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
650 int rc = VINF_SUCCESS;
651 pszName += sizeof("/GVMM/") - 1;
652 if (!strcmp(pszName, "cEMTsMeansCompany"))
653 *pu64Value = pGVMM->cEMTsMeansCompany;
654 else if (!strcmp(pszName, "MinSleepAlone"))
655 *pu64Value = pGVMM->nsMinSleepAlone;
656 else if (!strcmp(pszName, "MinSleepCompany"))
657 *pu64Value = pGVMM->nsMinSleepCompany;
658 else if (!strcmp(pszName, "EarlyWakeUp1"))
659 *pu64Value = pGVMM->nsEarlyWakeUp1;
660 else if (!strcmp(pszName, "EarlyWakeUp2"))
661 *pu64Value = pGVMM->nsEarlyWakeUp2;
662 else
663 rc = VERR_CFGM_VALUE_NOT_FOUND;
664 return rc;
665}
666
667
668/**
669 * Acquire the 'used' lock in shared mode.
670 *
671 * This prevents destruction of the VM while we're in ring-0.
672 *
673 * @returns IPRT status code, see RTSemFastMutexRequest.
674 * @param a_pGVMM The GVMM instance data.
675 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
676 */
677#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
678
679/**
680 * Release the 'used' lock in when owning it in shared mode.
681 *
682 * @returns IPRT status code, see RTSemFastMutexRequest.
683 * @param a_pGVMM The GVMM instance data.
684 * @sa GVMMR0_USED_SHARED_LOCK
685 */
686#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
687
688/**
689 * Acquire the 'used' lock in exclusive mode.
690 *
691 * Only use this function when making changes to the used list.
692 *
693 * @returns IPRT status code, see RTSemFastMutexRequest.
694 * @param a_pGVMM The GVMM instance data.
695 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
696 */
697#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
698
699/**
700 * Release the 'used' lock when owning it in exclusive mode.
701 *
702 * @returns IPRT status code, see RTSemFastMutexRelease.
703 * @param a_pGVMM The GVMM instance data.
704 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
705 */
706#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
707
708
709/**
710 * Try acquire the 'create & destroy' lock.
711 *
712 * @returns IPRT status code, see RTSemFastMutexRequest.
713 * @param pGVMM The GVMM instance data.
714 */
715DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
716{
717 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
718 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
719 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
720 return rc;
721}
722
723
724/**
725 * Release the 'create & destroy' lock.
726 *
727 * @returns IPRT status code, see RTSemFastMutexRequest.
728 * @param pGVMM The GVMM instance data.
729 */
730DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
731{
732 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
733 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
734 AssertRC(rc);
735 return rc;
736}
737
738
739/**
740 * Request wrapper for the GVMMR0CreateVM API.
741 *
742 * @returns VBox status code.
743 * @param pReq The request buffer.
744 * @param pSession The session handle. The VM will be associated with this.
745 */
746GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
747{
748 /*
749 * Validate the request.
750 */
751 if (!RT_VALID_PTR(pReq))
752 return VERR_INVALID_POINTER;
753 if (pReq->Hdr.cbReq != sizeof(*pReq))
754 return VERR_INVALID_PARAMETER;
755 if (pReq->pSession != pSession)
756 return VERR_INVALID_POINTER;
757
758 /*
759 * Execute it.
760 */
761 PGVM pGVM;
762 pReq->pVMR0 = NULL;
763 pReq->pVMR3 = NIL_RTR3PTR;
764 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
765 if (RT_SUCCESS(rc))
766 {
767 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
768 pReq->pVMR3 = pGVM->pVMR3;
769 }
770 return rc;
771}
772
773
774/**
775 * Allocates the VM structure and registers it with GVM.
776 *
777 * The caller will become the VM owner and there by the EMT.
778 *
779 * @returns VBox status code.
780 * @param pSession The support driver session.
781 * @param cCpus Number of virtual CPUs for the new VM.
782 * @param ppGVM Where to store the pointer to the VM structure.
783 *
784 * @thread EMT.
785 */
786GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
787{
788 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
789 PGVMM pGVMM;
790 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
791
792 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
793 *ppGVM = NULL;
794
795 if ( cCpus == 0
796 || cCpus > VMM_MAX_CPU_COUNT)
797 return VERR_INVALID_PARAMETER;
798
799 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
800 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
801 RTPROCESS ProcId = RTProcSelf();
802 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
803
804 /*
805 * The whole allocation process is protected by the lock.
806 */
807 int rc = gvmmR0CreateDestroyLock(pGVMM);
808 AssertRCReturn(rc, rc);
809
810 /*
811 * Only one VM per session.
812 */
813 if (SUPR0GetSessionVM(pSession) != NULL)
814 {
815 gvmmR0CreateDestroyUnlock(pGVMM);
816 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
817 return VERR_ALREADY_EXISTS;
818 }
819
820 /*
821 * Allocate a handle first so we don't waste resources unnecessarily.
822 */
823 uint16_t iHandle = pGVMM->iFreeHead;
824 if (iHandle)
825 {
826 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
827
828 /* consistency checks, a bit paranoid as always. */
829 if ( !pHandle->pGVM
830 && !pHandle->pvObj
831 && pHandle->iSelf == iHandle)
832 {
833 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
834 if (pHandle->pvObj)
835 {
836 /*
837 * Move the handle from the free to used list and perform permission checks.
838 */
839 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
840 AssertRC(rc);
841
842 pGVMM->iFreeHead = pHandle->iNext;
843 pHandle->iNext = pGVMM->iUsedHead;
844 pGVMM->iUsedHead = iHandle;
845 pGVMM->cVMs++;
846
847 pHandle->pGVM = NULL;
848 pHandle->pSession = pSession;
849 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
850 pHandle->ProcId = NIL_RTPROCESS;
851
852 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
853
854 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
855 if (RT_SUCCESS(rc))
856 {
857 /*
858 * Allocate memory for the VM structure (combined VM + GVM).
859 */
860 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
861 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
862 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
863 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
864 if (RT_SUCCESS(rc))
865 {
866 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
867 AssertPtr(pGVM);
868
869 /*
870 * Initialise the structure.
871 */
872 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
873 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
874 pGVM->gvmm.s.VMMemObj = hVMMemObj;
875 rc = GMMR0InitPerVMData(pGVM);
876 int rc2 = PGMR0InitPerVMData(pGVM);
877 int rc3 = VMMR0InitPerVMData(pGVM);
878 DBGFR0InitPerVMData(pGVM);
879 PDMR0InitPerVMData(pGVM);
880 IOMR0InitPerVMData(pGVM);
881 TMR0InitPerVMData(pGVM);
882 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2) && RT_SUCCESS(rc3))
883 {
884 /*
885 * Allocate page array.
886 * This currently have to be made available to ring-3, but this is should change eventually.
887 */
888 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
889 if (RT_SUCCESS(rc))
890 {
891 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
892 for (uint32_t iPage = 0; iPage < cPages; iPage++)
893 {
894 paPages[iPage].uReserved = 0;
895 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
896 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
897 }
898
899 /*
900 * Map the page array, VM and VMCPU structures into ring-3.
901 */
902 AssertCompileSizeAlignment(VM, PAGE_SIZE);
903 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
904 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
905 0 /*offSub*/, sizeof(VM));
906 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
907 {
908 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
909 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
910 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
911 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
912 }
913 if (RT_SUCCESS(rc))
914 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
915 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
916 NIL_RTR0PROCESS);
917 if (RT_SUCCESS(rc))
918 {
919 /*
920 * Initialize all the VM pointers.
921 */
922 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
923 AssertMsg(RTR0MemUserIsValidAddr(pVMR3) && pVMR3 != NIL_RTR3PTR, ("%p\n", pVMR3));
924
925 for (VMCPUID i = 0; i < cCpus; i++)
926 {
927 pGVM->aCpus[i].pVMR0 = pGVM;
928 pGVM->aCpus[i].pVMR3 = pVMR3;
929 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
930 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
931 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
932 AssertMsg(RTR0MemUserIsValidAddr(pGVM->apCpusR3[i]) && pGVM->apCpusR3[i] != NIL_RTR3PTR,
933 ("apCpusR3[%u]=%p\n", i, pGVM->apCpusR3[i]));
934 }
935
936 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
937 AssertMsg(RTR0MemUserIsValidAddr(pGVM->paVMPagesR3) && pGVM->paVMPagesR3 != NIL_RTR3PTR,
938 ("%p\n", pGVM->paVMPagesR3));
939
940 /*
941 * Complete the handle - take the UsedLock sem just to be careful.
942 */
943 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
944 AssertRC(rc);
945
946 pHandle->pGVM = pGVM;
947 pHandle->hEMT0 = hEMT0;
948 pHandle->ProcId = ProcId;
949 pGVM->pVMR3 = pVMR3;
950 pGVM->pVMR3Unsafe = pVMR3;
951 pGVM->aCpus[0].hEMT = hEMT0;
952 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
953 pGVM->aCpus[0].cEmtHashCollisions = 0;
954 uint32_t const idxHash = GVMM_EMT_HASH_1(hEMT0);
955 pGVM->aCpus[0].gvmm.s.idxEmtHash = (uint16_t)idxHash;
956 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hEMT0;
957 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = 0;
958 pGVMM->cEMTs += cCpus;
959
960 /* Associate it with the session and create the context hook for EMT0. */
961 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
962 if (RT_SUCCESS(rc))
963 {
964 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
965 if (RT_SUCCESS(rc))
966 {
967 /*
968 * Done!
969 */
970 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
971
972 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
973 gvmmR0CreateDestroyUnlock(pGVMM);
974
975 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
976
977 *ppGVM = pGVM;
978 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
979 return VINF_SUCCESS;
980 }
981
982 SUPR0SetSessionVM(pSession, NULL, NULL);
983 }
984 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
985 }
986
987 /* Cleanup mappings. */
988 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
989 {
990 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
991 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
992 }
993 for (VMCPUID i = 0; i < cCpus; i++)
994 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
995 {
996 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
997 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
998 }
999 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1000 {
1001 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1002 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1003 }
1004 }
1005 }
1006 else
1007 {
1008 if (RT_SUCCESS_NP(rc))
1009 rc = rc2;
1010 if (RT_SUCCESS_NP(rc))
1011 rc = rc3;
1012 }
1013 }
1014 }
1015 /* else: The user wasn't permitted to create this VM. */
1016
1017 /*
1018 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1019 * object reference here. A little extra mess because of non-recursive lock.
1020 */
1021 void *pvObj = pHandle->pvObj;
1022 pHandle->pvObj = NULL;
1023 gvmmR0CreateDestroyUnlock(pGVMM);
1024
1025 SUPR0ObjRelease(pvObj, pSession);
1026
1027 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1028 return rc;
1029 }
1030
1031 rc = VERR_NO_MEMORY;
1032 }
1033 else
1034 rc = VERR_GVMM_IPE_1;
1035 }
1036 else
1037 rc = VERR_GVM_TOO_MANY_VMS;
1038
1039 gvmmR0CreateDestroyUnlock(pGVMM);
1040 return rc;
1041}
1042
1043
1044/**
1045 * Initializes the per VM data belonging to GVMM.
1046 *
1047 * @param pGVM Pointer to the global VM structure.
1048 * @param hSelf The handle.
1049 * @param cCpus The CPU count.
1050 * @param pSession The session this VM is associated with.
1051 */
1052static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1053{
1054 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1055 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1056 AssertCompileMemberAlignment(VM, cpum, 64);
1057 AssertCompileMemberAlignment(VM, tm, 64);
1058
1059 /* GVM: */
1060 pGVM->u32Magic = GVM_MAGIC;
1061 pGVM->hSelf = hSelf;
1062 pGVM->cCpus = cCpus;
1063 pGVM->pSession = pSession;
1064 pGVM->pSelf = pGVM;
1065
1066 /* VM: */
1067 pGVM->enmVMState = VMSTATE_CREATING;
1068 pGVM->hSelfUnsafe = hSelf;
1069 pGVM->pSessionUnsafe = pSession;
1070 pGVM->pVMR0ForCall = pGVM;
1071 pGVM->cCpusUnsafe = cCpus;
1072 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1073 pGVM->uStructVersion = 1;
1074 pGVM->cbSelf = sizeof(VM);
1075 pGVM->cbVCpu = sizeof(VMCPU);
1076
1077 /* GVMM: */
1078 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1079 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1080 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1081 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1082 pGVM->gvmm.s.fDoneVMMR0Init = false;
1083 pGVM->gvmm.s.fDoneVMMR0Term = false;
1084
1085 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); i++)
1086 {
1087 pGVM->gvmm.s.aWorkerThreads[i].hNativeThread = NIL_RTNATIVETHREAD;
1088 pGVM->gvmm.s.aWorkerThreads[i].hNativeThreadR3 = NIL_RTNATIVETHREAD;
1089 }
1090 pGVM->gvmm.s.aWorkerThreads[0].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED; /* invalid entry */
1091
1092 for (size_t i = 0; i < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash); i++)
1093 {
1094 pGVM->gvmm.s.aEmtHash[i].hNativeEmt = NIL_RTNATIVETHREAD;
1095 pGVM->gvmm.s.aEmtHash[i].idVCpu = NIL_VMCPUID;
1096 }
1097
1098 /*
1099 * Per virtual CPU.
1100 */
1101 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1102 {
1103 pGVM->aCpus[i].idCpu = i;
1104 pGVM->aCpus[i].idCpuUnsafe = i;
1105 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1106 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1107 pGVM->aCpus[i].gvmm.s.idxEmtHash = UINT16_MAX;
1108 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1109 pGVM->aCpus[i].pGVM = pGVM;
1110 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1111 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1112 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1113 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1114 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1115 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1116 }
1117}
1118
1119
1120/**
1121 * Does the VM initialization.
1122 *
1123 * @returns VBox status code.
1124 * @param pGVM The global (ring-0) VM structure.
1125 */
1126GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1127{
1128 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1129
1130 int rc = VERR_INTERNAL_ERROR_3;
1131 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1132 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1133 {
1134 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1135 {
1136 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1137 if (RT_FAILURE(rc))
1138 {
1139 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1140 break;
1141 }
1142 }
1143 }
1144 else
1145 rc = VERR_WRONG_ORDER;
1146
1147 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1148 return rc;
1149}
1150
1151
1152/**
1153 * Indicates that we're done with the ring-0 initialization
1154 * of the VM.
1155 *
1156 * @param pGVM The global (ring-0) VM structure.
1157 * @thread EMT(0)
1158 */
1159GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1160{
1161 /* Set the indicator. */
1162 pGVM->gvmm.s.fDoneVMMR0Init = true;
1163}
1164
1165
1166/**
1167 * Indicates that we're doing the ring-0 termination of the VM.
1168 *
1169 * @returns true if termination hasn't been done already, false if it has.
1170 * @param pGVM Pointer to the global VM structure. Optional.
1171 * @thread EMT(0) or session cleanup thread.
1172 */
1173GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1174{
1175 /* Validate the VM structure, state and handle. */
1176 AssertPtrReturn(pGVM, false);
1177
1178 /* Set the indicator. */
1179 if (pGVM->gvmm.s.fDoneVMMR0Term)
1180 return false;
1181 pGVM->gvmm.s.fDoneVMMR0Term = true;
1182 return true;
1183}
1184
1185
1186/**
1187 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1188 *
1189 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1190 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1191 * would've been nice if the caller was actually the EMT thread or that we somehow
1192 * could've associated the calling thread with the VM up front.
1193 *
1194 * @returns VBox status code.
1195 * @param pGVM The global (ring-0) VM structure.
1196 *
1197 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1198 */
1199GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1200{
1201 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1202 PGVMM pGVMM;
1203 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1204
1205 /*
1206 * Validate the VM structure, state and caller.
1207 */
1208 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1209 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1210 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1211 VERR_WRONG_ORDER);
1212
1213 uint32_t hGVM = pGVM->hSelf;
1214 ASMCompilerBarrier();
1215 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1216 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1217
1218 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1219 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1220
1221 RTPROCESS ProcId = RTProcSelf();
1222 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1223 AssertReturn( ( pHandle->hEMT0 == hSelf
1224 && pHandle->ProcId == ProcId)
1225 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1226
1227 /*
1228 * Lookup the handle and destroy the object.
1229 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1230 * object, we take some precautions against racing callers just in case...
1231 */
1232 int rc = gvmmR0CreateDestroyLock(pGVMM);
1233 AssertRC(rc);
1234
1235 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1236 if ( pHandle->pGVM == pGVM
1237 && ( ( pHandle->hEMT0 == hSelf
1238 && pHandle->ProcId == ProcId)
1239 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1240 && RT_VALID_PTR(pHandle->pvObj)
1241 && RT_VALID_PTR(pHandle->pSession)
1242 && RT_VALID_PTR(pHandle->pGVM)
1243 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1244 {
1245 /* Check that other EMTs have deregistered. */
1246 uint32_t cNotDeregistered = 0;
1247 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1248 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != GVMM_RTNATIVETHREAD_DESTROYED;
1249 if (cNotDeregistered == 0)
1250 {
1251 /* Grab the object pointer. */
1252 void *pvObj = pHandle->pvObj;
1253 pHandle->pvObj = NULL;
1254 gvmmR0CreateDestroyUnlock(pGVMM);
1255
1256 SUPR0ObjRelease(pvObj, pHandle->pSession);
1257 }
1258 else
1259 {
1260 gvmmR0CreateDestroyUnlock(pGVMM);
1261 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1262 }
1263 }
1264 else
1265 {
1266 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1267 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1268 gvmmR0CreateDestroyUnlock(pGVMM);
1269 rc = VERR_GVMM_IPE_2;
1270 }
1271
1272 return rc;
1273}
1274
1275
1276/**
1277 * Performs VM cleanup task as part of object destruction.
1278 *
1279 * @param pGVM The GVM pointer.
1280 */
1281static void gvmmR0CleanupVM(PGVM pGVM)
1282{
1283 if ( pGVM->gvmm.s.fDoneVMMR0Init
1284 && !pGVM->gvmm.s.fDoneVMMR0Term)
1285 {
1286 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1287 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1288 {
1289 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1290 VMMR0TermVM(pGVM, NIL_VMCPUID);
1291 }
1292 else
1293 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1294 }
1295
1296 GMMR0CleanupVM(pGVM);
1297#ifdef VBOX_WITH_NEM_R0
1298 NEMR0CleanupVM(pGVM);
1299#endif
1300 PDMR0CleanupVM(pGVM);
1301 IOMR0CleanupVM(pGVM);
1302 DBGFR0CleanupVM(pGVM);
1303 PGMR0CleanupVM(pGVM);
1304 TMR0CleanupVM(pGVM);
1305 VMMR0CleanupVM(pGVM);
1306}
1307
1308
1309/**
1310 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1311 *
1312 * pvUser1 is the GVM instance pointer.
1313 * pvUser2 is the handle pointer.
1314 */
1315static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1316{
1317 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1318
1319 NOREF(pvObj);
1320
1321 /*
1322 * Some quick, paranoid, input validation.
1323 */
1324 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1325 AssertPtr(pHandle);
1326 PGVMM pGVMM = (PGVMM)pvUser1;
1327 Assert(pGVMM == g_pGVMM);
1328 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1329 if ( !iHandle
1330 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1331 || iHandle != pHandle->iSelf)
1332 {
1333 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1334 return;
1335 }
1336
1337 int rc = gvmmR0CreateDestroyLock(pGVMM);
1338 AssertRC(rc);
1339 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1340 AssertRC(rc);
1341
1342 /*
1343 * This is a tad slow but a doubly linked list is too much hassle.
1344 */
1345 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1346 {
1347 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1348 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1349 gvmmR0CreateDestroyUnlock(pGVMM);
1350 return;
1351 }
1352
1353 if (pGVMM->iUsedHead == iHandle)
1354 pGVMM->iUsedHead = pHandle->iNext;
1355 else
1356 {
1357 uint16_t iPrev = pGVMM->iUsedHead;
1358 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1359 while (iPrev)
1360 {
1361 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1362 {
1363 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1364 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1365 gvmmR0CreateDestroyUnlock(pGVMM);
1366 return;
1367 }
1368 if (RT_UNLIKELY(c-- <= 0))
1369 {
1370 iPrev = 0;
1371 break;
1372 }
1373
1374 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1375 break;
1376 iPrev = pGVMM->aHandles[iPrev].iNext;
1377 }
1378 if (!iPrev)
1379 {
1380 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1381 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1382 gvmmR0CreateDestroyUnlock(pGVMM);
1383 return;
1384 }
1385
1386 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1387 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1388 }
1389 pHandle->iNext = 0;
1390 pGVMM->cVMs--;
1391
1392 /*
1393 * Do the global cleanup round.
1394 */
1395 PGVM pGVM = pHandle->pGVM;
1396 if ( RT_VALID_PTR(pGVM)
1397 && pGVM->u32Magic == GVM_MAGIC)
1398 {
1399 pGVMM->cEMTs -= pGVM->cCpus;
1400
1401 if (pGVM->pSession)
1402 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1403
1404 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1405
1406 gvmmR0CleanupVM(pGVM);
1407
1408 /*
1409 * Do the GVMM cleanup - must be done last.
1410 */
1411 /* The VM and VM pages mappings/allocations. */
1412 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1413 {
1414 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1415 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1416 }
1417
1418 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1419 {
1420 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1421 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1422 }
1423
1424 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1425 {
1426 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1427 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1428 }
1429
1430 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1431 {
1432 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1433 {
1434 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1435 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1436 }
1437 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1438 {
1439 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1440 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1441 }
1442 }
1443
1444 /* the GVM structure itself. */
1445 pGVM->u32Magic |= UINT32_C(0x80000000);
1446 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1447 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1448 pGVM = NULL;
1449
1450 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1451 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1452 AssertRC(rc);
1453 }
1454 /* else: GVMMR0CreateVM cleanup. */
1455
1456 /*
1457 * Free the handle.
1458 */
1459 pHandle->iNext = pGVMM->iFreeHead;
1460 pGVMM->iFreeHead = iHandle;
1461 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1462 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1463 ASMAtomicWriteNullPtr(&pHandle->pSession);
1464 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1465 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1466
1467 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1468 gvmmR0CreateDestroyUnlock(pGVMM);
1469 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1470}
1471
1472
1473/**
1474 * Registers the calling thread as the EMT of a Virtual CPU.
1475 *
1476 * Note that VCPU 0 is automatically registered during VM creation.
1477 *
1478 * @returns VBox status code
1479 * @param pGVM The global (ring-0) VM structure.
1480 * @param idCpu VCPU id to register the current thread as.
1481 */
1482GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1483{
1484 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1485
1486 /*
1487 * Validate the VM structure, state and handle.
1488 */
1489 PGVMM pGVMM;
1490 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */);
1491 if (RT_SUCCESS(rc))
1492 {
1493 if (idCpu < pGVM->cCpus)
1494 {
1495 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1496
1497 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1498
1499 /* Check that the EMT isn't already assigned to a thread. */
1500 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1501 {
1502 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1503
1504 /* A thread may only be one EMT (this makes sure hNativeSelf isn't NIL). */
1505 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1506 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1507 if (RT_SUCCESS(rc))
1508 {
1509 /*
1510 * Do the assignment, then try setup the hook. Undo if that fails.
1511 */
1512 unsigned cCollisions = 0;
1513 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
1514 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD)
1515 {
1516 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
1517 do
1518 {
1519 cCollisions++;
1520 Assert(cCollisions < GVMM_EMT_HASH_SIZE);
1521 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
1522 } while (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt != NIL_RTNATIVETHREAD);
1523 }
1524 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = hNativeSelf;
1525 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = idCpu;
1526 pGVM->aCpus[idCpu].hNativeThreadR0 = hNativeSelf;
1527 pGVM->aCpus[idCpu].hEMT = hNativeSelf;
1528 pGVM->aCpus[idCpu].cEmtHashCollisions = (uint8_t)cCollisions;
1529 pGVM->aCpus[idCpu].gvmm.s.idxEmtHash = (uint16_t)idxHash;
1530
1531 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1532 if (RT_SUCCESS(rc))
1533 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1534 else
1535 {
1536 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1537 pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1538 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = NIL_RTNATIVETHREAD;
1539 pGVM->gvmm.s.aEmtHash[idxHash].idVCpu = NIL_VMCPUID;
1540 pGVM->aCpus[idCpu].gvmm.s.idxEmtHash = UINT16_MAX;
1541 }
1542 }
1543 }
1544 else
1545 rc = VERR_ACCESS_DENIED;
1546
1547 gvmmR0CreateDestroyUnlock(pGVMM);
1548 }
1549 else
1550 rc = VERR_INVALID_CPU_ID;
1551 }
1552 return rc;
1553}
1554
1555
1556/**
1557 * Deregisters the calling thread as the EMT of a Virtual CPU.
1558 *
1559 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1560 *
1561 * @returns VBox status code
1562 * @param pGVM The global (ring-0) VM structure.
1563 * @param idCpu VCPU id to register the current thread as.
1564 */
1565GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1566{
1567 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1568
1569 /*
1570 * Validate the VM structure, state and handle.
1571 */
1572 PGVMM pGVMM;
1573 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1574 if (RT_SUCCESS(rc))
1575 {
1576 /*
1577 * Take the destruction lock and recheck the handle state to
1578 * prevent racing GVMMR0DestroyVM.
1579 */
1580 gvmmR0CreateDestroyLock(pGVMM);
1581
1582 uint32_t hSelf = pGVM->hSelf;
1583 ASMCompilerBarrier();
1584 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1585 && pGVMM->aHandles[hSelf].pvObj != NULL
1586 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1587 {
1588 /*
1589 * Do per-EMT cleanups.
1590 */
1591 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1592
1593 /*
1594 * Invalidate hEMT. We don't use NIL here as that would allow
1595 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1596 */
1597 pGVM->aCpus[idCpu].hEMT = GVMM_RTNATIVETHREAD_DESTROYED;
1598 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1599
1600 uint32_t const idxHash = pGVM->aCpus[idCpu].gvmm.s.idxEmtHash;
1601 if (idxHash < RT_ELEMENTS(pGVM->gvmm.s.aEmtHash))
1602 pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt = GVMM_RTNATIVETHREAD_DESTROYED;
1603 }
1604
1605 gvmmR0CreateDestroyUnlock(pGVMM);
1606 }
1607 return rc;
1608}
1609
1610
1611/**
1612 * Registers the caller as a given worker thread.
1613 *
1614 * This enables the thread to operate critical sections in ring-0.
1615 *
1616 * @returns VBox status code.
1617 * @param pGVM The global (ring-0) VM structure.
1618 * @param enmWorker The worker thread this is supposed to be.
1619 * @param hNativeSelfR3 The ring-3 native self of the caller.
1620 */
1621GVMMR0DECL(int) GVMMR0RegisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker, RTNATIVETHREAD hNativeSelfR3)
1622{
1623 /*
1624 * Validate input.
1625 */
1626 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1627 AssertReturn(hNativeSelfR3 != NIL_RTNATIVETHREAD, VERR_INVALID_HANDLE);
1628 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1629 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1630 PGVMM pGVMM;
1631 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1632 AssertRCReturn(rc, rc);
1633 AssertReturn(pGVM->enmVMState < VMSTATE_DESTROYING, VERR_VM_INVALID_VM_STATE);
1634
1635 /*
1636 * Grab the big lock and check the VM state again.
1637 */
1638 uint32_t const hSelf = pGVM->hSelf;
1639 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1640 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1641 && pGVMM->aHandles[hSelf].pvObj != NULL
1642 && pGVMM->aHandles[hSelf].pGVM == pGVM
1643 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1644 {
1645 if (pGVM->enmVMState < VMSTATE_DESTROYING)
1646 {
1647 /*
1648 * Check that the thread isn't an EMT or serving in some other worker capacity.
1649 */
1650 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1651 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1652 for (size_t idx = 0; idx < RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads); idx++)
1653 AssertBreakStmt(idx == (size_t)enmWorker || pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread != hNativeSelf,
1654 rc = VERR_INVALID_PARAMETER);
1655 if (RT_SUCCESS(rc))
1656 {
1657 /*
1658 * Do the registration.
1659 */
1660 if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == NIL_RTNATIVETHREAD
1661 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == NIL_RTNATIVETHREAD)
1662 {
1663 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = hNativeSelf;
1664 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = hNativeSelfR3;
1665 rc = VINF_SUCCESS;
1666 }
1667 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeSelf
1668 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == hNativeSelfR3)
1669 rc = VERR_ALREADY_EXISTS;
1670 else
1671 rc = VERR_RESOURCE_BUSY;
1672 }
1673 }
1674 else
1675 rc = VERR_VM_INVALID_VM_STATE;
1676 }
1677 else
1678 rc = VERR_INVALID_VM_HANDLE;
1679 gvmmR0CreateDestroyUnlock(pGVMM);
1680 return rc;
1681}
1682
1683
1684/**
1685 * Deregisters a workinger thread (caller).
1686 *
1687 * The worker thread cannot be re-created and re-registered, instead the given
1688 * @a enmWorker slot becomes invalid.
1689 *
1690 * @returns VBox status code.
1691 * @param pGVM The global (ring-0) VM structure.
1692 * @param enmWorker The worker thread this is supposed to be.
1693 */
1694GVMMR0DECL(int) GVMMR0DeregisterWorkerThread(PGVM pGVM, GVMMWORKERTHREAD enmWorker)
1695{
1696 /*
1697 * Validate input.
1698 */
1699 AssertReturn(enmWorker > GVMMWORKERTHREAD_INVALID && enmWorker < GVMMWORKERTHREAD_END, VERR_INVALID_PARAMETER);
1700 RTNATIVETHREAD const hNativeThread = RTThreadNativeSelf();
1701 AssertReturn(hNativeThread != NIL_RTNATIVETHREAD, VERR_INTERNAL_ERROR_3);
1702 PGVMM pGVMM;
1703 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1704 AssertRCReturn(rc, rc);
1705
1706 /*
1707 * Grab the big lock and check the VM state again.
1708 */
1709 uint32_t const hSelf = pGVM->hSelf;
1710 gvmmR0CreateDestroyLock(pGVMM); /** @todo per-VM lock? */
1711 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1712 && pGVMM->aHandles[hSelf].pvObj != NULL
1713 && pGVMM->aHandles[hSelf].pGVM == pGVM
1714 && pGVMM->aHandles[hSelf].ProcId == RTProcSelf())
1715 {
1716 /*
1717 * Do the deregistration.
1718 * This will prevent any other threads register as the worker later.
1719 */
1720 if (pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == hNativeThread)
1721 {
1722 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread = GVMM_RTNATIVETHREAD_DESTROYED;
1723 pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 = GVMM_RTNATIVETHREAD_DESTROYED;
1724 rc = VINF_SUCCESS;
1725 }
1726 else if ( pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThread == GVMM_RTNATIVETHREAD_DESTROYED
1727 && pGVM->gvmm.s.aWorkerThreads[enmWorker].hNativeThreadR3 == GVMM_RTNATIVETHREAD_DESTROYED)
1728 rc = VINF_SUCCESS;
1729 else
1730 rc = VERR_NOT_OWNER;
1731 }
1732 else
1733 rc = VERR_INVALID_VM_HANDLE;
1734 gvmmR0CreateDestroyUnlock(pGVMM);
1735 return rc;
1736}
1737
1738
1739/**
1740 * Lookup a GVM structure by its handle.
1741 *
1742 * @returns The GVM pointer on success, NULL on failure.
1743 * @param hGVM The global VM handle. Asserts on bad handle.
1744 */
1745GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1746{
1747 PGVMM pGVMM;
1748 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1749
1750 /*
1751 * Validate.
1752 */
1753 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1754 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1755
1756 /*
1757 * Look it up.
1758 */
1759 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1760 AssertPtrReturn(pHandle->pvObj, NULL);
1761 PGVM pGVM = pHandle->pGVM;
1762 AssertPtrReturn(pGVM, NULL);
1763
1764 return pGVM;
1765}
1766
1767
1768/**
1769 * Check that the given GVM and VM structures match up.
1770 *
1771 * The calling thread must be in the same process as the VM. All current lookups
1772 * are by threads inside the same process, so this will not be an issue.
1773 *
1774 * @returns VBox status code.
1775 * @param pGVM The global (ring-0) VM structure.
1776 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1777 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1778 * shared mode when requested.
1779 *
1780 * Be very careful if not taking the lock as it's
1781 * possible that the VM will disappear then!
1782 *
1783 * @remark This will not assert on an invalid pGVM but try return silently.
1784 */
1785static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1786{
1787 /*
1788 * Check the pointers.
1789 */
1790 int rc;
1791 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1792 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1793 {
1794 /*
1795 * Get the pGVMM instance and check the VM handle.
1796 */
1797 PGVMM pGVMM;
1798 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1799
1800 uint16_t hGVM = pGVM->hSelf;
1801 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1802 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1803 {
1804 RTPROCESS const pidSelf = RTProcSelf();
1805 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1806 if (fTakeUsedLock)
1807 {
1808 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1809 AssertRCReturn(rc, rc);
1810 }
1811
1812 if (RT_LIKELY( pHandle->pGVM == pGVM
1813 && pHandle->ProcId == pidSelf
1814 && RT_VALID_PTR(pHandle->pvObj)))
1815 {
1816 /*
1817 * Some more VM data consistency checks.
1818 */
1819 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1820 && pGVM->hSelfUnsafe == hGVM
1821 && pGVM->pSelf == pGVM))
1822 {
1823 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1824 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1825 {
1826 *ppGVMM = pGVMM;
1827 return VINF_SUCCESS;
1828 }
1829 rc = VERR_INCONSISTENT_VM_HANDLE;
1830 }
1831 else
1832 rc = VERR_INCONSISTENT_VM_HANDLE;
1833 }
1834 else
1835 rc = VERR_INVALID_VM_HANDLE;
1836
1837 if (fTakeUsedLock)
1838 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1839 }
1840 else
1841 rc = VERR_INVALID_VM_HANDLE;
1842 }
1843 else
1844 rc = VERR_INVALID_POINTER;
1845 return rc;
1846}
1847
1848
1849/**
1850 * Validates a GVM/VM pair.
1851 *
1852 * @returns VBox status code.
1853 * @param pGVM The global (ring-0) VM structure.
1854 */
1855GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1856{
1857 PGVMM pGVMM;
1858 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1859}
1860
1861
1862/**
1863 * Check that the given GVM and VM structures match up.
1864 *
1865 * The calling thread must be in the same process as the VM. All current lookups
1866 * are by threads inside the same process, so this will not be an issue.
1867 *
1868 * @returns VBox status code.
1869 * @param pGVM The global (ring-0) VM structure.
1870 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1871 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1872 * @thread EMT
1873 *
1874 * @remarks This will assert in all failure paths.
1875 */
1876static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1877{
1878 /*
1879 * Check the pointers.
1880 */
1881 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1882 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1883
1884 /*
1885 * Get the pGVMM instance and check the VM handle.
1886 */
1887 PGVMM pGVMM;
1888 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1889
1890 uint16_t hGVM = pGVM->hSelf;
1891 ASMCompilerBarrier();
1892 AssertReturn( hGVM != NIL_GVM_HANDLE
1893 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1894
1895 RTPROCESS const pidSelf = RTProcSelf();
1896 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1897 AssertReturn( pHandle->pGVM == pGVM
1898 && pHandle->ProcId == pidSelf
1899 && RT_VALID_PTR(pHandle->pvObj),
1900 VERR_INVALID_HANDLE);
1901
1902 /*
1903 * Check the EMT claim.
1904 */
1905 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1906 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1907 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1908
1909 /*
1910 * Some more VM data consistency checks.
1911 */
1912 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1913 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1914 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1915 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1916
1917 *ppGVMM = pGVMM;
1918 return VINF_SUCCESS;
1919}
1920
1921
1922/**
1923 * Validates a GVM/EMT pair.
1924 *
1925 * @returns VBox status code.
1926 * @param pGVM The global (ring-0) VM structure.
1927 * @param idCpu The Virtual CPU ID of the calling EMT.
1928 * @thread EMT(idCpu)
1929 */
1930GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1931{
1932 PGVMM pGVMM;
1933 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1934}
1935
1936
1937/**
1938 * Looks up the VM belonging to the specified EMT thread.
1939 *
1940 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1941 * unnecessary kernel panics when the EMT thread hits an assertion. The
1942 * call may or not be an EMT thread.
1943 *
1944 * @returns Pointer to the VM on success, NULL on failure.
1945 * @param hEMT The native thread handle of the EMT.
1946 * NIL_RTNATIVETHREAD means the current thread
1947 */
1948GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1949{
1950 /*
1951 * No Assertions here as we're usually called in a AssertMsgN or
1952 * RTAssert* context.
1953 */
1954 PGVMM pGVMM = g_pGVMM;
1955 if ( !RT_VALID_PTR(pGVMM)
1956 || pGVMM->u32Magic != GVMM_MAGIC)
1957 return NULL;
1958
1959 if (hEMT == NIL_RTNATIVETHREAD)
1960 hEMT = RTThreadNativeSelf();
1961 RTPROCESS ProcId = RTProcSelf();
1962
1963 /*
1964 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1965 */
1966/** @todo introduce some pid hash table here, please. */
1967 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1968 {
1969 if ( pGVMM->aHandles[i].iSelf == i
1970 && pGVMM->aHandles[i].ProcId == ProcId
1971 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1972 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1973 {
1974 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1975 return pGVMM->aHandles[i].pGVM;
1976
1977 /* This is fearly safe with the current process per VM approach. */
1978 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1979 VMCPUID const cCpus = pGVM->cCpus;
1980 ASMCompilerBarrier();
1981 if ( cCpus < 1
1982 || cCpus > VMM_MAX_CPU_COUNT)
1983 continue;
1984 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1985 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1986 return pGVMM->aHandles[i].pGVM;
1987 }
1988 }
1989 return NULL;
1990}
1991
1992
1993/**
1994 * Looks up the GVMCPU belonging to the specified EMT thread.
1995 *
1996 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1997 * unnecessary kernel panics when the EMT thread hits an assertion. The
1998 * call may or not be an EMT thread.
1999 *
2000 * @returns Pointer to the VM on success, NULL on failure.
2001 * @param hEMT The native thread handle of the EMT.
2002 * NIL_RTNATIVETHREAD means the current thread
2003 */
2004GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
2005{
2006 /*
2007 * No Assertions here as we're usually called in a AssertMsgN,
2008 * RTAssert*, Log and LogRel contexts.
2009 */
2010 PGVMM pGVMM = g_pGVMM;
2011 if ( !RT_VALID_PTR(pGVMM)
2012 || pGVMM->u32Magic != GVMM_MAGIC)
2013 return NULL;
2014
2015 if (hEMT == NIL_RTNATIVETHREAD)
2016 hEMT = RTThreadNativeSelf();
2017 RTPROCESS ProcId = RTProcSelf();
2018
2019 /*
2020 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
2021 */
2022/** @todo introduce some pid hash table here, please. */
2023 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
2024 {
2025 if ( pGVMM->aHandles[i].iSelf == i
2026 && pGVMM->aHandles[i].ProcId == ProcId
2027 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2028 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
2029 {
2030 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2031 if (pGVMM->aHandles[i].hEMT0 == hEMT)
2032 return &pGVM->aCpus[0];
2033
2034 /* This is fearly safe with the current process per VM approach. */
2035 VMCPUID const cCpus = pGVM->cCpus;
2036 ASMCompilerBarrier();
2037 ASMCompilerBarrier();
2038 if ( cCpus < 1
2039 || cCpus > VMM_MAX_CPU_COUNT)
2040 continue;
2041 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
2042 if (pGVM->aCpus[idCpu].hEMT == hEMT)
2043 return &pGVM->aCpus[idCpu];
2044 }
2045 }
2046 return NULL;
2047}
2048
2049
2050/**
2051 * Get the GVMCPU structure for the given EMT.
2052 *
2053 * @returns The VCpu structure for @a hEMT, NULL if not an EMT.
2054 * @param pGVM The global (ring-0) VM structure.
2055 * @param hEMT The native thread handle of the EMT.
2056 * NIL_RTNATIVETHREAD means the current thread
2057 */
2058GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByGVMandEMT(PGVM pGVM, RTNATIVETHREAD hEMT)
2059{
2060 /*
2061 * Validate & adjust input.
2062 */
2063 AssertPtr(pGVM);
2064 Assert(pGVM->u32Magic == GVM_MAGIC);
2065 if (hEMT == NIL_RTNATIVETHREAD /* likely */)
2066 {
2067 hEMT = RTThreadNativeSelf();
2068 AssertReturn(hEMT != NIL_RTNATIVETHREAD, NULL);
2069 }
2070
2071 /*
2072 * Find the matching hash table entry.
2073 * See similar code in GVMMR0GetRing3ThreadForSelf.
2074 */
2075 uint32_t idxHash = GVMM_EMT_HASH_1(hEMT);
2076 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2077 { /* likely */ }
2078 else
2079 {
2080#ifdef VBOX_STRICT
2081 unsigned cCollisions = 0;
2082#endif
2083 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hEMT);
2084 for (;;)
2085 {
2086 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2087 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2088 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hEMT)
2089 break;
2090 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2091 {
2092#ifdef VBOX_STRICT
2093 uint32_t idxCpu = pGVM->cCpus;
2094 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2095 while (idxCpu-- > 0)
2096 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hEMT);
2097#endif
2098 return NULL;
2099 }
2100 }
2101 }
2102
2103 /*
2104 * Validate the VCpu number and translate it into a pointer.
2105 */
2106 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2107 AssertReturn(idCpu < pGVM->cCpus, NULL);
2108 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2109 Assert(pGVCpu->hNativeThreadR0 == hEMT);
2110 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2111 return pGVCpu;
2112}
2113
2114
2115/**
2116 * Get the native ring-3 thread handle for the caller.
2117 *
2118 * This works for EMTs and registered workers.
2119 *
2120 * @returns ring-3 native thread handle or NIL_RTNATIVETHREAD.
2121 * @param pGVM The global (ring-0) VM structure.
2122 */
2123GVMMR0DECL(RTNATIVETHREAD) GVMMR0GetRing3ThreadForSelf(PGVM pGVM)
2124{
2125 /*
2126 * Validate input.
2127 */
2128 AssertPtr(pGVM);
2129 AssertReturn(pGVM->u32Magic == GVM_MAGIC, NIL_RTNATIVETHREAD);
2130 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
2131 AssertReturn(hNativeSelf != NIL_RTNATIVETHREAD, NIL_RTNATIVETHREAD);
2132
2133 /*
2134 * Find the matching hash table entry.
2135 * See similar code in GVMMR0GetGVCpuByGVMandEMT.
2136 */
2137 uint32_t idxHash = GVMM_EMT_HASH_1(hNativeSelf);
2138 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2139 { /* likely */ }
2140 else
2141 {
2142#ifdef VBOX_STRICT
2143 unsigned cCollisions = 0;
2144#endif
2145 uint32_t const idxHash2 = GVMM_EMT_HASH_2(hNativeSelf);
2146 for (;;)
2147 {
2148 Assert(cCollisions++ < GVMM_EMT_HASH_SIZE);
2149 idxHash = (idxHash + idxHash2) % GVMM_EMT_HASH_SIZE;
2150 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == hNativeSelf)
2151 break;
2152 if (pGVM->gvmm.s.aEmtHash[idxHash].hNativeEmt == NIL_RTNATIVETHREAD)
2153 {
2154#ifdef VBOX_STRICT
2155 uint32_t idxCpu = pGVM->cCpus;
2156 AssertStmt(idxCpu < VMM_MAX_CPU_COUNT, idxCpu = VMM_MAX_CPU_COUNT);
2157 while (idxCpu-- > 0)
2158 Assert(pGVM->aCpus[idxCpu].hNativeThreadR0 != hNativeSelf);
2159#endif
2160
2161 /*
2162 * Not an EMT, so see if it's a worker thread.
2163 */
2164 size_t idx = RT_ELEMENTS(pGVM->gvmm.s.aWorkerThreads);
2165 while (--idx > GVMMWORKERTHREAD_INVALID)
2166 if (pGVM->gvmm.s.aWorkerThreads[idx].hNativeThread == hNativeSelf)
2167 return pGVM->gvmm.s.aWorkerThreads[idx].hNativeThreadR3;
2168
2169 return NIL_RTNATIVETHREAD;
2170 }
2171 }
2172 }
2173
2174 /*
2175 * Validate the VCpu number and translate it into a pointer.
2176 */
2177 VMCPUID const idCpu = pGVM->gvmm.s.aEmtHash[idxHash].idVCpu;
2178 AssertReturn(idCpu < pGVM->cCpus, NIL_RTNATIVETHREAD);
2179 PGVMCPU pGVCpu = &pGVM->aCpus[idCpu];
2180 Assert(pGVCpu->hNativeThreadR0 == hNativeSelf);
2181 Assert(pGVCpu->gvmm.s.idxEmtHash == idxHash);
2182 return pGVCpu->hNativeThread;
2183}
2184
2185
2186/**
2187 * Converts a pointer with the GVM structure to a host physical address.
2188 *
2189 * @returns Host physical address.
2190 * @param pGVM The global (ring-0) VM structure.
2191 * @param pv The address to convert.
2192 * @thread EMT
2193 */
2194GVMMR0DECL(RTHCPHYS) GVMMR0ConvertGVMPtr2HCPhys(PGVM pGVM, void *pv)
2195{
2196 AssertPtr(pGVM);
2197 Assert(pGVM->u32Magic == GVM_MAGIC);
2198 uintptr_t const off = (uintptr_t)pv - (uintptr_t)pGVM;
2199 Assert(off < RT_UOFFSETOF_DYN(GVM, aCpus[pGVM->cCpus]));
2200 return RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, off >> PAGE_SHIFT) | ((uintptr_t)pv & PAGE_OFFSET_MASK);
2201}
2202
2203
2204/**
2205 * This is will wake up expired and soon-to-be expired VMs.
2206 *
2207 * @returns Number of VMs that has been woken up.
2208 * @param pGVMM Pointer to the GVMM instance data.
2209 * @param u64Now The current time.
2210 */
2211static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
2212{
2213 /*
2214 * Skip this if we've got disabled because of high resolution wakeups or by
2215 * the user.
2216 */
2217 if (!pGVMM->fDoEarlyWakeUps)
2218 return 0;
2219
2220/** @todo Rewrite this algorithm. See performance defect XYZ. */
2221
2222 /*
2223 * A cheap optimization to stop wasting so much time here on big setups.
2224 */
2225 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
2226 if ( pGVMM->cHaltedEMTs == 0
2227 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
2228 return 0;
2229
2230 /*
2231 * Only one thread doing this at a time.
2232 */
2233 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
2234 return 0;
2235
2236 /*
2237 * The first pass will wake up VMs which have actually expired
2238 * and look for VMs that should be woken up in the 2nd and 3rd passes.
2239 */
2240 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
2241 uint64_t u64Min = UINT64_MAX;
2242 unsigned cWoken = 0;
2243 unsigned cHalted = 0;
2244 unsigned cTodo2nd = 0;
2245 unsigned cTodo3rd = 0;
2246 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2247 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2248 i = pGVMM->aHandles[i].iNext)
2249 {
2250 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2251 if ( RT_VALID_PTR(pCurGVM)
2252 && pCurGVM->u32Magic == GVM_MAGIC)
2253 {
2254 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2255 {
2256 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2257 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2258 if (u64)
2259 {
2260 if (u64 <= u64Now)
2261 {
2262 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2263 {
2264 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2265 AssertRC(rc);
2266 cWoken++;
2267 }
2268 }
2269 else
2270 {
2271 cHalted++;
2272 if (u64 <= uNsEarlyWakeUp1)
2273 cTodo2nd++;
2274 else if (u64 <= uNsEarlyWakeUp2)
2275 cTodo3rd++;
2276 else if (u64 < u64Min)
2277 u64 = u64Min;
2278 }
2279 }
2280 }
2281 }
2282 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2283 }
2284
2285 if (cTodo2nd)
2286 {
2287 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2288 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2289 i = pGVMM->aHandles[i].iNext)
2290 {
2291 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2292 if ( RT_VALID_PTR(pCurGVM)
2293 && pCurGVM->u32Magic == GVM_MAGIC)
2294 {
2295 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2296 {
2297 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2298 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2299 if ( u64
2300 && u64 <= uNsEarlyWakeUp1)
2301 {
2302 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2303 {
2304 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2305 AssertRC(rc);
2306 cWoken++;
2307 }
2308 }
2309 }
2310 }
2311 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2312 }
2313 }
2314
2315 if (cTodo3rd)
2316 {
2317 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2318 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2319 i = pGVMM->aHandles[i].iNext)
2320 {
2321 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2322 if ( RT_VALID_PTR(pCurGVM)
2323 && pCurGVM->u32Magic == GVM_MAGIC)
2324 {
2325 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2326 {
2327 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2328 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2329 if ( u64
2330 && u64 <= uNsEarlyWakeUp2)
2331 {
2332 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2333 {
2334 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2335 AssertRC(rc);
2336 cWoken++;
2337 }
2338 }
2339 }
2340 }
2341 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2342 }
2343 }
2344
2345 /*
2346 * Set the minimum value.
2347 */
2348 pGVMM->uNsNextEmtWakeup = u64Min;
2349
2350 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2351 return cWoken;
2352}
2353
2354
2355/**
2356 * Halt the EMT thread.
2357 *
2358 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2359 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2360 * @param pGVM The global (ring-0) VM structure.
2361 * @param pGVCpu The global (ring-0) CPU structure of the calling
2362 * EMT.
2363 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2364 * @thread EMT(pGVCpu).
2365 */
2366GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2367{
2368 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2369 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2370 PGVMM pGVMM;
2371 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2372
2373 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2374 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2375
2376 /*
2377 * If we're doing early wake-ups, we must take the UsedList lock before we
2378 * start querying the current time.
2379 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2380 */
2381 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2382 if (fDoEarlyWakeUps)
2383 {
2384 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2385 }
2386
2387 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2388
2389 /* GIP hack: We might are frequently sleeping for short intervals where the
2390 difference between GIP and system time matters on systems with high resolution
2391 system time. So, convert the input from GIP to System time in that case. */
2392 Assert(ASMGetFlags() & X86_EFL_IF);
2393 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2394 const uint64_t u64NowGip = RTTimeNanoTS();
2395
2396 if (fDoEarlyWakeUps)
2397 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2398
2399 /*
2400 * Go to sleep if we must...
2401 * Cap the sleep time to 1 second to be on the safe side.
2402 */
2403 int rc;
2404 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2405 if ( u64NowGip < u64ExpireGipTime
2406 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2407 ? pGVMM->nsMinSleepCompany
2408 : pGVMM->nsMinSleepAlone))
2409 {
2410 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2411 if (cNsInterval > RT_NS_1SEC)
2412 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2413 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2414 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2415 if (fDoEarlyWakeUps)
2416 {
2417 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2418 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2419 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2420 }
2421
2422 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2423 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2424 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2425
2426 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2427 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2428
2429 /* Reset the semaphore to try prevent a few false wake-ups. */
2430 if (rc == VINF_SUCCESS)
2431 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2432 else if (rc == VERR_TIMEOUT)
2433 {
2434 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2435 rc = VINF_SUCCESS;
2436 }
2437 }
2438 else
2439 {
2440 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2441 if (fDoEarlyWakeUps)
2442 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2443 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2444 rc = VINF_SUCCESS;
2445 }
2446
2447 return rc;
2448}
2449
2450
2451/**
2452 * Halt the EMT thread.
2453 *
2454 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2455 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2456 * @param pGVM The global (ring-0) VM structure.
2457 * @param idCpu The Virtual CPU ID of the calling EMT.
2458 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2459 * @thread EMT(idCpu).
2460 */
2461GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2462{
2463 PGVMM pGVMM;
2464 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2465 if (RT_SUCCESS(rc))
2466 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2467 return rc;
2468}
2469
2470
2471
2472/**
2473 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2474 * the a sleeping EMT.
2475 *
2476 * @retval VINF_SUCCESS if successfully woken up.
2477 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2478 *
2479 * @param pGVM The global (ring-0) VM structure.
2480 * @param pGVCpu The global (ring-0) VCPU structure.
2481 */
2482DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2483{
2484 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2485
2486 /*
2487 * Signal the semaphore regardless of whether it's current blocked on it.
2488 *
2489 * The reason for this is that there is absolutely no way we can be 100%
2490 * certain that it isn't *about* go to go to sleep on it and just got
2491 * delayed a bit en route. So, we will always signal the semaphore when
2492 * the it is flagged as halted in the VMM.
2493 */
2494/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2495 int rc;
2496 if (pGVCpu->gvmm.s.u64HaltExpire)
2497 {
2498 rc = VINF_SUCCESS;
2499 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2500 }
2501 else
2502 {
2503 rc = VINF_GVM_NOT_BLOCKED;
2504 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2505 }
2506
2507 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2508 AssertRC(rc2);
2509
2510 return rc;
2511}
2512
2513
2514/**
2515 * Wakes up the halted EMT thread so it can service a pending request.
2516 *
2517 * @returns VBox status code.
2518 * @retval VINF_SUCCESS if successfully woken up.
2519 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2520 *
2521 * @param pGVM The global (ring-0) VM structure.
2522 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2523 * @param fTakeUsedLock Take the used lock or not
2524 * @thread Any but EMT(idCpu).
2525 */
2526GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2527{
2528 /*
2529 * Validate input and take the UsedLock.
2530 */
2531 PGVMM pGVMM;
2532 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2533 if (RT_SUCCESS(rc))
2534 {
2535 if (idCpu < pGVM->cCpus)
2536 {
2537 /*
2538 * Do the actual job.
2539 */
2540 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2541
2542 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2543 {
2544 /*
2545 * While we're here, do a round of scheduling.
2546 */
2547 Assert(ASMGetFlags() & X86_EFL_IF);
2548 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2549 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2550 }
2551 }
2552 else
2553 rc = VERR_INVALID_CPU_ID;
2554
2555 if (fTakeUsedLock)
2556 {
2557 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2558 AssertRC(rc2);
2559 }
2560 }
2561
2562 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2563 return rc;
2564}
2565
2566
2567/**
2568 * Wakes up the halted EMT thread so it can service a pending request.
2569 *
2570 * @returns VBox status code.
2571 * @retval VINF_SUCCESS if successfully woken up.
2572 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2573 *
2574 * @param pGVM The global (ring-0) VM structure.
2575 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2576 * @thread Any but EMT(idCpu).
2577 */
2578GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2579{
2580 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2581}
2582
2583
2584/**
2585 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2586 * parameter and no used locking.
2587 *
2588 * @returns VBox status code.
2589 * @retval VINF_SUCCESS if successfully woken up.
2590 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2591 *
2592 * @param pGVM The global (ring-0) VM structure.
2593 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2594 * @thread Any but EMT(idCpu).
2595 * @deprecated Don't use in new code if possible! Use the GVM variant.
2596 */
2597GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2598{
2599 PGVMM pGVMM;
2600 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2601 if (RT_SUCCESS(rc))
2602 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2603 return rc;
2604}
2605
2606
2607/**
2608 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2609 * the Virtual CPU if it's still busy executing guest code.
2610 *
2611 * @returns VBox status code.
2612 * @retval VINF_SUCCESS if poked successfully.
2613 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2614 *
2615 * @param pGVM The global (ring-0) VM structure.
2616 * @param pVCpu The cross context virtual CPU structure.
2617 */
2618DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2619{
2620 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2621
2622 RTCPUID idHostCpu = pVCpu->idHostCpu;
2623 if ( idHostCpu == NIL_RTCPUID
2624 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2625 {
2626 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2627 return VINF_GVM_NOT_BUSY_IN_GC;
2628 }
2629
2630 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2631 RTMpPokeCpu(idHostCpu);
2632 return VINF_SUCCESS;
2633}
2634
2635
2636/**
2637 * Pokes an EMT if it's still busy running guest code.
2638 *
2639 * @returns VBox status code.
2640 * @retval VINF_SUCCESS if poked successfully.
2641 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2642 *
2643 * @param pGVM The global (ring-0) VM structure.
2644 * @param idCpu The ID of the virtual CPU to poke.
2645 * @param fTakeUsedLock Take the used lock or not
2646 */
2647GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2648{
2649 /*
2650 * Validate input and take the UsedLock.
2651 */
2652 PGVMM pGVMM;
2653 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2654 if (RT_SUCCESS(rc))
2655 {
2656 if (idCpu < pGVM->cCpus)
2657 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2658 else
2659 rc = VERR_INVALID_CPU_ID;
2660
2661 if (fTakeUsedLock)
2662 {
2663 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2664 AssertRC(rc2);
2665 }
2666 }
2667
2668 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2669 return rc;
2670}
2671
2672
2673/**
2674 * Pokes an EMT if it's still busy running guest code.
2675 *
2676 * @returns VBox status code.
2677 * @retval VINF_SUCCESS if poked successfully.
2678 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2679 *
2680 * @param pGVM The global (ring-0) VM structure.
2681 * @param idCpu The ID of the virtual CPU to poke.
2682 */
2683GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2684{
2685 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2686}
2687
2688
2689/**
2690 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2691 * used locking.
2692 *
2693 * @returns VBox status code.
2694 * @retval VINF_SUCCESS if poked successfully.
2695 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2696 *
2697 * @param pGVM The global (ring-0) VM structure.
2698 * @param idCpu The ID of the virtual CPU to poke.
2699 *
2700 * @deprecated Don't use in new code if possible! Use the GVM variant.
2701 */
2702GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2703{
2704 PGVMM pGVMM;
2705 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2706 if (RT_SUCCESS(rc))
2707 {
2708 if (idCpu < pGVM->cCpus)
2709 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2710 else
2711 rc = VERR_INVALID_CPU_ID;
2712 }
2713 return rc;
2714}
2715
2716
2717/**
2718 * Wakes up a set of halted EMT threads so they can service pending request.
2719 *
2720 * @returns VBox status code, no informational stuff.
2721 *
2722 * @param pGVM The global (ring-0) VM structure.
2723 * @param pSleepSet The set of sleepers to wake up.
2724 * @param pPokeSet The set of CPUs to poke.
2725 */
2726GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2727{
2728 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2729 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2730 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2731
2732 /*
2733 * Validate input and take the UsedLock.
2734 */
2735 PGVMM pGVMM;
2736 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2737 if (RT_SUCCESS(rc))
2738 {
2739 rc = VINF_SUCCESS;
2740 VMCPUID idCpu = pGVM->cCpus;
2741 while (idCpu-- > 0)
2742 {
2743 /* Don't try poke or wake up ourselves. */
2744 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2745 continue;
2746
2747 /* just ignore errors for now. */
2748 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2749 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2750 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2751 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2752 }
2753
2754 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2755 AssertRC(rc2);
2756 }
2757
2758 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2759 return rc;
2760}
2761
2762
2763/**
2764 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2765 *
2766 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2767 * @param pGVM The global (ring-0) VM structure.
2768 * @param pReq Pointer to the request packet.
2769 */
2770GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2771{
2772 /*
2773 * Validate input and pass it on.
2774 */
2775 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2776 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2777
2778 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2779}
2780
2781
2782
2783/**
2784 * Poll the schedule to see if someone else should get a chance to run.
2785 *
2786 * This is a bit hackish and will not work too well if the machine is
2787 * under heavy load from non-VM processes.
2788 *
2789 * @returns VINF_SUCCESS if not yielded.
2790 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2791 * @param pGVM The global (ring-0) VM structure.
2792 * @param idCpu The Virtual CPU ID of the calling EMT.
2793 * @param fYield Whether to yield or not.
2794 * This is for when we're spinning in the halt loop.
2795 * @thread EMT(idCpu).
2796 */
2797GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2798{
2799 /*
2800 * Validate input.
2801 */
2802 PGVMM pGVMM;
2803 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2804 if (RT_SUCCESS(rc))
2805 {
2806 /*
2807 * We currently only implement helping doing wakeups (fYield = false), so don't
2808 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2809 */
2810 if (!fYield && pGVMM->fDoEarlyWakeUps)
2811 {
2812 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2813 pGVM->gvmm.s.StatsSched.cPollCalls++;
2814
2815 Assert(ASMGetFlags() & X86_EFL_IF);
2816 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2817
2818 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2819
2820 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2821 }
2822 /*
2823 * Not quite sure what we could do here...
2824 */
2825 else if (fYield)
2826 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2827 else
2828 rc = VINF_SUCCESS;
2829 }
2830
2831 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2832 return rc;
2833}
2834
2835
2836#ifdef GVMM_SCHED_WITH_PPT
2837/**
2838 * Timer callback for the periodic preemption timer.
2839 *
2840 * @param pTimer The timer handle.
2841 * @param pvUser Pointer to the per cpu structure.
2842 * @param iTick The current tick.
2843 */
2844static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2845{
2846 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2847 NOREF(pTimer); NOREF(iTick);
2848
2849 /*
2850 * Termination check
2851 */
2852 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2853 return;
2854
2855 /*
2856 * Do the house keeping.
2857 */
2858 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2859
2860 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2861 {
2862 /*
2863 * Historicize the max frequency.
2864 */
2865 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2866 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2867 pCpu->Ppt.iTickHistorization = 0;
2868 pCpu->Ppt.uDesiredHz = 0;
2869
2870 /*
2871 * Check if the current timer frequency.
2872 */
2873 uint32_t uHistMaxHz = 0;
2874 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2875 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2876 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2877 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2878 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2879 else if (uHistMaxHz)
2880 {
2881 /*
2882 * Reprogram it.
2883 */
2884 pCpu->Ppt.cChanges++;
2885 pCpu->Ppt.iTickHistorization = 0;
2886 pCpu->Ppt.uTimerHz = uHistMaxHz;
2887 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2888 pCpu->Ppt.cNsInterval = cNsInterval;
2889 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2890 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2891 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2892 / cNsInterval;
2893 else
2894 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2895 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2896
2897 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2898 RTTimerChangeInterval(pTimer, cNsInterval);
2899 }
2900 else
2901 {
2902 /*
2903 * Stop it.
2904 */
2905 pCpu->Ppt.fStarted = false;
2906 pCpu->Ppt.uTimerHz = 0;
2907 pCpu->Ppt.cNsInterval = 0;
2908 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2909
2910 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2911 RTTimerStop(pTimer);
2912 }
2913 }
2914 else
2915 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2916}
2917#endif /* GVMM_SCHED_WITH_PPT */
2918
2919
2920/**
2921 * Updates the periodic preemption timer for the calling CPU.
2922 *
2923 * The caller must have disabled preemption!
2924 * The caller must check that the host can do high resolution timers.
2925 *
2926 * @param pGVM The global (ring-0) VM structure.
2927 * @param idHostCpu The current host CPU id.
2928 * @param uHz The desired frequency.
2929 */
2930GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2931{
2932 NOREF(pGVM);
2933#ifdef GVMM_SCHED_WITH_PPT
2934 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2935 Assert(RTTimerCanDoHighResolution());
2936
2937 /*
2938 * Resolve the per CPU data.
2939 */
2940 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2941 PGVMM pGVMM = g_pGVMM;
2942 if ( !RT_VALID_PTR(pGVMM)
2943 || pGVMM->u32Magic != GVMM_MAGIC)
2944 return;
2945 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2946 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2947 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2948 && pCpu->idCpu == idHostCpu,
2949 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2950
2951 /*
2952 * Check whether we need to do anything about the timer.
2953 * We have to be a little bit careful since we might be race the timer
2954 * callback here.
2955 */
2956 if (uHz > 16384)
2957 uHz = 16384; /** @todo add a query method for this! */
2958 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2959 && uHz >= pCpu->Ppt.uMinHz
2960 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2961 {
2962 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2963
2964 pCpu->Ppt.uDesiredHz = uHz;
2965 uint32_t cNsInterval = 0;
2966 if (!pCpu->Ppt.fStarted)
2967 {
2968 pCpu->Ppt.cStarts++;
2969 pCpu->Ppt.fStarted = true;
2970 pCpu->Ppt.fStarting = true;
2971 pCpu->Ppt.iTickHistorization = 0;
2972 pCpu->Ppt.uTimerHz = uHz;
2973 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2974 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2975 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2976 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2977 / cNsInterval;
2978 else
2979 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2980 }
2981
2982 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2983
2984 if (cNsInterval)
2985 {
2986 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2987 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2988 AssertRC(rc);
2989
2990 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2991 if (RT_FAILURE(rc))
2992 pCpu->Ppt.fStarted = false;
2993 pCpu->Ppt.fStarting = false;
2994 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2995 }
2996 }
2997#else /* !GVMM_SCHED_WITH_PPT */
2998 NOREF(idHostCpu); NOREF(uHz);
2999#endif /* !GVMM_SCHED_WITH_PPT */
3000}
3001
3002
3003/**
3004 * Calls @a pfnCallback for each VM in the system.
3005 *
3006 * This will enumerate the VMs while holding the global VM used list lock in
3007 * shared mode. So, only suitable for simple work. If more expensive work
3008 * needs doing, a different approach must be taken as using this API would
3009 * otherwise block VM creation and destruction.
3010 *
3011 * @returns VBox status code.
3012 * @param pfnCallback The callback function.
3013 * @param pvUser User argument to the callback.
3014 */
3015GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
3016{
3017 PGVMM pGVMM;
3018 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3019
3020 int rc = VINF_SUCCESS;
3021 GVMMR0_USED_SHARED_LOCK(pGVMM);
3022 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
3023 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3024 i = pGVMM->aHandles[i].iNext, cLoops++)
3025 {
3026 PGVM pGVM = pGVMM->aHandles[i].pGVM;
3027 if ( RT_VALID_PTR(pGVM)
3028 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
3029 && pGVM->u32Magic == GVM_MAGIC)
3030 {
3031 rc = pfnCallback(pGVM, pvUser);
3032 if (rc != VINF_SUCCESS)
3033 break;
3034 }
3035
3036 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
3037 }
3038 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3039 return rc;
3040}
3041
3042
3043/**
3044 * Retrieves the GVMM statistics visible to the caller.
3045 *
3046 * @returns VBox status code.
3047 *
3048 * @param pStats Where to put the statistics.
3049 * @param pSession The current session.
3050 * @param pGVM The GVM to obtain statistics for. Optional.
3051 */
3052GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3053{
3054 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3055
3056 /*
3057 * Validate input.
3058 */
3059 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3060 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3061 pStats->cVMs = 0; /* (crash before taking the sem...) */
3062
3063 /*
3064 * Take the lock and get the VM statistics.
3065 */
3066 PGVMM pGVMM;
3067 if (pGVM)
3068 {
3069 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3070 if (RT_FAILURE(rc))
3071 return rc;
3072 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
3073 }
3074 else
3075 {
3076 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3077 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
3078
3079 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3080 AssertRCReturn(rc, rc);
3081 }
3082
3083 /*
3084 * Enumerate the VMs and add the ones visible to the statistics.
3085 */
3086 pStats->cVMs = 0;
3087 pStats->cEMTs = 0;
3088 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
3089
3090 for (unsigned i = pGVMM->iUsedHead;
3091 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3092 i = pGVMM->aHandles[i].iNext)
3093 {
3094 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3095 void *pvObj = pGVMM->aHandles[i].pvObj;
3096 if ( RT_VALID_PTR(pvObj)
3097 && RT_VALID_PTR(pOtherGVM)
3098 && pOtherGVM->u32Magic == GVM_MAGIC
3099 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3100 {
3101 pStats->cVMs++;
3102 pStats->cEMTs += pOtherGVM->cCpus;
3103
3104 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
3105 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
3106 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
3107 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
3108 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
3109
3110 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
3111 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
3112 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
3113
3114 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
3115 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
3116
3117 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
3118 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
3119 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
3120 }
3121 }
3122
3123 /*
3124 * Copy out the per host CPU statistics.
3125 */
3126 uint32_t iDstCpu = 0;
3127 uint32_t cSrcCpus = pGVMM->cHostCpus;
3128 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
3129 {
3130 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
3131 {
3132 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
3133 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
3134#ifdef GVMM_SCHED_WITH_PPT
3135 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
3136 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
3137 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
3138 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
3139#else
3140 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
3141 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
3142 pStats->aHostCpus[iDstCpu].cChanges = 0;
3143 pStats->aHostCpus[iDstCpu].cStarts = 0;
3144#endif
3145 iDstCpu++;
3146 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
3147 break;
3148 }
3149 }
3150 pStats->cHostCpus = iDstCpu;
3151
3152 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3153
3154 return VINF_SUCCESS;
3155}
3156
3157
3158/**
3159 * VMMR0 request wrapper for GVMMR0QueryStatistics.
3160 *
3161 * @returns see GVMMR0QueryStatistics.
3162 * @param pGVM The global (ring-0) VM structure. Optional.
3163 * @param pReq Pointer to the request packet.
3164 * @param pSession The current session.
3165 */
3166GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3167{
3168 /*
3169 * Validate input and pass it on.
3170 */
3171 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3172 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3173 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3174
3175 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
3176}
3177
3178
3179/**
3180 * Resets the specified GVMM statistics.
3181 *
3182 * @returns VBox status code.
3183 *
3184 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
3185 * @param pSession The current session.
3186 * @param pGVM The GVM to reset statistics for. Optional.
3187 */
3188GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
3189{
3190 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
3191
3192 /*
3193 * Validate input.
3194 */
3195 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
3196 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
3197
3198 /*
3199 * Take the lock and get the VM statistics.
3200 */
3201 PGVMM pGVMM;
3202 if (pGVM)
3203 {
3204 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
3205 if (RT_FAILURE(rc))
3206 return rc;
3207# define MAYBE_RESET_FIELD(field) \
3208 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3209 MAYBE_RESET_FIELD(cHaltCalls);
3210 MAYBE_RESET_FIELD(cHaltBlocking);
3211 MAYBE_RESET_FIELD(cHaltTimeouts);
3212 MAYBE_RESET_FIELD(cHaltNotBlocking);
3213 MAYBE_RESET_FIELD(cHaltWakeUps);
3214 MAYBE_RESET_FIELD(cWakeUpCalls);
3215 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3216 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3217 MAYBE_RESET_FIELD(cPokeCalls);
3218 MAYBE_RESET_FIELD(cPokeNotBusy);
3219 MAYBE_RESET_FIELD(cPollCalls);
3220 MAYBE_RESET_FIELD(cPollHalts);
3221 MAYBE_RESET_FIELD(cPollWakeUps);
3222# undef MAYBE_RESET_FIELD
3223 }
3224 else
3225 {
3226 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
3227
3228 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
3229 AssertRCReturn(rc, rc);
3230 }
3231
3232 /*
3233 * Enumerate the VMs and add the ones visible to the statistics.
3234 */
3235 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
3236 {
3237 for (unsigned i = pGVMM->iUsedHead;
3238 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
3239 i = pGVMM->aHandles[i].iNext)
3240 {
3241 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
3242 void *pvObj = pGVMM->aHandles[i].pvObj;
3243 if ( RT_VALID_PTR(pvObj)
3244 && RT_VALID_PTR(pOtherGVM)
3245 && pOtherGVM->u32Magic == GVM_MAGIC
3246 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
3247 {
3248# define MAYBE_RESET_FIELD(field) \
3249 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
3250 MAYBE_RESET_FIELD(cHaltCalls);
3251 MAYBE_RESET_FIELD(cHaltBlocking);
3252 MAYBE_RESET_FIELD(cHaltTimeouts);
3253 MAYBE_RESET_FIELD(cHaltNotBlocking);
3254 MAYBE_RESET_FIELD(cHaltWakeUps);
3255 MAYBE_RESET_FIELD(cWakeUpCalls);
3256 MAYBE_RESET_FIELD(cWakeUpNotHalted);
3257 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3258 MAYBE_RESET_FIELD(cPokeCalls);
3259 MAYBE_RESET_FIELD(cPokeNotBusy);
3260 MAYBE_RESET_FIELD(cPollCalls);
3261 MAYBE_RESET_FIELD(cPollHalts);
3262 MAYBE_RESET_FIELD(cPollWakeUps);
3263# undef MAYBE_RESET_FIELD
3264 }
3265 }
3266 }
3267
3268 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3269
3270 return VINF_SUCCESS;
3271}
3272
3273
3274/**
3275 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3276 *
3277 * @returns see GVMMR0ResetStatistics.
3278 * @param pGVM The global (ring-0) VM structure. Optional.
3279 * @param pReq Pointer to the request packet.
3280 * @param pSession The current session.
3281 */
3282GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3283{
3284 /*
3285 * Validate input and pass it on.
3286 */
3287 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3288 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3289 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3290
3291 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3292}
3293
Note: See TracBrowser for help on using the repository browser.

© 2025 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette