VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 87792

Last change on this file since 87792 was 87792, checked in by vboxsync, 4 years ago

VMM/TM: Moved the timers off the hyper heap. Replaced the relative offset addressing with queue array indexing. bugref:9943

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 106.5 KB
Line 
1/* $Id: GVMMR0.cpp 87792 2021-02-18 18:38:24Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99
100/** @def GVMM_CHECK_SMAP_SETUP
101 * SMAP check setup. */
102/** @def GVMM_CHECK_SMAP_CHECK
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
104 * it will be logged and @a a_BadExpr is executed. */
105/** @def GVMM_CHECK_SMAP_CHECK2
106 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
107 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
108 * executed. */
109#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
110# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
111# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
112 do { \
113 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
114 { \
115 RTCCUINTREG fEflCheck = ASMGetFlags(); \
116 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
117 { /* likely */ } \
118 else \
119 { \
120 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
121 a_BadExpr; \
122 } \
123 } \
124 } while (0)
125# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
126 do { \
127 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
128 { \
129 RTCCUINTREG fEflCheck = ASMGetFlags(); \
130 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
131 { /* likely */ } \
132 else \
133 { \
134 SUPR0BadContext((a_pGVM) ? (a_pGVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
135 a_BadExpr; \
136 } \
137 } \
138 } while (0)
139#else
140# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
141# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
142# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) NOREF(fKernelFeatures)
143#endif
144
145
146
147/*********************************************************************************************************************************
148* Structures and Typedefs *
149*********************************************************************************************************************************/
150
151/**
152 * Global VM handle.
153 */
154typedef struct GVMHANDLE
155{
156 /** The index of the next handle in the list (free or used). (0 is nil.) */
157 uint16_t volatile iNext;
158 /** Our own index / handle value. */
159 uint16_t iSelf;
160 /** The process ID of the handle owner.
161 * This is used for access checks. */
162 RTPROCESS ProcId;
163 /** The pointer to the ring-0 only (aka global) VM structure. */
164 PGVM pGVM;
165 /** The virtual machine object. */
166 void *pvObj;
167 /** The session this VM is associated with. */
168 PSUPDRVSESSION pSession;
169 /** The ring-0 handle of the EMT0 thread.
170 * This is used for ownership checks as well as looking up a VM handle by thread
171 * at times like assertions. */
172 RTNATIVETHREAD hEMT0;
173} GVMHANDLE;
174/** Pointer to a global VM handle. */
175typedef GVMHANDLE *PGVMHANDLE;
176
177/** Number of GVM handles (including the NIL handle). */
178#if HC_ARCH_BITS == 64
179# define GVMM_MAX_HANDLES 8192
180#else
181# define GVMM_MAX_HANDLES 128
182#endif
183
184/**
185 * Per host CPU GVMM data.
186 */
187typedef struct GVMMHOSTCPU
188{
189 /** Magic number (GVMMHOSTCPU_MAGIC). */
190 uint32_t volatile u32Magic;
191 /** The CPU ID. */
192 RTCPUID idCpu;
193 /** The CPU set index. */
194 uint32_t idxCpuSet;
195
196#ifdef GVMM_SCHED_WITH_PPT
197 /** Periodic preemption timer data. */
198 struct
199 {
200 /** The handle to the periodic preemption timer. */
201 PRTTIMER pTimer;
202 /** Spinlock protecting the data below. */
203 RTSPINLOCK hSpinlock;
204 /** The smalles Hz that we need to care about. (static) */
205 uint32_t uMinHz;
206 /** The number of ticks between each historization. */
207 uint32_t cTicksHistoriziationInterval;
208 /** The current historization tick (counting up to
209 * cTicksHistoriziationInterval and then resetting). */
210 uint32_t iTickHistorization;
211 /** The current timer interval. This is set to 0 when inactive. */
212 uint32_t cNsInterval;
213 /** The current timer frequency. This is set to 0 when inactive. */
214 uint32_t uTimerHz;
215 /** The current max frequency reported by the EMTs.
216 * This gets historicize and reset by the timer callback. This is
217 * read without holding the spinlock, so needs atomic updating. */
218 uint32_t volatile uDesiredHz;
219 /** Whether the timer was started or not. */
220 bool volatile fStarted;
221 /** Set if we're starting timer. */
222 bool volatile fStarting;
223 /** The index of the next history entry (mod it). */
224 uint32_t iHzHistory;
225 /** Historicized uDesiredHz values. The array wraps around, new entries
226 * are added at iHzHistory. This is updated approximately every
227 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
228 uint32_t aHzHistory[8];
229 /** Statistics counter for recording the number of interval changes. */
230 uint32_t cChanges;
231 /** Statistics counter for recording the number of timer starts. */
232 uint32_t cStarts;
233 } Ppt;
234#endif /* GVMM_SCHED_WITH_PPT */
235
236} GVMMHOSTCPU;
237/** Pointer to the per host CPU GVMM data. */
238typedef GVMMHOSTCPU *PGVMMHOSTCPU;
239/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
240#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
241/** The interval on history entry should cover (approximately) give in
242 * nanoseconds. */
243#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
244
245
246/**
247 * The GVMM instance data.
248 */
249typedef struct GVMM
250{
251 /** Eyecatcher / magic. */
252 uint32_t u32Magic;
253 /** The index of the head of the free handle chain. (0 is nil.) */
254 uint16_t volatile iFreeHead;
255 /** The index of the head of the active handle chain. (0 is nil.) */
256 uint16_t volatile iUsedHead;
257 /** The number of VMs. */
258 uint16_t volatile cVMs;
259 /** Alignment padding. */
260 uint16_t u16Reserved;
261 /** The number of EMTs. */
262 uint32_t volatile cEMTs;
263 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
264 uint32_t volatile cHaltedEMTs;
265 /** Mini lock for restricting early wake-ups to one thread. */
266 bool volatile fDoingEarlyWakeUps;
267 bool afPadding[3]; /**< explicit alignment padding. */
268 /** When the next halted or sleeping EMT will wake up.
269 * This is set to 0 when it needs recalculating and to UINT64_MAX when
270 * there are no halted or sleeping EMTs in the GVMM. */
271 uint64_t uNsNextEmtWakeup;
272 /** The lock used to serialize VM creation, destruction and associated events that
273 * isn't performance critical. Owners may acquire the list lock. */
274 RTCRITSECT CreateDestroyLock;
275 /** The lock used to serialize used list updates and accesses.
276 * This indirectly includes scheduling since the scheduler will have to walk the
277 * used list to examin running VMs. Owners may not acquire any other locks. */
278 RTCRITSECTRW UsedLock;
279 /** The handle array.
280 * The size of this array defines the maximum number of currently running VMs.
281 * The first entry is unused as it represents the NIL handle. */
282 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
283
284 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
285 * The number of EMTs that means we no longer consider ourselves alone on a
286 * CPU/Core.
287 */
288 uint32_t cEMTsMeansCompany;
289 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
290 * The minimum sleep time for when we're alone, in nano seconds.
291 */
292 uint32_t nsMinSleepAlone;
293 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
294 * The minimum sleep time for when we've got company, in nano seconds.
295 */
296 uint32_t nsMinSleepCompany;
297 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
298 * The limit for the first round of early wake-ups, given in nano seconds.
299 */
300 uint32_t nsEarlyWakeUp1;
301 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
302 * The limit for the second round of early wake-ups, given in nano seconds.
303 */
304 uint32_t nsEarlyWakeUp2;
305
306 /** Set if we're doing early wake-ups.
307 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
308 bool volatile fDoEarlyWakeUps;
309
310 /** The number of entries in the host CPU array (aHostCpus). */
311 uint32_t cHostCpus;
312 /** Per host CPU data (variable length). */
313 GVMMHOSTCPU aHostCpus[1];
314} GVMM;
315AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
316AssertCompileMemberAlignment(GVMM, UsedLock, 8);
317AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
318/** Pointer to the GVMM instance data. */
319typedef GVMM *PGVMM;
320
321/** The GVMM::u32Magic value (Charlie Haden). */
322#define GVMM_MAGIC UINT32_C(0x19370806)
323
324
325
326/*********************************************************************************************************************************
327* Global Variables *
328*********************************************************************************************************************************/
329/** Pointer to the GVMM instance data.
330 * (Just my general dislike for global variables.) */
331static PGVMM g_pGVMM = NULL;
332
333/** Macro for obtaining and validating the g_pGVMM pointer.
334 * On failure it will return from the invoking function with the specified return value.
335 *
336 * @param pGVMM The name of the pGVMM variable.
337 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
338 * status codes.
339 */
340#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
341 do { \
342 (pGVMM) = g_pGVMM;\
343 AssertPtrReturn((pGVMM), (rc)); \
344 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
345 } while (0)
346
347/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
348 * On failure it will return from the invoking function.
349 *
350 * @param pGVMM The name of the pGVMM variable.
351 */
352#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
353 do { \
354 (pGVMM) = g_pGVMM;\
355 AssertPtrReturnVoid((pGVMM)); \
356 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
357 } while (0)
358
359
360/*********************************************************************************************************************************
361* Internal Functions *
362*********************************************************************************************************************************/
363static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
364static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
365static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
366static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
367
368#ifdef GVMM_SCHED_WITH_PPT
369static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
370#endif
371
372
373/**
374 * Initializes the GVMM.
375 *
376 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
377 *
378 * @returns VBox status code.
379 */
380GVMMR0DECL(int) GVMMR0Init(void)
381{
382 LogFlow(("GVMMR0Init:\n"));
383
384 /*
385 * Allocate and initialize the instance data.
386 */
387 uint32_t cHostCpus = RTMpGetArraySize();
388 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
389
390 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
391 if (!pGVMM)
392 return VERR_NO_MEMORY;
393 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
394 "GVMM-CreateDestroyLock");
395 if (RT_SUCCESS(rc))
396 {
397 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
398 if (RT_SUCCESS(rc))
399 {
400 pGVMM->u32Magic = GVMM_MAGIC;
401 pGVMM->iUsedHead = 0;
402 pGVMM->iFreeHead = 1;
403
404 /* the nil handle */
405 pGVMM->aHandles[0].iSelf = 0;
406 pGVMM->aHandles[0].iNext = 0;
407
408 /* the tail */
409 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
410 pGVMM->aHandles[i].iSelf = i;
411 pGVMM->aHandles[i].iNext = 0; /* nil */
412
413 /* the rest */
414 while (i-- > 1)
415 {
416 pGVMM->aHandles[i].iSelf = i;
417 pGVMM->aHandles[i].iNext = i + 1;
418 }
419
420 /* The default configuration values. */
421 uint32_t cNsResolution = RTSemEventMultiGetResolution();
422 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
423 if (cNsResolution >= 5*RT_NS_100US)
424 {
425 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
426 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
427 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
428 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
429 }
430 else if (cNsResolution > RT_NS_100US)
431 {
432 pGVMM->nsMinSleepAlone = cNsResolution / 2;
433 pGVMM->nsMinSleepCompany = cNsResolution / 4;
434 pGVMM->nsEarlyWakeUp1 = 0;
435 pGVMM->nsEarlyWakeUp2 = 0;
436 }
437 else
438 {
439 pGVMM->nsMinSleepAlone = 2000;
440 pGVMM->nsMinSleepCompany = 2000;
441 pGVMM->nsEarlyWakeUp1 = 0;
442 pGVMM->nsEarlyWakeUp2 = 0;
443 }
444 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
445
446 /* The host CPU data. */
447 pGVMM->cHostCpus = cHostCpus;
448 uint32_t iCpu = cHostCpus;
449 RTCPUSET PossibleSet;
450 RTMpGetSet(&PossibleSet);
451 while (iCpu-- > 0)
452 {
453 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
454#ifdef GVMM_SCHED_WITH_PPT
455 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
456 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
457 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
458 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
459 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
464 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
465 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
466 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
467#endif
468
469 if (RTCpuSetIsMember(&PossibleSet, iCpu))
470 {
471 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
472 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
473
474#ifdef GVMM_SCHED_WITH_PPT
475 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
476 50*1000*1000 /* whatever */,
477 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
478 gvmmR0SchedPeriodicPreemptionTimerCallback,
479 &pGVMM->aHostCpus[iCpu]);
480 if (RT_SUCCESS(rc))
481 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
482 if (RT_FAILURE(rc))
483 {
484 while (iCpu < cHostCpus)
485 {
486 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
487 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
488 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
489 iCpu++;
490 }
491 break;
492 }
493#endif
494 }
495 else
496 {
497 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
498 pGVMM->aHostCpus[iCpu].u32Magic = 0;
499 }
500 }
501 if (RT_SUCCESS(rc))
502 {
503 g_pGVMM = pGVMM;
504 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
505 return VINF_SUCCESS;
506 }
507
508 /* bail out. */
509 RTCritSectRwDelete(&pGVMM->UsedLock);
510 }
511 RTCritSectDelete(&pGVMM->CreateDestroyLock);
512 }
513
514 RTMemFree(pGVMM);
515 return rc;
516}
517
518
519/**
520 * Terminates the GVM.
521 *
522 * This is called while owning the loader semaphore (see supdrvLdrFree()).
523 * And unless something is wrong, there should be absolutely no VMs
524 * registered at this point.
525 */
526GVMMR0DECL(void) GVMMR0Term(void)
527{
528 LogFlow(("GVMMR0Term:\n"));
529
530 PGVMM pGVMM = g_pGVMM;
531 g_pGVMM = NULL;
532 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
533 {
534 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
535 return;
536 }
537
538 /*
539 * First of all, stop all active timers.
540 */
541 uint32_t cActiveTimers = 0;
542 uint32_t iCpu = pGVMM->cHostCpus;
543 while (iCpu-- > 0)
544 {
545 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
546#ifdef GVMM_SCHED_WITH_PPT
547 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
548 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
549 cActiveTimers++;
550#endif
551 }
552 if (cActiveTimers)
553 RTThreadSleep(1); /* fudge */
554
555 /*
556 * Invalidate the and free resources.
557 */
558 pGVMM->u32Magic = ~GVMM_MAGIC;
559 RTCritSectRwDelete(&pGVMM->UsedLock);
560 RTCritSectDelete(&pGVMM->CreateDestroyLock);
561
562 pGVMM->iFreeHead = 0;
563 if (pGVMM->iUsedHead)
564 {
565 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
566 pGVMM->iUsedHead = 0;
567 }
568
569#ifdef GVMM_SCHED_WITH_PPT
570 iCpu = pGVMM->cHostCpus;
571 while (iCpu-- > 0)
572 {
573 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
574 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
575 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
576 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
577 }
578#endif
579
580 RTMemFree(pGVMM);
581}
582
583
584/**
585 * A quick hack for setting global config values.
586 *
587 * @returns VBox status code.
588 *
589 * @param pSession The session handle. Used for authentication.
590 * @param pszName The variable name.
591 * @param u64Value The new value.
592 */
593GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
594{
595 /*
596 * Validate input.
597 */
598 PGVMM pGVMM;
599 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
600 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
601 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
602
603 /*
604 * String switch time!
605 */
606 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
607 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
608 int rc = VINF_SUCCESS;
609 pszName += sizeof("/GVMM/") - 1;
610 if (!strcmp(pszName, "cEMTsMeansCompany"))
611 {
612 if (u64Value <= UINT32_MAX)
613 pGVMM->cEMTsMeansCompany = u64Value;
614 else
615 rc = VERR_OUT_OF_RANGE;
616 }
617 else if (!strcmp(pszName, "MinSleepAlone"))
618 {
619 if (u64Value <= RT_NS_100MS)
620 pGVMM->nsMinSleepAlone = u64Value;
621 else
622 rc = VERR_OUT_OF_RANGE;
623 }
624 else if (!strcmp(pszName, "MinSleepCompany"))
625 {
626 if (u64Value <= RT_NS_100MS)
627 pGVMM->nsMinSleepCompany = u64Value;
628 else
629 rc = VERR_OUT_OF_RANGE;
630 }
631 else if (!strcmp(pszName, "EarlyWakeUp1"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 {
635 pGVMM->nsEarlyWakeUp1 = u64Value;
636 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
637 }
638 else
639 rc = VERR_OUT_OF_RANGE;
640 }
641 else if (!strcmp(pszName, "EarlyWakeUp2"))
642 {
643 if (u64Value <= RT_NS_100MS)
644 {
645 pGVMM->nsEarlyWakeUp2 = u64Value;
646 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
647 }
648 else
649 rc = VERR_OUT_OF_RANGE;
650 }
651 else
652 rc = VERR_CFGM_VALUE_NOT_FOUND;
653 return rc;
654}
655
656
657/**
658 * A quick hack for getting global config values.
659 *
660 * @returns VBox status code.
661 *
662 * @param pSession The session handle. Used for authentication.
663 * @param pszName The variable name.
664 * @param pu64Value Where to return the value.
665 */
666GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
667{
668 /*
669 * Validate input.
670 */
671 PGVMM pGVMM;
672 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
673 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
674 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
675 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
676
677 /*
678 * String switch time!
679 */
680 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
681 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
682 int rc = VINF_SUCCESS;
683 pszName += sizeof("/GVMM/") - 1;
684 if (!strcmp(pszName, "cEMTsMeansCompany"))
685 *pu64Value = pGVMM->cEMTsMeansCompany;
686 else if (!strcmp(pszName, "MinSleepAlone"))
687 *pu64Value = pGVMM->nsMinSleepAlone;
688 else if (!strcmp(pszName, "MinSleepCompany"))
689 *pu64Value = pGVMM->nsMinSleepCompany;
690 else if (!strcmp(pszName, "EarlyWakeUp1"))
691 *pu64Value = pGVMM->nsEarlyWakeUp1;
692 else if (!strcmp(pszName, "EarlyWakeUp2"))
693 *pu64Value = pGVMM->nsEarlyWakeUp2;
694 else
695 rc = VERR_CFGM_VALUE_NOT_FOUND;
696 return rc;
697}
698
699
700/**
701 * Acquire the 'used' lock in shared mode.
702 *
703 * This prevents destruction of the VM while we're in ring-0.
704 *
705 * @returns IPRT status code, see RTSemFastMutexRequest.
706 * @param a_pGVMM The GVMM instance data.
707 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
708 */
709#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
710
711/**
712 * Release the 'used' lock in when owning it in shared mode.
713 *
714 * @returns IPRT status code, see RTSemFastMutexRequest.
715 * @param a_pGVMM The GVMM instance data.
716 * @sa GVMMR0_USED_SHARED_LOCK
717 */
718#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
719
720/**
721 * Acquire the 'used' lock in exclusive mode.
722 *
723 * Only use this function when making changes to the used list.
724 *
725 * @returns IPRT status code, see RTSemFastMutexRequest.
726 * @param a_pGVMM The GVMM instance data.
727 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
728 */
729#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
730
731/**
732 * Release the 'used' lock when owning it in exclusive mode.
733 *
734 * @returns IPRT status code, see RTSemFastMutexRelease.
735 * @param a_pGVMM The GVMM instance data.
736 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
737 */
738#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
739
740
741/**
742 * Try acquire the 'create & destroy' lock.
743 *
744 * @returns IPRT status code, see RTSemFastMutexRequest.
745 * @param pGVMM The GVMM instance data.
746 */
747DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
748{
749 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
750 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
751 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
752 return rc;
753}
754
755
756/**
757 * Release the 'create & destroy' lock.
758 *
759 * @returns IPRT status code, see RTSemFastMutexRequest.
760 * @param pGVMM The GVMM instance data.
761 */
762DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
763{
764 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
765 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
766 AssertRC(rc);
767 return rc;
768}
769
770
771/**
772 * Request wrapper for the GVMMR0CreateVM API.
773 *
774 * @returns VBox status code.
775 * @param pReq The request buffer.
776 * @param pSession The session handle. The VM will be associated with this.
777 */
778GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
779{
780 /*
781 * Validate the request.
782 */
783 if (!RT_VALID_PTR(pReq))
784 return VERR_INVALID_POINTER;
785 if (pReq->Hdr.cbReq != sizeof(*pReq))
786 return VERR_INVALID_PARAMETER;
787 if (pReq->pSession != pSession)
788 return VERR_INVALID_POINTER;
789
790 /*
791 * Execute it.
792 */
793 PGVM pGVM;
794 pReq->pVMR0 = NULL;
795 pReq->pVMR3 = NIL_RTR3PTR;
796 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
797 if (RT_SUCCESS(rc))
798 {
799 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
800 pReq->pVMR3 = pGVM->pVMR3;
801 }
802 return rc;
803}
804
805
806/**
807 * Allocates the VM structure and registers it with GVM.
808 *
809 * The caller will become the VM owner and there by the EMT.
810 *
811 * @returns VBox status code.
812 * @param pSession The support driver session.
813 * @param cCpus Number of virtual CPUs for the new VM.
814 * @param ppGVM Where to store the pointer to the VM structure.
815 *
816 * @thread EMT.
817 */
818GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
819{
820 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
821 PGVMM pGVMM;
822 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
823
824 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
825 *ppGVM = NULL;
826
827 if ( cCpus == 0
828 || cCpus > VMM_MAX_CPU_COUNT)
829 return VERR_INVALID_PARAMETER;
830
831 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
832 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
833 RTPROCESS ProcId = RTProcSelf();
834 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
835
836 /*
837 * The whole allocation process is protected by the lock.
838 */
839 int rc = gvmmR0CreateDestroyLock(pGVMM);
840 AssertRCReturn(rc, rc);
841
842 /*
843 * Only one VM per session.
844 */
845 if (SUPR0GetSessionVM(pSession) != NULL)
846 {
847 gvmmR0CreateDestroyUnlock(pGVMM);
848 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
849 return VERR_ALREADY_EXISTS;
850 }
851
852 /*
853 * Allocate a handle first so we don't waste resources unnecessarily.
854 */
855 uint16_t iHandle = pGVMM->iFreeHead;
856 if (iHandle)
857 {
858 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
859
860 /* consistency checks, a bit paranoid as always. */
861 if ( !pHandle->pGVM
862 && !pHandle->pvObj
863 && pHandle->iSelf == iHandle)
864 {
865 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
866 if (pHandle->pvObj)
867 {
868 /*
869 * Move the handle from the free to used list and perform permission checks.
870 */
871 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
872 AssertRC(rc);
873
874 pGVMM->iFreeHead = pHandle->iNext;
875 pHandle->iNext = pGVMM->iUsedHead;
876 pGVMM->iUsedHead = iHandle;
877 pGVMM->cVMs++;
878
879 pHandle->pGVM = NULL;
880 pHandle->pSession = pSession;
881 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
882 pHandle->ProcId = NIL_RTPROCESS;
883
884 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
885
886 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
887 if (RT_SUCCESS(rc))
888 {
889 /*
890 * Allocate memory for the VM structure (combined VM + GVM).
891 */
892 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
893 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
894 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
895 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
896 if (RT_SUCCESS(rc))
897 {
898 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
899 AssertPtr(pGVM);
900
901 /*
902 * Initialise the structure.
903 */
904 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
905 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
906 pGVM->gvmm.s.VMMemObj = hVMMemObj;
907 rc = GMMR0InitPerVMData(pGVM);
908 int rc2 = PGMR0InitPerVMData(pGVM);
909 DBGFR0InitPerVMData(pGVM);
910 PDMR0InitPerVMData(pGVM);
911 IOMR0InitPerVMData(pGVM);
912 TMR0InitPerVMData(pGVM);
913 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2))
914 {
915 /*
916 * Allocate page array.
917 * This currently have to be made available to ring-3, but this is should change eventually.
918 */
919 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
920 if (RT_SUCCESS(rc))
921 {
922 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
923 for (uint32_t iPage = 0; iPage < cPages; iPage++)
924 {
925 paPages[iPage].uReserved = 0;
926 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
927 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
928 }
929
930 /*
931 * Map the page array, VM and VMCPU structures into ring-3.
932 */
933 AssertCompileSizeAlignment(VM, PAGE_SIZE);
934 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
935 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
936 0 /*offSub*/, sizeof(VM));
937 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
938 {
939 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
940 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
941 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
942 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
943 }
944 if (RT_SUCCESS(rc))
945 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
946 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
947 NIL_RTR0PROCESS);
948 if (RT_SUCCESS(rc))
949 {
950 /*
951 * Initialize all the VM pointers.
952 */
953 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
954 AssertPtr((void *)pVMR3);
955
956 for (VMCPUID i = 0; i < cCpus; i++)
957 {
958 pGVM->aCpus[i].pVMR0 = pGVM;
959 pGVM->aCpus[i].pVMR3 = pVMR3;
960 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
961 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
962 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
963 AssertPtr((void *)pGVM->apCpusR3[i]);
964 }
965
966 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
967 AssertPtr((void *)pGVM->paVMPagesR3);
968
969 /*
970 * Complete the handle - take the UsedLock sem just to be careful.
971 */
972 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
973 AssertRC(rc);
974
975 pHandle->pGVM = pGVM;
976 pHandle->hEMT0 = hEMT0;
977 pHandle->ProcId = ProcId;
978 pGVM->pVMR3 = pVMR3;
979 pGVM->pVMR3Unsafe = pVMR3;
980 pGVM->aCpus[0].hEMT = hEMT0;
981 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
982 pGVMM->cEMTs += cCpus;
983
984 /* Associate it with the session and create the context hook for EMT0. */
985 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
986 if (RT_SUCCESS(rc))
987 {
988 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
989 if (RT_SUCCESS(rc))
990 {
991 /*
992 * Done!
993 */
994 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
995
996 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
997 gvmmR0CreateDestroyUnlock(pGVMM);
998
999 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1000
1001 *ppGVM = pGVM;
1002 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1003 return VINF_SUCCESS;
1004 }
1005
1006 SUPR0SetSessionVM(pSession, NULL, NULL);
1007 }
1008 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1009 }
1010
1011 /* Cleanup mappings. */
1012 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1013 {
1014 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1015 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1016 }
1017 for (VMCPUID i = 0; i < cCpus; i++)
1018 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1019 {
1020 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1021 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1022 }
1023 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1024 {
1025 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1026 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1027 }
1028 }
1029 }
1030 else if (RT_SUCCESS(rc))
1031 rc = rc2;
1032 }
1033 }
1034 /* else: The user wasn't permitted to create this VM. */
1035
1036 /*
1037 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1038 * object reference here. A little extra mess because of non-recursive lock.
1039 */
1040 void *pvObj = pHandle->pvObj;
1041 pHandle->pvObj = NULL;
1042 gvmmR0CreateDestroyUnlock(pGVMM);
1043
1044 SUPR0ObjRelease(pvObj, pSession);
1045
1046 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1047 return rc;
1048 }
1049
1050 rc = VERR_NO_MEMORY;
1051 }
1052 else
1053 rc = VERR_GVMM_IPE_1;
1054 }
1055 else
1056 rc = VERR_GVM_TOO_MANY_VMS;
1057
1058 gvmmR0CreateDestroyUnlock(pGVMM);
1059 return rc;
1060}
1061
1062
1063/**
1064 * Initializes the per VM data belonging to GVMM.
1065 *
1066 * @param pGVM Pointer to the global VM structure.
1067 * @param hSelf The handle.
1068 * @param cCpus The CPU count.
1069 * @param pSession The session this VM is associated with.
1070 */
1071static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1072{
1073 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1074 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1075 AssertCompileMemberAlignment(VM, cpum, 64);
1076 AssertCompileMemberAlignment(VM, tm, 64);
1077
1078 /* GVM: */
1079 pGVM->u32Magic = GVM_MAGIC;
1080 pGVM->hSelf = hSelf;
1081 pGVM->cCpus = cCpus;
1082 pGVM->pSession = pSession;
1083 pGVM->pSelf = pGVM;
1084
1085 /* VM: */
1086 pGVM->enmVMState = VMSTATE_CREATING;
1087 pGVM->hSelfUnsafe = hSelf;
1088 pGVM->pSessionUnsafe = pSession;
1089 pGVM->pVMR0ForCall = pGVM;
1090 pGVM->cCpusUnsafe = cCpus;
1091 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1092 pGVM->uStructVersion = 1;
1093 pGVM->cbSelf = sizeof(VM);
1094 pGVM->cbVCpu = sizeof(VMCPU);
1095
1096 /* GVMM: */
1097 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1098 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1099 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1100 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1101 pGVM->gvmm.s.fDoneVMMR0Init = false;
1102 pGVM->gvmm.s.fDoneVMMR0Term = false;
1103
1104 /*
1105 * Per virtual CPU.
1106 */
1107 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1108 {
1109 pGVM->aCpus[i].idCpu = i;
1110 pGVM->aCpus[i].idCpuUnsafe = i;
1111 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1112 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1113 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1114 pGVM->aCpus[i].pGVM = pGVM;
1115 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1116 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1117 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1118 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1119 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1120 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1121 }
1122}
1123
1124
1125/**
1126 * Does the VM initialization.
1127 *
1128 * @returns VBox status code.
1129 * @param pGVM The global (ring-0) VM structure.
1130 */
1131GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1132{
1133 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1134
1135 int rc = VERR_INTERNAL_ERROR_3;
1136 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1137 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1138 {
1139 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1140 {
1141 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1142 if (RT_FAILURE(rc))
1143 {
1144 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1145 break;
1146 }
1147 }
1148 }
1149 else
1150 rc = VERR_WRONG_ORDER;
1151
1152 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1153 return rc;
1154}
1155
1156
1157/**
1158 * Indicates that we're done with the ring-0 initialization
1159 * of the VM.
1160 *
1161 * @param pGVM The global (ring-0) VM structure.
1162 * @thread EMT(0)
1163 */
1164GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1165{
1166 /* Set the indicator. */
1167 pGVM->gvmm.s.fDoneVMMR0Init = true;
1168}
1169
1170
1171/**
1172 * Indicates that we're doing the ring-0 termination of the VM.
1173 *
1174 * @returns true if termination hasn't been done already, false if it has.
1175 * @param pGVM Pointer to the global VM structure. Optional.
1176 * @thread EMT(0) or session cleanup thread.
1177 */
1178GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1179{
1180 /* Validate the VM structure, state and handle. */
1181 AssertPtrReturn(pGVM, false);
1182
1183 /* Set the indicator. */
1184 if (pGVM->gvmm.s.fDoneVMMR0Term)
1185 return false;
1186 pGVM->gvmm.s.fDoneVMMR0Term = true;
1187 return true;
1188}
1189
1190
1191/**
1192 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1193 *
1194 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1195 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1196 * would've been nice if the caller was actually the EMT thread or that we somehow
1197 * could've associated the calling thread with the VM up front.
1198 *
1199 * @returns VBox status code.
1200 * @param pGVM The global (ring-0) VM structure.
1201 *
1202 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1203 */
1204GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1205{
1206 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1207 PGVMM pGVMM;
1208 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1209
1210 /*
1211 * Validate the VM structure, state and caller.
1212 */
1213 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1214 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1215 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1216 VERR_WRONG_ORDER);
1217
1218 uint32_t hGVM = pGVM->hSelf;
1219 ASMCompilerBarrier();
1220 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1221 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1222
1223 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1224 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1225
1226 RTPROCESS ProcId = RTProcSelf();
1227 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1228 AssertReturn( ( pHandle->hEMT0 == hSelf
1229 && pHandle->ProcId == ProcId)
1230 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1231
1232 /*
1233 * Lookup the handle and destroy the object.
1234 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1235 * object, we take some precautions against racing callers just in case...
1236 */
1237 int rc = gvmmR0CreateDestroyLock(pGVMM);
1238 AssertRC(rc);
1239
1240 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1241 if ( pHandle->pGVM == pGVM
1242 && ( ( pHandle->hEMT0 == hSelf
1243 && pHandle->ProcId == ProcId)
1244 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1245 && RT_VALID_PTR(pHandle->pvObj)
1246 && RT_VALID_PTR(pHandle->pSession)
1247 && RT_VALID_PTR(pHandle->pGVM)
1248 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1249 {
1250 /* Check that other EMTs have deregistered. */
1251 uint32_t cNotDeregistered = 0;
1252 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1253 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1254 if (cNotDeregistered == 0)
1255 {
1256 /* Grab the object pointer. */
1257 void *pvObj = pHandle->pvObj;
1258 pHandle->pvObj = NULL;
1259 gvmmR0CreateDestroyUnlock(pGVMM);
1260
1261 SUPR0ObjRelease(pvObj, pHandle->pSession);
1262 }
1263 else
1264 {
1265 gvmmR0CreateDestroyUnlock(pGVMM);
1266 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1267 }
1268 }
1269 else
1270 {
1271 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1272 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1273 gvmmR0CreateDestroyUnlock(pGVMM);
1274 rc = VERR_GVMM_IPE_2;
1275 }
1276
1277 return rc;
1278}
1279
1280
1281/**
1282 * Performs VM cleanup task as part of object destruction.
1283 *
1284 * @param pGVM The GVM pointer.
1285 */
1286static void gvmmR0CleanupVM(PGVM pGVM)
1287{
1288 if ( pGVM->gvmm.s.fDoneVMMR0Init
1289 && !pGVM->gvmm.s.fDoneVMMR0Term)
1290 {
1291 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1292 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1293 {
1294 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1295 VMMR0TermVM(pGVM, NIL_VMCPUID);
1296 }
1297 else
1298 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1299 }
1300
1301 GMMR0CleanupVM(pGVM);
1302#ifdef VBOX_WITH_NEM_R0
1303 NEMR0CleanupVM(pGVM);
1304#endif
1305 PDMR0CleanupVM(pGVM);
1306 IOMR0CleanupVM(pGVM);
1307 DBGFR0CleanupVM(pGVM);
1308 PGMR0CleanupVM(pGVM);
1309 TMR0CleanupVM(pGVM);
1310
1311 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1312 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1313 {
1314 /** @todo Can we busy wait here for all thread-context hooks to be
1315 * deregistered before releasing (destroying) it? Only until we find a
1316 * solution for not deregistering hooks everytime we're leaving HMR0
1317 * context. */
1318 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1319 }
1320}
1321
1322
1323/**
1324 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1325 *
1326 * pvUser1 is the GVM instance pointer.
1327 * pvUser2 is the handle pointer.
1328 */
1329static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1330{
1331 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1332
1333 NOREF(pvObj);
1334
1335 /*
1336 * Some quick, paranoid, input validation.
1337 */
1338 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1339 AssertPtr(pHandle);
1340 PGVMM pGVMM = (PGVMM)pvUser1;
1341 Assert(pGVMM == g_pGVMM);
1342 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1343 if ( !iHandle
1344 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1345 || iHandle != pHandle->iSelf)
1346 {
1347 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1348 return;
1349 }
1350
1351 int rc = gvmmR0CreateDestroyLock(pGVMM);
1352 AssertRC(rc);
1353 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1354 AssertRC(rc);
1355
1356 /*
1357 * This is a tad slow but a doubly linked list is too much hassle.
1358 */
1359 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1360 {
1361 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1362 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1363 gvmmR0CreateDestroyUnlock(pGVMM);
1364 return;
1365 }
1366
1367 if (pGVMM->iUsedHead == iHandle)
1368 pGVMM->iUsedHead = pHandle->iNext;
1369 else
1370 {
1371 uint16_t iPrev = pGVMM->iUsedHead;
1372 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1373 while (iPrev)
1374 {
1375 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1376 {
1377 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1378 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1379 gvmmR0CreateDestroyUnlock(pGVMM);
1380 return;
1381 }
1382 if (RT_UNLIKELY(c-- <= 0))
1383 {
1384 iPrev = 0;
1385 break;
1386 }
1387
1388 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1389 break;
1390 iPrev = pGVMM->aHandles[iPrev].iNext;
1391 }
1392 if (!iPrev)
1393 {
1394 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1395 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1396 gvmmR0CreateDestroyUnlock(pGVMM);
1397 return;
1398 }
1399
1400 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1401 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1402 }
1403 pHandle->iNext = 0;
1404 pGVMM->cVMs--;
1405
1406 /*
1407 * Do the global cleanup round.
1408 */
1409 PGVM pGVM = pHandle->pGVM;
1410 if ( RT_VALID_PTR(pGVM)
1411 && pGVM->u32Magic == GVM_MAGIC)
1412 {
1413 pGVMM->cEMTs -= pGVM->cCpus;
1414
1415 if (pGVM->pSession)
1416 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1417
1418 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1419
1420 gvmmR0CleanupVM(pGVM);
1421
1422 /*
1423 * Do the GVMM cleanup - must be done last.
1424 */
1425 /* The VM and VM pages mappings/allocations. */
1426 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1427 {
1428 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1429 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1430 }
1431
1432 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1433 {
1434 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1435 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1436 }
1437
1438 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1439 {
1440 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1441 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1442 }
1443
1444 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1445 {
1446 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1447 {
1448 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1449 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1450 }
1451 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1452 {
1453 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1454 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1455 }
1456 }
1457
1458 /* the GVM structure itself. */
1459 pGVM->u32Magic |= UINT32_C(0x80000000);
1460 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1461 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1462 pGVM = NULL;
1463
1464 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1465 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1466 AssertRC(rc);
1467 }
1468 /* else: GVMMR0CreateVM cleanup. */
1469
1470 /*
1471 * Free the handle.
1472 */
1473 pHandle->iNext = pGVMM->iFreeHead;
1474 pGVMM->iFreeHead = iHandle;
1475 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1476 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1477 ASMAtomicWriteNullPtr(&pHandle->pSession);
1478 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1479 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1480
1481 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1482 gvmmR0CreateDestroyUnlock(pGVMM);
1483 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1484}
1485
1486
1487/**
1488 * Registers the calling thread as the EMT of a Virtual CPU.
1489 *
1490 * Note that VCPU 0 is automatically registered during VM creation.
1491 *
1492 * @returns VBox status code
1493 * @param pGVM The global (ring-0) VM structure.
1494 * @param idCpu VCPU id to register the current thread as.
1495 */
1496GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1497{
1498 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1499
1500 /*
1501 * Validate the VM structure, state and handle.
1502 */
1503 PGVMM pGVMM;
1504 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1505 if (RT_SUCCESS(rc))
1506 {
1507 if (idCpu < pGVM->cCpus)
1508 {
1509 /* Check that the EMT isn't already assigned to a thread. */
1510 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1511 {
1512 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1513
1514 /* A thread may only be one EMT. */
1515 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1516 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1517 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1518 if (RT_SUCCESS(rc))
1519 {
1520 /*
1521 * Do the assignment, then try setup the hook. Undo if that fails.
1522 */
1523 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1524
1525 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1526 if (RT_SUCCESS(rc))
1527 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1528 else
1529 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1530 }
1531 }
1532 else
1533 rc = VERR_ACCESS_DENIED;
1534 }
1535 else
1536 rc = VERR_INVALID_CPU_ID;
1537 }
1538 return rc;
1539}
1540
1541
1542/**
1543 * Deregisters the calling thread as the EMT of a Virtual CPU.
1544 *
1545 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1546 *
1547 * @returns VBox status code
1548 * @param pGVM The global (ring-0) VM structure.
1549 * @param idCpu VCPU id to register the current thread as.
1550 */
1551GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1552{
1553 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1554
1555 /*
1556 * Validate the VM structure, state and handle.
1557 */
1558 PGVMM pGVMM;
1559 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1560 if (RT_SUCCESS(rc))
1561 {
1562 /*
1563 * Take the destruction lock and recheck the handle state to
1564 * prevent racing GVMMR0DestroyVM.
1565 */
1566 gvmmR0CreateDestroyLock(pGVMM);
1567 uint32_t hSelf = pGVM->hSelf;
1568 ASMCompilerBarrier();
1569 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1570 && pGVMM->aHandles[hSelf].pvObj != NULL
1571 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1572 {
1573 /*
1574 * Do per-EMT cleanups.
1575 */
1576 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1577
1578 /*
1579 * Invalidate hEMT. We don't use NIL here as that would allow
1580 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1581 */
1582 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1583 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1584 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1585 }
1586
1587 gvmmR0CreateDestroyUnlock(pGVMM);
1588 }
1589 return rc;
1590}
1591
1592
1593/**
1594 * Lookup a GVM structure by its handle.
1595 *
1596 * @returns The GVM pointer on success, NULL on failure.
1597 * @param hGVM The global VM handle. Asserts on bad handle.
1598 */
1599GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1600{
1601 PGVMM pGVMM;
1602 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1603
1604 /*
1605 * Validate.
1606 */
1607 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1608 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1609
1610 /*
1611 * Look it up.
1612 */
1613 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1614 AssertPtrReturn(pHandle->pvObj, NULL);
1615 PGVM pGVM = pHandle->pGVM;
1616 AssertPtrReturn(pGVM, NULL);
1617
1618 return pGVM;
1619}
1620
1621
1622/**
1623 * Check that the given GVM and VM structures match up.
1624 *
1625 * The calling thread must be in the same process as the VM. All current lookups
1626 * are by threads inside the same process, so this will not be an issue.
1627 *
1628 * @returns VBox status code.
1629 * @param pGVM The global (ring-0) VM structure.
1630 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1631 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1632 * shared mode when requested.
1633 *
1634 * Be very careful if not taking the lock as it's
1635 * possible that the VM will disappear then!
1636 *
1637 * @remark This will not assert on an invalid pGVM but try return silently.
1638 */
1639static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1640{
1641 /*
1642 * Check the pointers.
1643 */
1644 int rc;
1645 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1646 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1647 {
1648 /*
1649 * Get the pGVMM instance and check the VM handle.
1650 */
1651 PGVMM pGVMM;
1652 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1653
1654 uint16_t hGVM = pGVM->hSelf;
1655 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1656 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1657 {
1658 RTPROCESS const pidSelf = RTProcSelf();
1659 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1660 if (fTakeUsedLock)
1661 {
1662 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1663 AssertRCReturn(rc, rc);
1664 }
1665
1666 if (RT_LIKELY( pHandle->pGVM == pGVM
1667 && pHandle->ProcId == pidSelf
1668 && RT_VALID_PTR(pHandle->pvObj)))
1669 {
1670 /*
1671 * Some more VM data consistency checks.
1672 */
1673 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1674 && pGVM->hSelfUnsafe == hGVM
1675 && pGVM->pSelf == pGVM))
1676 {
1677 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1678 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1679 {
1680 *ppGVMM = pGVMM;
1681 return VINF_SUCCESS;
1682 }
1683 rc = VERR_INCONSISTENT_VM_HANDLE;
1684 }
1685 else
1686 rc = VERR_INCONSISTENT_VM_HANDLE;
1687 }
1688 else
1689 rc = VERR_INVALID_VM_HANDLE;
1690
1691 if (fTakeUsedLock)
1692 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1693 }
1694 else
1695 rc = VERR_INVALID_VM_HANDLE;
1696 }
1697 else
1698 rc = VERR_INVALID_POINTER;
1699 return rc;
1700}
1701
1702
1703/**
1704 * Validates a GVM/VM pair.
1705 *
1706 * @returns VBox status code.
1707 * @param pGVM The global (ring-0) VM structure.
1708 */
1709GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1710{
1711 PGVMM pGVMM;
1712 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1713}
1714
1715
1716/**
1717 * Check that the given GVM and VM structures match up.
1718 *
1719 * The calling thread must be in the same process as the VM. All current lookups
1720 * are by threads inside the same process, so this will not be an issue.
1721 *
1722 * @returns VBox status code.
1723 * @param pGVM The global (ring-0) VM structure.
1724 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1725 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1726 * @thread EMT
1727 *
1728 * @remarks This will assert in all failure paths.
1729 */
1730static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1731{
1732 /*
1733 * Check the pointers.
1734 */
1735 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1736 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1737
1738 /*
1739 * Get the pGVMM instance and check the VM handle.
1740 */
1741 PGVMM pGVMM;
1742 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1743
1744 uint16_t hGVM = pGVM->hSelf;
1745 ASMCompilerBarrier();
1746 AssertReturn( hGVM != NIL_GVM_HANDLE
1747 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1748
1749 RTPROCESS const pidSelf = RTProcSelf();
1750 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1751 AssertReturn( pHandle->pGVM == pGVM
1752 && pHandle->ProcId == pidSelf
1753 && RT_VALID_PTR(pHandle->pvObj),
1754 VERR_INVALID_HANDLE);
1755
1756 /*
1757 * Check the EMT claim.
1758 */
1759 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1760 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1761 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1762
1763 /*
1764 * Some more VM data consistency checks.
1765 */
1766 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1767 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1768 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1769 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1770
1771 *ppGVMM = pGVMM;
1772 return VINF_SUCCESS;
1773}
1774
1775
1776/**
1777 * Validates a GVM/EMT pair.
1778 *
1779 * @returns VBox status code.
1780 * @param pGVM The global (ring-0) VM structure.
1781 * @param idCpu The Virtual CPU ID of the calling EMT.
1782 * @thread EMT(idCpu)
1783 */
1784GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1785{
1786 PGVMM pGVMM;
1787 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1788}
1789
1790
1791/**
1792 * Looks up the VM belonging to the specified EMT thread.
1793 *
1794 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1795 * unnecessary kernel panics when the EMT thread hits an assertion. The
1796 * call may or not be an EMT thread.
1797 *
1798 * @returns Pointer to the VM on success, NULL on failure.
1799 * @param hEMT The native thread handle of the EMT.
1800 * NIL_RTNATIVETHREAD means the current thread
1801 */
1802GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1803{
1804 /*
1805 * No Assertions here as we're usually called in a AssertMsgN or
1806 * RTAssert* context.
1807 */
1808 PGVMM pGVMM = g_pGVMM;
1809 if ( !RT_VALID_PTR(pGVMM)
1810 || pGVMM->u32Magic != GVMM_MAGIC)
1811 return NULL;
1812
1813 if (hEMT == NIL_RTNATIVETHREAD)
1814 hEMT = RTThreadNativeSelf();
1815 RTPROCESS ProcId = RTProcSelf();
1816
1817 /*
1818 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1819 */
1820/** @todo introduce some pid hash table here, please. */
1821 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1822 {
1823 if ( pGVMM->aHandles[i].iSelf == i
1824 && pGVMM->aHandles[i].ProcId == ProcId
1825 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1826 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1827 {
1828 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1829 return pGVMM->aHandles[i].pGVM;
1830
1831 /* This is fearly safe with the current process per VM approach. */
1832 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1833 VMCPUID const cCpus = pGVM->cCpus;
1834 ASMCompilerBarrier();
1835 if ( cCpus < 1
1836 || cCpus > VMM_MAX_CPU_COUNT)
1837 continue;
1838 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1839 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1840 return pGVMM->aHandles[i].pGVM;
1841 }
1842 }
1843 return NULL;
1844}
1845
1846
1847/**
1848 * Looks up the GVMCPU belonging to the specified EMT thread.
1849 *
1850 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1851 * unnecessary kernel panics when the EMT thread hits an assertion. The
1852 * call may or not be an EMT thread.
1853 *
1854 * @returns Pointer to the VM on success, NULL on failure.
1855 * @param hEMT The native thread handle of the EMT.
1856 * NIL_RTNATIVETHREAD means the current thread
1857 */
1858GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1859{
1860 /*
1861 * No Assertions here as we're usually called in a AssertMsgN,
1862 * RTAssert*, Log and LogRel contexts.
1863 */
1864 PGVMM pGVMM = g_pGVMM;
1865 if ( !RT_VALID_PTR(pGVMM)
1866 || pGVMM->u32Magic != GVMM_MAGIC)
1867 return NULL;
1868
1869 if (hEMT == NIL_RTNATIVETHREAD)
1870 hEMT = RTThreadNativeSelf();
1871 RTPROCESS ProcId = RTProcSelf();
1872
1873 /*
1874 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1875 */
1876/** @todo introduce some pid hash table here, please. */
1877 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1878 {
1879 if ( pGVMM->aHandles[i].iSelf == i
1880 && pGVMM->aHandles[i].ProcId == ProcId
1881 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1882 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1883 {
1884 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1885 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1886 return &pGVM->aCpus[0];
1887
1888 /* This is fearly safe with the current process per VM approach. */
1889 VMCPUID const cCpus = pGVM->cCpus;
1890 ASMCompilerBarrier();
1891 ASMCompilerBarrier();
1892 if ( cCpus < 1
1893 || cCpus > VMM_MAX_CPU_COUNT)
1894 continue;
1895 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1896 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1897 return &pGVM->aCpus[idCpu];
1898 }
1899 }
1900 return NULL;
1901}
1902
1903
1904/**
1905 * This is will wake up expired and soon-to-be expired VMs.
1906 *
1907 * @returns Number of VMs that has been woken up.
1908 * @param pGVMM Pointer to the GVMM instance data.
1909 * @param u64Now The current time.
1910 */
1911static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1912{
1913 /*
1914 * Skip this if we've got disabled because of high resolution wakeups or by
1915 * the user.
1916 */
1917 if (!pGVMM->fDoEarlyWakeUps)
1918 return 0;
1919
1920/** @todo Rewrite this algorithm. See performance defect XYZ. */
1921
1922 /*
1923 * A cheap optimization to stop wasting so much time here on big setups.
1924 */
1925 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1926 if ( pGVMM->cHaltedEMTs == 0
1927 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1928 return 0;
1929
1930 /*
1931 * Only one thread doing this at a time.
1932 */
1933 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
1934 return 0;
1935
1936 /*
1937 * The first pass will wake up VMs which have actually expired
1938 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1939 */
1940 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1941 uint64_t u64Min = UINT64_MAX;
1942 unsigned cWoken = 0;
1943 unsigned cHalted = 0;
1944 unsigned cTodo2nd = 0;
1945 unsigned cTodo3rd = 0;
1946 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1947 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1948 i = pGVMM->aHandles[i].iNext)
1949 {
1950 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1951 if ( RT_VALID_PTR(pCurGVM)
1952 && pCurGVM->u32Magic == GVM_MAGIC)
1953 {
1954 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1955 {
1956 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1957 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1958 if (u64)
1959 {
1960 if (u64 <= u64Now)
1961 {
1962 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1963 {
1964 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1965 AssertRC(rc);
1966 cWoken++;
1967 }
1968 }
1969 else
1970 {
1971 cHalted++;
1972 if (u64 <= uNsEarlyWakeUp1)
1973 cTodo2nd++;
1974 else if (u64 <= uNsEarlyWakeUp2)
1975 cTodo3rd++;
1976 else if (u64 < u64Min)
1977 u64 = u64Min;
1978 }
1979 }
1980 }
1981 }
1982 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1983 }
1984
1985 if (cTodo2nd)
1986 {
1987 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1988 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1989 i = pGVMM->aHandles[i].iNext)
1990 {
1991 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1992 if ( RT_VALID_PTR(pCurGVM)
1993 && pCurGVM->u32Magic == GVM_MAGIC)
1994 {
1995 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1996 {
1997 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1998 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1999 if ( u64
2000 && u64 <= uNsEarlyWakeUp1)
2001 {
2002 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2003 {
2004 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2005 AssertRC(rc);
2006 cWoken++;
2007 }
2008 }
2009 }
2010 }
2011 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2012 }
2013 }
2014
2015 if (cTodo3rd)
2016 {
2017 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2018 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2019 i = pGVMM->aHandles[i].iNext)
2020 {
2021 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2022 if ( RT_VALID_PTR(pCurGVM)
2023 && pCurGVM->u32Magic == GVM_MAGIC)
2024 {
2025 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2026 {
2027 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2028 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2029 if ( u64
2030 && u64 <= uNsEarlyWakeUp2)
2031 {
2032 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2033 {
2034 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2035 AssertRC(rc);
2036 cWoken++;
2037 }
2038 }
2039 }
2040 }
2041 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2042 }
2043 }
2044
2045 /*
2046 * Set the minimum value.
2047 */
2048 pGVMM->uNsNextEmtWakeup = u64Min;
2049
2050 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2051 return cWoken;
2052}
2053
2054
2055/**
2056 * Halt the EMT thread.
2057 *
2058 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2059 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2060 * @param pGVM The global (ring-0) VM structure.
2061 * @param pGVCpu The global (ring-0) CPU structure of the calling
2062 * EMT.
2063 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2064 * @thread EMT(pGVCpu).
2065 */
2066GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2067{
2068 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2069 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2070 GVMM_CHECK_SMAP_SETUP();
2071 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2072
2073 PGVMM pGVMM;
2074 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2075
2076 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2077 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2078
2079 /*
2080 * If we're doing early wake-ups, we must take the UsedList lock before we
2081 * start querying the current time.
2082 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2083 */
2084 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2085 if (fDoEarlyWakeUps)
2086 {
2087 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2088 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2089 }
2090
2091 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2092
2093 /* GIP hack: We might are frequently sleeping for short intervals where the
2094 difference between GIP and system time matters on systems with high resolution
2095 system time. So, convert the input from GIP to System time in that case. */
2096 Assert(ASMGetFlags() & X86_EFL_IF);
2097 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2098 const uint64_t u64NowGip = RTTimeNanoTS();
2099 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2100
2101 if (fDoEarlyWakeUps)
2102 {
2103 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2104 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2105 }
2106
2107 /*
2108 * Go to sleep if we must...
2109 * Cap the sleep time to 1 second to be on the safe side.
2110 */
2111 int rc;
2112 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2113 if ( u64NowGip < u64ExpireGipTime
2114 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2115 ? pGVMM->nsMinSleepCompany
2116 : pGVMM->nsMinSleepAlone))
2117 {
2118 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2119 if (cNsInterval > RT_NS_1SEC)
2120 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2121 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2122 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2123 if (fDoEarlyWakeUps)
2124 {
2125 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2126 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2127 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2128 }
2129 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2130
2131 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2132 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2133 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2134 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2135
2136 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2137 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2138
2139 /* Reset the semaphore to try prevent a few false wake-ups. */
2140 if (rc == VINF_SUCCESS)
2141 {
2142 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2143 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2144 }
2145 else if (rc == VERR_TIMEOUT)
2146 {
2147 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2148 rc = VINF_SUCCESS;
2149 }
2150 }
2151 else
2152 {
2153 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2154 if (fDoEarlyWakeUps)
2155 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2156 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2157 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2158 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2159 rc = VINF_SUCCESS;
2160 }
2161
2162 return rc;
2163}
2164
2165
2166/**
2167 * Halt the EMT thread.
2168 *
2169 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2170 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2171 * @param pGVM The global (ring-0) VM structure.
2172 * @param idCpu The Virtual CPU ID of the calling EMT.
2173 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2174 * @thread EMT(idCpu).
2175 */
2176GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2177{
2178 GVMM_CHECK_SMAP_SETUP();
2179 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2180 PGVMM pGVMM;
2181 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2182 if (RT_SUCCESS(rc))
2183 {
2184 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2185 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2186 }
2187 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2188 return rc;
2189}
2190
2191
2192
2193/**
2194 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2195 * the a sleeping EMT.
2196 *
2197 * @retval VINF_SUCCESS if successfully woken up.
2198 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2199 *
2200 * @param pGVM The global (ring-0) VM structure.
2201 * @param pGVCpu The global (ring-0) VCPU structure.
2202 */
2203DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2204{
2205 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2206
2207 /*
2208 * Signal the semaphore regardless of whether it's current blocked on it.
2209 *
2210 * The reason for this is that there is absolutely no way we can be 100%
2211 * certain that it isn't *about* go to go to sleep on it and just got
2212 * delayed a bit en route. So, we will always signal the semaphore when
2213 * the it is flagged as halted in the VMM.
2214 */
2215/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2216 int rc;
2217 if (pGVCpu->gvmm.s.u64HaltExpire)
2218 {
2219 rc = VINF_SUCCESS;
2220 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2221 }
2222 else
2223 {
2224 rc = VINF_GVM_NOT_BLOCKED;
2225 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2226 }
2227
2228 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2229 AssertRC(rc2);
2230
2231 return rc;
2232}
2233
2234
2235/**
2236 * Wakes up the halted EMT thread so it can service a pending request.
2237 *
2238 * @returns VBox status code.
2239 * @retval VINF_SUCCESS if successfully woken up.
2240 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2241 *
2242 * @param pGVM The global (ring-0) VM structure.
2243 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2244 * @param fTakeUsedLock Take the used lock or not
2245 * @thread Any but EMT(idCpu).
2246 */
2247GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2248{
2249 GVMM_CHECK_SMAP_SETUP();
2250 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2251
2252 /*
2253 * Validate input and take the UsedLock.
2254 */
2255 PGVMM pGVMM;
2256 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2257 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2258 if (RT_SUCCESS(rc))
2259 {
2260 if (idCpu < pGVM->cCpus)
2261 {
2262 /*
2263 * Do the actual job.
2264 */
2265 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2266 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2267
2268 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2269 {
2270 /*
2271 * While we're here, do a round of scheduling.
2272 */
2273 Assert(ASMGetFlags() & X86_EFL_IF);
2274 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2275 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2276 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2277 }
2278 }
2279 else
2280 rc = VERR_INVALID_CPU_ID;
2281
2282 if (fTakeUsedLock)
2283 {
2284 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2285 AssertRC(rc2);
2286 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2287 }
2288 }
2289
2290 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2291 return rc;
2292}
2293
2294
2295/**
2296 * Wakes up the halted EMT thread so it can service a pending request.
2297 *
2298 * @returns VBox status code.
2299 * @retval VINF_SUCCESS if successfully woken up.
2300 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2301 *
2302 * @param pGVM The global (ring-0) VM structure.
2303 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2304 * @thread Any but EMT(idCpu).
2305 */
2306GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2307{
2308 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2309}
2310
2311
2312/**
2313 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2314 * parameter and no used locking.
2315 *
2316 * @returns VBox status code.
2317 * @retval VINF_SUCCESS if successfully woken up.
2318 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2319 *
2320 * @param pGVM The global (ring-0) VM structure.
2321 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2322 * @thread Any but EMT(idCpu).
2323 * @deprecated Don't use in new code if possible! Use the GVM variant.
2324 */
2325GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2326{
2327 GVMM_CHECK_SMAP_SETUP();
2328 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2329 PGVMM pGVMM;
2330 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2331 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2332 if (RT_SUCCESS(rc))
2333 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2334 return rc;
2335}
2336
2337
2338/**
2339 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2340 * the Virtual CPU if it's still busy executing guest code.
2341 *
2342 * @returns VBox status code.
2343 * @retval VINF_SUCCESS if poked successfully.
2344 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2345 *
2346 * @param pGVM The global (ring-0) VM structure.
2347 * @param pVCpu The cross context virtual CPU structure.
2348 */
2349DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2350{
2351 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2352
2353 RTCPUID idHostCpu = pVCpu->idHostCpu;
2354 if ( idHostCpu == NIL_RTCPUID
2355 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2356 {
2357 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2358 return VINF_GVM_NOT_BUSY_IN_GC;
2359 }
2360
2361 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2362 RTMpPokeCpu(idHostCpu);
2363 return VINF_SUCCESS;
2364}
2365
2366
2367/**
2368 * Pokes an EMT if it's still busy running guest code.
2369 *
2370 * @returns VBox status code.
2371 * @retval VINF_SUCCESS if poked successfully.
2372 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2373 *
2374 * @param pGVM The global (ring-0) VM structure.
2375 * @param idCpu The ID of the virtual CPU to poke.
2376 * @param fTakeUsedLock Take the used lock or not
2377 */
2378GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2379{
2380 /*
2381 * Validate input and take the UsedLock.
2382 */
2383 PGVMM pGVMM;
2384 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2385 if (RT_SUCCESS(rc))
2386 {
2387 if (idCpu < pGVM->cCpus)
2388 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2389 else
2390 rc = VERR_INVALID_CPU_ID;
2391
2392 if (fTakeUsedLock)
2393 {
2394 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2395 AssertRC(rc2);
2396 }
2397 }
2398
2399 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2400 return rc;
2401}
2402
2403
2404/**
2405 * Pokes an EMT if it's still busy running guest code.
2406 *
2407 * @returns VBox status code.
2408 * @retval VINF_SUCCESS if poked successfully.
2409 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2410 *
2411 * @param pGVM The global (ring-0) VM structure.
2412 * @param idCpu The ID of the virtual CPU to poke.
2413 */
2414GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2415{
2416 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2417}
2418
2419
2420/**
2421 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2422 * used locking.
2423 *
2424 * @returns VBox status code.
2425 * @retval VINF_SUCCESS if poked successfully.
2426 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2427 *
2428 * @param pGVM The global (ring-0) VM structure.
2429 * @param idCpu The ID of the virtual CPU to poke.
2430 *
2431 * @deprecated Don't use in new code if possible! Use the GVM variant.
2432 */
2433GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2434{
2435 PGVMM pGVMM;
2436 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2437 if (RT_SUCCESS(rc))
2438 {
2439 if (idCpu < pGVM->cCpus)
2440 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2441 else
2442 rc = VERR_INVALID_CPU_ID;
2443 }
2444 return rc;
2445}
2446
2447
2448/**
2449 * Wakes up a set of halted EMT threads so they can service pending request.
2450 *
2451 * @returns VBox status code, no informational stuff.
2452 *
2453 * @param pGVM The global (ring-0) VM structure.
2454 * @param pSleepSet The set of sleepers to wake up.
2455 * @param pPokeSet The set of CPUs to poke.
2456 */
2457GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2458{
2459 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2460 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2461 GVMM_CHECK_SMAP_SETUP();
2462 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2463 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2464
2465 /*
2466 * Validate input and take the UsedLock.
2467 */
2468 PGVMM pGVMM;
2469 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2470 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2471 if (RT_SUCCESS(rc))
2472 {
2473 rc = VINF_SUCCESS;
2474 VMCPUID idCpu = pGVM->cCpus;
2475 while (idCpu-- > 0)
2476 {
2477 /* Don't try poke or wake up ourselves. */
2478 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2479 continue;
2480
2481 /* just ignore errors for now. */
2482 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2483 {
2484 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2485 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2486 }
2487 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2488 {
2489 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2490 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2491 }
2492 }
2493
2494 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2495 AssertRC(rc2);
2496 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2497 }
2498
2499 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2500 return rc;
2501}
2502
2503
2504/**
2505 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2506 *
2507 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2508 * @param pGVM The global (ring-0) VM structure.
2509 * @param pReq Pointer to the request packet.
2510 */
2511GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2512{
2513 /*
2514 * Validate input and pass it on.
2515 */
2516 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2517 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2518
2519 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2520}
2521
2522
2523
2524/**
2525 * Poll the schedule to see if someone else should get a chance to run.
2526 *
2527 * This is a bit hackish and will not work too well if the machine is
2528 * under heavy load from non-VM processes.
2529 *
2530 * @returns VINF_SUCCESS if not yielded.
2531 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2532 * @param pGVM The global (ring-0) VM structure.
2533 * @param idCpu The Virtual CPU ID of the calling EMT.
2534 * @param fYield Whether to yield or not.
2535 * This is for when we're spinning in the halt loop.
2536 * @thread EMT(idCpu).
2537 */
2538GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2539{
2540 /*
2541 * Validate input.
2542 */
2543 PGVMM pGVMM;
2544 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2545 if (RT_SUCCESS(rc))
2546 {
2547 /*
2548 * We currently only implement helping doing wakeups (fYield = false), so don't
2549 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2550 */
2551 if (!fYield && pGVMM->fDoEarlyWakeUps)
2552 {
2553 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2554 pGVM->gvmm.s.StatsSched.cPollCalls++;
2555
2556 Assert(ASMGetFlags() & X86_EFL_IF);
2557 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2558
2559 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2560
2561 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2562 }
2563 /*
2564 * Not quite sure what we could do here...
2565 */
2566 else if (fYield)
2567 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2568 else
2569 rc = VINF_SUCCESS;
2570 }
2571
2572 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2573 return rc;
2574}
2575
2576
2577#ifdef GVMM_SCHED_WITH_PPT
2578/**
2579 * Timer callback for the periodic preemption timer.
2580 *
2581 * @param pTimer The timer handle.
2582 * @param pvUser Pointer to the per cpu structure.
2583 * @param iTick The current tick.
2584 */
2585static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2586{
2587 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2588 NOREF(pTimer); NOREF(iTick);
2589
2590 /*
2591 * Termination check
2592 */
2593 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2594 return;
2595
2596 /*
2597 * Do the house keeping.
2598 */
2599 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2600
2601 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2602 {
2603 /*
2604 * Historicize the max frequency.
2605 */
2606 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2607 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2608 pCpu->Ppt.iTickHistorization = 0;
2609 pCpu->Ppt.uDesiredHz = 0;
2610
2611 /*
2612 * Check if the current timer frequency.
2613 */
2614 uint32_t uHistMaxHz = 0;
2615 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2616 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2617 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2618 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2619 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2620 else if (uHistMaxHz)
2621 {
2622 /*
2623 * Reprogram it.
2624 */
2625 pCpu->Ppt.cChanges++;
2626 pCpu->Ppt.iTickHistorization = 0;
2627 pCpu->Ppt.uTimerHz = uHistMaxHz;
2628 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2629 pCpu->Ppt.cNsInterval = cNsInterval;
2630 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2631 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2632 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2633 / cNsInterval;
2634 else
2635 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2636 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2637
2638 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2639 RTTimerChangeInterval(pTimer, cNsInterval);
2640 }
2641 else
2642 {
2643 /*
2644 * Stop it.
2645 */
2646 pCpu->Ppt.fStarted = false;
2647 pCpu->Ppt.uTimerHz = 0;
2648 pCpu->Ppt.cNsInterval = 0;
2649 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2650
2651 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2652 RTTimerStop(pTimer);
2653 }
2654 }
2655 else
2656 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2657}
2658#endif /* GVMM_SCHED_WITH_PPT */
2659
2660
2661/**
2662 * Updates the periodic preemption timer for the calling CPU.
2663 *
2664 * The caller must have disabled preemption!
2665 * The caller must check that the host can do high resolution timers.
2666 *
2667 * @param pGVM The global (ring-0) VM structure.
2668 * @param idHostCpu The current host CPU id.
2669 * @param uHz The desired frequency.
2670 */
2671GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2672{
2673 NOREF(pGVM);
2674#ifdef GVMM_SCHED_WITH_PPT
2675 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2676 Assert(RTTimerCanDoHighResolution());
2677
2678 /*
2679 * Resolve the per CPU data.
2680 */
2681 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2682 PGVMM pGVMM = g_pGVMM;
2683 if ( !RT_VALID_PTR(pGVMM)
2684 || pGVMM->u32Magic != GVMM_MAGIC)
2685 return;
2686 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2687 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2688 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2689 && pCpu->idCpu == idHostCpu,
2690 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2691
2692 /*
2693 * Check whether we need to do anything about the timer.
2694 * We have to be a little bit careful since we might be race the timer
2695 * callback here.
2696 */
2697 if (uHz > 16384)
2698 uHz = 16384; /** @todo add a query method for this! */
2699 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2700 && uHz >= pCpu->Ppt.uMinHz
2701 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2702 {
2703 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2704
2705 pCpu->Ppt.uDesiredHz = uHz;
2706 uint32_t cNsInterval = 0;
2707 if (!pCpu->Ppt.fStarted)
2708 {
2709 pCpu->Ppt.cStarts++;
2710 pCpu->Ppt.fStarted = true;
2711 pCpu->Ppt.fStarting = true;
2712 pCpu->Ppt.iTickHistorization = 0;
2713 pCpu->Ppt.uTimerHz = uHz;
2714 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2715 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2716 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2717 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2718 / cNsInterval;
2719 else
2720 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2721 }
2722
2723 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2724
2725 if (cNsInterval)
2726 {
2727 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2728 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2729 AssertRC(rc);
2730
2731 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2732 if (RT_FAILURE(rc))
2733 pCpu->Ppt.fStarted = false;
2734 pCpu->Ppt.fStarting = false;
2735 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2736 }
2737 }
2738#else /* !GVMM_SCHED_WITH_PPT */
2739 NOREF(idHostCpu); NOREF(uHz);
2740#endif /* !GVMM_SCHED_WITH_PPT */
2741}
2742
2743
2744/**
2745 * Calls @a pfnCallback for each VM in the system.
2746 *
2747 * This will enumerate the VMs while holding the global VM used list lock in
2748 * shared mode. So, only suitable for simple work. If more expensive work
2749 * needs doing, a different approach must be taken as using this API would
2750 * otherwise block VM creation and destruction.
2751 *
2752 * @returns VBox status code.
2753 * @param pfnCallback The callback function.
2754 * @param pvUser User argument to the callback.
2755 */
2756GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
2757{
2758 PGVMM pGVMM;
2759 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2760
2761 int rc = VINF_SUCCESS;
2762 GVMMR0_USED_SHARED_LOCK(pGVMM);
2763 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
2764 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2765 i = pGVMM->aHandles[i].iNext, cLoops++)
2766 {
2767 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2768 if ( RT_VALID_PTR(pGVM)
2769 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2770 && pGVM->u32Magic == GVM_MAGIC)
2771 {
2772 rc = pfnCallback(pGVM, pvUser);
2773 if (rc != VINF_SUCCESS)
2774 break;
2775 }
2776
2777 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
2778 }
2779 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2780 return rc;
2781}
2782
2783
2784/**
2785 * Retrieves the GVMM statistics visible to the caller.
2786 *
2787 * @returns VBox status code.
2788 *
2789 * @param pStats Where to put the statistics.
2790 * @param pSession The current session.
2791 * @param pGVM The GVM to obtain statistics for. Optional.
2792 */
2793GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2794{
2795 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2796
2797 /*
2798 * Validate input.
2799 */
2800 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2801 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2802 pStats->cVMs = 0; /* (crash before taking the sem...) */
2803
2804 /*
2805 * Take the lock and get the VM statistics.
2806 */
2807 PGVMM pGVMM;
2808 if (pGVM)
2809 {
2810 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2811 if (RT_FAILURE(rc))
2812 return rc;
2813 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2814 }
2815 else
2816 {
2817 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2818 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2819
2820 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2821 AssertRCReturn(rc, rc);
2822 }
2823
2824 /*
2825 * Enumerate the VMs and add the ones visible to the statistics.
2826 */
2827 pStats->cVMs = 0;
2828 pStats->cEMTs = 0;
2829 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2830
2831 for (unsigned i = pGVMM->iUsedHead;
2832 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2833 i = pGVMM->aHandles[i].iNext)
2834 {
2835 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2836 void *pvObj = pGVMM->aHandles[i].pvObj;
2837 if ( RT_VALID_PTR(pvObj)
2838 && RT_VALID_PTR(pOtherGVM)
2839 && pOtherGVM->u32Magic == GVM_MAGIC
2840 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2841 {
2842 pStats->cVMs++;
2843 pStats->cEMTs += pOtherGVM->cCpus;
2844
2845 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2846 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2847 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2848 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2849 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2850
2851 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2852 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2853 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2854
2855 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2856 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2857
2858 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2859 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2860 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2861 }
2862 }
2863
2864 /*
2865 * Copy out the per host CPU statistics.
2866 */
2867 uint32_t iDstCpu = 0;
2868 uint32_t cSrcCpus = pGVMM->cHostCpus;
2869 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2870 {
2871 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2872 {
2873 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2874 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2875#ifdef GVMM_SCHED_WITH_PPT
2876 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2877 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2878 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2879 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2880#else
2881 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2882 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2883 pStats->aHostCpus[iDstCpu].cChanges = 0;
2884 pStats->aHostCpus[iDstCpu].cStarts = 0;
2885#endif
2886 iDstCpu++;
2887 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2888 break;
2889 }
2890 }
2891 pStats->cHostCpus = iDstCpu;
2892
2893 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2894
2895 return VINF_SUCCESS;
2896}
2897
2898
2899/**
2900 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2901 *
2902 * @returns see GVMMR0QueryStatistics.
2903 * @param pGVM The global (ring-0) VM structure. Optional.
2904 * @param pReq Pointer to the request packet.
2905 * @param pSession The current session.
2906 */
2907GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2908{
2909 /*
2910 * Validate input and pass it on.
2911 */
2912 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2913 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2914 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2915
2916 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
2917}
2918
2919
2920/**
2921 * Resets the specified GVMM statistics.
2922 *
2923 * @returns VBox status code.
2924 *
2925 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2926 * @param pSession The current session.
2927 * @param pGVM The GVM to reset statistics for. Optional.
2928 */
2929GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2930{
2931 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2932
2933 /*
2934 * Validate input.
2935 */
2936 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2937 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2938
2939 /*
2940 * Take the lock and get the VM statistics.
2941 */
2942 PGVMM pGVMM;
2943 if (pGVM)
2944 {
2945 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2946 if (RT_FAILURE(rc))
2947 return rc;
2948# define MAYBE_RESET_FIELD(field) \
2949 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2950 MAYBE_RESET_FIELD(cHaltCalls);
2951 MAYBE_RESET_FIELD(cHaltBlocking);
2952 MAYBE_RESET_FIELD(cHaltTimeouts);
2953 MAYBE_RESET_FIELD(cHaltNotBlocking);
2954 MAYBE_RESET_FIELD(cHaltWakeUps);
2955 MAYBE_RESET_FIELD(cWakeUpCalls);
2956 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2957 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2958 MAYBE_RESET_FIELD(cPokeCalls);
2959 MAYBE_RESET_FIELD(cPokeNotBusy);
2960 MAYBE_RESET_FIELD(cPollCalls);
2961 MAYBE_RESET_FIELD(cPollHalts);
2962 MAYBE_RESET_FIELD(cPollWakeUps);
2963# undef MAYBE_RESET_FIELD
2964 }
2965 else
2966 {
2967 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2968
2969 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2970 AssertRCReturn(rc, rc);
2971 }
2972
2973 /*
2974 * Enumerate the VMs and add the ones visible to the statistics.
2975 */
2976 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2977 {
2978 for (unsigned i = pGVMM->iUsedHead;
2979 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2980 i = pGVMM->aHandles[i].iNext)
2981 {
2982 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2983 void *pvObj = pGVMM->aHandles[i].pvObj;
2984 if ( RT_VALID_PTR(pvObj)
2985 && RT_VALID_PTR(pOtherGVM)
2986 && pOtherGVM->u32Magic == GVM_MAGIC
2987 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2988 {
2989# define MAYBE_RESET_FIELD(field) \
2990 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2991 MAYBE_RESET_FIELD(cHaltCalls);
2992 MAYBE_RESET_FIELD(cHaltBlocking);
2993 MAYBE_RESET_FIELD(cHaltTimeouts);
2994 MAYBE_RESET_FIELD(cHaltNotBlocking);
2995 MAYBE_RESET_FIELD(cHaltWakeUps);
2996 MAYBE_RESET_FIELD(cWakeUpCalls);
2997 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2998 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2999 MAYBE_RESET_FIELD(cPokeCalls);
3000 MAYBE_RESET_FIELD(cPokeNotBusy);
3001 MAYBE_RESET_FIELD(cPollCalls);
3002 MAYBE_RESET_FIELD(cPollHalts);
3003 MAYBE_RESET_FIELD(cPollWakeUps);
3004# undef MAYBE_RESET_FIELD
3005 }
3006 }
3007 }
3008
3009 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3010
3011 return VINF_SUCCESS;
3012}
3013
3014
3015/**
3016 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3017 *
3018 * @returns see GVMMR0ResetStatistics.
3019 * @param pGVM The global (ring-0) VM structure. Optional.
3020 * @param pReq Pointer to the request packet.
3021 * @param pSession The current session.
3022 */
3023GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3024{
3025 /*
3026 * Validate input and pass it on.
3027 */
3028 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3029 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3030 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3031
3032 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3033}
3034
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette