VirtualBox

source: vbox/trunk/src/VBox/VMM/VMMR0/GVMMR0.cpp@ 90438

Last change on this file since 90438 was 90379, checked in by vboxsync, 4 years ago

VMM: Implementing blocking on critical sections in ring-0 HM context (actual code is disabled). bugref:6695

  • Property svn:eol-style set to native
  • Property svn:keywords set to Id Revision
File size: 106.5 KB
Line 
1/* $Id: GVMMR0.cpp 90379 2021-07-28 20:00:43Z vboxsync $ */
2/** @file
3 * GVMM - Global VM Manager.
4 */
5
6/*
7 * Copyright (C) 2007-2020 Oracle Corporation
8 *
9 * This file is part of VirtualBox Open Source Edition (OSE), as
10 * available from http://www.virtualbox.org. This file is free software;
11 * you can redistribute it and/or modify it under the terms of the GNU
12 * General Public License (GPL) as published by the Free Software
13 * Foundation, in version 2 as it comes in the "COPYING" file of the
14 * VirtualBox OSE distribution. VirtualBox OSE is distributed in the
15 * hope that it will be useful, but WITHOUT ANY WARRANTY of any kind.
16 */
17
18
19/** @page pg_gvmm GVMM - The Global VM Manager
20 *
21 * The Global VM Manager lives in ring-0. Its main function at the moment is
22 * to manage a list of all running VMs, keep a ring-0 only structure (GVM) for
23 * each of them, and assign them unique identifiers (so GMM can track page
24 * owners). The GVMM also manage some of the host CPU resources, like the
25 * periodic preemption timer.
26 *
27 * The GVMM will create a ring-0 object for each VM when it is registered, this
28 * is both for session cleanup purposes and for having a point where it is
29 * possible to implement usage polices later (in SUPR0ObjRegister).
30 *
31 *
32 * @section sec_gvmm_ppt Periodic Preemption Timer (PPT)
33 *
34 * On system that sports a high resolution kernel timer API, we use per-cpu
35 * timers to generate interrupts that preempts VT-x, AMD-V and raw-mode guest
36 * execution. The timer frequency is calculating by taking the max
37 * TMCalcHostTimerFrequency for all VMs running on a CPU for the last ~160 ms
38 * (RT_ELEMENTS((PGVMMHOSTCPU)0, Ppt.aHzHistory) *
39 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS).
40 *
41 * The TMCalcHostTimerFrequency() part of the things gets its takes the max
42 * TMTimerSetFrequencyHint() value and adjusts by the current catch-up percent,
43 * warp drive percent and some fudge factors. VMMR0.cpp reports the result via
44 * GVMMR0SchedUpdatePeriodicPreemptionTimer() before switching to the VT-x,
45 * AMD-V and raw-mode execution environments.
46 */
47
48
49/*********************************************************************************************************************************
50* Header Files *
51*********************************************************************************************************************************/
52#define LOG_GROUP LOG_GROUP_GVMM
53#include <VBox/vmm/gvmm.h>
54#include <VBox/vmm/gmm.h>
55#include "GVMMR0Internal.h"
56#include <VBox/vmm/dbgf.h>
57#include <VBox/vmm/iom.h>
58#include <VBox/vmm/pdm.h>
59#include <VBox/vmm/pgm.h>
60#include <VBox/vmm/vmm.h>
61#ifdef VBOX_WITH_NEM_R0
62# include <VBox/vmm/nem.h>
63#endif
64#include <VBox/vmm/vmcpuset.h>
65#include <VBox/vmm/vmcc.h>
66#include <VBox/param.h>
67#include <VBox/err.h>
68
69#include <iprt/asm.h>
70#include <iprt/asm-amd64-x86.h>
71#include <iprt/critsect.h>
72#include <iprt/mem.h>
73#include <iprt/semaphore.h>
74#include <iprt/time.h>
75#include <VBox/log.h>
76#include <iprt/thread.h>
77#include <iprt/process.h>
78#include <iprt/param.h>
79#include <iprt/string.h>
80#include <iprt/assert.h>
81#include <iprt/mem.h>
82#include <iprt/memobj.h>
83#include <iprt/mp.h>
84#include <iprt/cpuset.h>
85#include <iprt/spinlock.h>
86#include <iprt/timer.h>
87
88#include "dtrace/VBoxVMM.h"
89
90
91/*********************************************************************************************************************************
92* Defined Constants And Macros *
93*********************************************************************************************************************************/
94#if defined(RT_OS_LINUX) || defined(RT_OS_SOLARIS) || defined(DOXYGEN_RUNNING)
95/** Define this to enable the periodic preemption timer. */
96# define GVMM_SCHED_WITH_PPT
97#endif
98
99
100/** @def GVMM_CHECK_SMAP_SETUP
101 * SMAP check setup. */
102/** @def GVMM_CHECK_SMAP_CHECK
103 * Checks that the AC flag is set if SMAP is enabled. If AC is not set,
104 * it will be logged and @a a_BadExpr is executed. */
105/** @def GVMM_CHECK_SMAP_CHECK2
106 * Checks that the AC flag is set if SMAP is enabled. If AC is not set, it will
107 * be logged, written to the VMs assertion text buffer, and @a a_BadExpr is
108 * executed. */
109#if (defined(VBOX_STRICT) || 1) && !defined(VBOX_WITH_RAM_IN_KERNEL)
110# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = SUPR0GetKernelFeatures()
111# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) \
112 do { \
113 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
114 { \
115 RTCCUINTREG fEflCheck = ASMGetFlags(); \
116 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
117 { /* likely */ } \
118 else \
119 { \
120 SUPR0Printf("%s, line %d: EFLAGS.AC is clear! (%#x)\n", __FUNCTION__, __LINE__, (uint32_t)fEflCheck); \
121 a_BadExpr; \
122 } \
123 } \
124 } while (0)
125# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) \
126 do { \
127 if (fKernelFeatures & SUPKERNELFEATURES_SMAP) \
128 { \
129 RTCCUINTREG fEflCheck = ASMGetFlags(); \
130 if (RT_LIKELY(fEflCheck & X86_EFL_AC)) \
131 { /* likely */ } \
132 else \
133 { \
134 SUPR0BadContext((a_pGVM) ? (a_pGVM)->pSession : NULL, __FILE__, __LINE__, "EFLAGS.AC is zero!"); \
135 a_BadExpr; \
136 } \
137 } \
138 } while (0)
139#else
140# define GVMM_CHECK_SMAP_SETUP() uint32_t const fKernelFeatures = 0
141# define GVMM_CHECK_SMAP_CHECK(a_BadExpr) NOREF(fKernelFeatures)
142# define GVMM_CHECK_SMAP_CHECK2(a_pGVM, a_BadExpr) NOREF(fKernelFeatures)
143#endif
144
145
146
147/*********************************************************************************************************************************
148* Structures and Typedefs *
149*********************************************************************************************************************************/
150
151/**
152 * Global VM handle.
153 */
154typedef struct GVMHANDLE
155{
156 /** The index of the next handle in the list (free or used). (0 is nil.) */
157 uint16_t volatile iNext;
158 /** Our own index / handle value. */
159 uint16_t iSelf;
160 /** The process ID of the handle owner.
161 * This is used for access checks. */
162 RTPROCESS ProcId;
163 /** The pointer to the ring-0 only (aka global) VM structure. */
164 PGVM pGVM;
165 /** The virtual machine object. */
166 void *pvObj;
167 /** The session this VM is associated with. */
168 PSUPDRVSESSION pSession;
169 /** The ring-0 handle of the EMT0 thread.
170 * This is used for ownership checks as well as looking up a VM handle by thread
171 * at times like assertions. */
172 RTNATIVETHREAD hEMT0;
173} GVMHANDLE;
174/** Pointer to a global VM handle. */
175typedef GVMHANDLE *PGVMHANDLE;
176
177/** Number of GVM handles (including the NIL handle). */
178#if HC_ARCH_BITS == 64
179# define GVMM_MAX_HANDLES 8192
180#else
181# define GVMM_MAX_HANDLES 128
182#endif
183
184/**
185 * Per host CPU GVMM data.
186 */
187typedef struct GVMMHOSTCPU
188{
189 /** Magic number (GVMMHOSTCPU_MAGIC). */
190 uint32_t volatile u32Magic;
191 /** The CPU ID. */
192 RTCPUID idCpu;
193 /** The CPU set index. */
194 uint32_t idxCpuSet;
195
196#ifdef GVMM_SCHED_WITH_PPT
197 /** Periodic preemption timer data. */
198 struct
199 {
200 /** The handle to the periodic preemption timer. */
201 PRTTIMER pTimer;
202 /** Spinlock protecting the data below. */
203 RTSPINLOCK hSpinlock;
204 /** The smalles Hz that we need to care about. (static) */
205 uint32_t uMinHz;
206 /** The number of ticks between each historization. */
207 uint32_t cTicksHistoriziationInterval;
208 /** The current historization tick (counting up to
209 * cTicksHistoriziationInterval and then resetting). */
210 uint32_t iTickHistorization;
211 /** The current timer interval. This is set to 0 when inactive. */
212 uint32_t cNsInterval;
213 /** The current timer frequency. This is set to 0 when inactive. */
214 uint32_t uTimerHz;
215 /** The current max frequency reported by the EMTs.
216 * This gets historicize and reset by the timer callback. This is
217 * read without holding the spinlock, so needs atomic updating. */
218 uint32_t volatile uDesiredHz;
219 /** Whether the timer was started or not. */
220 bool volatile fStarted;
221 /** Set if we're starting timer. */
222 bool volatile fStarting;
223 /** The index of the next history entry (mod it). */
224 uint32_t iHzHistory;
225 /** Historicized uDesiredHz values. The array wraps around, new entries
226 * are added at iHzHistory. This is updated approximately every
227 * GVMMHOSTCPU_PPT_HIST_INTERVAL_NS by the timer callback. */
228 uint32_t aHzHistory[8];
229 /** Statistics counter for recording the number of interval changes. */
230 uint32_t cChanges;
231 /** Statistics counter for recording the number of timer starts. */
232 uint32_t cStarts;
233 } Ppt;
234#endif /* GVMM_SCHED_WITH_PPT */
235
236} GVMMHOSTCPU;
237/** Pointer to the per host CPU GVMM data. */
238typedef GVMMHOSTCPU *PGVMMHOSTCPU;
239/** The GVMMHOSTCPU::u32Magic value (Petra, Tanya & Rachel Haden). */
240#define GVMMHOSTCPU_MAGIC UINT32_C(0x19711011)
241/** The interval on history entry should cover (approximately) give in
242 * nanoseconds. */
243#define GVMMHOSTCPU_PPT_HIST_INTERVAL_NS UINT32_C(20000000)
244
245
246/**
247 * The GVMM instance data.
248 */
249typedef struct GVMM
250{
251 /** Eyecatcher / magic. */
252 uint32_t u32Magic;
253 /** The index of the head of the free handle chain. (0 is nil.) */
254 uint16_t volatile iFreeHead;
255 /** The index of the head of the active handle chain. (0 is nil.) */
256 uint16_t volatile iUsedHead;
257 /** The number of VMs. */
258 uint16_t volatile cVMs;
259 /** Alignment padding. */
260 uint16_t u16Reserved;
261 /** The number of EMTs. */
262 uint32_t volatile cEMTs;
263 /** The number of EMTs that have halted in GVMMR0SchedHalt. */
264 uint32_t volatile cHaltedEMTs;
265 /** Mini lock for restricting early wake-ups to one thread. */
266 bool volatile fDoingEarlyWakeUps;
267 bool afPadding[3]; /**< explicit alignment padding. */
268 /** When the next halted or sleeping EMT will wake up.
269 * This is set to 0 when it needs recalculating and to UINT64_MAX when
270 * there are no halted or sleeping EMTs in the GVMM. */
271 uint64_t uNsNextEmtWakeup;
272 /** The lock used to serialize VM creation, destruction and associated events that
273 * isn't performance critical. Owners may acquire the list lock. */
274 RTCRITSECT CreateDestroyLock;
275 /** The lock used to serialize used list updates and accesses.
276 * This indirectly includes scheduling since the scheduler will have to walk the
277 * used list to examin running VMs. Owners may not acquire any other locks. */
278 RTCRITSECTRW UsedLock;
279 /** The handle array.
280 * The size of this array defines the maximum number of currently running VMs.
281 * The first entry is unused as it represents the NIL handle. */
282 GVMHANDLE aHandles[GVMM_MAX_HANDLES];
283
284 /** @gcfgm{/GVMM/cEMTsMeansCompany, 32-bit, 0, UINT32_MAX, 1}
285 * The number of EMTs that means we no longer consider ourselves alone on a
286 * CPU/Core.
287 */
288 uint32_t cEMTsMeansCompany;
289 /** @gcfgm{/GVMM/MinSleepAlone,32-bit, 0, 100000000, 750000, ns}
290 * The minimum sleep time for when we're alone, in nano seconds.
291 */
292 uint32_t nsMinSleepAlone;
293 /** @gcfgm{/GVMM/MinSleepCompany,32-bit,0, 100000000, 15000, ns}
294 * The minimum sleep time for when we've got company, in nano seconds.
295 */
296 uint32_t nsMinSleepCompany;
297 /** @gcfgm{/GVMM/EarlyWakeUp1, 32-bit, 0, 100000000, 25000, ns}
298 * The limit for the first round of early wake-ups, given in nano seconds.
299 */
300 uint32_t nsEarlyWakeUp1;
301 /** @gcfgm{/GVMM/EarlyWakeUp2, 32-bit, 0, 100000000, 50000, ns}
302 * The limit for the second round of early wake-ups, given in nano seconds.
303 */
304 uint32_t nsEarlyWakeUp2;
305
306 /** Set if we're doing early wake-ups.
307 * This reflects nsEarlyWakeUp1 and nsEarlyWakeUp2. */
308 bool volatile fDoEarlyWakeUps;
309
310 /** The number of entries in the host CPU array (aHostCpus). */
311 uint32_t cHostCpus;
312 /** Per host CPU data (variable length). */
313 GVMMHOSTCPU aHostCpus[1];
314} GVMM;
315AssertCompileMemberAlignment(GVMM, CreateDestroyLock, 8);
316AssertCompileMemberAlignment(GVMM, UsedLock, 8);
317AssertCompileMemberAlignment(GVMM, uNsNextEmtWakeup, 8);
318/** Pointer to the GVMM instance data. */
319typedef GVMM *PGVMM;
320
321/** The GVMM::u32Magic value (Charlie Haden). */
322#define GVMM_MAGIC UINT32_C(0x19370806)
323
324
325
326/*********************************************************************************************************************************
327* Global Variables *
328*********************************************************************************************************************************/
329/** Pointer to the GVMM instance data.
330 * (Just my general dislike for global variables.) */
331static PGVMM g_pGVMM = NULL;
332
333/** Macro for obtaining and validating the g_pGVMM pointer.
334 * On failure it will return from the invoking function with the specified return value.
335 *
336 * @param pGVMM The name of the pGVMM variable.
337 * @param rc The return value on failure. Use VERR_GVMM_INSTANCE for VBox
338 * status codes.
339 */
340#define GVMM_GET_VALID_INSTANCE(pGVMM, rc) \
341 do { \
342 (pGVMM) = g_pGVMM;\
343 AssertPtrReturn((pGVMM), (rc)); \
344 AssertMsgReturn((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic), (rc)); \
345 } while (0)
346
347/** Macro for obtaining and validating the g_pGVMM pointer, void function variant.
348 * On failure it will return from the invoking function.
349 *
350 * @param pGVMM The name of the pGVMM variable.
351 */
352#define GVMM_GET_VALID_INSTANCE_VOID(pGVMM) \
353 do { \
354 (pGVMM) = g_pGVMM;\
355 AssertPtrReturnVoid((pGVMM)); \
356 AssertMsgReturnVoid((pGVMM)->u32Magic == GVMM_MAGIC, ("%p - %#x\n", (pGVMM), (pGVMM)->u32Magic)); \
357 } while (0)
358
359
360/*********************************************************************************************************************************
361* Internal Functions *
362*********************************************************************************************************************************/
363static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession);
364static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvGVMM, void *pvHandle);
365static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock);
366static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM);
367
368#ifdef GVMM_SCHED_WITH_PPT
369static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick);
370#endif
371
372
373/**
374 * Initializes the GVMM.
375 *
376 * This is called while owning the loader semaphore (see supdrvIOCtl_LdrLoad()).
377 *
378 * @returns VBox status code.
379 */
380GVMMR0DECL(int) GVMMR0Init(void)
381{
382 LogFlow(("GVMMR0Init:\n"));
383
384 /*
385 * Allocate and initialize the instance data.
386 */
387 uint32_t cHostCpus = RTMpGetArraySize();
388 AssertMsgReturn(cHostCpus > 0 && cHostCpus < _64K, ("%d", (int)cHostCpus), VERR_GVMM_HOST_CPU_RANGE);
389
390 PGVMM pGVMM = (PGVMM)RTMemAllocZ(RT_UOFFSETOF_DYN(GVMM, aHostCpus[cHostCpus]));
391 if (!pGVMM)
392 return VERR_NO_MEMORY;
393 int rc = RTCritSectInitEx(&pGVMM->CreateDestroyLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE,
394 "GVMM-CreateDestroyLock");
395 if (RT_SUCCESS(rc))
396 {
397 rc = RTCritSectRwInitEx(&pGVMM->UsedLock, 0, NIL_RTLOCKVALCLASS, RTLOCKVAL_SUB_CLASS_NONE, "GVMM-UsedLock");
398 if (RT_SUCCESS(rc))
399 {
400 pGVMM->u32Magic = GVMM_MAGIC;
401 pGVMM->iUsedHead = 0;
402 pGVMM->iFreeHead = 1;
403
404 /* the nil handle */
405 pGVMM->aHandles[0].iSelf = 0;
406 pGVMM->aHandles[0].iNext = 0;
407
408 /* the tail */
409 unsigned i = RT_ELEMENTS(pGVMM->aHandles) - 1;
410 pGVMM->aHandles[i].iSelf = i;
411 pGVMM->aHandles[i].iNext = 0; /* nil */
412
413 /* the rest */
414 while (i-- > 1)
415 {
416 pGVMM->aHandles[i].iSelf = i;
417 pGVMM->aHandles[i].iNext = i + 1;
418 }
419
420 /* The default configuration values. */
421 uint32_t cNsResolution = RTSemEventMultiGetResolution();
422 pGVMM->cEMTsMeansCompany = 1; /** @todo should be adjusted to relative to the cpu count or something... */
423 if (cNsResolution >= 5*RT_NS_100US)
424 {
425 pGVMM->nsMinSleepAlone = 750000 /* ns (0.750 ms) */; /** @todo this should be adjusted to be 75% (or something) of the scheduler granularity... */
426 pGVMM->nsMinSleepCompany = 15000 /* ns (0.015 ms) */;
427 pGVMM->nsEarlyWakeUp1 = 25000 /* ns (0.025 ms) */;
428 pGVMM->nsEarlyWakeUp2 = 50000 /* ns (0.050 ms) */;
429 }
430 else if (cNsResolution > RT_NS_100US)
431 {
432 pGVMM->nsMinSleepAlone = cNsResolution / 2;
433 pGVMM->nsMinSleepCompany = cNsResolution / 4;
434 pGVMM->nsEarlyWakeUp1 = 0;
435 pGVMM->nsEarlyWakeUp2 = 0;
436 }
437 else
438 {
439 pGVMM->nsMinSleepAlone = 2000;
440 pGVMM->nsMinSleepCompany = 2000;
441 pGVMM->nsEarlyWakeUp1 = 0;
442 pGVMM->nsEarlyWakeUp2 = 0;
443 }
444 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
445
446 /* The host CPU data. */
447 pGVMM->cHostCpus = cHostCpus;
448 uint32_t iCpu = cHostCpus;
449 RTCPUSET PossibleSet;
450 RTMpGetSet(&PossibleSet);
451 while (iCpu-- > 0)
452 {
453 pGVMM->aHostCpus[iCpu].idxCpuSet = iCpu;
454#ifdef GVMM_SCHED_WITH_PPT
455 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
456 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
457 pGVMM->aHostCpus[iCpu].Ppt.uMinHz = 5; /** @todo Add some API which figures this one out. (not *that* important) */
458 pGVMM->aHostCpus[iCpu].Ppt.cTicksHistoriziationInterval = 1;
459 //pGVMM->aHostCpus[iCpu].Ppt.iTickHistorization = 0;
460 //pGVMM->aHostCpus[iCpu].Ppt.cNsInterval = 0;
461 //pGVMM->aHostCpus[iCpu].Ppt.uTimerHz = 0;
462 //pGVMM->aHostCpus[iCpu].Ppt.uDesiredHz = 0;
463 //pGVMM->aHostCpus[iCpu].Ppt.fStarted = false;
464 //pGVMM->aHostCpus[iCpu].Ppt.fStarting = false;
465 //pGVMM->aHostCpus[iCpu].Ppt.iHzHistory = 0;
466 //pGVMM->aHostCpus[iCpu].Ppt.aHzHistory = {0};
467#endif
468
469 if (RTCpuSetIsMember(&PossibleSet, iCpu))
470 {
471 pGVMM->aHostCpus[iCpu].idCpu = RTMpCpuIdFromSetIndex(iCpu);
472 pGVMM->aHostCpus[iCpu].u32Magic = GVMMHOSTCPU_MAGIC;
473
474#ifdef GVMM_SCHED_WITH_PPT
475 rc = RTTimerCreateEx(&pGVMM->aHostCpus[iCpu].Ppt.pTimer,
476 50*1000*1000 /* whatever */,
477 RTTIMER_FLAGS_CPU(iCpu) | RTTIMER_FLAGS_HIGH_RES,
478 gvmmR0SchedPeriodicPreemptionTimerCallback,
479 &pGVMM->aHostCpus[iCpu]);
480 if (RT_SUCCESS(rc))
481 rc = RTSpinlockCreate(&pGVMM->aHostCpus[iCpu].Ppt.hSpinlock, RTSPINLOCK_FLAGS_INTERRUPT_SAFE, "GVMM/CPU");
482 if (RT_FAILURE(rc))
483 {
484 while (iCpu < cHostCpus)
485 {
486 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
487 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
488 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
489 iCpu++;
490 }
491 break;
492 }
493#endif
494 }
495 else
496 {
497 pGVMM->aHostCpus[iCpu].idCpu = NIL_RTCPUID;
498 pGVMM->aHostCpus[iCpu].u32Magic = 0;
499 }
500 }
501 if (RT_SUCCESS(rc))
502 {
503 g_pGVMM = pGVMM;
504 LogFlow(("GVMMR0Init: pGVMM=%p cHostCpus=%u\n", pGVMM, cHostCpus));
505 return VINF_SUCCESS;
506 }
507
508 /* bail out. */
509 RTCritSectRwDelete(&pGVMM->UsedLock);
510 }
511 RTCritSectDelete(&pGVMM->CreateDestroyLock);
512 }
513
514 RTMemFree(pGVMM);
515 return rc;
516}
517
518
519/**
520 * Terminates the GVM.
521 *
522 * This is called while owning the loader semaphore (see supdrvLdrFree()).
523 * And unless something is wrong, there should be absolutely no VMs
524 * registered at this point.
525 */
526GVMMR0DECL(void) GVMMR0Term(void)
527{
528 LogFlow(("GVMMR0Term:\n"));
529
530 PGVMM pGVMM = g_pGVMM;
531 g_pGVMM = NULL;
532 if (RT_UNLIKELY(!RT_VALID_PTR(pGVMM)))
533 {
534 SUPR0Printf("GVMMR0Term: pGVMM=%RKv\n", pGVMM);
535 return;
536 }
537
538 /*
539 * First of all, stop all active timers.
540 */
541 uint32_t cActiveTimers = 0;
542 uint32_t iCpu = pGVMM->cHostCpus;
543 while (iCpu-- > 0)
544 {
545 ASMAtomicWriteU32(&pGVMM->aHostCpus[iCpu].u32Magic, ~GVMMHOSTCPU_MAGIC);
546#ifdef GVMM_SCHED_WITH_PPT
547 if ( pGVMM->aHostCpus[iCpu].Ppt.pTimer != NULL
548 && RT_SUCCESS(RTTimerStop(pGVMM->aHostCpus[iCpu].Ppt.pTimer)))
549 cActiveTimers++;
550#endif
551 }
552 if (cActiveTimers)
553 RTThreadSleep(1); /* fudge */
554
555 /*
556 * Invalidate the and free resources.
557 */
558 pGVMM->u32Magic = ~GVMM_MAGIC;
559 RTCritSectRwDelete(&pGVMM->UsedLock);
560 RTCritSectDelete(&pGVMM->CreateDestroyLock);
561
562 pGVMM->iFreeHead = 0;
563 if (pGVMM->iUsedHead)
564 {
565 SUPR0Printf("GVMMR0Term: iUsedHead=%#x! (cVMs=%#x cEMTs=%#x)\n", pGVMM->iUsedHead, pGVMM->cVMs, pGVMM->cEMTs);
566 pGVMM->iUsedHead = 0;
567 }
568
569#ifdef GVMM_SCHED_WITH_PPT
570 iCpu = pGVMM->cHostCpus;
571 while (iCpu-- > 0)
572 {
573 RTTimerDestroy(pGVMM->aHostCpus[iCpu].Ppt.pTimer);
574 pGVMM->aHostCpus[iCpu].Ppt.pTimer = NULL;
575 RTSpinlockDestroy(pGVMM->aHostCpus[iCpu].Ppt.hSpinlock);
576 pGVMM->aHostCpus[iCpu].Ppt.hSpinlock = NIL_RTSPINLOCK;
577 }
578#endif
579
580 RTMemFree(pGVMM);
581}
582
583
584/**
585 * A quick hack for setting global config values.
586 *
587 * @returns VBox status code.
588 *
589 * @param pSession The session handle. Used for authentication.
590 * @param pszName The variable name.
591 * @param u64Value The new value.
592 */
593GVMMR0DECL(int) GVMMR0SetConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t u64Value)
594{
595 /*
596 * Validate input.
597 */
598 PGVMM pGVMM;
599 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
600 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
601 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
602
603 /*
604 * String switch time!
605 */
606 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
607 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
608 int rc = VINF_SUCCESS;
609 pszName += sizeof("/GVMM/") - 1;
610 if (!strcmp(pszName, "cEMTsMeansCompany"))
611 {
612 if (u64Value <= UINT32_MAX)
613 pGVMM->cEMTsMeansCompany = u64Value;
614 else
615 rc = VERR_OUT_OF_RANGE;
616 }
617 else if (!strcmp(pszName, "MinSleepAlone"))
618 {
619 if (u64Value <= RT_NS_100MS)
620 pGVMM->nsMinSleepAlone = u64Value;
621 else
622 rc = VERR_OUT_OF_RANGE;
623 }
624 else if (!strcmp(pszName, "MinSleepCompany"))
625 {
626 if (u64Value <= RT_NS_100MS)
627 pGVMM->nsMinSleepCompany = u64Value;
628 else
629 rc = VERR_OUT_OF_RANGE;
630 }
631 else if (!strcmp(pszName, "EarlyWakeUp1"))
632 {
633 if (u64Value <= RT_NS_100MS)
634 {
635 pGVMM->nsEarlyWakeUp1 = u64Value;
636 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
637 }
638 else
639 rc = VERR_OUT_OF_RANGE;
640 }
641 else if (!strcmp(pszName, "EarlyWakeUp2"))
642 {
643 if (u64Value <= RT_NS_100MS)
644 {
645 pGVMM->nsEarlyWakeUp2 = u64Value;
646 pGVMM->fDoEarlyWakeUps = pGVMM->nsEarlyWakeUp1 > 0 && pGVMM->nsEarlyWakeUp2 > 0;
647 }
648 else
649 rc = VERR_OUT_OF_RANGE;
650 }
651 else
652 rc = VERR_CFGM_VALUE_NOT_FOUND;
653 return rc;
654}
655
656
657/**
658 * A quick hack for getting global config values.
659 *
660 * @returns VBox status code.
661 *
662 * @param pSession The session handle. Used for authentication.
663 * @param pszName The variable name.
664 * @param pu64Value Where to return the value.
665 */
666GVMMR0DECL(int) GVMMR0QueryConfig(PSUPDRVSESSION pSession, const char *pszName, uint64_t *pu64Value)
667{
668 /*
669 * Validate input.
670 */
671 PGVMM pGVMM;
672 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
673 AssertPtrReturn(pSession, VERR_INVALID_HANDLE);
674 AssertPtrReturn(pszName, VERR_INVALID_POINTER);
675 AssertPtrReturn(pu64Value, VERR_INVALID_POINTER);
676
677 /*
678 * String switch time!
679 */
680 if (strncmp(pszName, RT_STR_TUPLE("/GVMM/")))
681 return VERR_CFGM_VALUE_NOT_FOUND; /* borrow status codes from CFGM... */
682 int rc = VINF_SUCCESS;
683 pszName += sizeof("/GVMM/") - 1;
684 if (!strcmp(pszName, "cEMTsMeansCompany"))
685 *pu64Value = pGVMM->cEMTsMeansCompany;
686 else if (!strcmp(pszName, "MinSleepAlone"))
687 *pu64Value = pGVMM->nsMinSleepAlone;
688 else if (!strcmp(pszName, "MinSleepCompany"))
689 *pu64Value = pGVMM->nsMinSleepCompany;
690 else if (!strcmp(pszName, "EarlyWakeUp1"))
691 *pu64Value = pGVMM->nsEarlyWakeUp1;
692 else if (!strcmp(pszName, "EarlyWakeUp2"))
693 *pu64Value = pGVMM->nsEarlyWakeUp2;
694 else
695 rc = VERR_CFGM_VALUE_NOT_FOUND;
696 return rc;
697}
698
699
700/**
701 * Acquire the 'used' lock in shared mode.
702 *
703 * This prevents destruction of the VM while we're in ring-0.
704 *
705 * @returns IPRT status code, see RTSemFastMutexRequest.
706 * @param a_pGVMM The GVMM instance data.
707 * @sa GVMMR0_USED_SHARED_UNLOCK, GVMMR0_USED_EXCLUSIVE_LOCK
708 */
709#define GVMMR0_USED_SHARED_LOCK(a_pGVMM) RTCritSectRwEnterShared(&(a_pGVMM)->UsedLock)
710
711/**
712 * Release the 'used' lock in when owning it in shared mode.
713 *
714 * @returns IPRT status code, see RTSemFastMutexRequest.
715 * @param a_pGVMM The GVMM instance data.
716 * @sa GVMMR0_USED_SHARED_LOCK
717 */
718#define GVMMR0_USED_SHARED_UNLOCK(a_pGVMM) RTCritSectRwLeaveShared(&(a_pGVMM)->UsedLock)
719
720/**
721 * Acquire the 'used' lock in exclusive mode.
722 *
723 * Only use this function when making changes to the used list.
724 *
725 * @returns IPRT status code, see RTSemFastMutexRequest.
726 * @param a_pGVMM The GVMM instance data.
727 * @sa GVMMR0_USED_EXCLUSIVE_UNLOCK
728 */
729#define GVMMR0_USED_EXCLUSIVE_LOCK(a_pGVMM) RTCritSectRwEnterExcl(&(a_pGVMM)->UsedLock)
730
731/**
732 * Release the 'used' lock when owning it in exclusive mode.
733 *
734 * @returns IPRT status code, see RTSemFastMutexRelease.
735 * @param a_pGVMM The GVMM instance data.
736 * @sa GVMMR0_USED_EXCLUSIVE_LOCK, GVMMR0_USED_SHARED_UNLOCK
737 */
738#define GVMMR0_USED_EXCLUSIVE_UNLOCK(a_pGVMM) RTCritSectRwLeaveExcl(&(a_pGVMM)->UsedLock)
739
740
741/**
742 * Try acquire the 'create & destroy' lock.
743 *
744 * @returns IPRT status code, see RTSemFastMutexRequest.
745 * @param pGVMM The GVMM instance data.
746 */
747DECLINLINE(int) gvmmR0CreateDestroyLock(PGVMM pGVMM)
748{
749 LogFlow(("++gvmmR0CreateDestroyLock(%p)\n", pGVMM));
750 int rc = RTCritSectEnter(&pGVMM->CreateDestroyLock);
751 LogFlow(("gvmmR0CreateDestroyLock(%p)->%Rrc\n", pGVMM, rc));
752 return rc;
753}
754
755
756/**
757 * Release the 'create & destroy' lock.
758 *
759 * @returns IPRT status code, see RTSemFastMutexRequest.
760 * @param pGVMM The GVMM instance data.
761 */
762DECLINLINE(int) gvmmR0CreateDestroyUnlock(PGVMM pGVMM)
763{
764 LogFlow(("--gvmmR0CreateDestroyUnlock(%p)\n", pGVMM));
765 int rc = RTCritSectLeave(&pGVMM->CreateDestroyLock);
766 AssertRC(rc);
767 return rc;
768}
769
770
771/**
772 * Request wrapper for the GVMMR0CreateVM API.
773 *
774 * @returns VBox status code.
775 * @param pReq The request buffer.
776 * @param pSession The session handle. The VM will be associated with this.
777 */
778GVMMR0DECL(int) GVMMR0CreateVMReq(PGVMMCREATEVMREQ pReq, PSUPDRVSESSION pSession)
779{
780 /*
781 * Validate the request.
782 */
783 if (!RT_VALID_PTR(pReq))
784 return VERR_INVALID_POINTER;
785 if (pReq->Hdr.cbReq != sizeof(*pReq))
786 return VERR_INVALID_PARAMETER;
787 if (pReq->pSession != pSession)
788 return VERR_INVALID_POINTER;
789
790 /*
791 * Execute it.
792 */
793 PGVM pGVM;
794 pReq->pVMR0 = NULL;
795 pReq->pVMR3 = NIL_RTR3PTR;
796 int rc = GVMMR0CreateVM(pSession, pReq->cCpus, &pGVM);
797 if (RT_SUCCESS(rc))
798 {
799 pReq->pVMR0 = pGVM; /** @todo don't expose this to ring-3, use a unique random number instead. */
800 pReq->pVMR3 = pGVM->pVMR3;
801 }
802 return rc;
803}
804
805
806/**
807 * Allocates the VM structure and registers it with GVM.
808 *
809 * The caller will become the VM owner and there by the EMT.
810 *
811 * @returns VBox status code.
812 * @param pSession The support driver session.
813 * @param cCpus Number of virtual CPUs for the new VM.
814 * @param ppGVM Where to store the pointer to the VM structure.
815 *
816 * @thread EMT.
817 */
818GVMMR0DECL(int) GVMMR0CreateVM(PSUPDRVSESSION pSession, uint32_t cCpus, PGVM *ppGVM)
819{
820 LogFlow(("GVMMR0CreateVM: pSession=%p\n", pSession));
821 PGVMM pGVMM;
822 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
823
824 AssertPtrReturn(ppGVM, VERR_INVALID_POINTER);
825 *ppGVM = NULL;
826
827 if ( cCpus == 0
828 || cCpus > VMM_MAX_CPU_COUNT)
829 return VERR_INVALID_PARAMETER;
830
831 RTNATIVETHREAD hEMT0 = RTThreadNativeSelf();
832 AssertReturn(hEMT0 != NIL_RTNATIVETHREAD, VERR_GVMM_BROKEN_IPRT);
833 RTPROCESS ProcId = RTProcSelf();
834 AssertReturn(ProcId != NIL_RTPROCESS, VERR_GVMM_BROKEN_IPRT);
835
836 /*
837 * The whole allocation process is protected by the lock.
838 */
839 int rc = gvmmR0CreateDestroyLock(pGVMM);
840 AssertRCReturn(rc, rc);
841
842 /*
843 * Only one VM per session.
844 */
845 if (SUPR0GetSessionVM(pSession) != NULL)
846 {
847 gvmmR0CreateDestroyUnlock(pGVMM);
848 SUPR0Printf("GVMMR0CreateVM: The session %p already got a VM: %p\n", pSession, SUPR0GetSessionVM(pSession));
849 return VERR_ALREADY_EXISTS;
850 }
851
852 /*
853 * Allocate a handle first so we don't waste resources unnecessarily.
854 */
855 uint16_t iHandle = pGVMM->iFreeHead;
856 if (iHandle)
857 {
858 PGVMHANDLE pHandle = &pGVMM->aHandles[iHandle];
859
860 /* consistency checks, a bit paranoid as always. */
861 if ( !pHandle->pGVM
862 && !pHandle->pvObj
863 && pHandle->iSelf == iHandle)
864 {
865 pHandle->pvObj = SUPR0ObjRegister(pSession, SUPDRVOBJTYPE_VM, gvmmR0HandleObjDestructor, pGVMM, pHandle);
866 if (pHandle->pvObj)
867 {
868 /*
869 * Move the handle from the free to used list and perform permission checks.
870 */
871 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
872 AssertRC(rc);
873
874 pGVMM->iFreeHead = pHandle->iNext;
875 pHandle->iNext = pGVMM->iUsedHead;
876 pGVMM->iUsedHead = iHandle;
877 pGVMM->cVMs++;
878
879 pHandle->pGVM = NULL;
880 pHandle->pSession = pSession;
881 pHandle->hEMT0 = NIL_RTNATIVETHREAD;
882 pHandle->ProcId = NIL_RTPROCESS;
883
884 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
885
886 rc = SUPR0ObjVerifyAccess(pHandle->pvObj, pSession, NULL);
887 if (RT_SUCCESS(rc))
888 {
889 /*
890 * Allocate memory for the VM structure (combined VM + GVM).
891 */
892 const uint32_t cbVM = RT_UOFFSETOF_DYN(GVM, aCpus[cCpus]);
893 const uint32_t cPages = RT_ALIGN_32(cbVM, PAGE_SIZE) >> PAGE_SHIFT;
894 RTR0MEMOBJ hVMMemObj = NIL_RTR0MEMOBJ;
895 rc = RTR0MemObjAllocPage(&hVMMemObj, cPages << PAGE_SHIFT, false /* fExecutable */);
896 if (RT_SUCCESS(rc))
897 {
898 PGVM pGVM = (PGVM)RTR0MemObjAddress(hVMMemObj);
899 AssertPtr(pGVM);
900
901 /*
902 * Initialise the structure.
903 */
904 RT_BZERO(pGVM, cPages << PAGE_SHIFT);
905 gvmmR0InitPerVMData(pGVM, iHandle, cCpus, pSession);
906 pGVM->gvmm.s.VMMemObj = hVMMemObj;
907 rc = GMMR0InitPerVMData(pGVM);
908 int rc2 = PGMR0InitPerVMData(pGVM);
909 VMMR0InitPerVMData(pGVM);
910 DBGFR0InitPerVMData(pGVM);
911 PDMR0InitPerVMData(pGVM);
912 IOMR0InitPerVMData(pGVM);
913 TMR0InitPerVMData(pGVM);
914 if (RT_SUCCESS(rc) && RT_SUCCESS(rc2))
915 {
916 /*
917 * Allocate page array.
918 * This currently have to be made available to ring-3, but this is should change eventually.
919 */
920 rc = RTR0MemObjAllocPage(&pGVM->gvmm.s.VMPagesMemObj, cPages * sizeof(SUPPAGE), false /* fExecutable */);
921 if (RT_SUCCESS(rc))
922 {
923 PSUPPAGE paPages = (PSUPPAGE)RTR0MemObjAddress(pGVM->gvmm.s.VMPagesMemObj); AssertPtr(paPages);
924 for (uint32_t iPage = 0; iPage < cPages; iPage++)
925 {
926 paPages[iPage].uReserved = 0;
927 paPages[iPage].Phys = RTR0MemObjGetPagePhysAddr(pGVM->gvmm.s.VMMemObj, iPage);
928 Assert(paPages[iPage].Phys != NIL_RTHCPHYS);
929 }
930
931 /*
932 * Map the page array, VM and VMCPU structures into ring-3.
933 */
934 AssertCompileSizeAlignment(VM, PAGE_SIZE);
935 rc = RTR0MemObjMapUserEx(&pGVM->gvmm.s.VMMapObj, pGVM->gvmm.s.VMMemObj, (RTR3PTR)-1, 0,
936 RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
937 0 /*offSub*/, sizeof(VM));
938 for (VMCPUID i = 0; i < cCpus && RT_SUCCESS(rc); i++)
939 {
940 AssertCompileSizeAlignment(VMCPU, PAGE_SIZE);
941 rc = RTR0MemObjMapUserEx(&pGVM->aCpus[i].gvmm.s.VMCpuMapObj, pGVM->gvmm.s.VMMemObj,
942 (RTR3PTR)-1, 0, RTMEM_PROT_READ | RTMEM_PROT_WRITE, NIL_RTR0PROCESS,
943 RT_UOFFSETOF_DYN(GVM, aCpus[i]), sizeof(VMCPU));
944 }
945 if (RT_SUCCESS(rc))
946 rc = RTR0MemObjMapUser(&pGVM->gvmm.s.VMPagesMapObj, pGVM->gvmm.s.VMPagesMemObj, (RTR3PTR)-1,
947 0 /* uAlignment */, RTMEM_PROT_READ | RTMEM_PROT_WRITE,
948 NIL_RTR0PROCESS);
949 if (RT_SUCCESS(rc))
950 {
951 /*
952 * Initialize all the VM pointers.
953 */
954 PVMR3 pVMR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMMapObj);
955 AssertPtr((void *)pVMR3);
956
957 for (VMCPUID i = 0; i < cCpus; i++)
958 {
959 pGVM->aCpus[i].pVMR0 = pGVM;
960 pGVM->aCpus[i].pVMR3 = pVMR3;
961 pGVM->apCpusR3[i] = RTR0MemObjAddressR3(pGVM->aCpus[i].gvmm.s.VMCpuMapObj);
962 pGVM->aCpus[i].pVCpuR3 = pGVM->apCpusR3[i];
963 pGVM->apCpusR0[i] = &pGVM->aCpus[i];
964 AssertPtr((void *)pGVM->apCpusR3[i]);
965 }
966
967 pGVM->paVMPagesR3 = RTR0MemObjAddressR3(pGVM->gvmm.s.VMPagesMapObj);
968 AssertPtr((void *)pGVM->paVMPagesR3);
969
970 /*
971 * Complete the handle - take the UsedLock sem just to be careful.
972 */
973 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
974 AssertRC(rc);
975
976 pHandle->pGVM = pGVM;
977 pHandle->hEMT0 = hEMT0;
978 pHandle->ProcId = ProcId;
979 pGVM->pVMR3 = pVMR3;
980 pGVM->pVMR3Unsafe = pVMR3;
981 pGVM->aCpus[0].hEMT = hEMT0;
982 pGVM->aCpus[0].hNativeThreadR0 = hEMT0;
983 pGVMM->cEMTs += cCpus;
984
985 /* Associate it with the session and create the context hook for EMT0. */
986 rc = SUPR0SetSessionVM(pSession, pGVM, pGVM);
987 if (RT_SUCCESS(rc))
988 {
989 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[0]);
990 if (RT_SUCCESS(rc))
991 {
992 /*
993 * Done!
994 */
995 VBOXVMM_R0_GVMM_VM_CREATED(pGVM, pGVM, ProcId, (void *)hEMT0, cCpus);
996
997 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
998 gvmmR0CreateDestroyUnlock(pGVMM);
999
1000 CPUMR0RegisterVCpuThread(&pGVM->aCpus[0]);
1001
1002 *ppGVM = pGVM;
1003 Log(("GVMMR0CreateVM: pVMR3=%p pGVM=%p hGVM=%d\n", pVMR3, pGVM, iHandle));
1004 return VINF_SUCCESS;
1005 }
1006
1007 SUPR0SetSessionVM(pSession, NULL, NULL);
1008 }
1009 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1010 }
1011
1012 /* Cleanup mappings. */
1013 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1014 {
1015 RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */);
1016 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1017 }
1018 for (VMCPUID i = 0; i < cCpus; i++)
1019 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1020 {
1021 RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */);
1022 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1023 }
1024 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1025 {
1026 RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */);
1027 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1028 }
1029 }
1030 }
1031 else if (RT_SUCCESS(rc))
1032 rc = rc2;
1033 }
1034 }
1035 /* else: The user wasn't permitted to create this VM. */
1036
1037 /*
1038 * The handle will be freed by gvmmR0HandleObjDestructor as we release the
1039 * object reference here. A little extra mess because of non-recursive lock.
1040 */
1041 void *pvObj = pHandle->pvObj;
1042 pHandle->pvObj = NULL;
1043 gvmmR0CreateDestroyUnlock(pGVMM);
1044
1045 SUPR0ObjRelease(pvObj, pSession);
1046
1047 SUPR0Printf("GVMMR0CreateVM: failed, rc=%Rrc\n", rc);
1048 return rc;
1049 }
1050
1051 rc = VERR_NO_MEMORY;
1052 }
1053 else
1054 rc = VERR_GVMM_IPE_1;
1055 }
1056 else
1057 rc = VERR_GVM_TOO_MANY_VMS;
1058
1059 gvmmR0CreateDestroyUnlock(pGVMM);
1060 return rc;
1061}
1062
1063
1064/**
1065 * Initializes the per VM data belonging to GVMM.
1066 *
1067 * @param pGVM Pointer to the global VM structure.
1068 * @param hSelf The handle.
1069 * @param cCpus The CPU count.
1070 * @param pSession The session this VM is associated with.
1071 */
1072static void gvmmR0InitPerVMData(PGVM pGVM, int16_t hSelf, VMCPUID cCpus, PSUPDRVSESSION pSession)
1073{
1074 AssertCompile(RT_SIZEOFMEMB(GVM,gvmm.s) <= RT_SIZEOFMEMB(GVM,gvmm.padding));
1075 AssertCompile(RT_SIZEOFMEMB(GVMCPU,gvmm.s) <= RT_SIZEOFMEMB(GVMCPU,gvmm.padding));
1076 AssertCompileMemberAlignment(VM, cpum, 64);
1077 AssertCompileMemberAlignment(VM, tm, 64);
1078
1079 /* GVM: */
1080 pGVM->u32Magic = GVM_MAGIC;
1081 pGVM->hSelf = hSelf;
1082 pGVM->cCpus = cCpus;
1083 pGVM->pSession = pSession;
1084 pGVM->pSelf = pGVM;
1085
1086 /* VM: */
1087 pGVM->enmVMState = VMSTATE_CREATING;
1088 pGVM->hSelfUnsafe = hSelf;
1089 pGVM->pSessionUnsafe = pSession;
1090 pGVM->pVMR0ForCall = pGVM;
1091 pGVM->cCpusUnsafe = cCpus;
1092 pGVM->uCpuExecutionCap = 100; /* default is no cap. */
1093 pGVM->uStructVersion = 1;
1094 pGVM->cbSelf = sizeof(VM);
1095 pGVM->cbVCpu = sizeof(VMCPU);
1096
1097 /* GVMM: */
1098 pGVM->gvmm.s.VMMemObj = NIL_RTR0MEMOBJ;
1099 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1100 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1101 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1102 pGVM->gvmm.s.fDoneVMMR0Init = false;
1103 pGVM->gvmm.s.fDoneVMMR0Term = false;
1104
1105 /*
1106 * Per virtual CPU.
1107 */
1108 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1109 {
1110 pGVM->aCpus[i].idCpu = i;
1111 pGVM->aCpus[i].idCpuUnsafe = i;
1112 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1113 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1114 pGVM->aCpus[i].hEMT = NIL_RTNATIVETHREAD;
1115 pGVM->aCpus[i].pGVM = pGVM;
1116 pGVM->aCpus[i].idHostCpu = NIL_RTCPUID;
1117 pGVM->aCpus[i].iHostCpuSet = UINT32_MAX;
1118 pGVM->aCpus[i].hNativeThread = NIL_RTNATIVETHREAD;
1119 pGVM->aCpus[i].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1120 pGVM->aCpus[i].enmState = VMCPUSTATE_STOPPED;
1121 pGVM->aCpus[i].pVCpuR0ForVtg = &pGVM->aCpus[i];
1122 }
1123}
1124
1125
1126/**
1127 * Does the VM initialization.
1128 *
1129 * @returns VBox status code.
1130 * @param pGVM The global (ring-0) VM structure.
1131 */
1132GVMMR0DECL(int) GVMMR0InitVM(PGVM pGVM)
1133{
1134 LogFlow(("GVMMR0InitVM: pGVM=%p\n", pGVM));
1135
1136 int rc = VERR_INTERNAL_ERROR_3;
1137 if ( !pGVM->gvmm.s.fDoneVMMR0Init
1138 && pGVM->aCpus[0].gvmm.s.HaltEventMulti == NIL_RTSEMEVENTMULTI)
1139 {
1140 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1141 {
1142 rc = RTSemEventMultiCreate(&pGVM->aCpus[i].gvmm.s.HaltEventMulti);
1143 if (RT_FAILURE(rc))
1144 {
1145 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1146 break;
1147 }
1148 }
1149 }
1150 else
1151 rc = VERR_WRONG_ORDER;
1152
1153 LogFlow(("GVMMR0InitVM: returns %Rrc\n", rc));
1154 return rc;
1155}
1156
1157
1158/**
1159 * Indicates that we're done with the ring-0 initialization
1160 * of the VM.
1161 *
1162 * @param pGVM The global (ring-0) VM structure.
1163 * @thread EMT(0)
1164 */
1165GVMMR0DECL(void) GVMMR0DoneInitVM(PGVM pGVM)
1166{
1167 /* Set the indicator. */
1168 pGVM->gvmm.s.fDoneVMMR0Init = true;
1169}
1170
1171
1172/**
1173 * Indicates that we're doing the ring-0 termination of the VM.
1174 *
1175 * @returns true if termination hasn't been done already, false if it has.
1176 * @param pGVM Pointer to the global VM structure. Optional.
1177 * @thread EMT(0) or session cleanup thread.
1178 */
1179GVMMR0DECL(bool) GVMMR0DoingTermVM(PGVM pGVM)
1180{
1181 /* Validate the VM structure, state and handle. */
1182 AssertPtrReturn(pGVM, false);
1183
1184 /* Set the indicator. */
1185 if (pGVM->gvmm.s.fDoneVMMR0Term)
1186 return false;
1187 pGVM->gvmm.s.fDoneVMMR0Term = true;
1188 return true;
1189}
1190
1191
1192/**
1193 * Destroys the VM, freeing all associated resources (the ring-0 ones anyway).
1194 *
1195 * This is call from the vmR3DestroyFinalBit and from a error path in VMR3Create,
1196 * and the caller is not the EMT thread, unfortunately. For security reasons, it
1197 * would've been nice if the caller was actually the EMT thread or that we somehow
1198 * could've associated the calling thread with the VM up front.
1199 *
1200 * @returns VBox status code.
1201 * @param pGVM The global (ring-0) VM structure.
1202 *
1203 * @thread EMT(0) if it's associated with the VM, otherwise any thread.
1204 */
1205GVMMR0DECL(int) GVMMR0DestroyVM(PGVM pGVM)
1206{
1207 LogFlow(("GVMMR0DestroyVM: pGVM=%p\n", pGVM));
1208 PGVMM pGVMM;
1209 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1210
1211 /*
1212 * Validate the VM structure, state and caller.
1213 */
1214 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1215 AssertReturn(!((uintptr_t)pGVM & PAGE_OFFSET_MASK), VERR_INVALID_POINTER);
1216 AssertMsgReturn(pGVM->enmVMState >= VMSTATE_CREATING && pGVM->enmVMState <= VMSTATE_TERMINATED, ("%d\n", pGVM->enmVMState),
1217 VERR_WRONG_ORDER);
1218
1219 uint32_t hGVM = pGVM->hSelf;
1220 ASMCompilerBarrier();
1221 AssertReturn(hGVM != NIL_GVM_HANDLE, VERR_INVALID_VM_HANDLE);
1222 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1223
1224 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1225 AssertReturn(pHandle->pGVM == pGVM, VERR_NOT_OWNER);
1226
1227 RTPROCESS ProcId = RTProcSelf();
1228 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
1229 AssertReturn( ( pHandle->hEMT0 == hSelf
1230 && pHandle->ProcId == ProcId)
1231 || pHandle->hEMT0 == NIL_RTNATIVETHREAD, VERR_NOT_OWNER);
1232
1233 /*
1234 * Lookup the handle and destroy the object.
1235 * Since the lock isn't recursive and we'll have to leave it before dereferencing the
1236 * object, we take some precautions against racing callers just in case...
1237 */
1238 int rc = gvmmR0CreateDestroyLock(pGVMM);
1239 AssertRC(rc);
1240
1241 /* Be careful here because we might theoretically be racing someone else cleaning up. */
1242 if ( pHandle->pGVM == pGVM
1243 && ( ( pHandle->hEMT0 == hSelf
1244 && pHandle->ProcId == ProcId)
1245 || pHandle->hEMT0 == NIL_RTNATIVETHREAD)
1246 && RT_VALID_PTR(pHandle->pvObj)
1247 && RT_VALID_PTR(pHandle->pSession)
1248 && RT_VALID_PTR(pHandle->pGVM)
1249 && pHandle->pGVM->u32Magic == GVM_MAGIC)
1250 {
1251 /* Check that other EMTs have deregistered. */
1252 uint32_t cNotDeregistered = 0;
1253 for (VMCPUID idCpu = 1; idCpu < pGVM->cCpus; idCpu++)
1254 cNotDeregistered += pGVM->aCpus[idCpu].hEMT != ~(RTNATIVETHREAD)1; /* see GVMMR0DeregisterVCpu for the value */
1255 if (cNotDeregistered == 0)
1256 {
1257 /* Grab the object pointer. */
1258 void *pvObj = pHandle->pvObj;
1259 pHandle->pvObj = NULL;
1260 gvmmR0CreateDestroyUnlock(pGVMM);
1261
1262 SUPR0ObjRelease(pvObj, pHandle->pSession);
1263 }
1264 else
1265 {
1266 gvmmR0CreateDestroyUnlock(pGVMM);
1267 rc = VERR_GVMM_NOT_ALL_EMTS_DEREGISTERED;
1268 }
1269 }
1270 else
1271 {
1272 SUPR0Printf("GVMMR0DestroyVM: pHandle=%RKv:{.pGVM=%p, .hEMT0=%p, .ProcId=%u, .pvObj=%p} pGVM=%p hSelf=%p\n",
1273 pHandle, pHandle->pGVM, pHandle->hEMT0, pHandle->ProcId, pHandle->pvObj, pGVM, hSelf);
1274 gvmmR0CreateDestroyUnlock(pGVMM);
1275 rc = VERR_GVMM_IPE_2;
1276 }
1277
1278 return rc;
1279}
1280
1281
1282/**
1283 * Performs VM cleanup task as part of object destruction.
1284 *
1285 * @param pGVM The GVM pointer.
1286 */
1287static void gvmmR0CleanupVM(PGVM pGVM)
1288{
1289 if ( pGVM->gvmm.s.fDoneVMMR0Init
1290 && !pGVM->gvmm.s.fDoneVMMR0Term)
1291 {
1292 if ( pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ
1293 && RTR0MemObjAddress(pGVM->gvmm.s.VMMemObj) == pGVM)
1294 {
1295 LogFlow(("gvmmR0CleanupVM: Calling VMMR0TermVM\n"));
1296 VMMR0TermVM(pGVM, NIL_VMCPUID);
1297 }
1298 else
1299 AssertMsgFailed(("gvmmR0CleanupVM: VMMemObj=%p pGVM=%p\n", pGVM->gvmm.s.VMMemObj, pGVM));
1300 }
1301
1302 GMMR0CleanupVM(pGVM);
1303#ifdef VBOX_WITH_NEM_R0
1304 NEMR0CleanupVM(pGVM);
1305#endif
1306 PDMR0CleanupVM(pGVM);
1307 IOMR0CleanupVM(pGVM);
1308 DBGFR0CleanupVM(pGVM);
1309 PGMR0CleanupVM(pGVM);
1310 TMR0CleanupVM(pGVM);
1311
1312 AssertCompile(NIL_RTTHREADCTXHOOK == (RTTHREADCTXHOOK)0); /* Depends on zero initialized memory working for NIL at the moment. */
1313 for (VMCPUID idCpu = 0; idCpu < pGVM->cCpus; idCpu++)
1314 {
1315 /** @todo Can we busy wait here for all thread-context hooks to be
1316 * deregistered before releasing (destroying) it? Only until we find a
1317 * solution for not deregistering hooks everytime we're leaving HMR0
1318 * context. */
1319 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1320 }
1321}
1322
1323
1324/**
1325 * @callback_method_impl{FNSUPDRVDESTRUCTOR,VM handle destructor}
1326 *
1327 * pvUser1 is the GVM instance pointer.
1328 * pvUser2 is the handle pointer.
1329 */
1330static DECLCALLBACK(void) gvmmR0HandleObjDestructor(void *pvObj, void *pvUser1, void *pvUser2)
1331{
1332 LogFlow(("gvmmR0HandleObjDestructor: %p %p %p\n", pvObj, pvUser1, pvUser2));
1333
1334 NOREF(pvObj);
1335
1336 /*
1337 * Some quick, paranoid, input validation.
1338 */
1339 PGVMHANDLE pHandle = (PGVMHANDLE)pvUser2;
1340 AssertPtr(pHandle);
1341 PGVMM pGVMM = (PGVMM)pvUser1;
1342 Assert(pGVMM == g_pGVMM);
1343 const uint16_t iHandle = pHandle - &pGVMM->aHandles[0];
1344 if ( !iHandle
1345 || iHandle >= RT_ELEMENTS(pGVMM->aHandles)
1346 || iHandle != pHandle->iSelf)
1347 {
1348 SUPR0Printf("GVM: handle %d is out of range or corrupt (iSelf=%d)!\n", iHandle, pHandle->iSelf);
1349 return;
1350 }
1351
1352 int rc = gvmmR0CreateDestroyLock(pGVMM);
1353 AssertRC(rc);
1354 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1355 AssertRC(rc);
1356
1357 /*
1358 * This is a tad slow but a doubly linked list is too much hassle.
1359 */
1360 if (RT_UNLIKELY(pHandle->iNext >= RT_ELEMENTS(pGVMM->aHandles)))
1361 {
1362 SUPR0Printf("GVM: used list index %d is out of range!\n", pHandle->iNext);
1363 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1364 gvmmR0CreateDestroyUnlock(pGVMM);
1365 return;
1366 }
1367
1368 if (pGVMM->iUsedHead == iHandle)
1369 pGVMM->iUsedHead = pHandle->iNext;
1370 else
1371 {
1372 uint16_t iPrev = pGVMM->iUsedHead;
1373 int c = RT_ELEMENTS(pGVMM->aHandles) + 2;
1374 while (iPrev)
1375 {
1376 if (RT_UNLIKELY(iPrev >= RT_ELEMENTS(pGVMM->aHandles)))
1377 {
1378 SUPR0Printf("GVM: used list index %d is out of range!\n", iPrev);
1379 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1380 gvmmR0CreateDestroyUnlock(pGVMM);
1381 return;
1382 }
1383 if (RT_UNLIKELY(c-- <= 0))
1384 {
1385 iPrev = 0;
1386 break;
1387 }
1388
1389 if (pGVMM->aHandles[iPrev].iNext == iHandle)
1390 break;
1391 iPrev = pGVMM->aHandles[iPrev].iNext;
1392 }
1393 if (!iPrev)
1394 {
1395 SUPR0Printf("GVM: can't find the handle previous previous of %d!\n", pHandle->iSelf);
1396 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1397 gvmmR0CreateDestroyUnlock(pGVMM);
1398 return;
1399 }
1400
1401 Assert(pGVMM->aHandles[iPrev].iNext == iHandle);
1402 pGVMM->aHandles[iPrev].iNext = pHandle->iNext;
1403 }
1404 pHandle->iNext = 0;
1405 pGVMM->cVMs--;
1406
1407 /*
1408 * Do the global cleanup round.
1409 */
1410 PGVM pGVM = pHandle->pGVM;
1411 if ( RT_VALID_PTR(pGVM)
1412 && pGVM->u32Magic == GVM_MAGIC)
1413 {
1414 pGVMM->cEMTs -= pGVM->cCpus;
1415
1416 if (pGVM->pSession)
1417 SUPR0SetSessionVM(pGVM->pSession, NULL, NULL);
1418
1419 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1420
1421 gvmmR0CleanupVM(pGVM);
1422
1423 /*
1424 * Do the GVMM cleanup - must be done last.
1425 */
1426 /* The VM and VM pages mappings/allocations. */
1427 if (pGVM->gvmm.s.VMPagesMapObj != NIL_RTR0MEMOBJ)
1428 {
1429 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMapObj, false /* fFreeMappings */); AssertRC(rc);
1430 pGVM->gvmm.s.VMPagesMapObj = NIL_RTR0MEMOBJ;
1431 }
1432
1433 if (pGVM->gvmm.s.VMMapObj != NIL_RTR0MEMOBJ)
1434 {
1435 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMapObj, false /* fFreeMappings */); AssertRC(rc);
1436 pGVM->gvmm.s.VMMapObj = NIL_RTR0MEMOBJ;
1437 }
1438
1439 if (pGVM->gvmm.s.VMPagesMemObj != NIL_RTR0MEMOBJ)
1440 {
1441 rc = RTR0MemObjFree(pGVM->gvmm.s.VMPagesMemObj, false /* fFreeMappings */); AssertRC(rc);
1442 pGVM->gvmm.s.VMPagesMemObj = NIL_RTR0MEMOBJ;
1443 }
1444
1445 for (VMCPUID i = 0; i < pGVM->cCpus; i++)
1446 {
1447 if (pGVM->aCpus[i].gvmm.s.HaltEventMulti != NIL_RTSEMEVENTMULTI)
1448 {
1449 rc = RTSemEventMultiDestroy(pGVM->aCpus[i].gvmm.s.HaltEventMulti); AssertRC(rc);
1450 pGVM->aCpus[i].gvmm.s.HaltEventMulti = NIL_RTSEMEVENTMULTI;
1451 }
1452 if (pGVM->aCpus[i].gvmm.s.VMCpuMapObj != NIL_RTR0MEMOBJ)
1453 {
1454 rc = RTR0MemObjFree(pGVM->aCpus[i].gvmm.s.VMCpuMapObj, false /* fFreeMappings */); AssertRC(rc);
1455 pGVM->aCpus[i].gvmm.s.VMCpuMapObj = NIL_RTR0MEMOBJ;
1456 }
1457 }
1458
1459 /* the GVM structure itself. */
1460 pGVM->u32Magic |= UINT32_C(0x80000000);
1461 Assert(pGVM->gvmm.s.VMMemObj != NIL_RTR0MEMOBJ);
1462 rc = RTR0MemObjFree(pGVM->gvmm.s.VMMemObj, true /*fFreeMappings*/); AssertRC(rc);
1463 pGVM = NULL;
1464
1465 /* Re-acquire the UsedLock before freeing the handle since we're updating handle fields. */
1466 rc = GVMMR0_USED_EXCLUSIVE_LOCK(pGVMM);
1467 AssertRC(rc);
1468 }
1469 /* else: GVMMR0CreateVM cleanup. */
1470
1471 /*
1472 * Free the handle.
1473 */
1474 pHandle->iNext = pGVMM->iFreeHead;
1475 pGVMM->iFreeHead = iHandle;
1476 ASMAtomicWriteNullPtr(&pHandle->pGVM);
1477 ASMAtomicWriteNullPtr(&pHandle->pvObj);
1478 ASMAtomicWriteNullPtr(&pHandle->pSession);
1479 ASMAtomicWriteHandle(&pHandle->hEMT0, NIL_RTNATIVETHREAD);
1480 ASMAtomicWriteU32(&pHandle->ProcId, NIL_RTPROCESS);
1481
1482 GVMMR0_USED_EXCLUSIVE_UNLOCK(pGVMM);
1483 gvmmR0CreateDestroyUnlock(pGVMM);
1484 LogFlow(("gvmmR0HandleObjDestructor: returns\n"));
1485}
1486
1487
1488/**
1489 * Registers the calling thread as the EMT of a Virtual CPU.
1490 *
1491 * Note that VCPU 0 is automatically registered during VM creation.
1492 *
1493 * @returns VBox status code
1494 * @param pGVM The global (ring-0) VM structure.
1495 * @param idCpu VCPU id to register the current thread as.
1496 */
1497GVMMR0DECL(int) GVMMR0RegisterVCpu(PGVM pGVM, VMCPUID idCpu)
1498{
1499 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1500
1501 /*
1502 * Validate the VM structure, state and handle.
1503 */
1504 PGVMM pGVMM;
1505 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /* fTakeUsedLock */); /** @todo take lock here. */
1506 if (RT_SUCCESS(rc))
1507 {
1508 if (idCpu < pGVM->cCpus)
1509 {
1510 /* Check that the EMT isn't already assigned to a thread. */
1511 if (pGVM->aCpus[idCpu].hEMT == NIL_RTNATIVETHREAD)
1512 {
1513 Assert(pGVM->aCpus[idCpu].hNativeThreadR0 == NIL_RTNATIVETHREAD);
1514
1515 /* A thread may only be one EMT. */
1516 RTNATIVETHREAD const hNativeSelf = RTThreadNativeSelf();
1517 for (VMCPUID iCpu = 0; iCpu < pGVM->cCpus; iCpu++)
1518 AssertBreakStmt(pGVM->aCpus[iCpu].hEMT != hNativeSelf, rc = VERR_INVALID_PARAMETER);
1519 if (RT_SUCCESS(rc))
1520 {
1521 /*
1522 * Do the assignment, then try setup the hook. Undo if that fails.
1523 */
1524 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = RTThreadNativeSelf();
1525
1526 rc = VMMR0ThreadCtxHookCreateForEmt(&pGVM->aCpus[idCpu]);
1527 if (RT_SUCCESS(rc))
1528 CPUMR0RegisterVCpuThread(&pGVM->aCpus[idCpu]);
1529 else
1530 pGVM->aCpus[idCpu].hNativeThreadR0 = pGVM->aCpus[idCpu].hEMT = NIL_RTNATIVETHREAD;
1531 }
1532 }
1533 else
1534 rc = VERR_ACCESS_DENIED;
1535 }
1536 else
1537 rc = VERR_INVALID_CPU_ID;
1538 }
1539 return rc;
1540}
1541
1542
1543/**
1544 * Deregisters the calling thread as the EMT of a Virtual CPU.
1545 *
1546 * Note that VCPU 0 shall call GVMMR0DestroyVM intead of this API.
1547 *
1548 * @returns VBox status code
1549 * @param pGVM The global (ring-0) VM structure.
1550 * @param idCpu VCPU id to register the current thread as.
1551 */
1552GVMMR0DECL(int) GVMMR0DeregisterVCpu(PGVM pGVM, VMCPUID idCpu)
1553{
1554 AssertReturn(idCpu != 0, VERR_INVALID_FUNCTION);
1555
1556 /*
1557 * Validate the VM structure, state and handle.
1558 */
1559 PGVMM pGVMM;
1560 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1561 if (RT_SUCCESS(rc))
1562 {
1563 /*
1564 * Take the destruction lock and recheck the handle state to
1565 * prevent racing GVMMR0DestroyVM.
1566 */
1567 gvmmR0CreateDestroyLock(pGVMM);
1568 uint32_t hSelf = pGVM->hSelf;
1569 ASMCompilerBarrier();
1570 if ( hSelf < RT_ELEMENTS(pGVMM->aHandles)
1571 && pGVMM->aHandles[hSelf].pvObj != NULL
1572 && pGVMM->aHandles[hSelf].pGVM == pGVM)
1573 {
1574 /*
1575 * Do per-EMT cleanups.
1576 */
1577 VMMR0ThreadCtxHookDestroyForEmt(&pGVM->aCpus[idCpu]);
1578
1579 /*
1580 * Invalidate hEMT. We don't use NIL here as that would allow
1581 * GVMMR0RegisterVCpu to be called again, and we don't want that.
1582 */
1583 AssertCompile(~(RTNATIVETHREAD)1 != NIL_RTNATIVETHREAD);
1584 pGVM->aCpus[idCpu].hEMT = ~(RTNATIVETHREAD)1;
1585 pGVM->aCpus[idCpu].hNativeThreadR0 = NIL_RTNATIVETHREAD;
1586 }
1587
1588 gvmmR0CreateDestroyUnlock(pGVMM);
1589 }
1590 return rc;
1591}
1592
1593
1594/**
1595 * Lookup a GVM structure by its handle.
1596 *
1597 * @returns The GVM pointer on success, NULL on failure.
1598 * @param hGVM The global VM handle. Asserts on bad handle.
1599 */
1600GVMMR0DECL(PGVM) GVMMR0ByHandle(uint32_t hGVM)
1601{
1602 PGVMM pGVMM;
1603 GVMM_GET_VALID_INSTANCE(pGVMM, NULL);
1604
1605 /*
1606 * Validate.
1607 */
1608 AssertReturn(hGVM != NIL_GVM_HANDLE, NULL);
1609 AssertReturn(hGVM < RT_ELEMENTS(pGVMM->aHandles), NULL);
1610
1611 /*
1612 * Look it up.
1613 */
1614 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1615 AssertPtrReturn(pHandle->pvObj, NULL);
1616 PGVM pGVM = pHandle->pGVM;
1617 AssertPtrReturn(pGVM, NULL);
1618
1619 return pGVM;
1620}
1621
1622
1623/**
1624 * Check that the given GVM and VM structures match up.
1625 *
1626 * The calling thread must be in the same process as the VM. All current lookups
1627 * are by threads inside the same process, so this will not be an issue.
1628 *
1629 * @returns VBox status code.
1630 * @param pGVM The global (ring-0) VM structure.
1631 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1632 * @param fTakeUsedLock Whether to take the used lock or not. We take it in
1633 * shared mode when requested.
1634 *
1635 * Be very careful if not taking the lock as it's
1636 * possible that the VM will disappear then!
1637 *
1638 * @remark This will not assert on an invalid pGVM but try return silently.
1639 */
1640static int gvmmR0ByGVM(PGVM pGVM, PGVMM *ppGVMM, bool fTakeUsedLock)
1641{
1642 /*
1643 * Check the pointers.
1644 */
1645 int rc;
1646 if (RT_LIKELY( RT_VALID_PTR(pGVM)
1647 && ((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0 ))
1648 {
1649 /*
1650 * Get the pGVMM instance and check the VM handle.
1651 */
1652 PGVMM pGVMM;
1653 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1654
1655 uint16_t hGVM = pGVM->hSelf;
1656 if (RT_LIKELY( hGVM != NIL_GVM_HANDLE
1657 && hGVM < RT_ELEMENTS(pGVMM->aHandles)))
1658 {
1659 RTPROCESS const pidSelf = RTProcSelf();
1660 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1661 if (fTakeUsedLock)
1662 {
1663 rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
1664 AssertRCReturn(rc, rc);
1665 }
1666
1667 if (RT_LIKELY( pHandle->pGVM == pGVM
1668 && pHandle->ProcId == pidSelf
1669 && RT_VALID_PTR(pHandle->pvObj)))
1670 {
1671 /*
1672 * Some more VM data consistency checks.
1673 */
1674 if (RT_LIKELY( pGVM->cCpusUnsafe == pGVM->cCpus
1675 && pGVM->hSelfUnsafe == hGVM
1676 && pGVM->pSelf == pGVM))
1677 {
1678 if (RT_LIKELY( pGVM->enmVMState >= VMSTATE_CREATING
1679 && pGVM->enmVMState <= VMSTATE_TERMINATED))
1680 {
1681 *ppGVMM = pGVMM;
1682 return VINF_SUCCESS;
1683 }
1684 rc = VERR_INCONSISTENT_VM_HANDLE;
1685 }
1686 else
1687 rc = VERR_INCONSISTENT_VM_HANDLE;
1688 }
1689 else
1690 rc = VERR_INVALID_VM_HANDLE;
1691
1692 if (fTakeUsedLock)
1693 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
1694 }
1695 else
1696 rc = VERR_INVALID_VM_HANDLE;
1697 }
1698 else
1699 rc = VERR_INVALID_POINTER;
1700 return rc;
1701}
1702
1703
1704/**
1705 * Validates a GVM/VM pair.
1706 *
1707 * @returns VBox status code.
1708 * @param pGVM The global (ring-0) VM structure.
1709 */
1710GVMMR0DECL(int) GVMMR0ValidateGVM(PGVM pGVM)
1711{
1712 PGVMM pGVMM;
1713 return gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
1714}
1715
1716
1717/**
1718 * Check that the given GVM and VM structures match up.
1719 *
1720 * The calling thread must be in the same process as the VM. All current lookups
1721 * are by threads inside the same process, so this will not be an issue.
1722 *
1723 * @returns VBox status code.
1724 * @param pGVM The global (ring-0) VM structure.
1725 * @param idCpu The (alleged) Virtual CPU ID of the calling EMT.
1726 * @param ppGVMM Where to store the pointer to the GVMM instance data.
1727 * @thread EMT
1728 *
1729 * @remarks This will assert in all failure paths.
1730 */
1731static int gvmmR0ByGVMandEMT(PGVM pGVM, VMCPUID idCpu, PGVMM *ppGVMM)
1732{
1733 /*
1734 * Check the pointers.
1735 */
1736 AssertPtrReturn(pGVM, VERR_INVALID_POINTER);
1737 AssertReturn(((uintptr_t)pGVM & PAGE_OFFSET_MASK) == 0, VERR_INVALID_POINTER);
1738
1739 /*
1740 * Get the pGVMM instance and check the VM handle.
1741 */
1742 PGVMM pGVMM;
1743 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
1744
1745 uint16_t hGVM = pGVM->hSelf;
1746 ASMCompilerBarrier();
1747 AssertReturn( hGVM != NIL_GVM_HANDLE
1748 && hGVM < RT_ELEMENTS(pGVMM->aHandles), VERR_INVALID_VM_HANDLE);
1749
1750 RTPROCESS const pidSelf = RTProcSelf();
1751 PGVMHANDLE pHandle = &pGVMM->aHandles[hGVM];
1752 AssertReturn( pHandle->pGVM == pGVM
1753 && pHandle->ProcId == pidSelf
1754 && RT_VALID_PTR(pHandle->pvObj),
1755 VERR_INVALID_HANDLE);
1756
1757 /*
1758 * Check the EMT claim.
1759 */
1760 RTNATIVETHREAD const hAllegedEMT = RTThreadNativeSelf();
1761 AssertReturn(idCpu < pGVM->cCpus, VERR_INVALID_CPU_ID);
1762 AssertReturn(pGVM->aCpus[idCpu].hEMT == hAllegedEMT, VERR_NOT_OWNER);
1763
1764 /*
1765 * Some more VM data consistency checks.
1766 */
1767 AssertReturn(pGVM->cCpusUnsafe == pGVM->cCpus, VERR_INCONSISTENT_VM_HANDLE);
1768 AssertReturn(pGVM->hSelfUnsafe == hGVM, VERR_INCONSISTENT_VM_HANDLE);
1769 AssertReturn( pGVM->enmVMState >= VMSTATE_CREATING
1770 && pGVM->enmVMState <= VMSTATE_TERMINATED, VERR_INCONSISTENT_VM_HANDLE);
1771
1772 *ppGVMM = pGVMM;
1773 return VINF_SUCCESS;
1774}
1775
1776
1777/**
1778 * Validates a GVM/EMT pair.
1779 *
1780 * @returns VBox status code.
1781 * @param pGVM The global (ring-0) VM structure.
1782 * @param idCpu The Virtual CPU ID of the calling EMT.
1783 * @thread EMT(idCpu)
1784 */
1785GVMMR0DECL(int) GVMMR0ValidateGVMandEMT(PGVM pGVM, VMCPUID idCpu)
1786{
1787 PGVMM pGVMM;
1788 return gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
1789}
1790
1791
1792/**
1793 * Looks up the VM belonging to the specified EMT thread.
1794 *
1795 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1796 * unnecessary kernel panics when the EMT thread hits an assertion. The
1797 * call may or not be an EMT thread.
1798 *
1799 * @returns Pointer to the VM on success, NULL on failure.
1800 * @param hEMT The native thread handle of the EMT.
1801 * NIL_RTNATIVETHREAD means the current thread
1802 */
1803GVMMR0DECL(PVMCC) GVMMR0GetVMByEMT(RTNATIVETHREAD hEMT)
1804{
1805 /*
1806 * No Assertions here as we're usually called in a AssertMsgN or
1807 * RTAssert* context.
1808 */
1809 PGVMM pGVMM = g_pGVMM;
1810 if ( !RT_VALID_PTR(pGVMM)
1811 || pGVMM->u32Magic != GVMM_MAGIC)
1812 return NULL;
1813
1814 if (hEMT == NIL_RTNATIVETHREAD)
1815 hEMT = RTThreadNativeSelf();
1816 RTPROCESS ProcId = RTProcSelf();
1817
1818 /*
1819 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1820 */
1821/** @todo introduce some pid hash table here, please. */
1822 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1823 {
1824 if ( pGVMM->aHandles[i].iSelf == i
1825 && pGVMM->aHandles[i].ProcId == ProcId
1826 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1827 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1828 {
1829 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1830 return pGVMM->aHandles[i].pGVM;
1831
1832 /* This is fearly safe with the current process per VM approach. */
1833 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1834 VMCPUID const cCpus = pGVM->cCpus;
1835 ASMCompilerBarrier();
1836 if ( cCpus < 1
1837 || cCpus > VMM_MAX_CPU_COUNT)
1838 continue;
1839 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1840 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1841 return pGVMM->aHandles[i].pGVM;
1842 }
1843 }
1844 return NULL;
1845}
1846
1847
1848/**
1849 * Looks up the GVMCPU belonging to the specified EMT thread.
1850 *
1851 * This is used by the assertion machinery in VMMR0.cpp to avoid causing
1852 * unnecessary kernel panics when the EMT thread hits an assertion. The
1853 * call may or not be an EMT thread.
1854 *
1855 * @returns Pointer to the VM on success, NULL on failure.
1856 * @param hEMT The native thread handle of the EMT.
1857 * NIL_RTNATIVETHREAD means the current thread
1858 */
1859GVMMR0DECL(PGVMCPU) GVMMR0GetGVCpuByEMT(RTNATIVETHREAD hEMT)
1860{
1861 /*
1862 * No Assertions here as we're usually called in a AssertMsgN,
1863 * RTAssert*, Log and LogRel contexts.
1864 */
1865 PGVMM pGVMM = g_pGVMM;
1866 if ( !RT_VALID_PTR(pGVMM)
1867 || pGVMM->u32Magic != GVMM_MAGIC)
1868 return NULL;
1869
1870 if (hEMT == NIL_RTNATIVETHREAD)
1871 hEMT = RTThreadNativeSelf();
1872 RTPROCESS ProcId = RTProcSelf();
1873
1874 /*
1875 * Search the handles in a linear fashion as we don't dare to take the lock (assert).
1876 */
1877/** @todo introduce some pid hash table here, please. */
1878 for (unsigned i = 1; i < RT_ELEMENTS(pGVMM->aHandles); i++)
1879 {
1880 if ( pGVMM->aHandles[i].iSelf == i
1881 && pGVMM->aHandles[i].ProcId == ProcId
1882 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
1883 && RT_VALID_PTR(pGVMM->aHandles[i].pGVM))
1884 {
1885 PGVM pGVM = pGVMM->aHandles[i].pGVM;
1886 if (pGVMM->aHandles[i].hEMT0 == hEMT)
1887 return &pGVM->aCpus[0];
1888
1889 /* This is fearly safe with the current process per VM approach. */
1890 VMCPUID const cCpus = pGVM->cCpus;
1891 ASMCompilerBarrier();
1892 ASMCompilerBarrier();
1893 if ( cCpus < 1
1894 || cCpus > VMM_MAX_CPU_COUNT)
1895 continue;
1896 for (VMCPUID idCpu = 1; idCpu < cCpus; idCpu++)
1897 if (pGVM->aCpus[idCpu].hEMT == hEMT)
1898 return &pGVM->aCpus[idCpu];
1899 }
1900 }
1901 return NULL;
1902}
1903
1904
1905/**
1906 * This is will wake up expired and soon-to-be expired VMs.
1907 *
1908 * @returns Number of VMs that has been woken up.
1909 * @param pGVMM Pointer to the GVMM instance data.
1910 * @param u64Now The current time.
1911 */
1912static unsigned gvmmR0SchedDoWakeUps(PGVMM pGVMM, uint64_t u64Now)
1913{
1914 /*
1915 * Skip this if we've got disabled because of high resolution wakeups or by
1916 * the user.
1917 */
1918 if (!pGVMM->fDoEarlyWakeUps)
1919 return 0;
1920
1921/** @todo Rewrite this algorithm. See performance defect XYZ. */
1922
1923 /*
1924 * A cheap optimization to stop wasting so much time here on big setups.
1925 */
1926 const uint64_t uNsEarlyWakeUp2 = u64Now + pGVMM->nsEarlyWakeUp2;
1927 if ( pGVMM->cHaltedEMTs == 0
1928 || uNsEarlyWakeUp2 > pGVMM->uNsNextEmtWakeup)
1929 return 0;
1930
1931 /*
1932 * Only one thread doing this at a time.
1933 */
1934 if (!ASMAtomicCmpXchgBool(&pGVMM->fDoingEarlyWakeUps, true, false))
1935 return 0;
1936
1937 /*
1938 * The first pass will wake up VMs which have actually expired
1939 * and look for VMs that should be woken up in the 2nd and 3rd passes.
1940 */
1941 const uint64_t uNsEarlyWakeUp1 = u64Now + pGVMM->nsEarlyWakeUp1;
1942 uint64_t u64Min = UINT64_MAX;
1943 unsigned cWoken = 0;
1944 unsigned cHalted = 0;
1945 unsigned cTodo2nd = 0;
1946 unsigned cTodo3rd = 0;
1947 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1948 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1949 i = pGVMM->aHandles[i].iNext)
1950 {
1951 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1952 if ( RT_VALID_PTR(pCurGVM)
1953 && pCurGVM->u32Magic == GVM_MAGIC)
1954 {
1955 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1956 {
1957 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1958 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
1959 if (u64)
1960 {
1961 if (u64 <= u64Now)
1962 {
1963 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
1964 {
1965 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
1966 AssertRC(rc);
1967 cWoken++;
1968 }
1969 }
1970 else
1971 {
1972 cHalted++;
1973 if (u64 <= uNsEarlyWakeUp1)
1974 cTodo2nd++;
1975 else if (u64 <= uNsEarlyWakeUp2)
1976 cTodo3rd++;
1977 else if (u64 < u64Min)
1978 u64 = u64Min;
1979 }
1980 }
1981 }
1982 }
1983 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
1984 }
1985
1986 if (cTodo2nd)
1987 {
1988 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
1989 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
1990 i = pGVMM->aHandles[i].iNext)
1991 {
1992 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
1993 if ( RT_VALID_PTR(pCurGVM)
1994 && pCurGVM->u32Magic == GVM_MAGIC)
1995 {
1996 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
1997 {
1998 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
1999 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2000 if ( u64
2001 && u64 <= uNsEarlyWakeUp1)
2002 {
2003 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2004 {
2005 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2006 AssertRC(rc);
2007 cWoken++;
2008 }
2009 }
2010 }
2011 }
2012 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2013 }
2014 }
2015
2016 if (cTodo3rd)
2017 {
2018 for (unsigned i = pGVMM->iUsedHead, cGuard = 0;
2019 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2020 i = pGVMM->aHandles[i].iNext)
2021 {
2022 PGVM pCurGVM = pGVMM->aHandles[i].pGVM;
2023 if ( RT_VALID_PTR(pCurGVM)
2024 && pCurGVM->u32Magic == GVM_MAGIC)
2025 {
2026 for (VMCPUID idCpu = 0; idCpu < pCurGVM->cCpus; idCpu++)
2027 {
2028 PGVMCPU pCurGVCpu = &pCurGVM->aCpus[idCpu];
2029 uint64_t u64 = ASMAtomicUoReadU64(&pCurGVCpu->gvmm.s.u64HaltExpire);
2030 if ( u64
2031 && u64 <= uNsEarlyWakeUp2)
2032 {
2033 if (ASMAtomicXchgU64(&pCurGVCpu->gvmm.s.u64HaltExpire, 0))
2034 {
2035 int rc = RTSemEventMultiSignal(pCurGVCpu->gvmm.s.HaltEventMulti);
2036 AssertRC(rc);
2037 cWoken++;
2038 }
2039 }
2040 }
2041 }
2042 AssertLogRelBreak(cGuard++ < RT_ELEMENTS(pGVMM->aHandles));
2043 }
2044 }
2045
2046 /*
2047 * Set the minimum value.
2048 */
2049 pGVMM->uNsNextEmtWakeup = u64Min;
2050
2051 ASMAtomicWriteBool(&pGVMM->fDoingEarlyWakeUps, false);
2052 return cWoken;
2053}
2054
2055
2056/**
2057 * Halt the EMT thread.
2058 *
2059 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2060 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2061 * @param pGVM The global (ring-0) VM structure.
2062 * @param pGVCpu The global (ring-0) CPU structure of the calling
2063 * EMT.
2064 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2065 * @thread EMT(pGVCpu).
2066 */
2067GVMMR0DECL(int) GVMMR0SchedHalt(PGVM pGVM, PGVMCPU pGVCpu, uint64_t u64ExpireGipTime)
2068{
2069 LogFlow(("GVMMR0SchedHalt: pGVM=%p pGVCpu=%p(%d) u64ExpireGipTime=%#RX64\n",
2070 pGVM, pGVCpu, pGVCpu->idCpu, u64ExpireGipTime));
2071 GVMM_CHECK_SMAP_SETUP();
2072 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2073
2074 PGVMM pGVMM;
2075 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2076
2077 pGVM->gvmm.s.StatsSched.cHaltCalls++;
2078 Assert(!pGVCpu->gvmm.s.u64HaltExpire);
2079
2080 /*
2081 * If we're doing early wake-ups, we must take the UsedList lock before we
2082 * start querying the current time.
2083 * Note! Interrupts must NOT be disabled at this point because we ask for GIP time!
2084 */
2085 bool const fDoEarlyWakeUps = pGVMM->fDoEarlyWakeUps;
2086 if (fDoEarlyWakeUps)
2087 {
2088 int rc2 = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc2);
2089 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2090 }
2091
2092 pGVCpu->gvmm.s.iCpuEmt = ASMGetApicId();
2093
2094 /* GIP hack: We might are frequently sleeping for short intervals where the
2095 difference between GIP and system time matters on systems with high resolution
2096 system time. So, convert the input from GIP to System time in that case. */
2097 Assert(ASMGetFlags() & X86_EFL_IF);
2098 const uint64_t u64NowSys = RTTimeSystemNanoTS();
2099 const uint64_t u64NowGip = RTTimeNanoTS();
2100 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2101
2102 if (fDoEarlyWakeUps)
2103 {
2104 pGVM->gvmm.s.StatsSched.cHaltWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64NowGip);
2105 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2106 }
2107
2108 /*
2109 * Go to sleep if we must...
2110 * Cap the sleep time to 1 second to be on the safe side.
2111 */
2112 int rc;
2113 uint64_t cNsInterval = u64ExpireGipTime - u64NowGip;
2114 if ( u64NowGip < u64ExpireGipTime
2115 && cNsInterval >= (pGVMM->cEMTs > pGVMM->cEMTsMeansCompany
2116 ? pGVMM->nsMinSleepCompany
2117 : pGVMM->nsMinSleepAlone))
2118 {
2119 pGVM->gvmm.s.StatsSched.cHaltBlocking++;
2120 if (cNsInterval > RT_NS_1SEC)
2121 u64ExpireGipTime = u64NowGip + RT_NS_1SEC;
2122 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, u64ExpireGipTime);
2123 ASMAtomicIncU32(&pGVMM->cHaltedEMTs);
2124 if (fDoEarlyWakeUps)
2125 {
2126 if (u64ExpireGipTime < pGVMM->uNsNextEmtWakeup)
2127 pGVMM->uNsNextEmtWakeup = u64ExpireGipTime;
2128 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2129 }
2130 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2131
2132 rc = RTSemEventMultiWaitEx(pGVCpu->gvmm.s.HaltEventMulti,
2133 RTSEMWAIT_FLAGS_ABSOLUTE | RTSEMWAIT_FLAGS_NANOSECS | RTSEMWAIT_FLAGS_INTERRUPTIBLE,
2134 u64NowGip > u64NowSys ? u64ExpireGipTime : u64NowSys + cNsInterval);
2135 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2136
2137 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2138 ASMAtomicDecU32(&pGVMM->cHaltedEMTs);
2139
2140 /* Reset the semaphore to try prevent a few false wake-ups. */
2141 if (rc == VINF_SUCCESS)
2142 {
2143 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2144 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2145 }
2146 else if (rc == VERR_TIMEOUT)
2147 {
2148 pGVM->gvmm.s.StatsSched.cHaltTimeouts++;
2149 rc = VINF_SUCCESS;
2150 }
2151 }
2152 else
2153 {
2154 pGVM->gvmm.s.StatsSched.cHaltNotBlocking++;
2155 if (fDoEarlyWakeUps)
2156 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2157 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2158 RTSemEventMultiReset(pGVCpu->gvmm.s.HaltEventMulti);
2159 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2160 rc = VINF_SUCCESS;
2161 }
2162
2163 return rc;
2164}
2165
2166
2167/**
2168 * Halt the EMT thread.
2169 *
2170 * @returns VINF_SUCCESS normal wakeup (timeout or kicked by other thread).
2171 * VERR_INTERRUPTED if a signal was scheduled for the thread.
2172 * @param pGVM The global (ring-0) VM structure.
2173 * @param idCpu The Virtual CPU ID of the calling EMT.
2174 * @param u64ExpireGipTime The time for the sleep to expire expressed as GIP time.
2175 * @thread EMT(idCpu).
2176 */
2177GVMMR0DECL(int) GVMMR0SchedHaltReq(PGVM pGVM, VMCPUID idCpu, uint64_t u64ExpireGipTime)
2178{
2179 GVMM_CHECK_SMAP_SETUP();
2180 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2181 PGVMM pGVMM;
2182 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2183 if (RT_SUCCESS(rc))
2184 {
2185 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2186 rc = GVMMR0SchedHalt(pGVM, &pGVM->aCpus[idCpu], u64ExpireGipTime);
2187 }
2188 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2189 return rc;
2190}
2191
2192
2193
2194/**
2195 * Worker for GVMMR0SchedWakeUp and GVMMR0SchedWakeUpAndPokeCpus that wakes up
2196 * the a sleeping EMT.
2197 *
2198 * @retval VINF_SUCCESS if successfully woken up.
2199 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2200 *
2201 * @param pGVM The global (ring-0) VM structure.
2202 * @param pGVCpu The global (ring-0) VCPU structure.
2203 */
2204DECLINLINE(int) gvmmR0SchedWakeUpOne(PGVM pGVM, PGVMCPU pGVCpu)
2205{
2206 pGVM->gvmm.s.StatsSched.cWakeUpCalls++;
2207
2208 /*
2209 * Signal the semaphore regardless of whether it's current blocked on it.
2210 *
2211 * The reason for this is that there is absolutely no way we can be 100%
2212 * certain that it isn't *about* go to go to sleep on it and just got
2213 * delayed a bit en route. So, we will always signal the semaphore when
2214 * the it is flagged as halted in the VMM.
2215 */
2216/** @todo we can optimize some of that by means of the pVCpu->enmState now. */
2217 int rc;
2218 if (pGVCpu->gvmm.s.u64HaltExpire)
2219 {
2220 rc = VINF_SUCCESS;
2221 ASMAtomicWriteU64(&pGVCpu->gvmm.s.u64HaltExpire, 0);
2222 }
2223 else
2224 {
2225 rc = VINF_GVM_NOT_BLOCKED;
2226 pGVM->gvmm.s.StatsSched.cWakeUpNotHalted++;
2227 }
2228
2229 int rc2 = RTSemEventMultiSignal(pGVCpu->gvmm.s.HaltEventMulti);
2230 AssertRC(rc2);
2231
2232 return rc;
2233}
2234
2235
2236/**
2237 * Wakes up the halted EMT thread so it can service a pending request.
2238 *
2239 * @returns VBox status code.
2240 * @retval VINF_SUCCESS if successfully woken up.
2241 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2242 *
2243 * @param pGVM The global (ring-0) VM structure.
2244 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2245 * @param fTakeUsedLock Take the used lock or not
2246 * @thread Any but EMT(idCpu).
2247 */
2248GVMMR0DECL(int) GVMMR0SchedWakeUpEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2249{
2250 GVMM_CHECK_SMAP_SETUP();
2251 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2252
2253 /*
2254 * Validate input and take the UsedLock.
2255 */
2256 PGVMM pGVMM;
2257 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2258 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2259 if (RT_SUCCESS(rc))
2260 {
2261 if (idCpu < pGVM->cCpus)
2262 {
2263 /*
2264 * Do the actual job.
2265 */
2266 rc = gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2267 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2268
2269 if (fTakeUsedLock && pGVMM->fDoEarlyWakeUps)
2270 {
2271 /*
2272 * While we're here, do a round of scheduling.
2273 */
2274 Assert(ASMGetFlags() & X86_EFL_IF);
2275 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2276 pGVM->gvmm.s.StatsSched.cWakeUpWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2277 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2278 }
2279 }
2280 else
2281 rc = VERR_INVALID_CPU_ID;
2282
2283 if (fTakeUsedLock)
2284 {
2285 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2286 AssertRC(rc2);
2287 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2288 }
2289 }
2290
2291 LogFlow(("GVMMR0SchedWakeUpEx: returns %Rrc\n", rc));
2292 return rc;
2293}
2294
2295
2296/**
2297 * Wakes up the halted EMT thread so it can service a pending request.
2298 *
2299 * @returns VBox status code.
2300 * @retval VINF_SUCCESS if successfully woken up.
2301 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2302 *
2303 * @param pGVM The global (ring-0) VM structure.
2304 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2305 * @thread Any but EMT(idCpu).
2306 */
2307GVMMR0DECL(int) GVMMR0SchedWakeUp(PGVM pGVM, VMCPUID idCpu)
2308{
2309 return GVMMR0SchedWakeUpEx(pGVM, idCpu, true /* fTakeUsedLock */);
2310}
2311
2312
2313/**
2314 * Wakes up the halted EMT thread so it can service a pending request, no GVM
2315 * parameter and no used locking.
2316 *
2317 * @returns VBox status code.
2318 * @retval VINF_SUCCESS if successfully woken up.
2319 * @retval VINF_GVM_NOT_BLOCKED if the EMT wasn't blocked.
2320 *
2321 * @param pGVM The global (ring-0) VM structure.
2322 * @param idCpu The Virtual CPU ID of the EMT to wake up.
2323 * @thread Any but EMT(idCpu).
2324 * @deprecated Don't use in new code if possible! Use the GVM variant.
2325 */
2326GVMMR0DECL(int) GVMMR0SchedWakeUpNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2327{
2328 GVMM_CHECK_SMAP_SETUP();
2329 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2330 PGVMM pGVMM;
2331 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2332 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2333 if (RT_SUCCESS(rc))
2334 rc = GVMMR0SchedWakeUpEx(pGVM, idCpu, false /*fTakeUsedLock*/);
2335 return rc;
2336}
2337
2338
2339/**
2340 * Worker common to GVMMR0SchedPoke and GVMMR0SchedWakeUpAndPokeCpus that pokes
2341 * the Virtual CPU if it's still busy executing guest code.
2342 *
2343 * @returns VBox status code.
2344 * @retval VINF_SUCCESS if poked successfully.
2345 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2346 *
2347 * @param pGVM The global (ring-0) VM structure.
2348 * @param pVCpu The cross context virtual CPU structure.
2349 */
2350DECLINLINE(int) gvmmR0SchedPokeOne(PGVM pGVM, PVMCPUCC pVCpu)
2351{
2352 pGVM->gvmm.s.StatsSched.cPokeCalls++;
2353
2354 RTCPUID idHostCpu = pVCpu->idHostCpu;
2355 if ( idHostCpu == NIL_RTCPUID
2356 || VMCPU_GET_STATE(pVCpu) != VMCPUSTATE_STARTED_EXEC)
2357 {
2358 pGVM->gvmm.s.StatsSched.cPokeNotBusy++;
2359 return VINF_GVM_NOT_BUSY_IN_GC;
2360 }
2361
2362 /* Note: this function is not implemented on Darwin and Linux (kernel < 2.6.19) */
2363 RTMpPokeCpu(idHostCpu);
2364 return VINF_SUCCESS;
2365}
2366
2367
2368/**
2369 * Pokes an EMT if it's still busy running guest code.
2370 *
2371 * @returns VBox status code.
2372 * @retval VINF_SUCCESS if poked successfully.
2373 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2374 *
2375 * @param pGVM The global (ring-0) VM structure.
2376 * @param idCpu The ID of the virtual CPU to poke.
2377 * @param fTakeUsedLock Take the used lock or not
2378 */
2379GVMMR0DECL(int) GVMMR0SchedPokeEx(PGVM pGVM, VMCPUID idCpu, bool fTakeUsedLock)
2380{
2381 /*
2382 * Validate input and take the UsedLock.
2383 */
2384 PGVMM pGVMM;
2385 int rc = gvmmR0ByGVM(pGVM, &pGVMM, fTakeUsedLock);
2386 if (RT_SUCCESS(rc))
2387 {
2388 if (idCpu < pGVM->cCpus)
2389 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2390 else
2391 rc = VERR_INVALID_CPU_ID;
2392
2393 if (fTakeUsedLock)
2394 {
2395 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2396 AssertRC(rc2);
2397 }
2398 }
2399
2400 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2401 return rc;
2402}
2403
2404
2405/**
2406 * Pokes an EMT if it's still busy running guest code.
2407 *
2408 * @returns VBox status code.
2409 * @retval VINF_SUCCESS if poked successfully.
2410 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2411 *
2412 * @param pGVM The global (ring-0) VM structure.
2413 * @param idCpu The ID of the virtual CPU to poke.
2414 */
2415GVMMR0DECL(int) GVMMR0SchedPoke(PGVM pGVM, VMCPUID idCpu)
2416{
2417 return GVMMR0SchedPokeEx(pGVM, idCpu, true /* fTakeUsedLock */);
2418}
2419
2420
2421/**
2422 * Pokes an EMT if it's still busy running guest code, no GVM parameter and no
2423 * used locking.
2424 *
2425 * @returns VBox status code.
2426 * @retval VINF_SUCCESS if poked successfully.
2427 * @retval VINF_GVM_NOT_BUSY_IN_GC if the EMT wasn't busy in GC.
2428 *
2429 * @param pGVM The global (ring-0) VM structure.
2430 * @param idCpu The ID of the virtual CPU to poke.
2431 *
2432 * @deprecated Don't use in new code if possible! Use the GVM variant.
2433 */
2434GVMMR0DECL(int) GVMMR0SchedPokeNoGVMNoLock(PGVM pGVM, VMCPUID idCpu)
2435{
2436 PGVMM pGVMM;
2437 int rc = gvmmR0ByGVM(pGVM, &pGVMM, false /*fTakeUsedLock*/);
2438 if (RT_SUCCESS(rc))
2439 {
2440 if (idCpu < pGVM->cCpus)
2441 rc = gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2442 else
2443 rc = VERR_INVALID_CPU_ID;
2444 }
2445 return rc;
2446}
2447
2448
2449/**
2450 * Wakes up a set of halted EMT threads so they can service pending request.
2451 *
2452 * @returns VBox status code, no informational stuff.
2453 *
2454 * @param pGVM The global (ring-0) VM structure.
2455 * @param pSleepSet The set of sleepers to wake up.
2456 * @param pPokeSet The set of CPUs to poke.
2457 */
2458GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpus(PGVM pGVM, PCVMCPUSET pSleepSet, PCVMCPUSET pPokeSet)
2459{
2460 AssertPtrReturn(pSleepSet, VERR_INVALID_POINTER);
2461 AssertPtrReturn(pPokeSet, VERR_INVALID_POINTER);
2462 GVMM_CHECK_SMAP_SETUP();
2463 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2464 RTNATIVETHREAD hSelf = RTThreadNativeSelf();
2465
2466 /*
2467 * Validate input and take the UsedLock.
2468 */
2469 PGVMM pGVMM;
2470 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /* fTakeUsedLock */);
2471 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2472 if (RT_SUCCESS(rc))
2473 {
2474 rc = VINF_SUCCESS;
2475 VMCPUID idCpu = pGVM->cCpus;
2476 while (idCpu-- > 0)
2477 {
2478 /* Don't try poke or wake up ourselves. */
2479 if (pGVM->aCpus[idCpu].hEMT == hSelf)
2480 continue;
2481
2482 /* just ignore errors for now. */
2483 if (VMCPUSET_IS_PRESENT(pSleepSet, idCpu))
2484 {
2485 gvmmR0SchedWakeUpOne(pGVM, &pGVM->aCpus[idCpu]);
2486 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2487 }
2488 else if (VMCPUSET_IS_PRESENT(pPokeSet, idCpu))
2489 {
2490 gvmmR0SchedPokeOne(pGVM, &pGVM->aCpus[idCpu]);
2491 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2492 }
2493 }
2494
2495 int rc2 = GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2496 AssertRC(rc2);
2497 GVMM_CHECK_SMAP_CHECK2(pGVM, RT_NOTHING);
2498 }
2499
2500 LogFlow(("GVMMR0SchedWakeUpAndPokeCpus: returns %Rrc\n", rc));
2501 return rc;
2502}
2503
2504
2505/**
2506 * VMMR0 request wrapper for GVMMR0SchedWakeUpAndPokeCpus.
2507 *
2508 * @returns see GVMMR0SchedWakeUpAndPokeCpus.
2509 * @param pGVM The global (ring-0) VM structure.
2510 * @param pReq Pointer to the request packet.
2511 */
2512GVMMR0DECL(int) GVMMR0SchedWakeUpAndPokeCpusReq(PGVM pGVM, PGVMMSCHEDWAKEUPANDPOKECPUSREQ pReq)
2513{
2514 /*
2515 * Validate input and pass it on.
2516 */
2517 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2518 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2519
2520 return GVMMR0SchedWakeUpAndPokeCpus(pGVM, &pReq->SleepSet, &pReq->PokeSet);
2521}
2522
2523
2524
2525/**
2526 * Poll the schedule to see if someone else should get a chance to run.
2527 *
2528 * This is a bit hackish and will not work too well if the machine is
2529 * under heavy load from non-VM processes.
2530 *
2531 * @returns VINF_SUCCESS if not yielded.
2532 * VINF_GVM_YIELDED if an attempt to switch to a different VM task was made.
2533 * @param pGVM The global (ring-0) VM structure.
2534 * @param idCpu The Virtual CPU ID of the calling EMT.
2535 * @param fYield Whether to yield or not.
2536 * This is for when we're spinning in the halt loop.
2537 * @thread EMT(idCpu).
2538 */
2539GVMMR0DECL(int) GVMMR0SchedPoll(PGVM pGVM, VMCPUID idCpu, bool fYield)
2540{
2541 /*
2542 * Validate input.
2543 */
2544 PGVMM pGVMM;
2545 int rc = gvmmR0ByGVMandEMT(pGVM, idCpu, &pGVMM);
2546 if (RT_SUCCESS(rc))
2547 {
2548 /*
2549 * We currently only implement helping doing wakeups (fYield = false), so don't
2550 * bother taking the lock if gvmmR0SchedDoWakeUps is not going to do anything.
2551 */
2552 if (!fYield && pGVMM->fDoEarlyWakeUps)
2553 {
2554 rc = GVMMR0_USED_SHARED_LOCK(pGVMM); AssertRC(rc);
2555 pGVM->gvmm.s.StatsSched.cPollCalls++;
2556
2557 Assert(ASMGetFlags() & X86_EFL_IF);
2558 const uint64_t u64Now = RTTimeNanoTS(); /* (GIP time) */
2559
2560 pGVM->gvmm.s.StatsSched.cPollWakeUps += gvmmR0SchedDoWakeUps(pGVMM, u64Now);
2561
2562 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2563 }
2564 /*
2565 * Not quite sure what we could do here...
2566 */
2567 else if (fYield)
2568 rc = VERR_NOT_IMPLEMENTED; /** @todo implement this... */
2569 else
2570 rc = VINF_SUCCESS;
2571 }
2572
2573 LogFlow(("GVMMR0SchedWakeUp: returns %Rrc\n", rc));
2574 return rc;
2575}
2576
2577
2578#ifdef GVMM_SCHED_WITH_PPT
2579/**
2580 * Timer callback for the periodic preemption timer.
2581 *
2582 * @param pTimer The timer handle.
2583 * @param pvUser Pointer to the per cpu structure.
2584 * @param iTick The current tick.
2585 */
2586static DECLCALLBACK(void) gvmmR0SchedPeriodicPreemptionTimerCallback(PRTTIMER pTimer, void *pvUser, uint64_t iTick)
2587{
2588 PGVMMHOSTCPU pCpu = (PGVMMHOSTCPU)pvUser;
2589 NOREF(pTimer); NOREF(iTick);
2590
2591 /*
2592 * Termination check
2593 */
2594 if (pCpu->u32Magic != GVMMHOSTCPU_MAGIC)
2595 return;
2596
2597 /*
2598 * Do the house keeping.
2599 */
2600 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2601
2602 if (++pCpu->Ppt.iTickHistorization >= pCpu->Ppt.cTicksHistoriziationInterval)
2603 {
2604 /*
2605 * Historicize the max frequency.
2606 */
2607 uint32_t iHzHistory = ++pCpu->Ppt.iHzHistory % RT_ELEMENTS(pCpu->Ppt.aHzHistory);
2608 pCpu->Ppt.aHzHistory[iHzHistory] = pCpu->Ppt.uDesiredHz;
2609 pCpu->Ppt.iTickHistorization = 0;
2610 pCpu->Ppt.uDesiredHz = 0;
2611
2612 /*
2613 * Check if the current timer frequency.
2614 */
2615 uint32_t uHistMaxHz = 0;
2616 for (uint32_t i = 0; i < RT_ELEMENTS(pCpu->Ppt.aHzHistory); i++)
2617 if (pCpu->Ppt.aHzHistory[i] > uHistMaxHz)
2618 uHistMaxHz = pCpu->Ppt.aHzHistory[i];
2619 if (uHistMaxHz == pCpu->Ppt.uTimerHz)
2620 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2621 else if (uHistMaxHz)
2622 {
2623 /*
2624 * Reprogram it.
2625 */
2626 pCpu->Ppt.cChanges++;
2627 pCpu->Ppt.iTickHistorization = 0;
2628 pCpu->Ppt.uTimerHz = uHistMaxHz;
2629 uint32_t const cNsInterval = RT_NS_1SEC / uHistMaxHz;
2630 pCpu->Ppt.cNsInterval = cNsInterval;
2631 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2632 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2633 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2634 / cNsInterval;
2635 else
2636 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2637 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2638
2639 /*SUPR0Printf("Cpu%u: change to %u Hz / %u ns\n", pCpu->idxCpuSet, uHistMaxHz, cNsInterval);*/
2640 RTTimerChangeInterval(pTimer, cNsInterval);
2641 }
2642 else
2643 {
2644 /*
2645 * Stop it.
2646 */
2647 pCpu->Ppt.fStarted = false;
2648 pCpu->Ppt.uTimerHz = 0;
2649 pCpu->Ppt.cNsInterval = 0;
2650 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2651
2652 /*SUPR0Printf("Cpu%u: stopping (%u Hz)\n", pCpu->idxCpuSet, uHistMaxHz);*/
2653 RTTimerStop(pTimer);
2654 }
2655 }
2656 else
2657 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2658}
2659#endif /* GVMM_SCHED_WITH_PPT */
2660
2661
2662/**
2663 * Updates the periodic preemption timer for the calling CPU.
2664 *
2665 * The caller must have disabled preemption!
2666 * The caller must check that the host can do high resolution timers.
2667 *
2668 * @param pGVM The global (ring-0) VM structure.
2669 * @param idHostCpu The current host CPU id.
2670 * @param uHz The desired frequency.
2671 */
2672GVMMR0DECL(void) GVMMR0SchedUpdatePeriodicPreemptionTimer(PGVM pGVM, RTCPUID idHostCpu, uint32_t uHz)
2673{
2674 NOREF(pGVM);
2675#ifdef GVMM_SCHED_WITH_PPT
2676 Assert(!RTThreadPreemptIsEnabled(NIL_RTTHREAD));
2677 Assert(RTTimerCanDoHighResolution());
2678
2679 /*
2680 * Resolve the per CPU data.
2681 */
2682 uint32_t iCpu = RTMpCpuIdToSetIndex(idHostCpu);
2683 PGVMM pGVMM = g_pGVMM;
2684 if ( !RT_VALID_PTR(pGVMM)
2685 || pGVMM->u32Magic != GVMM_MAGIC)
2686 return;
2687 AssertMsgReturnVoid(iCpu < pGVMM->cHostCpus, ("iCpu=%d cHostCpus=%d\n", iCpu, pGVMM->cHostCpus));
2688 PGVMMHOSTCPU pCpu = &pGVMM->aHostCpus[iCpu];
2689 AssertMsgReturnVoid( pCpu->u32Magic == GVMMHOSTCPU_MAGIC
2690 && pCpu->idCpu == idHostCpu,
2691 ("u32Magic=%#x idCpu=% idHostCpu=%d\n", pCpu->u32Magic, pCpu->idCpu, idHostCpu));
2692
2693 /*
2694 * Check whether we need to do anything about the timer.
2695 * We have to be a little bit careful since we might be race the timer
2696 * callback here.
2697 */
2698 if (uHz > 16384)
2699 uHz = 16384; /** @todo add a query method for this! */
2700 if (RT_UNLIKELY( uHz > ASMAtomicReadU32(&pCpu->Ppt.uDesiredHz)
2701 && uHz >= pCpu->Ppt.uMinHz
2702 && !pCpu->Ppt.fStarting /* solaris paranoia */))
2703 {
2704 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2705
2706 pCpu->Ppt.uDesiredHz = uHz;
2707 uint32_t cNsInterval = 0;
2708 if (!pCpu->Ppt.fStarted)
2709 {
2710 pCpu->Ppt.cStarts++;
2711 pCpu->Ppt.fStarted = true;
2712 pCpu->Ppt.fStarting = true;
2713 pCpu->Ppt.iTickHistorization = 0;
2714 pCpu->Ppt.uTimerHz = uHz;
2715 pCpu->Ppt.cNsInterval = cNsInterval = RT_NS_1SEC / uHz;
2716 if (cNsInterval < GVMMHOSTCPU_PPT_HIST_INTERVAL_NS)
2717 pCpu->Ppt.cTicksHistoriziationInterval = ( GVMMHOSTCPU_PPT_HIST_INTERVAL_NS
2718 + GVMMHOSTCPU_PPT_HIST_INTERVAL_NS / 2 - 1)
2719 / cNsInterval;
2720 else
2721 pCpu->Ppt.cTicksHistoriziationInterval = 1;
2722 }
2723
2724 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2725
2726 if (cNsInterval)
2727 {
2728 RTTimerChangeInterval(pCpu->Ppt.pTimer, cNsInterval);
2729 int rc = RTTimerStart(pCpu->Ppt.pTimer, cNsInterval);
2730 AssertRC(rc);
2731
2732 RTSpinlockAcquire(pCpu->Ppt.hSpinlock);
2733 if (RT_FAILURE(rc))
2734 pCpu->Ppt.fStarted = false;
2735 pCpu->Ppt.fStarting = false;
2736 RTSpinlockRelease(pCpu->Ppt.hSpinlock);
2737 }
2738 }
2739#else /* !GVMM_SCHED_WITH_PPT */
2740 NOREF(idHostCpu); NOREF(uHz);
2741#endif /* !GVMM_SCHED_WITH_PPT */
2742}
2743
2744
2745/**
2746 * Calls @a pfnCallback for each VM in the system.
2747 *
2748 * This will enumerate the VMs while holding the global VM used list lock in
2749 * shared mode. So, only suitable for simple work. If more expensive work
2750 * needs doing, a different approach must be taken as using this API would
2751 * otherwise block VM creation and destruction.
2752 *
2753 * @returns VBox status code.
2754 * @param pfnCallback The callback function.
2755 * @param pvUser User argument to the callback.
2756 */
2757GVMMR0DECL(int) GVMMR0EnumVMs(PFNGVMMR0ENUMCALLBACK pfnCallback, void *pvUser)
2758{
2759 PGVMM pGVMM;
2760 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2761
2762 int rc = VINF_SUCCESS;
2763 GVMMR0_USED_SHARED_LOCK(pGVMM);
2764 for (unsigned i = pGVMM->iUsedHead, cLoops = 0;
2765 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2766 i = pGVMM->aHandles[i].iNext, cLoops++)
2767 {
2768 PGVM pGVM = pGVMM->aHandles[i].pGVM;
2769 if ( RT_VALID_PTR(pGVM)
2770 && RT_VALID_PTR(pGVMM->aHandles[i].pvObj)
2771 && pGVM->u32Magic == GVM_MAGIC)
2772 {
2773 rc = pfnCallback(pGVM, pvUser);
2774 if (rc != VINF_SUCCESS)
2775 break;
2776 }
2777
2778 AssertBreak(cLoops < RT_ELEMENTS(pGVMM->aHandles) * 4); /* paranoia */
2779 }
2780 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2781 return rc;
2782}
2783
2784
2785/**
2786 * Retrieves the GVMM statistics visible to the caller.
2787 *
2788 * @returns VBox status code.
2789 *
2790 * @param pStats Where to put the statistics.
2791 * @param pSession The current session.
2792 * @param pGVM The GVM to obtain statistics for. Optional.
2793 */
2794GVMMR0DECL(int) GVMMR0QueryStatistics(PGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2795{
2796 LogFlow(("GVMMR0QueryStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2797
2798 /*
2799 * Validate input.
2800 */
2801 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2802 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2803 pStats->cVMs = 0; /* (crash before taking the sem...) */
2804
2805 /*
2806 * Take the lock and get the VM statistics.
2807 */
2808 PGVMM pGVMM;
2809 if (pGVM)
2810 {
2811 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2812 if (RT_FAILURE(rc))
2813 return rc;
2814 pStats->SchedVM = pGVM->gvmm.s.StatsSched;
2815 }
2816 else
2817 {
2818 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2819 memset(&pStats->SchedVM, 0, sizeof(pStats->SchedVM));
2820
2821 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2822 AssertRCReturn(rc, rc);
2823 }
2824
2825 /*
2826 * Enumerate the VMs and add the ones visible to the statistics.
2827 */
2828 pStats->cVMs = 0;
2829 pStats->cEMTs = 0;
2830 memset(&pStats->SchedSum, 0, sizeof(pStats->SchedSum));
2831
2832 for (unsigned i = pGVMM->iUsedHead;
2833 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2834 i = pGVMM->aHandles[i].iNext)
2835 {
2836 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2837 void *pvObj = pGVMM->aHandles[i].pvObj;
2838 if ( RT_VALID_PTR(pvObj)
2839 && RT_VALID_PTR(pOtherGVM)
2840 && pOtherGVM->u32Magic == GVM_MAGIC
2841 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2842 {
2843 pStats->cVMs++;
2844 pStats->cEMTs += pOtherGVM->cCpus;
2845
2846 pStats->SchedSum.cHaltCalls += pOtherGVM->gvmm.s.StatsSched.cHaltCalls;
2847 pStats->SchedSum.cHaltBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltBlocking;
2848 pStats->SchedSum.cHaltTimeouts += pOtherGVM->gvmm.s.StatsSched.cHaltTimeouts;
2849 pStats->SchedSum.cHaltNotBlocking += pOtherGVM->gvmm.s.StatsSched.cHaltNotBlocking;
2850 pStats->SchedSum.cHaltWakeUps += pOtherGVM->gvmm.s.StatsSched.cHaltWakeUps;
2851
2852 pStats->SchedSum.cWakeUpCalls += pOtherGVM->gvmm.s.StatsSched.cWakeUpCalls;
2853 pStats->SchedSum.cWakeUpNotHalted += pOtherGVM->gvmm.s.StatsSched.cWakeUpNotHalted;
2854 pStats->SchedSum.cWakeUpWakeUps += pOtherGVM->gvmm.s.StatsSched.cWakeUpWakeUps;
2855
2856 pStats->SchedSum.cPokeCalls += pOtherGVM->gvmm.s.StatsSched.cPokeCalls;
2857 pStats->SchedSum.cPokeNotBusy += pOtherGVM->gvmm.s.StatsSched.cPokeNotBusy;
2858
2859 pStats->SchedSum.cPollCalls += pOtherGVM->gvmm.s.StatsSched.cPollCalls;
2860 pStats->SchedSum.cPollHalts += pOtherGVM->gvmm.s.StatsSched.cPollHalts;
2861 pStats->SchedSum.cPollWakeUps += pOtherGVM->gvmm.s.StatsSched.cPollWakeUps;
2862 }
2863 }
2864
2865 /*
2866 * Copy out the per host CPU statistics.
2867 */
2868 uint32_t iDstCpu = 0;
2869 uint32_t cSrcCpus = pGVMM->cHostCpus;
2870 for (uint32_t iSrcCpu = 0; iSrcCpu < cSrcCpus; iSrcCpu++)
2871 {
2872 if (pGVMM->aHostCpus[iSrcCpu].idCpu != NIL_RTCPUID)
2873 {
2874 pStats->aHostCpus[iDstCpu].idCpu = pGVMM->aHostCpus[iSrcCpu].idCpu;
2875 pStats->aHostCpus[iDstCpu].idxCpuSet = pGVMM->aHostCpus[iSrcCpu].idxCpuSet;
2876#ifdef GVMM_SCHED_WITH_PPT
2877 pStats->aHostCpus[iDstCpu].uDesiredHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uDesiredHz;
2878 pStats->aHostCpus[iDstCpu].uTimerHz = pGVMM->aHostCpus[iSrcCpu].Ppt.uTimerHz;
2879 pStats->aHostCpus[iDstCpu].cChanges = pGVMM->aHostCpus[iSrcCpu].Ppt.cChanges;
2880 pStats->aHostCpus[iDstCpu].cStarts = pGVMM->aHostCpus[iSrcCpu].Ppt.cStarts;
2881#else
2882 pStats->aHostCpus[iDstCpu].uDesiredHz = 0;
2883 pStats->aHostCpus[iDstCpu].uTimerHz = 0;
2884 pStats->aHostCpus[iDstCpu].cChanges = 0;
2885 pStats->aHostCpus[iDstCpu].cStarts = 0;
2886#endif
2887 iDstCpu++;
2888 if (iDstCpu >= RT_ELEMENTS(pStats->aHostCpus))
2889 break;
2890 }
2891 }
2892 pStats->cHostCpus = iDstCpu;
2893
2894 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
2895
2896 return VINF_SUCCESS;
2897}
2898
2899
2900/**
2901 * VMMR0 request wrapper for GVMMR0QueryStatistics.
2902 *
2903 * @returns see GVMMR0QueryStatistics.
2904 * @param pGVM The global (ring-0) VM structure. Optional.
2905 * @param pReq Pointer to the request packet.
2906 * @param pSession The current session.
2907 */
2908GVMMR0DECL(int) GVMMR0QueryStatisticsReq(PGVM pGVM, PGVMMQUERYSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
2909{
2910 /*
2911 * Validate input and pass it on.
2912 */
2913 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
2914 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
2915 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
2916
2917 return GVMMR0QueryStatistics(&pReq->Stats, pSession, pGVM);
2918}
2919
2920
2921/**
2922 * Resets the specified GVMM statistics.
2923 *
2924 * @returns VBox status code.
2925 *
2926 * @param pStats Which statistics to reset, that is, non-zero fields indicates which to reset.
2927 * @param pSession The current session.
2928 * @param pGVM The GVM to reset statistics for. Optional.
2929 */
2930GVMMR0DECL(int) GVMMR0ResetStatistics(PCGVMMSTATS pStats, PSUPDRVSESSION pSession, PGVM pGVM)
2931{
2932 LogFlow(("GVMMR0ResetStatistics: pStats=%p pSession=%p pGVM=%p\n", pStats, pSession, pGVM));
2933
2934 /*
2935 * Validate input.
2936 */
2937 AssertPtrReturn(pSession, VERR_INVALID_POINTER);
2938 AssertPtrReturn(pStats, VERR_INVALID_POINTER);
2939
2940 /*
2941 * Take the lock and get the VM statistics.
2942 */
2943 PGVMM pGVMM;
2944 if (pGVM)
2945 {
2946 int rc = gvmmR0ByGVM(pGVM, &pGVMM, true /*fTakeUsedLock*/);
2947 if (RT_FAILURE(rc))
2948 return rc;
2949# define MAYBE_RESET_FIELD(field) \
2950 do { if (pStats->SchedVM. field ) { pGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2951 MAYBE_RESET_FIELD(cHaltCalls);
2952 MAYBE_RESET_FIELD(cHaltBlocking);
2953 MAYBE_RESET_FIELD(cHaltTimeouts);
2954 MAYBE_RESET_FIELD(cHaltNotBlocking);
2955 MAYBE_RESET_FIELD(cHaltWakeUps);
2956 MAYBE_RESET_FIELD(cWakeUpCalls);
2957 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2958 MAYBE_RESET_FIELD(cWakeUpWakeUps);
2959 MAYBE_RESET_FIELD(cPokeCalls);
2960 MAYBE_RESET_FIELD(cPokeNotBusy);
2961 MAYBE_RESET_FIELD(cPollCalls);
2962 MAYBE_RESET_FIELD(cPollHalts);
2963 MAYBE_RESET_FIELD(cPollWakeUps);
2964# undef MAYBE_RESET_FIELD
2965 }
2966 else
2967 {
2968 GVMM_GET_VALID_INSTANCE(pGVMM, VERR_GVMM_INSTANCE);
2969
2970 int rc = GVMMR0_USED_SHARED_LOCK(pGVMM);
2971 AssertRCReturn(rc, rc);
2972 }
2973
2974 /*
2975 * Enumerate the VMs and add the ones visible to the statistics.
2976 */
2977 if (!ASMMemIsZero(&pStats->SchedSum, sizeof(pStats->SchedSum)))
2978 {
2979 for (unsigned i = pGVMM->iUsedHead;
2980 i != NIL_GVM_HANDLE && i < RT_ELEMENTS(pGVMM->aHandles);
2981 i = pGVMM->aHandles[i].iNext)
2982 {
2983 PGVM pOtherGVM = pGVMM->aHandles[i].pGVM;
2984 void *pvObj = pGVMM->aHandles[i].pvObj;
2985 if ( RT_VALID_PTR(pvObj)
2986 && RT_VALID_PTR(pOtherGVM)
2987 && pOtherGVM->u32Magic == GVM_MAGIC
2988 && RT_SUCCESS(SUPR0ObjVerifyAccess(pvObj, pSession, NULL)))
2989 {
2990# define MAYBE_RESET_FIELD(field) \
2991 do { if (pStats->SchedSum. field ) { pOtherGVM->gvmm.s.StatsSched. field = 0; } } while (0)
2992 MAYBE_RESET_FIELD(cHaltCalls);
2993 MAYBE_RESET_FIELD(cHaltBlocking);
2994 MAYBE_RESET_FIELD(cHaltTimeouts);
2995 MAYBE_RESET_FIELD(cHaltNotBlocking);
2996 MAYBE_RESET_FIELD(cHaltWakeUps);
2997 MAYBE_RESET_FIELD(cWakeUpCalls);
2998 MAYBE_RESET_FIELD(cWakeUpNotHalted);
2999 MAYBE_RESET_FIELD(cWakeUpWakeUps);
3000 MAYBE_RESET_FIELD(cPokeCalls);
3001 MAYBE_RESET_FIELD(cPokeNotBusy);
3002 MAYBE_RESET_FIELD(cPollCalls);
3003 MAYBE_RESET_FIELD(cPollHalts);
3004 MAYBE_RESET_FIELD(cPollWakeUps);
3005# undef MAYBE_RESET_FIELD
3006 }
3007 }
3008 }
3009
3010 GVMMR0_USED_SHARED_UNLOCK(pGVMM);
3011
3012 return VINF_SUCCESS;
3013}
3014
3015
3016/**
3017 * VMMR0 request wrapper for GVMMR0ResetStatistics.
3018 *
3019 * @returns see GVMMR0ResetStatistics.
3020 * @param pGVM The global (ring-0) VM structure. Optional.
3021 * @param pReq Pointer to the request packet.
3022 * @param pSession The current session.
3023 */
3024GVMMR0DECL(int) GVMMR0ResetStatisticsReq(PGVM pGVM, PGVMMRESETSTATISTICSSREQ pReq, PSUPDRVSESSION pSession)
3025{
3026 /*
3027 * Validate input and pass it on.
3028 */
3029 AssertPtrReturn(pReq, VERR_INVALID_POINTER);
3030 AssertMsgReturn(pReq->Hdr.cbReq == sizeof(*pReq), ("%#x != %#x\n", pReq->Hdr.cbReq, sizeof(*pReq)), VERR_INVALID_PARAMETER);
3031 AssertReturn(pReq->pSession == pSession, VERR_INVALID_PARAMETER);
3032
3033 return GVMMR0ResetStatistics(&pReq->Stats, pSession, pGVM);
3034}
3035
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette